Module parser
[hide private]
[frames] | no frames]

Source Code for Module parser

   1  # -*- coding: utf-8 -*- 
   2  """ 
   3  This is part of Yappy 
   4   
   5  parser.py -- Yet another  parser for python... 
   6   
   7  A LR parser generator, based on Aho and al. 1986, C{Compilers} 
   8  (aho86:_compil). 
   9   
  10  It currently builds C{SLR}, C{LR(1)} and  C{LALR(1)} parsing tables. 
  11   
  12  Copyright (C) 2000-2003 Rogério Reis & Nelma Moreira {rvr,nam}@ncc.up.pt 
  13  Version: $Id: parser.py,v 1.18 2006-07-19 09:52:06 rvr Exp $ 
  14   
  15  This program is free software; you can redistribute it and/or modify 
  16  it under the terms of the GNU General Public License as published by 
  17  the Free Software Foundation; either version 2 of the License, or 
  18  (at your option) any later version. 
  19   
  20  This program is distributed in the hope that it will be useful, 
  21  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  22  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  23  GNU General Public License for more details. 
  24   
  25  You should have received a copy of the GNU General Public License 
  26  along with this program; if not, write to the Free Software 
  27  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.    
  28   
  29  @author: Rogério Reis & Nelma Moreira {rvr,nam}@ncc.up.pt 
  30   
  31  @var _DEBUG: if nonzero, display information during parser generation 
  32   or parsing. 
  33   
  34  @type _DEBUG: integer 
  35  """ 
  36   
  37  from types import * 
  38  import re, exceptions, string 
  39  import sys, string, copy, time, operator 
  40   
  41  import os.path 
  42   
  43  import shelve 
  44   
  45  # set elements are mutable objects; we cannot use sets 
  46  import osets 
  47   
  48  #Globals 
  49   
  50  _DEBUG=0 
  51   
  52  _Version = 1.7 
  53   
  54  NIL = "" 
  55   
56 -class Lexer:
57 """Class for lexical analyser to use with the parser 58 59 @ivar rules: lexical rules 60 @ivar operators: precedence and associativity for operators 61 @type operators: dictionary 62 63 """ 64
65 - def __init__(self,rules_list):
66 """ 67 By now lexer is kept as simple as possible, so order is really 68 essential: i.e. if a keyword is substring of another its rule 69 must appear after the larger keyword for the obvious 70 reasons... 71 72 @param rules_list: contains pairs C{(re,funct,op?)} where: 73 74 C{re}: is an uncompiled python regular expression 75 76 C{funct}: the name of 77 a funcion that returns the pair C{(TOKEN, SPECIAL_VALUE)}, where C{TOKEN} 78 is the token to be used by the parser and C{SPECIAL_VALUE} an eventual 79 associated value. The argument is the matched string. If 80 C{funct} equals C{""} the token is ignored. This can be 81 used for delimiters. 82 83 C{op}: if present, is a tuple with operador 84 information: C{(TOKEN,PRECEDENCE,ASSOC)} where C{PRECEDENCE} is an 85 integer and C{ASSOC} the string 'left' or 'right'. 86 87 """ 88 self.rules = [] 89 rnumber = 1 90 for r in rules_list: 91 try: 92 rex = r[0] 93 funct = r[1] 94 except IndexError: 95 raise LexicalError(rnumber,r) 96 try: rec = re.compile(rex) 97 except TypeError: 98 raise LexicalRulesErrorRE(rex,rnumber) 99 try: 100 op,prec,assoc = r[2] 101 if not self.__dict__.has_key("operators"): 102 self.operators = {} 103 if not self.operators.has_key(op): 104 self.operators[op] = (prec,assoc) 105 except IndexError: 106 pass 107 self.rules.append((rec,funct)) 108 109 rnumber = rnumber + 1 110 if _DEBUG and self.__dict__.has_key("operators"): 111 print "operators %s" %self.operators
112
113 - def scan(self,string):
114 """Performs the lexical analysis on C{string} 115 116 @return: a list of tokens (pairs C{(TOKEN , SPEcial_VALUE )}), for 117 recognized elements and C{("@UNK", string )} for the others""" 118 st = [string] 119 for r in self.rules: 120 st = self.scanOneRule(r,st) 121 return self.scanUnknown(st)
122
123 - def scanOneRule(self,rule,st):
124 """Scans space C{st} according only one rule 125 126 @param rule: one rule C{(re,fun,op)} 127 128 @param st: is a list of strings and already matched structures 129 """ 130 re = rule[0] 131 fun = rule[1] 132 st1 = [] 133 for s in st: 134 if not isinstance(s, StringType): 135 st1.append(s) 136 else: 137 s1 = s 138 while True: 139 m = re.search(s1) 140 if not m: 141 st1.append(s1) 142 break 143 else: 144 if m.start() != 0: 145 st1.append(s1[0:m.start()]) 146 # if fun == "": 147 # st1.append(("",s1[m.start():m.end()])) 148 # else: 149 if fun != "": 150 st1.append(apply(fun,[s1[m.start():m.end()]])) 151 if m.end() == len(s1): 152 break 153 else: 154 s1 = s1[m.end():] 155 return st1
156
157 - def scanUnknown(self,st):
158 """Scans the resulting structure making Unknown strings 159 160 Unknown parts will be of the form ("@UNK", string ) """ 161 st1 = [] 162 for s in st: 163 if isinstance(s, StringType): 164 st1.append(("@UNK",s)) 165 else: 166 st1.append(s) 167 return st1
168
169 - def readscan(self):
170 """Scans a string read from stdin """ 171 st = raw_input() 172 if not st: 173 raise IOError 174 if isinstance(st, StringType): 175 s = self.scan(st) 176 return s
177
178 -class YappyError(Exception):
179 """Class for all Yappy exceptions""" 180 pass
181 182
183 -class LexicalError(YappyError):
184 """Class for all Yappy Lexical analyser exceptions"""
185 - def __init__(self,r,rule):
186 self.message = 'Error in rule number %s: %s'%(r,rule)
187
188 - def __str__(self):
189 return "%s" % (self.message)
190
191 -class LexicalRulesErrorRE(YappyError):
192 """An error occured parsing the RE part of a lexical rule"""
193 - def __init__(self,re,no=0):
194 self.message = 'Error in RE "%s" at rule n.%d'%(re,no) 195 self.rule = no 196 self.re = re
197
198 - def __str__(self):
199 return "%s" % (self.message)
200
201 -class GrammarError(YappyError):
202 """Class for input grammar errors """
203 - def __init__(self,rule):
204 self.message = 'Error in rule "%s" '%rule
205 206
207 -class SLRConflictError(YappyError):
208 """Confliting actions in building SLR parsing table. Grammar 209 is not SLR(0)"""
210 - def __init__(self,i,a):
211 self.message = 'Confliting action[%d,%s] in SLR parsing table ' %(i,a) 212 self.item = i 213 self.symbol = a
214
215 -class LRConflictError(YappyError):
216 """Conflicting actions in building LR parsing table. Grammar 217 is not LR(1)"""
218 - def __init__(self,i,a):
219 self.message = 'Confliting action[%d,%s] in LR(1) parsing table ' %(i,a) 220 self.item = i 221 self.symbol = a
222 - def __str__(self):
223 return "%s" % (self.message)
224
225 -class LRConflicts(YappyError):
226 """Confliting actions in building LR parsing table. Grammar 227 is not LR(1)"""
228 - def __init__(self):
229 self.message = """Warning>>> Several confliting actions. Please 230 consult self.Log for details"""
231
232 - def __str__(self):
233 return "%s" % (self.message)
234
235 -class LRParserError(YappyError):
236 """An error occured in LR parsing program"""
237 - def __init__(self,s,a):
238 self.item = s 239 self.symbol = a 240 self.message = 'Error in LR: (%s,%s) not found' %(self.item,self.symbol)
241
242 - def __str__(self):
243 return "%s" % (self.message)
244
245 -class SemanticError(YappyError):
246 """An error occured in the application of a semantic action"""
247 - def __init__(self,m,n=0,r=None):
248 self.message = m 249 self.nrule = n 250 self.rule = r
251 - def __str__(self):
252 return "%s in semantic rule %d: %s" % (self.message,self.nrule,self.rule)
253
254 -class TableError(YappyError):
255 """Mismatch table version """
256 - def __init__(self,t):
257 self.message = """A new table must be built. 258 Please remove table shelve %s or set no_table to 0""" %t
259
260 - def __str__(self):
261 return "%s" % (self.message)
262
263 -class CFGrammar:
264 """ Class for context-free grammars 265 266 @ivar rules: grammar rules 267 @ivar terminals: terminals symbols 268 @ivar nonterminals: nonterminals symbols 269 @ivar start: start symbol 270 @type start: string 271 @ivar ntr: dictionary of rules for each nonterminal 272 273 """
274 - def __init__(self,grammar):
275 """ 276 277 @param grammar: is a list for productions; 278 each production is a tuple C{(LeftHandside,RightHandside,SemFunc,Prec?)} 279 with C{LeftHandside} nonterminal, C{RightHandside} list of symbols, 280 C{SemFunc} syntax-direct semantics, if present 281 C{Prec (PRECEDENCE,ASSOC)} for ambiguous rules 282 283 First production is for start symbol 284 285 Special symbols: C{@S}, C{$}, C{#} 286 """ 287 """ MUST BE IN THIS ORDER""" 288 self.rules = grammar 289 self.makenonterminals() 290 self.maketerminals() 291 self.start = self.rules[0][0] 292 self.aug_start = "@S" 293 self.rules.append((self.aug_start,[self.start],DefaultSemRule)) 294 self.endmark = '$' 295 self.dummy = '#' 296 self.terminals.append(self.endmark) 297 self.terminals.append(self.dummy) 298 self.nonterminals.append(self.aug_start) 299 """ ritems are only for control ... not needed """ 300 self.ritems = [] 301 """ ntr[A] is the set of rules which has A as left side""" 302 self.ntr = {} 303 i = 0 304 for r in self.rules: 305 if not self.ntr.has_key(r[0]): 306 self.ntr[r[0]] = [i] 307 else: 308 self.ntr[r[0]].append(i) 309 for j in range(len(r[1]) + 1): 310 self.ritems.append((i,j)) 311 i = i + 1
312
313 - def __str__(self):
314 """Grammar rules 315 316 @return: a string representing the grammar rules 317 """ 318 s = "" 319 for n in range(len(self.rules)): 320 lhs = self.rules[n][0] 321 rhs = self.rules[n][1] 322 s = s + "%s | %s -> %s \n" %(n, lhs, string.join(rhs," ")) 323 return "Grammar Rules:\n\n%s" % s
324
325 - def makeFFN(self):
326 self.NULLABLE() 327 self.FIRST_ONE() 328 self.FOLLOW()
329
330 - def maketerminals(self):
331 """Extracts C{terminals} from the rules. 332 C{nonterminals} must already exist""" 333 self.terminals = [] 334 for r in self.rules: 335 for s in r[1]: 336 if s not in self.nonterminals and s not in self.terminals: 337 self.terminals.append(s)
338
339 - def makenonterminals(self):
340 """Extracts C{nonterminals} from grammar rules.""" 341 self.nonterminals = [] 342 for r in self.rules: 343 if r[0] not in self.nonterminals: 344 self.nonterminals.append(r[0])
345
346 - def NULLABLE(self):
347 """Determines which nonterminals C{X ->* []} """ 348 self.nullable = {} 349 for s in self.terminals: 350 self.nullable[s] = 0 351 for s in self.nonterminals: 352 self.nullable[s] = 0 353 if self.ntr.has_key(s): 354 for i in self.ntr[s]: 355 if not self.rules[i][1]: 356 self.nullable[s] = 1 357 break 358 k = 1 359 while k == 1: 360 k = 0 361 for r in self.rules: 362 e = 0 363 for i in r[1]: 364 if not self.nullable[i]: 365 e = 1 366 break 367 if e == 0 and not self.nullable[r[0]]: 368 self.nullable[r[0]] = 1 369 k = 1
370 371
372 - def FIRST(self,s):
373 """C{FIRST(s)} is the set of terminals that begin the strings 374 derived from s """ 375 first = osets.Set([]) 376 e = 0 377 for i in range(len(s)): 378 first.s_extend(self.first[s[i]]) 379 if not self.nullable[s[i]]: 380 e = 1 381 break 382 if e == 0: 383 self.nullable[string.join(s)] = 1 384 else: 385 self.nullable[string.join(s)] = 0 386 return first
387
388 - def FIRST_ONE(self):
389 """Determines C{FIRST(s)}, for every symbol s, that is the set of 390 terminals that begin the strings derived from s """ 391 self.first = {} 392 self.nd = {} 393 self.ms =Stack() 394 for s in self.terminals: 395 self.first[s] = osets.Set([s]) 396 for s in self.nonterminals: 397 if self.ntr.has_key(s) and not self.first.has_key(s): 398 # self.FIRST_NT(s) 399 self.FIRST_TRA(s,1)
400
401 - def FIRST_TRA(self,s,d):
402 """Transitiv closure of C{FIRST(X)} """ 403 self.ms.push(s) 404 self.nd[s] = d 405 """ calculating F1(s)""" 406 self.first[s] = osets.Set([]) 407 for i in self.ntr[s]: 408 for y in self.rules[i][1]: 409 if self.nullable[y]: 410 continue 411 else: 412 if y in self.terminals: 413 self.first[s].append(y) 414 break 415 """transitive closure""" 416 for i in self.ntr[s]: 417 for y in self.rules[i][1]: 418 if y in self.nonterminals: 419 if not self.first.has_key(y): 420 self.FIRST_TRA(y,d+1) 421 if self.nd.has_key(y) and self.nd[y] != -1: 422 self.nd[s] = min(self.nd[s],self.nd[y]) 423 self.first[s].s_extend(self.first[y]) 424 if self.nullable[y]: 425 continue 426 else: 427 break 428 else: 429 break 430 if self.nd[s] == d: 431 while 1: 432 y = self.ms.pop() 433 if y == s: 434 break 435 self.first[y] = self.first[s].copy() 436 self.nd[y] = -1
437
438 - def FIRST_NT(self,s):
439 """ Recursivelly computes C{FIRST(X)} for a nonterminal X""" 440 if not self.ntr.has_key(s): 441 return 442 self.first[s] = osets.Set([]) 443 for i in self.ntr[s]: 444 r = self.rules[i][1] 445 if r == []: 446 self.nullable[s] = 1 447 else: 448 e = 1 449 for y in r: 450 if not self.first.has_key(y): 451 self.FIRST_NT(y) 452 self.first[s].s_extend(self.first[y]) 453 if not self.nullable[y]: 454 e = 0 455 break 456 if e == 1: 457 self.nullable[s] = 1
458
459 - def FOLLOW(self):
460 """computes C{FOLLOW(A)} for all nonterminals: the set of terminals a 461 that can appear immediately to the right of A in some sentential form.""" 462 self.follow = {} 463 self.follow[self.start] = osets.Set([self.endmark]) 464 for rule in self.rules: 465 r = rule[1] 466 for i in range(len(r)): 467 if r[i] in self.nonterminals: 468 if not self.follow.has_key(r[i]): 469 self.follow[r[i]] = osets.Set([]) 470 j = i + 1 471 self.follow[r[i]].s_extend(self.FIRST(r[j:])) 472 e = 1 473 while e: 474 e = 0 475 for s in self.nonterminals: 476 for i in self.ntr[s]: 477 r = self.rules[i][1] 478 try: 479 b = r[len(r)-1] 480 if b in self.nonterminals and self.follow[b].s_extend(self.follow[s]): 481 e = 1 482 except IndexError: pass 483 except KeyError: pass 484 for k in range(len(r)-1): 485 j = k + 1 486 if r[k] in self.nonterminals and self.nullable[string.join(r[j:])]: 487 if self.follow[r[k]].s_extend(self.follow[s]): 488 e = 1 489 break
490
491 - def TransClose(self):
492 """For each nonterminal C{s} determines the set of nonterminals 493 a such that C{s ->* ar}, for some C{r}""" 494 self.close_nt = {} 495 self.nd = {} 496 self.ms =Stack() 497 for s in self.nonterminals: 498 if self.ntr.has_key(s) and not self.close_nt.has_key(s): 499 self.TRAVERSE(s,1)
500
501 - def TRAVERSE(self,s,d):
502 """ """ 503 self.ms.push(s) 504 self.nd[s] = d 505 """ calculating F1(s)""" 506 self.close_nt[s] = {s:osets.Set([[]])} 507 for i in self.ntr[s]: 508 if not self.rules[i][1]: 509 continue 510 else: 511 r = self.rules[i][1] 512 for j in range(len(r)): 513 if r[j+1:]: 514 f = self.FIRST(r[j+1:]) 515 ns = self.nullable[string.join(r[j+1:])] 516 else: 517 f = [] 518 ns = 1 519 if r[j] in self.nonterminals: 520 if not self.close_nt[s].has_key(r[j]): 521 self.close_nt[s][r[j]] = osets.Set([[]]) 522 if r[j+1:]: 523 self.close_nt[s][r[j]].append((f,ns)) 524 if not self.nullable[r[j]]: 525 break 526 else: 527 break 528 """reflexive tansitive closure""" 529 for i in self.ntr[s]: 530 if not self.rules[i][1]: 531 continue 532 else: 533 r = self.rules[i][1] 534 for j in range(len(r)): 535 f = self.FIRST(r[j+1:]) 536 ns = self.nullable[string.join(r[j+1:])] 537 if r[j] in self.nonterminals: 538 if not self.close_nt.has_key(r[j]): 539 self.TRAVERSE(r[j],d+1) 540 if self.nd.has_key(r[j]) and self.nd[r[j]] != -1: 541 self.nd[s] = min(self.nd[s],self.nd[r[j]]) 542 for k in self.close_nt[r[j]].keys(): 543 if not self.close_nt[s].has_key(k): 544 self.close_nt[s][k] = osets.Set([[]]) 545 else: 546 for v in self.close_nt[s][k]: 547 if not v: 548 self.close_nt[s][k].append((f,ns)) 549 else: 550 p, n = v 551 if n: 552 self.close_nt[s][k].append((p+f,ns)) 553 else: 554 self.close_nt[s][k].append((p,n)) 555 if not self.nullable[r[j]]: 556 break 557 else: 558 break 559 if self.nd[s] == d: 560 while 1: 561 y = self.ms.pop() 562 if y == s: 563 break 564 self.close_nt[y] = self.close_nt[s].copy() 565 self.nd[y] = -1
566
567 - def DERIVE_NT(self):
568 """For each nonterminal C{s} determines the set of nonterminals 569 a such that C{s ->* ar}, for some C{r}""" 570 self.derive_nt = {} 571 for s in self.nonterminals: 572 if self.ntr.has_key(s) and not self.derive_nt.has_key(s): 573 self.DERIVE_ONE_NT(s)
574
575 - def DERIVE_ONE_NT(self,s):
576 """For nonterminal C{s} determines the set of nonterminals 577 a such that C{s -> ar}, for some C{r} """ 578 if not self.ntr.has_key(s): 579 return 580 self.derive_nt[s] = {s:osets.Set([None])} 581 for i in self.ntr[s]: 582 if not self.rules[i][1]: 583 continue 584 else: 585 r = self.rules[i][1] 586 for j in range(len(r)): 587 if r[j] in self.nonterminals: 588 if not self.derive_nt.has_key(r[j]): 589 self.DERIVE_ONE_NT(r[j]) 590 for k in self.derive_nt[r[j]].keys(): 591 if not self.derive_nt[s].has_key(k): 592 self.derive_nt[s][k] = osets.Set([]) 593 for p in self.derive_nt[r[j]][k]: 594 if not p : 595 self.derive_nt[s][k].append(r[j+1:]) 596 else: 597 self.derive_nt[s][k].append(r[j+1:].append(p)) 598 if not self.nullable[r[j]]: 599 break 600 else: 601 break
602
603 - def DERIVE_T(self):
604 """ """ 605 self.derive_ter = {} 606 for s in self.terminals: 607 self.derive_ter[s] = osets.Set([s]) 608 e = 1 609 while e: 610 e = 0 611 for s in self.nonterminals: 612 for i in self.ntr[s]: 613 r = self.rules[i][1] 614 if r == []: 615 continue 616 for i in range(len(r)): 617 if r[i] in self.terminals: 618 if i < len(r) -1: 619 if self.derive_ter.has_key(r[i+1]): 620 if not self.derive_ter.has_key(s): 621 self.derive_ter[s] = osets.Set([]) 622 if self.derive_ter[s].s_append(r[i]): 623 e = 1 624 break 625 else: 626 if not self.derive_ter.has_key(s): 627 self.derive_ter[s] = osets.Set([]) 628 if self.derive_ter[s].s_append(r[i]): 629 e = 1 630 631 break 632 else: 633 """ non-terminal""" 634 if self.derive_ter.has_key(r[i]): 635 if not self.derive_ter.has_key(s): 636 self.derive_ter[s] = osets.Set([]) 637 if self.derive_ter[s].s_extend(self.derive_ter[r[i]]) == 1: 638 e = 1 639 if i > 0 and self.nullable[r[i]]: 640 continue 641 else: 642 break
643
644 -class LRtable:
645 """Class for construction of a C{LR} table 646 647 @ivar gr: a context-free grammar 648 @ivar operators: operators 649 @ivar Log: Log report for LR table construction 650 651 652 """
653 - def __init__(self,cfgr,operators=None,noconflicts=1,expect=0):
654 """ 655 @param cfgr: a context-free grammar 656 @param operators: operators 657 @param noconflicts: if 0 LRtable conflicts are not resolved, 658 unless for spcecial operator rules 659 @type noconflicts: integer 660 @param expect: exact number of expected LR shift/reduce conflicts 661 @type expect: integer 662 """ 663 self.gr = cfgr 664 self.gr.makeFFN() 665 self.operators = operators 666 self.precedence = None 667 # if self.operators: 668 self.rules_precedence() 669 self.Log=LogLR(noconflicts,expect) 670 self.make_action_goto()
671 672
673 - def make_action_goto(self):
674 """ make C{action[i,X]} and C{goto[i,X]} 675 All pairs C{(i,s)} not in action and goto dictionaries are 'error' """ 676 c = self.items() 677 if _DEBUG: 678 print self.print_items(c) 679 self.ACTION = {} 680 self.GOTO = {} 681 #shelve not working with osets 682 #self.Log.items = c 683 for i in range(len(c)): 684 for item in c[i]: 685 a = self.NextToDot(item) 686 if a in self.gr.terminals: 687 state = self.goto(c[i],a) 688 try: 689 j = c.index(state) 690 self.add_action(i,a,'shift',j) 691 except IndexError: 692 if _DEBUG: print "no state" 693 elif a == "": 694 """ Dot at right end """ 695 l = self.gr.rules[item[0]][0] 696 if l != self.gr.aug_start : 697 self.dotatend(item,i) 698 else: 699 """ last rule """ 700 self.add_action(i,self.gr.endmark,'accept',[]) 701 for s in self.gr.nonterminals: 702 state = self.goto(c[i],s) 703 try: 704 j = c.index(state) 705 self.GOTO[(i,s)] = j 706 except ValueError: 707 pass
708
709 - def rules_precedence(self):
710 """Rule precedence obtained as the precedence of the right 711 most terminal. """ 712 self.precedence={} 713 for i in range(len(self.gr.rules)): 714 if len(self.gr.rules[i]) == 4: 715 self.precedence[i] = self.gr.rules[i][3] 716 else: 717 self.precedence[i] = None 718 if self.operators: 719 self.gr.rules[i][1].reverse() 720 for s in self.gr.rules[i][1]: 721 if self.operators.has_key(s): 722 self.precedence[i] = self.operators[s] 723 break 724 self.gr.rules[i][1].reverse() 725 726 if _DEBUG: 727 print "Precedence %s" %self.precedence
728
729 - def add_action(self,i,a,action,j):
730 """Set C{(action,j)} for state C{i} and symbol C{a} or raise 731 conflict error. Conficts are resolved using the following 732 rules: 733 - shift/reduce: if precedence/assoc information is available 734 try to use it; otherwise conflict is resolved in favor of shift 735 - reduce/reduce: choosing the production rule listed first 736 """ 737 if self.ACTION.has_key((i,a)) and self.ACTION[(i,a)] != (action,j): 738 action1 , j1 = self.ACTION[(i,a)] 739 if _DEBUG: 740 print "LRconflit %s %s %s %s %s %s" %(action,j,action1,j1, i,a) 741 if action1 == 'shift' and action == 'reduce': 742 self.resolve_shift_reduce(i,a,j1,j) 743 elif action == 'shift' and action1 == 'reduce': 744 self.resolve_shift_reduce(i,a,j,j1) 745 elif action == 'reduce' and action1 == 'reduce': 746 if self.Log.noconflicts: 747 # RESOLVED by choosing first rule 748 if j > j1: 749 self.ACTION[(i,a)] = (action,j1) 750 else: 751 self.ACTION[(i,a)] = (action,j) 752 self.Log.add_conflict('rr',i,a,j1,j) 753 else: 754 raise LRConflictError(i,a) 755 else: 756 self.ACTION[(i,a)] = (action,j)
757
758 - def resolve_shift_reduce(self,i,a,s,r):
759 """Operators precedence resolution or standard option: shift 760 761 C{s}: rule for shift 762 C{r}: rule for reduce 763 764 """ 765 try: 766 if self.operators and self.operators.has_key(a) and self.precedence.has_key(r) and self.precedence[r]: 767 prec_op, assoc_op = self.operators[a] 768 if (self.precedence[r][0] > prec_op) or (self.precedence[r][0] == prec_op and self.precedence[r][1] =='left'): 769 self.ACTION[(i,a)] = ('reduce',r) 770 if _DEBUG: print "solved reduce %s" %r 771 else: 772 self.ACTION[(i,a)] = ('shift',s) 773 if _DEBUG: print "solved shift %s" %s 774 else: 775 self.ACTION[(i,a)] = ('shift',s) 776 if _DEBUG: print "solved shift %s" %s 777 except (AttributeError, TypeError, KeyError,NameError): 778 if self.Log.noconflicts: 779 # choose to shift 780 self.ACTION[(i,a)] = ('shift',s) 781 if _DEBUG: print "choose shift %s for action (%s,%s)" %(s,i,a) 782 self.Log.add_conflict('sr',i,a,s,r) 783 if _DEBUG: print " %s for action (%s,%s)" %(self.Log.conflicts,i,a) 784 785 else: 786 raise LRConflictError(i,a)
787
788 -class SLRtable(LRtable):
789 """Class for construction of a C{SLR} table 790 791 C{SLR} items represented by a pair of integers C{(number of 792 rule,position of dot)} 793 794 (aho86:_compil page 221) 795 """ 796
797 - def dotatend(self,item,i):
798 n, k = item 799 l = self.gr.rules[item[0]][0] 800 for a in self.gr.follow[l]: 801 self.add_action(i,a,'reduce',n)
802
803 - def closure(self,items):
804 """The closure of a set of C{LR(0)} items C{I} is the set of 805 items constructed from C{I} by the two rules: 806 - every item of I is in closure(I) 807 - If A -> s.Bt in closure(I) and B -> r, then add B ->.r to closure(I) 808 (aho86:_compil page 223) 809 """ 810 added = {} 811 for l in self.gr.nonterminals: 812 added[l] = 0 813 close = items[:] 814 e = 1 815 while e: 816 e = 0 817 for i in close: 818 s = self.NextToDot(i) 819 if s in self.gr.nonterminals and added[s]==0 and self.gr.ntr.has_key(s): 820 for n in self.gr.ntr[s]: 821 close.append((n,0)) 822 added[s] = 1 823 e = 1 824 return close
825
826 - def goto(self,items,s):
827 """ goto(I,X) where I is a set of items and X a grammar symbol 828 is the closure of the set of all items A -> sX.r such that 829 A -> s.Xr is in I""" 830 valid = osets.Set([]) 831 for item in items: 832 if self.NextToDot(item) == s: 833 n, i = item 834 valid.append((n, i + 1)) 835 return self.closure(valid)
836
837 - def items(self):
838 """ An LR(0) item of a grammar G is a production of G with a dot at 839 some position on the right hand side. 840 It is represented by the rule number and the position of 841 the dot 842 843 @return: a set of sets of items 844 """ 845 c = osets.Set([self.closure(osets.Set([(len(self.gr.rules) - 1,0)]))]) 846 symbols = self.gr.terminals + self.gr.nonterminals 847 e = 1 848 while e: 849 e = 0 850 for i in c: 851 for s in symbols: 852 valid = self.goto(i,s) 853 if valid != [] and valid not in c: 854 c.append(valid) 855 e = 1 856 return c
857
858 - def print_items(self,c):
859 """Print SLR items """ 860 s = "" 861 j = 0 862 for i in c: 863 s = s+ "I_%d: \n" %j 864 for item in i: 865 r, p = item 866 lhs = self.gr.rules[r][0] 867 rhs = self.gr.rules[r][1] 868 s = s + "\t %s -> %s . %s \n" %(lhs, 869 string.join(rhs[:p]," "), string.join(rhs[p:]," ")) 870 j += 1 871 return s
872
873 - def NextToDot(self,item):
874 """ returns symbol next to te dot or empty string""" 875 n, i = item 876 try: 877 s = self.gr.rules[n][1][i] 878 except IndexError: 879 s = "" 880 return s
881 882 883
884 -class LR1table(LRtable):
885 """ 886 Class for construction of a LR1 table 887 888 Items are represented by a pair of integers (number of rule, position of dot) 889 """ 890
891 - def closure(self,items):
892 """The closure of a set of C{LR(1)} items C{I} is the set of items construted 893 from I by the two rules: 894 - every item of C{I} is in C{closure(I)} 895 896 - If C{[A -> s.Bt,a]} in C{closure(I)},for C{B ->r} and 897 each terminal C{b} in C{first(ta)}, add C{[B ->.r,b]} 898 to C{closure(I)} 899 """ 900 close = items 901 e = 1 902 while e: 903 e = 0 904 for i in close: 905 s = self.NextToDot(i) 906 sa = self.gr.FIRST(self.AfterDot(i)) 907 if s in self.gr.nonterminals and self.gr.ntr.has_key(s): 908 for n in self.gr.ntr[s]: 909 for b in sa: 910 e = close.append((n,0,b)) 911 return close
912
913 - def goto(self,items,s):
914 """ goto(I,X) where I is a set of items and X a grammar symbol 915 is the closure of the set of all items (A -> sX.r,a) such that 916 (A -> s.Xr,a) in I""" 917 valid = osets.Set([]) 918 for item in items: 919 if self.NextToDot(item) == s: 920 n, i, t = item 921 valid.append((n, i + 1,t)) 922 return self.closure(valid)
923
924 - def items(self):
925 """ An LR(1) item of a grammar G is a production of G with a dot at 926 some position of the right hand side and a terminal: 927 (rule_number,dot_position,terminal) 928 (aho86:_compil page 231) 929 """ 930 c = osets.Set([ self.closure(osets.Set([(len(self.gr.rules) - 1,0,self.gr.endmark)]))]) 931 symbols = self.gr.terminals + self.gr.nonterminals 932 e = 1 933 while e: 934 e = 0 935 for i in c: 936 for s in symbols: 937 valid=self.goto(i,s) 938 if valid != [] : 939 if c.s_append(valid): e = 1 940 return c
941
942 - def print_items(self,c):
943 """Print C{LR(1)} items """ 944 s = "" 945 j = 0 946 for i in c: 947 s = s+ "I_%d: \n" %j 948 for item in i: 949 r, p, t = item 950 lhs = self.gr.rules[r][0] 951 rhs = self.gr.rules[r][1] 952 s = s + "\t %s -> %s . %s , %s\n" %(lhs, 953 string.join(rhs[:p]," "), string.join(rhs[p:]," "),t) 954 j += 1 955 print s 956 return s
957
958 - def NextToDot(self,item):
959 """ returns symbol next to the dot or empty string""" 960 n, i, t = item 961 try: 962 s = self.gr.rules[n][1][i] 963 except IndexError: 964 s = "" 965 return s
966
967 - def AfterDot(self,item):
968 """ returns symbol next to the dot or empty string""" 969 n, i, t = item 970 try: 971 s = self.gr.rules[n][1][i+1:] 972 except IndexError: 973 s = [] 974 s.append(t) 975 return s
976
977 - def dotatend(self,item,i):
978 n, k, t = item 979 self.add_action(i,t,'reduce',n)
980 981
982 -class LALRtable1(LRtable):
983 """Class for construction of C{LALR(1)} tables""" 984
985 - def make_action_goto(self):
986 """ Make C{action[i,X]} and C{goto[i,X]} 987 all pairs C{(i,s)} not in action and goto dictionaries are 'error' """ 988 self.gr.DERIVE_NT() 989 c = self.items() 990 if _DEBUG: 991 print self.print_items(c) 992 self.ACTION = {} 993 self.GOTO = {} 994 #shelve not working with osets 995 #self.Log.items = c 996 for i in range(len(c)): 997 for item in c[i].keys(): 998 a = self.NextToDot(item) 999 if a in self.gr.terminals: 1000 state =self.goto(c[i],a) 1001 j = self.get_union(c,state) 1002 if j != -1: 1003 self.add_action(i,a,'shift',j) 1004 elif a == "": 1005 """ Dot at right end """ 1006 l = self.gr.rules[item[0]][0] 1007 if l != self.gr.aug_start : 1008 self.dotatend(item,c,i) 1009 else: 1010 """ last rule """ 1011 self.add_action(i,self.gr.endmark,'accept',[]) 1012 for s in self.gr.nonterminals: 1013 state = self.goto(c[i],s) 1014 j = self.get_union(c,state) 1015 if j != -1: 1016 self.GOTO[(i,s)] = j
1017
1018 - def items(self):
1019 """ An C{LALR(1)} item of a grammar C{G} is a production of 1020 C{G}with a dot at some position of the right hand side and a 1021 list of terminals: is coded as a dictonary with key 1022 C{(rule_number,dot_position)} and value a set of terminals 1023 """ 1024 i0 = {} 1025 i0[(len(self.gr.rules) - 1,0)] = osets.Set([self.gr.endmark]) 1026 c = osets.Set([self.closure(i0)]) 1027 symbols = self.gr.terminals + self.gr.nonterminals 1028 e = 1 1029 while e: 1030 e = 0 1031 for i in c: 1032 for s in symbols: 1033 if self.core_merge(c,self.goto(i,s)) == 1: 1034 e = 1 1035 return c
1036 - def print_items(self,c):
1037 """Print C{LALR(1)} items """ 1038 s = "" 1039 j = 0 1040 for i in range(len(c)): 1041 s = s+ "I_%d: \n" %i 1042 for item in c[i].keys(): 1043 r, p = item 1044 lhs = self.gr.rules[r][0] 1045 rhs = self.gr.rules[r][1] 1046 s = s + "\t %s -> %s . %s, %s \n" %(lhs, 1047 string.join(rhs[:p]," "), string.join(rhs[p:]," "),c[i][item]) 1048 print s 1049 return s
1050
1051 - def goto(self,items,s):
1052 """ C{goto(I,X)} where C{I} is a set of items and C{X} a grammar symbol 1053 is the closure of the set of all items C{(A -> sX.r,a)} such that 1054 C{(A -> s.Xr,a)} in C{I}""" 1055 valid = {} 1056 for (n,i) in items.keys(): 1057 if self.NextToDot((n,i)) == s: 1058 if not valid.has_key((n,i+1)): 1059 valid[(n,i + 1)] = osets.Set([]) 1060 for t in items[(n,i)]: 1061 valid[(n, i + 1)].append(t) 1062 return self.closure(valid)
1063 1064
1065 - def closure(self,items):
1066 """The closure of a set of C{LR(1)} items I is the set of items construted 1067 from I by the two rules: 1068 - every item of I is in closure(I) 1069 1070 - If [A -> s.Bt,a] in closure(I),for B ->r and each terminal b in 1071 first(ta), add [B ->.r,b] to closure(I) 1072 """ 1073 e = 1 1074 while e: 1075 e = 0 1076 for i in items.keys(): 1077 s = self.NextToDot(i) 1078 if s in self.gr.nonterminals and self.gr.ntr.has_key(s): 1079 l = self.AfterDot(i,items) 1080 for n in self.gr.ntr[s]: 1081 if not items.has_key((n,0)): 1082 items[(n,0)] = osets.Set([]) 1083 if items[(n,0)].s_extend(l) == 1 : 1084 e = 1 1085 return items
1086
1087 - def get_union(self,c,j):
1088 """ """ 1089 for i in c: 1090 if i.keys() == j.keys(): 1091 return c.index(i) 1092 return -1
1093
1094 - def core_merge(self,c,j):
1095 """ """ 1096 if j == {} or j in c : return 0 1097 e = 2 1098 for i in c: 1099 if i.keys() == j.keys(): 1100 e = 0 1101 for k in j.keys(): 1102 if i[k].s_extend(j[k]) == 1: 1103 e = 1 1104 break 1105 if e == 2: 1106 e = c.s_append(j) 1107 return e
1108
1109 - def NextToDot(self,item):
1110 """ returns symbol next to the dot or empty string""" 1111 n, i = item 1112 try: 1113 s = self.gr.rules[n][1][i] 1114 except IndexError: 1115 s = "" 1116 return s
1117
1118 - def AfterDot(self,item,items):
1119 """ returns FIRST of strings after the dot concatenated with lookahead""" 1120 n, i = item 1121 try: 1122 s = self.gr.rules[n][1][i+1:] 1123 except IndexError: 1124 s = [] 1125 sa = osets.Set([]) 1126 for a in items[item]: 1127 s.append(a) 1128 sa.s_extend(self.gr.FIRST(s)) 1129 del s[len(s)-1] 1130 return sa
1131 1132
1133 - def dotatend(self,item,c,i):
1134 n, k = item 1135 for a in c[i][item]: 1136 self.add_action(i,a,'reduce',n)
1137
1138 -class LALRtable(LALRtable1):
1139 """Class for construction of LALR tables """ 1140
1141 - def make_action_goto(self):
1142 """ collection of LR(0) items """ 1143 self.gr.DERIVE_T() 1144 self.gr.TransClose() 1145 c = self.items() 1146 if _DEBUG: 1147 print self.print_items(c) 1148 """ make action[i,X] and goto[i,X] 1149 all pairs (i,s) not in action and goto dictionaries are 'error' """ 1150 self.ACTION = {} 1151 self.GOTO = {} 1152 #shelve not working with osets 1153 #self.Log.items = c 1154 for i in range(len(c)): 1155 for item in c[i].keys(): 1156 C = self.NextToDot(item) 1157 if C in self.gr.nonterminals: 1158 if self.gr.derive_ter.has_key(C): 1159 for a in self.gr.derive_ter[C]: 1160 if self.goto_ref.has_key((i,a)): 1161 j = self.goto_ref[(i,a)] 1162 self.add_action(i,a,'shift',j) 1163 if self.gr.close_nt.has_key(C): 1164 for A in self.gr.close_nt[C].keys(): 1165 """Error: ignores end string s in C->*As""" 1166 for p in self.gr.close_nt[C][A]: 1167 r = self.AfterDotTer(item,c[i],p) 1168 if self.gr.ntr.has_key(A): 1169 for k in self.gr.ntr[A]: 1170 if self.gr.rules[k][1] == []: 1171 for a in r: 1172 self.add_action(i,a,'reduce',k) 1173 1174 elif C in self.gr.terminals: 1175 if self.goto_ref.has_key((i,C)): 1176 j = self.goto_ref[(i,C)] 1177 self.add_action(i,C,'shift',j) 1178 else: 1179 """ Dot at right end """ 1180 l = self.gr.rules[item[0]][0] 1181 if l != self.gr.aug_start: 1182 self.dotatend(item,c,i) 1183 else: 1184 """ last rule """ 1185 self.add_action(i,self.gr.endmark,'accept',[]) 1186 for s in self.gr.nonterminals: 1187 state = self.goto(c[i],s) 1188 j = self.get_union(c,state) 1189 if j != -1: 1190 self.GOTO[(i,s)] = j
1191
1192 - def items(self):
1193 """ An C{LALR(1)} kernel item of a grammar C{G} is a 1194 production of C{G} with a 1195 dot at some position of the right hand side (except the first) and a list 1196 of terminals: is coded as a dictionary with key 1197 C{(rule_number,dot_position)} and value a set of terminals. 1198 """ 1199 i0 = {} 1200 i0[(len(self.gr.rules) - 1,0)] = osets.Set([self.gr.endmark]) 1201 c= osets.Set([i0]) 1202 symbols = self.gr.terminals + self.gr.nonterminals 1203 """ kernel LR(0) items """ 1204 self.goto_ref = {} 1205 e = 1 1206 while e: 1207 e = 0 1208 for i in c: 1209 for s in symbols: 1210 valid = self.goto(i,s) 1211 if valid != {}: 1212 if c.s_append(valid): e = 1 1213 1214 self.goto_ref[(c.index(i),s)] = c.index(valid) 1215 1216 """ Discovering propagated and spontaneous lookaheads for 1217 kernel items k and grammar symbol s""" 1218 lh={} 1219 for k in c: 1220 nk = c.index(k) 1221 lh[nk] = {} #osets.Set([]) 1222 for (n,i) in k.keys(): 1223 lh[nk][(n,i)] = osets.Set([]) 1224 j = {} 1225 j[(n,i)]=osets.Set([(self.gr.dummy)]) 1226 j = self.closure(j) 1227 for s in symbols: 1228 for (m1,j1) in j.keys(): 1229 if self.NextToDot((m1,j1)) == s: 1230 for a in j[(m1,j1)]: 1231 if a == self.gr.dummy: 1232 lh[nk][(n,i)].append((self.goto_ref[(nk,s)],m1,j1+1)) 1233 else: 1234 c[self.goto_ref[(nk,s)]][(m1,j1+1)].append(a) 1235 del j 1236 """ Propagate lookaheads """ 1237 # c[0][(len(self.gr.rules) - 1,0)].s_append(self.gr.endmark) 1238 e = 1 1239 while e: 1240 e = 0 1241 for k in c: 1242 nk = c.index(k) 1243 for (n,i) in k.keys(): 1244 for (m,n1,i1) in lh[nk][(n,i)]: 1245 if c[m][(n1,i1)].s_extend(k[(n,i)]) == 1: 1246 e = 1 1247 1248 return c
1249 1250 1251
1252 - def goto(self,items,s):
1253 """ C{goto(I,X)} where I is a set of kernel items and X a 1254 grammar symbol is the closure of the set of all items (A 1255 -> sX.r,a) such that (A -> s.Xr,a) is in I""" 1256 valid = {} 1257 for (n,i) in items.keys(): 1258 x = self.NextToDot((n,i)) 1259 if x == s: 1260 if not valid.has_key((n,i+1)): 1261 valid[(n,i + 1)] = osets.Set([]) 1262 if self.gr.close_nt.has_key(x): 1263 for a in self.gr.close_nt[x].keys(): 1264 if self.gr.ntr.has_key(a): 1265 for k in self.gr.ntr[a]: 1266 if self.gr.rules[k][1] != [] and self.gr.rules[k][1][0] == s: 1267 valid[(k,1)] = osets.Set([]) 1268 return valid
1269
1270 - def NextToDot(self,item):
1271 """ returns symbol next to the dot or empty string""" 1272 n, i = item 1273 try: 1274 s = self.gr.rules[n][1][i] 1275 except IndexError: 1276 s = "" 1277 return s
1278
1279 - def AfterDotTer(self,item,items,path):
1280 """ returns FIRST of strings after the dot 1281 concatenated with lookahead""" 1282 1283 if path: 1284 p, n = path 1285 if not n: 1286 return p 1287 l, i = item 1288 try: 1289 f= self.gr.FIRST(self.gr.rules[l][1][i+1:]) 1290 ns = self.gr.nullable[string.join(self.gr.rules[l][1][i+1:])] 1291 except IndexError: 1292 f = [] 1293 ns = 1 1294 if ns: 1295 return items[item] 1296 else: 1297 return f
1298 1299
1300 -class LogLR:
1301 """Class for LR table construction report: 1302 @ivar expect: number of shit/reduce conflicts expected 1303 @type expect: integer 1304 @ivar items: set of LR items 1305 @ivar conflicts: dictionary of conflicts occurred in LR table 1306 construction: 'rr' and 'sr' 1307 """
1308 - def __init__(self,noconflicts,expect):
1309 self.noconflicts = noconflicts 1310 self.expect = expect 1311 self.conflicts = {} 1312 self.items = None
1313
1314 - def add_conflict(self,type,i,a,value1,value2):
1315 try: 1316 self.conflicts[type].append((i,a,value1,value2)) 1317 except KeyError: 1318 self.conflicts[type] = [(i,a,value1,value2)]
1319
1320 -class LRparser:
1321 """Class for LR parser 1322 1323 @ivar cfgr: context free grammar 1324 @ivar rules: grammar rules 1325 @ivar terminals: grammar terminals 1326 @ivar nonterminals: grammar nonterminals 1327 @ivar table: LR parsing table 1328 @ivar ACTION: Action function 1329 @ivar GOTO: Goto function 1330 1331 @ivar tokens: tokens to be parsed 1332 @ivar context: computational context 1333 @ivar output: list of grammar rules used for parsing C{tokens} 1334 (right derivation in reverse) 1335 @ivar stack: LR stack with pairs C{(state,token)} 1336 1337 """ 1338
1339 - def __init__(self,grammar,table_shelve,no_table=1,tabletype=LALRtable,operators=None,noconflicts=1,expect=0,**args):
1340 """ 1341 @param grammar: is a list for productions; 1342 each production is a tuple C{(LeftHandside,RightHandside,SemFunc,Prec?)} 1343 with C{LeftHandside} nonterminal, C{RightHandside} list of symbols, 1344 C{SemFunc} syntax-direct semantics, if present 1345 C{Prec (PRECEDENCE,ASSOC)} for ambiguous rules 1346 1347 First production is for start symbol 1348 1349 @param table_shelve: file where parser is saved 1350 @type table_shelve: string 1351 @param tabletype: type of LR table: C{SLR}, C{LR1}, C{LALR} 1352 @type tabletype: LRtable class 1353 @param no_table: if 0 table_shelve is created anyway 1354 @type no_table: integer 1355 @param operators: precedence and associativity for operators 1356 @type operators: dictionary 1357 @param noconflicts: if 0 LRtable conflicts are not resolved, 1358 unless spcecial operator rules 1359 @type noconflicts: integer 1360 @param expect: exact number of expected LR shift/reduce conflicts 1361 @type expect: integer 1362 @param args: extra arguments; key C{nosemrules} if 1 no 1363 semantic rules are applied 1364 @type args: dictionary 1365 1366 """ 1367 1368 self.cfgr = CFGrammar(grammar) 1369 self.rules = self.cfgr.rules 1370 self.terminals = self.cfgr.terminals 1371 self.nonterminals = self.cfgr.nonterminals 1372 self.endmark = self.cfgr.endmark 1373 if args.has_key('nosemrules'): 1374 self.nosemrules=args['nosemrules'] 1375 else: 1376 self.nosemrules = 0 1377 1378 d = shelve.open(table_shelve) 1379 1380 if d and no_table: 1381 self.ACTION = d['action'] 1382 self.GOTO = d['goto'] 1383 if d.has_key('version'): 1384 if d['version'] < _Version: 1385 raise TableError(table_shelve) 1386 try: 1387 self.Log = d['log'] 1388 except KeyError: 1389 raise TableError(table_shelve) 1390 else: 1391 self.table = tabletype(self.cfgr,operators,noconflicts,expect) 1392 d['version'] = _Version 1393 d['action'] = self. ACTION = self.table.ACTION 1394 d['goto'] = self.GOTO = self.table.GOTO 1395 d['log'] = self.Log = self.table.Log 1396 d.close()
1397
1398 - def __str__(self):
1399 """@return: the LR parsing table showing for each state the 1400 action and goto function """ 1401 1402 l = (map(lambda x: x[0],self.ACTION.keys())) 1403 l.sort() 1404 a1="\nState\n" 1405 if len(self.terminals) < 20: 1406 for a in self.terminals: 1407 a1=a1+" \t%s" %a 1408 for i in osets.Set(l): 1409 a3="\n%s" % i 1410 for a in self.terminals: 1411 if self.ACTION.has_key((i,a)): 1412 if self.ACTION[i,a][0]=="shift": x="s" 1413 else: x="r" 1414 a2="\t%s%s" %(x,self.ACTION[i,a][1]) 1415 else: 1416 a2="\t" 1417 a3=a3+a2 1418 a1="%s%s" %(a1,a3) 1419 ac=a1 1420 else: 1421 for i in osets.Set(l): 1422 a3="%s\n" % i 1423 for a in self.terminals: 1424 if self.ACTION.has_key((i,a)): 1425 if self.ACTION[i,a][0]=="shift": x="s" 1426 else: x="r" 1427 a3= a3+"%s = %s%s\n" %(a,x,self.ACTION[i,a][1]) 1428 a1="%s%s" %(a1,a3) 1429 ac=a1 1430 1431 l = (map(lambda x: x[0],self.GOTO.keys())) 1432 l.sort() 1433 a1 = "\nState\n" 1434 if len(self.nonterminals) < 20: 1435 for a in self.nonterminals: 1436 a1 = a1 + " \t%s" %a 1437 for i in osets.Set(l): 1438 a3 = "\n%s" % i 1439 for a in self.nonterminals: 1440 if self.GOTO.has_key((i,a)): 1441 a2 = "\t%s" %self.GOTO[(i,a)] 1442 else: 1443 a2 = "\t" 1444 a3 = a3 + a2 1445 a1 = "%s%s" %(a1,a3) 1446 else: 1447 for i in osets.Set(l): 1448 a3 = "%s\n" % i 1449 for a in self.nonterminals: 1450 if self.GOTO.has_key((i,a)): 1451 a3 = a3 + "%s = %s\n" %(a,self.GOTO[(i,a)]) 1452 a1 = "%s%s" %(a1,a3) 1453 go = a1 1454 return "Action table:\n %s\n Goto table:%s\n" % (ac,go)
1455 1456 1457
1458 - def parsing(self,tokens,context = None):
1459 """LR Parsing Algorithm (aho86:_compil, page 218) 1460 @param tokens: pairs (TOKEN, SPECIAL_VALUE) 1461 @param context: a computational context for semantic actions 1462 1463 @return: parsed result 1464 """ 1465 1466 self.stack = Stack() 1467 self.stack.push((0,[])) 1468 self.tokens = tokens 1469 self.tokens.append((self.endmark,self.endmark)) 1470 self.context = context 1471 self.output = [] 1472 self.ip = 0 1473 while 1: 1474 s = self.stack.top()[0] 1475 a = self.tokens[self.ip][0] 1476 if _DEBUG: 1477 print "Input: %s\nState: %s" %(map(lambda x:x[0],self.tokens[self.ip:]),s) 1478 print "Stack: %s" %self.stack 1479 try: 1480 if self.ACTION[s,a][0] == 'shift': 1481 if _DEBUG: print "Action: shift\n" 1482 self.stack.push((self.ACTION[s,a][1], self.tokens[self.ip][1])) 1483 self.ip = self.ip + 1 1484 elif self.ACTION[s,a][0] == 'reduce': 1485 n = self.ACTION[s,a][1] 1486 if _DEBUG: print "Action: reduce %s %s\n" %(n,str(self.rules[n])) 1487 semargs = [self.stack.pop()[1] for i in range(len(self.rules[n][1]))] 1488 semargs.reverse() 1489 if self.nosemrules: 1490 reduce = [] 1491 else: 1492 reduce = Reduction(self.rules[n][2],semargs,self.context) 1493 del semargs 1494 s1 = self.stack.top()[0] 1495 a = self.rules[n][0] 1496 self.stack.push((self.GOTO[s1,a],reduce)) 1497 self.output.append(n) 1498 elif self.ACTION[s,a] == ('accept', []): 1499 break 1500 else: 1501 raise LRParserError(s,a) 1502 except KeyError: 1503 if _DEBUG: print "Error in action: %s" %self.ACTION 1504 raise LRParserError(s,a) 1505 except SemanticError, m: 1506 if _DEBUG: print "Semantic Rule %d %s" %(n,self.rules[n][2]) 1507 raise SemanticError(m,n,self.rules[n][2]) 1508 return self.stack.top()[1]
1509
1510 - def parse_grammar(self,st,context,args):
1511 """ 1512 Transforms a string into a grammar description 1513 1514 @param st: is a string representing the grammar rules, with 1515 default symbols as below. Fisrt rule for start. 1516 1517 I{Example}:: 1518 reg -> reg + reg E{lb}E{lb} self.OrSemRule E{rb}E{rb} 1519 // priority 'left'| 1520 ( reg ) E{lb}E{lb}self.ParSemRuleE{rb}E{rb} ; 1521 where: 1522 1523 - rulesym="->" production symbol 1524 - rhssep='' RHS symbols separator 1525 - opsym='//' operator definition separator 1526 - semsym=E{lb}E{lb} semantic rule start marker 1527 - csemsym=E{rb}E{rb} semantic rule end marker 1528 - rulesep='|' separator for multiple rules for a LHS 1529 - ruleend=';' end marker for one LHS rule""" 1530 self.pg=Yappy_grammar(**args) 1531 self.pg.input(st,context) 1532 return self.pg.context['rules']
1533
1534 - def gsrules(self,rulestr, **sym):
1535 """ 1536 Transforms a string in a grammar description 1537 1538 @param rulestr: is a string representing the grammar rules, with 1539 default symbols as below. 1540 1541 @param sym: Dictionary with symbols used. Default ones: 1542 - rulesym="->" production symbol 1543 - rhssep='' RHS symbols separator 1544 - opsym='//' operator definition separator 1545 - semsym=E{lb}E{lb} semantic rule start marker 1546 - csemsym=E{rb}E{rb} semantic rule end marker 1547 - rulesep='|' separator for multiple rules for a LHS 1548 - ruleend=';' end marker for one LHS rule 1549 Example: 1550 reg -> reg + reg E{lb}E{lb} self.OrSemRule // (priority,'left') E{rb}E{rb} | 1551 ( reg ) E{lb}E{lb}self.ParSemRuleE{rb}E{rb} ; 1552 """ 1553 if not sym: 1554 sym = Dict(rulesym="->", 1555 rhssep='', 1556 opsym='//', 1557 semsym='{{', 1558 csemsym='}}', 1559 rulesep='|', 1560 ruleend=';') 1561 gr = [] 1562 rl = string.split(rulestr,sym['ruleend']) 1563 for l in rl: 1564 m = re.compile(sym['rulesym']).search(l) 1565 if not m: continue 1566 else: 1567 if m.start() == 0: 1568 raise GrammarError(l) 1569 else: lhs = l[0:m.start()].strip() 1570 if m.end() == len(l): 1571 raise GrammarError(l) 1572 else: 1573 rhss = string.strip(l[m.end():]) 1574 if rhss == "[]": 1575 rhs = [] 1576 sem = EmptySemRule 1577 op = None 1578 else: 1579 rhss = string.split(l[m.end():],sym['rulesep']) 1580 for rest in rhss: 1581 rest=string.strip(rest) 1582 if rhss == "[]": 1583 rhs = [] 1584 sem = EmptySemRule 1585 op = None 1586 else: 1587 m=re.search(sym['semsym']+'(?P<opsem>.*)'+sym['csemsym'],rest) 1588 if not m: 1589 rhs = string.split(rest,None) 1590 sem = DefaultSemRule 1591 op = None 1592 else: 1593 if m.start() == 0: 1594 raise GrammarError(rest) 1595 else: rhs = string.split(rest[0:m.start()].strip()) 1596 if m.group('opsem'): 1597 opsem = string.split(m.group('opsem'),sym['opsym']) 1598 if len(opsem) == 1: 1599 sem = string.strip(opsem[0]) 1600 op = None 1601 elif len(opsem) == 2: 1602 sem = string.strip(opsem[0]) 1603 op = string.strip(opsem[1]) 1604 else: 1605 raise GrammarError(rest) 1606 else: 1607 sem = DefaultSemRule 1608 op = None 1609 if op == None: 1610 gr.append((lhs,rhs,eval(sem))) 1611 else: 1612 gr.append((lhs,rhs,eval(sem),eval(op))) 1613 return gr
1614 1615
1616 -class LRBuildparser:
1617 """Class for LR parser: without shelve and semantic rules(obsolete) 1618 """ 1619
1620 - def __init__(self,grammar):
1621 """ 1622 """ 1623 self.table = LALRtable(grammar)
1624
1625 - def parsing(self,tokens):
1626 """LR Parsing Algorithm 1627 """ 1628 self.stack = Stack() 1629 self.stack.push(0) 1630 self.input = tokens 1631 self.input.append(self.table.gr.endmark) 1632 self.output = [] 1633 self.ip = 0 1634 while 1: 1635 s = self.stack.top() 1636 a = self.input[self.ip] 1637 if not self.table.ACTION.has_key((s,a)): 1638 raise LRParserError(s,a) 1639 elif self.table.ACTION[s,a][0] == 'shift': 1640 # self.stack.push(a) 1641 self.stack.push(self.table.ACTION[s,a][1]) 1642 self.ip = self.ip + 1 1643 elif self.table.ACTION[s,a][0] == 'reduce': 1644 n = self.table.ACTION[s,a][1] 1645 for i in range(len(self.table.gr.rules[n][1])): 1646 self.stack.pop() 1647 s1 = self.stack.top() 1648 a = self.table.gr.rules[n][0] 1649 # self.stack.push(a) 1650 if not self.table.GOTO.has_key((s1,a)): 1651 raise LRParserError(s1,a) 1652 else: 1653 self.stack.push(self.table.GOTO[s1,a]) 1654 self.output.append(n) 1655 elif self.table.ACTION[s,a] == ('accept', []): 1656 break 1657 else: 1658 raise LRParserError()
1659 1660 ############# Auxiliares ##################
1661 -def Dict(**entries):
1662 """Create a dict out of the argument=value arguments""" 1663 return entries 1664
1665 -def grules(rules_list,rulesym="->",rhssep=None):
1666 """ 1667 Transforms a list of rules in a grammar description. If a rule has 1668 no semantic rules, C{DefaultSemRule} is assumed. 1669 1670 @param rules_list: is a list of pairs (rule,sem) 1671 where rule is a string of the form: 1672 - Word rulesym Word1 ... Word2 1673 - Word rulesym [] 1674 @param rulesym: LHS and RHS rule separator 1675 @param rhssep: RHS values separator (None for white chars) 1676 @return: a grammar description 1677 """ 1678 gr = [] 1679 sep = re.compile(rulesym) 1680 for r in rules_list: 1681 if type(r) is StringType: 1682 rule = r 1683 else: 1684 rule = r[0] 1685 m = sep.search(rule) 1686 if not m: continue 1687 else: 1688 if m.start() == 0: 1689 raise GrammarError(rule) 1690 else: lhs = rule[0:m.start()].strip() 1691 if m.end() == len(rule): 1692 raise GrammarError(rule) 1693 else: 1694 rest=string.strip(rule[m.end():]) 1695 if rest == "[]": 1696 rhs = [] 1697 else: 1698 rhs = string.split(rest,rhssep) 1699 if type(r) is StringType: 1700 gr.append((lhs,rhs,DefaultSemRule)) 1701 elif len(r)==3: 1702 gr.append((lhs,rhs,r[1],r[2])) 1703 elif len(r)==2: 1704 gr.append((lhs,rhs,r[1])) 1705 else: 1706 raise GrammarError(r) 1707 1708 return gr
1709 1710 ####################################################### 1711
1712 -class Yappy(LRparser):
1713 """ A basic class for parsing. 1714 1715 @ivar lex: a Lexer object 1716 """ 1717
1718 - def __init__(self,tokenize,grammar, table='YappyTab',no_table=1, 1719 tabletype=LALRtable,noconflicts=1,expect=0,**args):
1720 """ 1721 @param tokenize: same as for L{Lexer} 1722 @param grammar: if a string C{parse_grammar} is called 1723 1724 @param table: and no_table, tabletype same as for L{LRparser} 1725 1726 @param **args: dictionary where: 1727 - key C{tmpdir} is the directory 1728 where the parse table used by the Yappy Grammar is stored; 1729 - key C{usrdir} is the directory where the user tables are stored 1730 - key C{nosemrules} if 1 semantic actions are not applied 1731 """ 1732 self.lex = Lexer(tokenize) 1733 operators = None 1734 if self.lex.__dict__.has_key("operators"): 1735 operators = self.lex.operators 1736 if type(grammar) is StringType: 1737 grammar = self.parse_grammar(grammar,{'locals':locals()},args) 1738 if args.has_key('usrdir') and os.path.isdir(args['usrdir']): 1739 table = string.rstrip(args['usrdir']) + '/' + table 1740 if os.path.dirname(table)=="" or os.path.exists(os.path.dirname(table)): 1741 LRparser.__init__(self,grammar,table,no_table,tabletype,operators,noconflicts,expect,**args) 1742 else: 1743 sys.stderr.write("Directory %s do not exist\n" %table) 1744 sys.exit() 1745 if (self.Log.noconflicts and ((self.Log.conflicts.has_key('sr') and 1746 len(self.Log.conflicts['sr'])!= 1747 self.Log.expect) or self.Log.conflicts.has_key('rr'))): 1748 print "LR conflicts: number %s value %s" %(len(self.Log.conflicts['sr']),self.Log.conflicts) 1749 print """If it is Ok, set expect to the number of conflicts and build table again"""
1750
1751 - def input(self,str=None,context={},lexer=0):
1752 """ Reads from stdin or string and retuns parsed result 1753 1754 @param str: String to be parsed. If not given, reads from 1755 C{stdin}. 1756 @param context: some initial computational context 1757 @param lexer: if 1 only lexical analisys is performed 1758 1759 @return: a tuple C{(parsed result,context)} or 1760 only the C{parsed result} 1761 1762 """ 1763 if str: 1764 self.tokens = self.lex.scan(str) 1765 else: 1766 print "Input: ", 1767 self.tokens = self.lex.readscan() 1768 if lexer: 1769 return self.tokens 1770 self.context = context 1771 return self.parsing(self.tokens,self.context)
1772
1773 - def inputfile(self,FileName,context={}):
1774 """Reads input from file """ 1775 try: file = open(FileName,"r") 1776 except IOError: raise YappyError() 1777 return self.input(file.read(),context)
1778 1779
1780 - def parse_tree(self):
1781 """To be defined using output""" 1782 pass
1783
1784 - def test(self):
1785 """A test for each class""" 1786 pass
1787 1788 ######### Semantic Grammar Rules ##############
1789 -def expandSemRule(strargs,strfun):
1790 regargs = re.compile(r'\$(\d+)') 1791 matchargs = regargs.finditer(strfun) 1792 for i in [(x.group(0),strargs+x.group(1)+"]") for x in matchargs]: 1793 strfun = string.replace(strfun,i[0],i[1]) 1794 return strfun
1795
1796 -def Reduction(fun,sargs,context={}):
1797 """Reduction function for semantic rules: 1798 - C{fun} can be: 1799 -- a function 1800 -- or a string with positional arguments C{$n} that is expanded 1801 and evaluated with C{eval} 1802 1803 """ 1804 if callable(fun): 1805 return apply(fun,[sargs, context]) 1806 elif type(fun) is StringType: 1807 a = expandSemRule("sargs[",fun) 1808 l = context.get('locals',{}) 1809 l.update(locals()) 1810 return eval(a,context.get('globals',{}),l) 1811 else: 1812 raise SemanticError,'Wrong type: %s' %fun
1813 1814
1815 -def DefaultSemRule(sargs,context={}):
1816 """Default semantic rule""" 1817 return sargs[0]
1818
1819 -def EmptySemRule(sargs,context={}):
1820 return []
1821 1822 1823 1824 ######Parser f,grammars ##################
1825 -class Yappy_grammar(Yappy):
1826 """ A parser for grammar rules. See C{test()} for an example. """ 1827
1828 - def __init__(self,no_table=1, table='yappypar.tab',tabletype=LR1table,**args):
1829 grammar= grules([ 1830 ("G -> RULE G",self.GRule), 1831 ("G -> []",EmptySemRule), 1832 ("RULE -> ID rulesym MULTI ruleend",self.RULERule) , 1833 ("MULTI -> RHS rulesep MULTI",self.MULTIRule), 1834 ("MULTI -> RHS",self.MULTIRule), 1835 ("RHS -> []",EmptySemRule), #RHS->OPSEM not allowed; epsilon-rule 1836 ("RHS -> RH OPSEM",self.RHSRule), 1837 ("RH -> ID RH",self.RHRule), 1838 ("RH -> ID",self.RHRule), 1839 ("OPSEM -> []",self.OPSEMRule), 1840 # ("OPSEM -> semsym ID csemsym",self.OPSEMRule),#OPSEM->OP not allowed 1841 # ("OPSEM -> semsym ID OP csemsym",self.OPSEMRule), 1842 ("OPSEM -> IDS",self.OPSEMRule1), 1843 ("OPSEM -> IDS OP",self.OPSEMRule1), 1844 ("OP -> opsym OPV",self.OPRule), 1845 ("OPV -> ID ID ", self.OPVRule) 1846 ]) 1847 1848 1849 tokenize = [ 1850 ("\{\{.*\}\}",lambda x: ("IDS",string.strip(x[2:-2]))), 1851 ("\s+",""), 1852 ("->",lambda x: ("rulesym",x)), 1853 ("\|",lambda x: ("rulesep",x)), 1854 (";",lambda x: ("ruleend",x)), 1855 # ("}}",lambda x: ("csemsym",x)), 1856 # ("{{",lambda x: ("semsym",x)), 1857 ("//",lambda x: ("opsym",x)), 1858 (".*",lambda x: ("ID",x))] 1859 if args.has_key('tmpdir'): 1860 args1 = {'usrdir':string.rstrip(args['tmpdir'],'/')} 1861 else: 1862 args1 = {} 1863 Yappy.__init__(self,tokenize,grammar,table,no_table,**args1)
1864 1865 1866
1867 - def OPVRule(self,arg,context):
1868 """ """ 1869 try: 1870 int(arg[0]) 1871 except ValueError: 1872 raise SemanticError("Precedence must be an integer: %s given" %arg[0]) 1873 if arg[1]!= 'left' and arg[1]!= 'right' and arg[1]!= 'noassoc': 1874 raise SemanticError("Associativity must be 'left' or 'right' or 'noassoc': %s\ 1875 given" %arg[1]) 1876 return (int(arg[0]),arg[1])
1877
1878 - def OPRule(self,arg,context):
1879 return arg[1]
1880
1881 - def OPSEMRule(self,arg,context):
1882 if len(arg) == 4: 1883 return (arg[1],arg[2]) 1884 if len(arg) == 3: 1885 return arg[1] 1886 if len(arg) == 0: 1887 return 'DefaultSemRule'
1888
1889 - def OPSEMRule1(self,arg,context):
1890 if len(arg) == 2: 1891 return (arg[0],arg[1]) 1892 if len(arg) == 1: 1893 return arg[0] 1894 if len(arg) == 0: 1895 return 'DefaultSemRule'
1896 1897
1898 - def RHRule(self,arg,context):
1899 if len(arg) == 1: 1900 return [arg[0]] 1901 if len(arg) == 2: 1902 return [arg[0]]+arg[1]
1903
1904 - def RHSRule(self,arg,context):
1905 return (arg[0],arg[1])
1906
1907 - def MULTIRule(self,arg,context):
1908 if len(arg) == 1: 1909 return [arg[0]] 1910 else: 1911 return [arg[0]]+arg[2]
1912
1913 - def RULERule(self,arg,context):
1914 lhs=arg[0] 1915 1916 def grule(self,l): 1917 if l == []: return (lhs,[],EmptySemRule) 1918 if type(l[1]) is TupleType: 1919 return (lhs,l[0],eval(l[1][0],globals(),context['locals']),l[1][1]) 1920 else: 1921 return (lhs,l[0],eval(l[1],globals(),context['locals']))
1922 1923 return map(lambda l:grule(self,l) ,arg[2])
1924
1925 - def GRule(self,args,context):
1926 if context.has_key('rules'): 1927 context['rules']= args[0]+context['rules'] 1928 else: 1929 context['rules'] = args[0] 1930 return []
1931
1932 - def test(self):
1933 st = """ 1934 reg -> reg + reg {{DefaultSemRule}} // 200 left | 1935 reg reg {{DefaultSemRule}} // 200 left | 1936 reg * {{DefaultSemRule}} | 1937 ( reg ) {{DefaultSemRule}} | 1938 id {{lambda l,c:l[0]}}; 1939 reg -> ; 1940 a -> reg | reg ; 1941 """ 1942 st1 = """ 1943 reg -> reg + reg {{DefaultSemRule // 200 left}} | 1944 reg reg {{DefaultSemRule // 200 left}} | 1945 reg * {{DefaultSemRule}} | 1946 ( reg ) {{DefaultSemRule}} | 1947 id {{DefaultSemRule}}; 1948 reg -> ; 1949 a -> reg | reg ; 1950 """ 1951 self.input(st,{'locals':locals()}) 1952 return self.context['rules']
1953 1954
1955 -class Stack:
1956 """ A simple class to implement stacks""" 1957
1958 - def __init__(self, start=[]):
1959 """Reverse initial stack objects""" 1960 self.stack = [] 1961 for x in start: self.push(x) 1962 self.stack.reverse()
1963
1964 - def push(self, object):
1965 self.stack = [object] + self.stack
1966
1967 - def pop(self):
1968 if not self.stack: 1969 raise 'stack underflow' 1970 top, self.stack = self.stack[0], self.stack[1:] 1971 return top
1972
1973 - def top(self):
1974 """ Returns top of stack (not poping it)""" 1975 if not self.stack: 1976 raise 'stack underflow' 1977 return self.stack[0]
1978
1979 - def empty(self):
1980 """ Tests if stack is empty""" 1981 return not self.stack
1982
1983 - def popall(self):
1984 """ Empties stack""" 1985 self.stack=[]
1986
1987 - def __repr__(self):
1988 return '[Stack:%s]' % self.stack
1989
1990 - def __cmp__(self, other):
1991 return cmp(self.stack, other.stack)
1992
1993 - def __len__(self):
1994 return len(self.stack)
1995
1996 - def __add__(self, other):
1997 return Stack(self.stack + other.stack)
1998
1999 - def __mul__(self, reps):
2000 return Stack(self.stack * reps) 2001
2002 - def __getitem__(self, offset):
2003 return self.stack[offset]
2004
2005 - def __getslice__(self, low, high):
2006 return Stack(self.stack[low : high])
2007
2008 - def __getattr__(self, name):
2009 return getattr(self.stack, name)
2010