1
2 """
3 This is part of Yappy
4
5 parser.py -- Yet another parser for python...
6
7 A LR parser generator, based on Aho and al. 1986, C{Compilers}
8 (aho86:_compil).
9
10 It currently builds C{SLR}, C{LR(1)} and C{LALR(1)} parsing tables.
11
12 Copyright (C) 2000-2003 Rogério Reis & Nelma Moreira {rvr,nam}@ncc.up.pt
13 Version: $Id: parser.py,v 1.18 2006-07-19 09:52:06 rvr Exp $
14
15 This program is free software; you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation; either version 2 of the License, or
18 (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28
29 @author: Rogério Reis & Nelma Moreira {rvr,nam}@ncc.up.pt
30
31 @var _DEBUG: if nonzero, display information during parser generation
32 or parsing.
33
34 @type _DEBUG: integer
35 """
36
37 from types import *
38 import re, exceptions, string
39 import sys, string, copy, time, operator
40
41 import os.path
42
43 import shelve
44
45
46 import osets
47
48
49
50 _DEBUG=0
51
52 _Version = 1.7
53
54 NIL = ""
55
57 """Class for lexical analyser to use with the parser
58
59 @ivar rules: lexical rules
60 @ivar operators: precedence and associativity for operators
61 @type operators: dictionary
62
63 """
64
66 """
67 By now lexer is kept as simple as possible, so order is really
68 essential: i.e. if a keyword is substring of another its rule
69 must appear after the larger keyword for the obvious
70 reasons...
71
72 @param rules_list: contains pairs C{(re,funct,op?)} where:
73
74 C{re}: is an uncompiled python regular expression
75
76 C{funct}: the name of
77 a funcion that returns the pair C{(TOKEN, SPECIAL_VALUE)}, where C{TOKEN}
78 is the token to be used by the parser and C{SPECIAL_VALUE} an eventual
79 associated value. The argument is the matched string. If
80 C{funct} equals C{""} the token is ignored. This can be
81 used for delimiters.
82
83 C{op}: if present, is a tuple with operador
84 information: C{(TOKEN,PRECEDENCE,ASSOC)} where C{PRECEDENCE} is an
85 integer and C{ASSOC} the string 'left' or 'right'.
86
87 """
88 self.rules = []
89 rnumber = 1
90 for r in rules_list:
91 try:
92 rex = r[0]
93 funct = r[1]
94 except IndexError:
95 raise LexicalError(rnumber,r)
96 try: rec = re.compile(rex)
97 except TypeError:
98 raise LexicalRulesErrorRE(rex,rnumber)
99 try:
100 op,prec,assoc = r[2]
101 if not self.__dict__.has_key("operators"):
102 self.operators = {}
103 if not self.operators.has_key(op):
104 self.operators[op] = (prec,assoc)
105 except IndexError:
106 pass
107 self.rules.append((rec,funct))
108
109 rnumber = rnumber + 1
110 if _DEBUG and self.__dict__.has_key("operators"):
111 print "operators %s" %self.operators
112
113 - def scan(self,string):
114 """Performs the lexical analysis on C{string}
115
116 @return: a list of tokens (pairs C{(TOKEN , SPEcial_VALUE )}), for
117 recognized elements and C{("@UNK", string )} for the others"""
118 st = [string]
119 for r in self.rules:
120 st = self.scanOneRule(r,st)
121 return self.scanUnknown(st)
122
124 """Scans space C{st} according only one rule
125
126 @param rule: one rule C{(re,fun,op)}
127
128 @param st: is a list of strings and already matched structures
129 """
130 re = rule[0]
131 fun = rule[1]
132 st1 = []
133 for s in st:
134 if not isinstance(s, StringType):
135 st1.append(s)
136 else:
137 s1 = s
138 while True:
139 m = re.search(s1)
140 if not m:
141 st1.append(s1)
142 break
143 else:
144 if m.start() != 0:
145 st1.append(s1[0:m.start()])
146
147
148
149 if fun != "":
150 st1.append(apply(fun,[s1[m.start():m.end()]]))
151 if m.end() == len(s1):
152 break
153 else:
154 s1 = s1[m.end():]
155 return st1
156
158 """Scans the resulting structure making Unknown strings
159
160 Unknown parts will be of the form ("@UNK", string ) """
161 st1 = []
162 for s in st:
163 if isinstance(s, StringType):
164 st1.append(("@UNK",s))
165 else:
166 st1.append(s)
167 return st1
168
170 """Scans a string read from stdin """
171 st = raw_input()
172 if not st:
173 raise IOError
174 if isinstance(st, StringType):
175 s = self.scan(st)
176 return s
177
179 """Class for all Yappy exceptions"""
180 pass
181
182
184 """Class for all Yappy Lexical analyser exceptions"""
186 self.message = 'Error in rule number %s: %s'%(r,rule)
187
189 return "%s" % (self.message)
190
192 """An error occured parsing the RE part of a lexical rule"""
194 self.message = 'Error in RE "%s" at rule n.%d'%(re,no)
195 self.rule = no
196 self.re = re
197
199 return "%s" % (self.message)
200
202 """Class for input grammar errors """
204 self.message = 'Error in rule "%s" '%rule
205
206
208 """Confliting actions in building SLR parsing table. Grammar
209 is not SLR(0)"""
211 self.message = 'Confliting action[%d,%s] in SLR parsing table ' %(i,a)
212 self.item = i
213 self.symbol = a
214
216 """Conflicting actions in building LR parsing table. Grammar
217 is not LR(1)"""
219 self.message = 'Confliting action[%d,%s] in LR(1) parsing table ' %(i,a)
220 self.item = i
221 self.symbol = a
223 return "%s" % (self.message)
224
226 """Confliting actions in building LR parsing table. Grammar
227 is not LR(1)"""
229 self.message = """Warning>>> Several confliting actions. Please
230 consult self.Log for details"""
231
233 return "%s" % (self.message)
234
236 """An error occured in LR parsing program"""
238 self.item = s
239 self.symbol = a
240 self.message = 'Error in LR: (%s,%s) not found' %(self.item,self.symbol)
241
243 return "%s" % (self.message)
244
246 """An error occured in the application of a semantic action"""
248 self.message = m
249 self.nrule = n
250 self.rule = r
252 return "%s in semantic rule %d: %s" % (self.message,self.nrule,self.rule)
253
255 """Mismatch table version """
257 self.message = """A new table must be built.
258 Please remove table shelve %s or set no_table to 0""" %t
259
261 return "%s" % (self.message)
262
264 """ Class for context-free grammars
265
266 @ivar rules: grammar rules
267 @ivar terminals: terminals symbols
268 @ivar nonterminals: nonterminals symbols
269 @ivar start: start symbol
270 @type start: string
271 @ivar ntr: dictionary of rules for each nonterminal
272
273 """
275 """
276
277 @param grammar: is a list for productions;
278 each production is a tuple C{(LeftHandside,RightHandside,SemFunc,Prec?)}
279 with C{LeftHandside} nonterminal, C{RightHandside} list of symbols,
280 C{SemFunc} syntax-direct semantics, if present
281 C{Prec (PRECEDENCE,ASSOC)} for ambiguous rules
282
283 First production is for start symbol
284
285 Special symbols: C{@S}, C{$}, C{#}
286 """
287 """ MUST BE IN THIS ORDER"""
288 self.rules = grammar
289 self.makenonterminals()
290 self.maketerminals()
291 self.start = self.rules[0][0]
292 self.aug_start = "@S"
293 self.rules.append((self.aug_start,[self.start],DefaultSemRule))
294 self.endmark = '$'
295 self.dummy = '#'
296 self.terminals.append(self.endmark)
297 self.terminals.append(self.dummy)
298 self.nonterminals.append(self.aug_start)
299 """ ritems are only for control ... not needed """
300 self.ritems = []
301 """ ntr[A] is the set of rules which has A as left side"""
302 self.ntr = {}
303 i = 0
304 for r in self.rules:
305 if not self.ntr.has_key(r[0]):
306 self.ntr[r[0]] = [i]
307 else:
308 self.ntr[r[0]].append(i)
309 for j in range(len(r[1]) + 1):
310 self.ritems.append((i,j))
311 i = i + 1
312
314 """Grammar rules
315
316 @return: a string representing the grammar rules
317 """
318 s = ""
319 for n in range(len(self.rules)):
320 lhs = self.rules[n][0]
321 rhs = self.rules[n][1]
322 s = s + "%s | %s -> %s \n" %(n, lhs, string.join(rhs," "))
323 return "Grammar Rules:\n\n%s" % s
324
329
331 """Extracts C{terminals} from the rules.
332 C{nonterminals} must already exist"""
333 self.terminals = []
334 for r in self.rules:
335 for s in r[1]:
336 if s not in self.nonterminals and s not in self.terminals:
337 self.terminals.append(s)
338
340 """Extracts C{nonterminals} from grammar rules."""
341 self.nonterminals = []
342 for r in self.rules:
343 if r[0] not in self.nonterminals:
344 self.nonterminals.append(r[0])
345
347 """Determines which nonterminals C{X ->* []} """
348 self.nullable = {}
349 for s in self.terminals:
350 self.nullable[s] = 0
351 for s in self.nonterminals:
352 self.nullable[s] = 0
353 if self.ntr.has_key(s):
354 for i in self.ntr[s]:
355 if not self.rules[i][1]:
356 self.nullable[s] = 1
357 break
358 k = 1
359 while k == 1:
360 k = 0
361 for r in self.rules:
362 e = 0
363 for i in r[1]:
364 if not self.nullable[i]:
365 e = 1
366 break
367 if e == 0 and not self.nullable[r[0]]:
368 self.nullable[r[0]] = 1
369 k = 1
370
371
373 """C{FIRST(s)} is the set of terminals that begin the strings
374 derived from s """
375 first = osets.Set([])
376 e = 0
377 for i in range(len(s)):
378 first.s_extend(self.first[s[i]])
379 if not self.nullable[s[i]]:
380 e = 1
381 break
382 if e == 0:
383 self.nullable[string.join(s)] = 1
384 else:
385 self.nullable[string.join(s)] = 0
386 return first
387
389 """Determines C{FIRST(s)}, for every symbol s, that is the set of
390 terminals that begin the strings derived from s """
391 self.first = {}
392 self.nd = {}
393 self.ms =Stack()
394 for s in self.terminals:
395 self.first[s] = osets.Set([s])
396 for s in self.nonterminals:
397 if self.ntr.has_key(s) and not self.first.has_key(s):
398
399 self.FIRST_TRA(s,1)
400
402 """Transitiv closure of C{FIRST(X)} """
403 self.ms.push(s)
404 self.nd[s] = d
405 """ calculating F1(s)"""
406 self.first[s] = osets.Set([])
407 for i in self.ntr[s]:
408 for y in self.rules[i][1]:
409 if self.nullable[y]:
410 continue
411 else:
412 if y in self.terminals:
413 self.first[s].append(y)
414 break
415 """transitive closure"""
416 for i in self.ntr[s]:
417 for y in self.rules[i][1]:
418 if y in self.nonterminals:
419 if not self.first.has_key(y):
420 self.FIRST_TRA(y,d+1)
421 if self.nd.has_key(y) and self.nd[y] != -1:
422 self.nd[s] = min(self.nd[s],self.nd[y])
423 self.first[s].s_extend(self.first[y])
424 if self.nullable[y]:
425 continue
426 else:
427 break
428 else:
429 break
430 if self.nd[s] == d:
431 while 1:
432 y = self.ms.pop()
433 if y == s:
434 break
435 self.first[y] = self.first[s].copy()
436 self.nd[y] = -1
437
439 """ Recursivelly computes C{FIRST(X)} for a nonterminal X"""
440 if not self.ntr.has_key(s):
441 return
442 self.first[s] = osets.Set([])
443 for i in self.ntr[s]:
444 r = self.rules[i][1]
445 if r == []:
446 self.nullable[s] = 1
447 else:
448 e = 1
449 for y in r:
450 if not self.first.has_key(y):
451 self.FIRST_NT(y)
452 self.first[s].s_extend(self.first[y])
453 if not self.nullable[y]:
454 e = 0
455 break
456 if e == 1:
457 self.nullable[s] = 1
458
460 """computes C{FOLLOW(A)} for all nonterminals: the set of terminals a
461 that can appear immediately to the right of A in some sentential form."""
462 self.follow = {}
463 self.follow[self.start] = osets.Set([self.endmark])
464 for rule in self.rules:
465 r = rule[1]
466 for i in range(len(r)):
467 if r[i] in self.nonterminals:
468 if not self.follow.has_key(r[i]):
469 self.follow[r[i]] = osets.Set([])
470 j = i + 1
471 self.follow[r[i]].s_extend(self.FIRST(r[j:]))
472 e = 1
473 while e:
474 e = 0
475 for s in self.nonterminals:
476 for i in self.ntr[s]:
477 r = self.rules[i][1]
478 try:
479 b = r[len(r)-1]
480 if b in self.nonterminals and self.follow[b].s_extend(self.follow[s]):
481 e = 1
482 except IndexError: pass
483 except KeyError: pass
484 for k in range(len(r)-1):
485 j = k + 1
486 if r[k] in self.nonterminals and self.nullable[string.join(r[j:])]:
487 if self.follow[r[k]].s_extend(self.follow[s]):
488 e = 1
489 break
490
492 """For each nonterminal C{s} determines the set of nonterminals
493 a such that C{s ->* ar}, for some C{r}"""
494 self.close_nt = {}
495 self.nd = {}
496 self.ms =Stack()
497 for s in self.nonterminals:
498 if self.ntr.has_key(s) and not self.close_nt.has_key(s):
499 self.TRAVERSE(s,1)
500
502 """ """
503 self.ms.push(s)
504 self.nd[s] = d
505 """ calculating F1(s)"""
506 self.close_nt[s] = {s:osets.Set([[]])}
507 for i in self.ntr[s]:
508 if not self.rules[i][1]:
509 continue
510 else:
511 r = self.rules[i][1]
512 for j in range(len(r)):
513 if r[j+1:]:
514 f = self.FIRST(r[j+1:])
515 ns = self.nullable[string.join(r[j+1:])]
516 else:
517 f = []
518 ns = 1
519 if r[j] in self.nonterminals:
520 if not self.close_nt[s].has_key(r[j]):
521 self.close_nt[s][r[j]] = osets.Set([[]])
522 if r[j+1:]:
523 self.close_nt[s][r[j]].append((f,ns))
524 if not self.nullable[r[j]]:
525 break
526 else:
527 break
528 """reflexive tansitive closure"""
529 for i in self.ntr[s]:
530 if not self.rules[i][1]:
531 continue
532 else:
533 r = self.rules[i][1]
534 for j in range(len(r)):
535 f = self.FIRST(r[j+1:])
536 ns = self.nullable[string.join(r[j+1:])]
537 if r[j] in self.nonterminals:
538 if not self.close_nt.has_key(r[j]):
539 self.TRAVERSE(r[j],d+1)
540 if self.nd.has_key(r[j]) and self.nd[r[j]] != -1:
541 self.nd[s] = min(self.nd[s],self.nd[r[j]])
542 for k in self.close_nt[r[j]].keys():
543 if not self.close_nt[s].has_key(k):
544 self.close_nt[s][k] = osets.Set([[]])
545 else:
546 for v in self.close_nt[s][k]:
547 if not v:
548 self.close_nt[s][k].append((f,ns))
549 else:
550 p, n = v
551 if n:
552 self.close_nt[s][k].append((p+f,ns))
553 else:
554 self.close_nt[s][k].append((p,n))
555 if not self.nullable[r[j]]:
556 break
557 else:
558 break
559 if self.nd[s] == d:
560 while 1:
561 y = self.ms.pop()
562 if y == s:
563 break
564 self.close_nt[y] = self.close_nt[s].copy()
565 self.nd[y] = -1
566
568 """For each nonterminal C{s} determines the set of nonterminals
569 a such that C{s ->* ar}, for some C{r}"""
570 self.derive_nt = {}
571 for s in self.nonterminals:
572 if self.ntr.has_key(s) and not self.derive_nt.has_key(s):
573 self.DERIVE_ONE_NT(s)
574
576 """For nonterminal C{s} determines the set of nonterminals
577 a such that C{s -> ar}, for some C{r} """
578 if not self.ntr.has_key(s):
579 return
580 self.derive_nt[s] = {s:osets.Set([None])}
581 for i in self.ntr[s]:
582 if not self.rules[i][1]:
583 continue
584 else:
585 r = self.rules[i][1]
586 for j in range(len(r)):
587 if r[j] in self.nonterminals:
588 if not self.derive_nt.has_key(r[j]):
589 self.DERIVE_ONE_NT(r[j])
590 for k in self.derive_nt[r[j]].keys():
591 if not self.derive_nt[s].has_key(k):
592 self.derive_nt[s][k] = osets.Set([])
593 for p in self.derive_nt[r[j]][k]:
594 if not p :
595 self.derive_nt[s][k].append(r[j+1:])
596 else:
597 self.derive_nt[s][k].append(r[j+1:].append(p))
598 if not self.nullable[r[j]]:
599 break
600 else:
601 break
602
604 """ """
605 self.derive_ter = {}
606 for s in self.terminals:
607 self.derive_ter[s] = osets.Set([s])
608 e = 1
609 while e:
610 e = 0
611 for s in self.nonterminals:
612 for i in self.ntr[s]:
613 r = self.rules[i][1]
614 if r == []:
615 continue
616 for i in range(len(r)):
617 if r[i] in self.terminals:
618 if i < len(r) -1:
619 if self.derive_ter.has_key(r[i+1]):
620 if not self.derive_ter.has_key(s):
621 self.derive_ter[s] = osets.Set([])
622 if self.derive_ter[s].s_append(r[i]):
623 e = 1
624 break
625 else:
626 if not self.derive_ter.has_key(s):
627 self.derive_ter[s] = osets.Set([])
628 if self.derive_ter[s].s_append(r[i]):
629 e = 1
630
631 break
632 else:
633 """ non-terminal"""
634 if self.derive_ter.has_key(r[i]):
635 if not self.derive_ter.has_key(s):
636 self.derive_ter[s] = osets.Set([])
637 if self.derive_ter[s].s_extend(self.derive_ter[r[i]]) == 1:
638 e = 1
639 if i > 0 and self.nullable[r[i]]:
640 continue
641 else:
642 break
643
645 """Class for construction of a C{LR} table
646
647 @ivar gr: a context-free grammar
648 @ivar operators: operators
649 @ivar Log: Log report for LR table construction
650
651
652 """
653 - def __init__(self,cfgr,operators=None,noconflicts=1,expect=0):
654 """
655 @param cfgr: a context-free grammar
656 @param operators: operators
657 @param noconflicts: if 0 LRtable conflicts are not resolved,
658 unless for spcecial operator rules
659 @type noconflicts: integer
660 @param expect: exact number of expected LR shift/reduce conflicts
661 @type expect: integer
662 """
663 self.gr = cfgr
664 self.gr.makeFFN()
665 self.operators = operators
666 self.precedence = None
667
668 self.rules_precedence()
669 self.Log=LogLR(noconflicts,expect)
670 self.make_action_goto()
671
672
674 """ make C{action[i,X]} and C{goto[i,X]}
675 All pairs C{(i,s)} not in action and goto dictionaries are 'error' """
676 c = self.items()
677 if _DEBUG:
678 print self.print_items(c)
679 self.ACTION = {}
680 self.GOTO = {}
681
682
683 for i in range(len(c)):
684 for item in c[i]:
685 a = self.NextToDot(item)
686 if a in self.gr.terminals:
687 state = self.goto(c[i],a)
688 try:
689 j = c.index(state)
690 self.add_action(i,a,'shift',j)
691 except IndexError:
692 if _DEBUG: print "no state"
693 elif a == "":
694 """ Dot at right end """
695 l = self.gr.rules[item[0]][0]
696 if l != self.gr.aug_start :
697 self.dotatend(item,i)
698 else:
699 """ last rule """
700 self.add_action(i,self.gr.endmark,'accept',[])
701 for s in self.gr.nonterminals:
702 state = self.goto(c[i],s)
703 try:
704 j = c.index(state)
705 self.GOTO[(i,s)] = j
706 except ValueError:
707 pass
708
710 """Rule precedence obtained as the precedence of the right
711 most terminal. """
712 self.precedence={}
713 for i in range(len(self.gr.rules)):
714 if len(self.gr.rules[i]) == 4:
715 self.precedence[i] = self.gr.rules[i][3]
716 else:
717 self.precedence[i] = None
718 if self.operators:
719 self.gr.rules[i][1].reverse()
720 for s in self.gr.rules[i][1]:
721 if self.operators.has_key(s):
722 self.precedence[i] = self.operators[s]
723 break
724 self.gr.rules[i][1].reverse()
725
726 if _DEBUG:
727 print "Precedence %s" %self.precedence
728
730 """Set C{(action,j)} for state C{i} and symbol C{a} or raise
731 conflict error. Conficts are resolved using the following
732 rules:
733 - shift/reduce: if precedence/assoc information is available
734 try to use it; otherwise conflict is resolved in favor of shift
735 - reduce/reduce: choosing the production rule listed first
736 """
737 if self.ACTION.has_key((i,a)) and self.ACTION[(i,a)] != (action,j):
738 action1 , j1 = self.ACTION[(i,a)]
739 if _DEBUG:
740 print "LRconflit %s %s %s %s %s %s" %(action,j,action1,j1, i,a)
741 if action1 == 'shift' and action == 'reduce':
742 self.resolve_shift_reduce(i,a,j1,j)
743 elif action == 'shift' and action1 == 'reduce':
744 self.resolve_shift_reduce(i,a,j,j1)
745 elif action == 'reduce' and action1 == 'reduce':
746 if self.Log.noconflicts:
747
748 if j > j1:
749 self.ACTION[(i,a)] = (action,j1)
750 else:
751 self.ACTION[(i,a)] = (action,j)
752 self.Log.add_conflict('rr',i,a,j1,j)
753 else:
754 raise LRConflictError(i,a)
755 else:
756 self.ACTION[(i,a)] = (action,j)
757
759 """Operators precedence resolution or standard option: shift
760
761 C{s}: rule for shift
762 C{r}: rule for reduce
763
764 """
765 try:
766 if self.operators and self.operators.has_key(a) and self.precedence.has_key(r) and self.precedence[r]:
767 prec_op, assoc_op = self.operators[a]
768 if (self.precedence[r][0] > prec_op) or (self.precedence[r][0] == prec_op and self.precedence[r][1] =='left'):
769 self.ACTION[(i,a)] = ('reduce',r)
770 if _DEBUG: print "solved reduce %s" %r
771 else:
772 self.ACTION[(i,a)] = ('shift',s)
773 if _DEBUG: print "solved shift %s" %s
774 else:
775 self.ACTION[(i,a)] = ('shift',s)
776 if _DEBUG: print "solved shift %s" %s
777 except (AttributeError, TypeError, KeyError,NameError):
778 if self.Log.noconflicts:
779
780 self.ACTION[(i,a)] = ('shift',s)
781 if _DEBUG: print "choose shift %s for action (%s,%s)" %(s,i,a)
782 self.Log.add_conflict('sr',i,a,s,r)
783 if _DEBUG: print " %s for action (%s,%s)" %(self.Log.conflicts,i,a)
784
785 else:
786 raise LRConflictError(i,a)
787
789 """Class for construction of a C{SLR} table
790
791 C{SLR} items represented by a pair of integers C{(number of
792 rule,position of dot)}
793
794 (aho86:_compil page 221)
795 """
796
798 n, k = item
799 l = self.gr.rules[item[0]][0]
800 for a in self.gr.follow[l]:
801 self.add_action(i,a,'reduce',n)
802
804 """The closure of a set of C{LR(0)} items C{I} is the set of
805 items constructed from C{I} by the two rules:
806 - every item of I is in closure(I)
807 - If A -> s.Bt in closure(I) and B -> r, then add B ->.r to closure(I)
808 (aho86:_compil page 223)
809 """
810 added = {}
811 for l in self.gr.nonterminals:
812 added[l] = 0
813 close = items[:]
814 e = 1
815 while e:
816 e = 0
817 for i in close:
818 s = self.NextToDot(i)
819 if s in self.gr.nonterminals and added[s]==0 and self.gr.ntr.has_key(s):
820 for n in self.gr.ntr[s]:
821 close.append((n,0))
822 added[s] = 1
823 e = 1
824 return close
825
826 - def goto(self,items,s):
827 """ goto(I,X) where I is a set of items and X a grammar symbol
828 is the closure of the set of all items A -> sX.r such that
829 A -> s.Xr is in I"""
830 valid = osets.Set([])
831 for item in items:
832 if self.NextToDot(item) == s:
833 n, i = item
834 valid.append((n, i + 1))
835 return self.closure(valid)
836
838 """ An LR(0) item of a grammar G is a production of G with a dot at
839 some position on the right hand side.
840 It is represented by the rule number and the position of
841 the dot
842
843 @return: a set of sets of items
844 """
845 c = osets.Set([self.closure(osets.Set([(len(self.gr.rules) - 1,0)]))])
846 symbols = self.gr.terminals + self.gr.nonterminals
847 e = 1
848 while e:
849 e = 0
850 for i in c:
851 for s in symbols:
852 valid = self.goto(i,s)
853 if valid != [] and valid not in c:
854 c.append(valid)
855 e = 1
856 return c
857
859 """Print SLR items """
860 s = ""
861 j = 0
862 for i in c:
863 s = s+ "I_%d: \n" %j
864 for item in i:
865 r, p = item
866 lhs = self.gr.rules[r][0]
867 rhs = self.gr.rules[r][1]
868 s = s + "\t %s -> %s . %s \n" %(lhs,
869 string.join(rhs[:p]," "), string.join(rhs[p:]," "))
870 j += 1
871 return s
872
874 """ returns symbol next to te dot or empty string"""
875 n, i = item
876 try:
877 s = self.gr.rules[n][1][i]
878 except IndexError:
879 s = ""
880 return s
881
882
883
885 """
886 Class for construction of a LR1 table
887
888 Items are represented by a pair of integers (number of rule, position of dot)
889 """
890
892 """The closure of a set of C{LR(1)} items C{I} is the set of items construted
893 from I by the two rules:
894 - every item of C{I} is in C{closure(I)}
895
896 - If C{[A -> s.Bt,a]} in C{closure(I)},for C{B ->r} and
897 each terminal C{b} in C{first(ta)}, add C{[B ->.r,b]}
898 to C{closure(I)}
899 """
900 close = items
901 e = 1
902 while e:
903 e = 0
904 for i in close:
905 s = self.NextToDot(i)
906 sa = self.gr.FIRST(self.AfterDot(i))
907 if s in self.gr.nonterminals and self.gr.ntr.has_key(s):
908 for n in self.gr.ntr[s]:
909 for b in sa:
910 e = close.append((n,0,b))
911 return close
912
913 - def goto(self,items,s):
914 """ goto(I,X) where I is a set of items and X a grammar symbol
915 is the closure of the set of all items (A -> sX.r,a) such that
916 (A -> s.Xr,a) in I"""
917 valid = osets.Set([])
918 for item in items:
919 if self.NextToDot(item) == s:
920 n, i, t = item
921 valid.append((n, i + 1,t))
922 return self.closure(valid)
923
925 """ An LR(1) item of a grammar G is a production of G with a dot at
926 some position of the right hand side and a terminal:
927 (rule_number,dot_position,terminal)
928 (aho86:_compil page 231)
929 """
930 c = osets.Set([ self.closure(osets.Set([(len(self.gr.rules) - 1,0,self.gr.endmark)]))])
931 symbols = self.gr.terminals + self.gr.nonterminals
932 e = 1
933 while e:
934 e = 0
935 for i in c:
936 for s in symbols:
937 valid=self.goto(i,s)
938 if valid != [] :
939 if c.s_append(valid): e = 1
940 return c
941
943 """Print C{LR(1)} items """
944 s = ""
945 j = 0
946 for i in c:
947 s = s+ "I_%d: \n" %j
948 for item in i:
949 r, p, t = item
950 lhs = self.gr.rules[r][0]
951 rhs = self.gr.rules[r][1]
952 s = s + "\t %s -> %s . %s , %s\n" %(lhs,
953 string.join(rhs[:p]," "), string.join(rhs[p:]," "),t)
954 j += 1
955 print s
956 return s
957
959 """ returns symbol next to the dot or empty string"""
960 n, i, t = item
961 try:
962 s = self.gr.rules[n][1][i]
963 except IndexError:
964 s = ""
965 return s
966
968 """ returns symbol next to the dot or empty string"""
969 n, i, t = item
970 try:
971 s = self.gr.rules[n][1][i+1:]
972 except IndexError:
973 s = []
974 s.append(t)
975 return s
976
978 n, k, t = item
979 self.add_action(i,t,'reduce',n)
980
981
983 """Class for construction of C{LALR(1)} tables"""
984
986 """ Make C{action[i,X]} and C{goto[i,X]}
987 all pairs C{(i,s)} not in action and goto dictionaries are 'error' """
988 self.gr.DERIVE_NT()
989 c = self.items()
990 if _DEBUG:
991 print self.print_items(c)
992 self.ACTION = {}
993 self.GOTO = {}
994
995
996 for i in range(len(c)):
997 for item in c[i].keys():
998 a = self.NextToDot(item)
999 if a in self.gr.terminals:
1000 state =self.goto(c[i],a)
1001 j = self.get_union(c,state)
1002 if j != -1:
1003 self.add_action(i,a,'shift',j)
1004 elif a == "":
1005 """ Dot at right end """
1006 l = self.gr.rules[item[0]][0]
1007 if l != self.gr.aug_start :
1008 self.dotatend(item,c,i)
1009 else:
1010 """ last rule """
1011 self.add_action(i,self.gr.endmark,'accept',[])
1012 for s in self.gr.nonterminals:
1013 state = self.goto(c[i],s)
1014 j = self.get_union(c,state)
1015 if j != -1:
1016 self.GOTO[(i,s)] = j
1017
1019 """ An C{LALR(1)} item of a grammar C{G} is a production of
1020 C{G}with a dot at some position of the right hand side and a
1021 list of terminals: is coded as a dictonary with key
1022 C{(rule_number,dot_position)} and value a set of terminals
1023 """
1024 i0 = {}
1025 i0[(len(self.gr.rules) - 1,0)] = osets.Set([self.gr.endmark])
1026 c = osets.Set([self.closure(i0)])
1027 symbols = self.gr.terminals + self.gr.nonterminals
1028 e = 1
1029 while e:
1030 e = 0
1031 for i in c:
1032 for s in symbols:
1033 if self.core_merge(c,self.goto(i,s)) == 1:
1034 e = 1
1035 return c
1037 """Print C{LALR(1)} items """
1038 s = ""
1039 j = 0
1040 for i in range(len(c)):
1041 s = s+ "I_%d: \n" %i
1042 for item in c[i].keys():
1043 r, p = item
1044 lhs = self.gr.rules[r][0]
1045 rhs = self.gr.rules[r][1]
1046 s = s + "\t %s -> %s . %s, %s \n" %(lhs,
1047 string.join(rhs[:p]," "), string.join(rhs[p:]," "),c[i][item])
1048 print s
1049 return s
1050
1051 - def goto(self,items,s):
1052 """ C{goto(I,X)} where C{I} is a set of items and C{X} a grammar symbol
1053 is the closure of the set of all items C{(A -> sX.r,a)} such that
1054 C{(A -> s.Xr,a)} in C{I}"""
1055 valid = {}
1056 for (n,i) in items.keys():
1057 if self.NextToDot((n,i)) == s:
1058 if not valid.has_key((n,i+1)):
1059 valid[(n,i + 1)] = osets.Set([])
1060 for t in items[(n,i)]:
1061 valid[(n, i + 1)].append(t)
1062 return self.closure(valid)
1063
1064
1066 """The closure of a set of C{LR(1)} items I is the set of items construted
1067 from I by the two rules:
1068 - every item of I is in closure(I)
1069
1070 - If [A -> s.Bt,a] in closure(I),for B ->r and each terminal b in
1071 first(ta), add [B ->.r,b] to closure(I)
1072 """
1073 e = 1
1074 while e:
1075 e = 0
1076 for i in items.keys():
1077 s = self.NextToDot(i)
1078 if s in self.gr.nonterminals and self.gr.ntr.has_key(s):
1079 l = self.AfterDot(i,items)
1080 for n in self.gr.ntr[s]:
1081 if not items.has_key((n,0)):
1082 items[(n,0)] = osets.Set([])
1083 if items[(n,0)].s_extend(l) == 1 :
1084 e = 1
1085 return items
1086
1088 """ """
1089 for i in c:
1090 if i.keys() == j.keys():
1091 return c.index(i)
1092 return -1
1093
1095 """ """
1096 if j == {} or j in c : return 0
1097 e = 2
1098 for i in c:
1099 if i.keys() == j.keys():
1100 e = 0
1101 for k in j.keys():
1102 if i[k].s_extend(j[k]) == 1:
1103 e = 1
1104 break
1105 if e == 2:
1106 e = c.s_append(j)
1107 return e
1108
1110 """ returns symbol next to the dot or empty string"""
1111 n, i = item
1112 try:
1113 s = self.gr.rules[n][1][i]
1114 except IndexError:
1115 s = ""
1116 return s
1117
1119 """ returns FIRST of strings after the dot concatenated with lookahead"""
1120 n, i = item
1121 try:
1122 s = self.gr.rules[n][1][i+1:]
1123 except IndexError:
1124 s = []
1125 sa = osets.Set([])
1126 for a in items[item]:
1127 s.append(a)
1128 sa.s_extend(self.gr.FIRST(s))
1129 del s[len(s)-1]
1130 return sa
1131
1132
1134 n, k = item
1135 for a in c[i][item]:
1136 self.add_action(i,a,'reduce',n)
1137
1139 """Class for construction of LALR tables """
1140
1142 """ collection of LR(0) items """
1143 self.gr.DERIVE_T()
1144 self.gr.TransClose()
1145 c = self.items()
1146 if _DEBUG:
1147 print self.print_items(c)
1148 """ make action[i,X] and goto[i,X]
1149 all pairs (i,s) not in action and goto dictionaries are 'error' """
1150 self.ACTION = {}
1151 self.GOTO = {}
1152
1153
1154 for i in range(len(c)):
1155 for item in c[i].keys():
1156 C = self.NextToDot(item)
1157 if C in self.gr.nonterminals:
1158 if self.gr.derive_ter.has_key(C):
1159 for a in self.gr.derive_ter[C]:
1160 if self.goto_ref.has_key((i,a)):
1161 j = self.goto_ref[(i,a)]
1162 self.add_action(i,a,'shift',j)
1163 if self.gr.close_nt.has_key(C):
1164 for A in self.gr.close_nt[C].keys():
1165 """Error: ignores end string s in C->*As"""
1166 for p in self.gr.close_nt[C][A]:
1167 r = self.AfterDotTer(item,c[i],p)
1168 if self.gr.ntr.has_key(A):
1169 for k in self.gr.ntr[A]:
1170 if self.gr.rules[k][1] == []:
1171 for a in r:
1172 self.add_action(i,a,'reduce',k)
1173
1174 elif C in self.gr.terminals:
1175 if self.goto_ref.has_key((i,C)):
1176 j = self.goto_ref[(i,C)]
1177 self.add_action(i,C,'shift',j)
1178 else:
1179 """ Dot at right end """
1180 l = self.gr.rules[item[0]][0]
1181 if l != self.gr.aug_start:
1182 self.dotatend(item,c,i)
1183 else:
1184 """ last rule """
1185 self.add_action(i,self.gr.endmark,'accept',[])
1186 for s in self.gr.nonterminals:
1187 state = self.goto(c[i],s)
1188 j = self.get_union(c,state)
1189 if j != -1:
1190 self.GOTO[(i,s)] = j
1191
1193 """ An C{LALR(1)} kernel item of a grammar C{G} is a
1194 production of C{G} with a
1195 dot at some position of the right hand side (except the first) and a list
1196 of terminals: is coded as a dictionary with key
1197 C{(rule_number,dot_position)} and value a set of terminals.
1198 """
1199 i0 = {}
1200 i0[(len(self.gr.rules) - 1,0)] = osets.Set([self.gr.endmark])
1201 c= osets.Set([i0])
1202 symbols = self.gr.terminals + self.gr.nonterminals
1203 """ kernel LR(0) items """
1204 self.goto_ref = {}
1205 e = 1
1206 while e:
1207 e = 0
1208 for i in c:
1209 for s in symbols:
1210 valid = self.goto(i,s)
1211 if valid != {}:
1212 if c.s_append(valid): e = 1
1213
1214 self.goto_ref[(c.index(i),s)] = c.index(valid)
1215
1216 """ Discovering propagated and spontaneous lookaheads for
1217 kernel items k and grammar symbol s"""
1218 lh={}
1219 for k in c:
1220 nk = c.index(k)
1221 lh[nk] = {}
1222 for (n,i) in k.keys():
1223 lh[nk][(n,i)] = osets.Set([])
1224 j = {}
1225 j[(n,i)]=osets.Set([(self.gr.dummy)])
1226 j = self.closure(j)
1227 for s in symbols:
1228 for (m1,j1) in j.keys():
1229 if self.NextToDot((m1,j1)) == s:
1230 for a in j[(m1,j1)]:
1231 if a == self.gr.dummy:
1232 lh[nk][(n,i)].append((self.goto_ref[(nk,s)],m1,j1+1))
1233 else:
1234 c[self.goto_ref[(nk,s)]][(m1,j1+1)].append(a)
1235 del j
1236 """ Propagate lookaheads """
1237
1238 e = 1
1239 while e:
1240 e = 0
1241 for k in c:
1242 nk = c.index(k)
1243 for (n,i) in k.keys():
1244 for (m,n1,i1) in lh[nk][(n,i)]:
1245 if c[m][(n1,i1)].s_extend(k[(n,i)]) == 1:
1246 e = 1
1247
1248 return c
1249
1250
1251
1252 - def goto(self,items,s):
1253 """ C{goto(I,X)} where I is a set of kernel items and X a
1254 grammar symbol is the closure of the set of all items (A
1255 -> sX.r,a) such that (A -> s.Xr,a) is in I"""
1256 valid = {}
1257 for (n,i) in items.keys():
1258 x = self.NextToDot((n,i))
1259 if x == s:
1260 if not valid.has_key((n,i+1)):
1261 valid[(n,i + 1)] = osets.Set([])
1262 if self.gr.close_nt.has_key(x):
1263 for a in self.gr.close_nt[x].keys():
1264 if self.gr.ntr.has_key(a):
1265 for k in self.gr.ntr[a]:
1266 if self.gr.rules[k][1] != [] and self.gr.rules[k][1][0] == s:
1267 valid[(k,1)] = osets.Set([])
1268 return valid
1269
1271 """ returns symbol next to the dot or empty string"""
1272 n, i = item
1273 try:
1274 s = self.gr.rules[n][1][i]
1275 except IndexError:
1276 s = ""
1277 return s
1278
1280 """ returns FIRST of strings after the dot
1281 concatenated with lookahead"""
1282
1283 if path:
1284 p, n = path
1285 if not n:
1286 return p
1287 l, i = item
1288 try:
1289 f= self.gr.FIRST(self.gr.rules[l][1][i+1:])
1290 ns = self.gr.nullable[string.join(self.gr.rules[l][1][i+1:])]
1291 except IndexError:
1292 f = []
1293 ns = 1
1294 if ns:
1295 return items[item]
1296 else:
1297 return f
1298
1299
1301 """Class for LR table construction report:
1302 @ivar expect: number of shit/reduce conflicts expected
1303 @type expect: integer
1304 @ivar items: set of LR items
1305 @ivar conflicts: dictionary of conflicts occurred in LR table
1306 construction: 'rr' and 'sr'
1307 """
1308 - def __init__(self,noconflicts,expect):
1309 self.noconflicts = noconflicts
1310 self.expect = expect
1311 self.conflicts = {}
1312 self.items = None
1313
1315 try:
1316 self.conflicts[type].append((i,a,value1,value2))
1317 except KeyError:
1318 self.conflicts[type] = [(i,a,value1,value2)]
1319
1321 """Class for LR parser
1322
1323 @ivar cfgr: context free grammar
1324 @ivar rules: grammar rules
1325 @ivar terminals: grammar terminals
1326 @ivar nonterminals: grammar nonterminals
1327 @ivar table: LR parsing table
1328 @ivar ACTION: Action function
1329 @ivar GOTO: Goto function
1330
1331 @ivar tokens: tokens to be parsed
1332 @ivar context: computational context
1333 @ivar output: list of grammar rules used for parsing C{tokens}
1334 (right derivation in reverse)
1335 @ivar stack: LR stack with pairs C{(state,token)}
1336
1337 """
1338
1339 - def __init__(self,grammar,table_shelve,no_table=1,tabletype=LALRtable,operators=None,noconflicts=1,expect=0,**args):
1340 """
1341 @param grammar: is a list for productions;
1342 each production is a tuple C{(LeftHandside,RightHandside,SemFunc,Prec?)}
1343 with C{LeftHandside} nonterminal, C{RightHandside} list of symbols,
1344 C{SemFunc} syntax-direct semantics, if present
1345 C{Prec (PRECEDENCE,ASSOC)} for ambiguous rules
1346
1347 First production is for start symbol
1348
1349 @param table_shelve: file where parser is saved
1350 @type table_shelve: string
1351 @param tabletype: type of LR table: C{SLR}, C{LR1}, C{LALR}
1352 @type tabletype: LRtable class
1353 @param no_table: if 0 table_shelve is created anyway
1354 @type no_table: integer
1355 @param operators: precedence and associativity for operators
1356 @type operators: dictionary
1357 @param noconflicts: if 0 LRtable conflicts are not resolved,
1358 unless spcecial operator rules
1359 @type noconflicts: integer
1360 @param expect: exact number of expected LR shift/reduce conflicts
1361 @type expect: integer
1362 @param args: extra arguments; key C{nosemrules} if 1 no
1363 semantic rules are applied
1364 @type args: dictionary
1365
1366 """
1367
1368 self.cfgr = CFGrammar(grammar)
1369 self.rules = self.cfgr.rules
1370 self.terminals = self.cfgr.terminals
1371 self.nonterminals = self.cfgr.nonterminals
1372 self.endmark = self.cfgr.endmark
1373 if args.has_key('nosemrules'):
1374 self.nosemrules=args['nosemrules']
1375 else:
1376 self.nosemrules = 0
1377
1378 d = shelve.open(table_shelve)
1379
1380 if d and no_table:
1381 self.ACTION = d['action']
1382 self.GOTO = d['goto']
1383 if d.has_key('version'):
1384 if d['version'] < _Version:
1385 raise TableError(table_shelve)
1386 try:
1387 self.Log = d['log']
1388 except KeyError:
1389 raise TableError(table_shelve)
1390 else:
1391 self.table = tabletype(self.cfgr,operators,noconflicts,expect)
1392 d['version'] = _Version
1393 d['action'] = self. ACTION = self.table.ACTION
1394 d['goto'] = self.GOTO = self.table.GOTO
1395 d['log'] = self.Log = self.table.Log
1396 d.close()
1397
1399 """@return: the LR parsing table showing for each state the
1400 action and goto function """
1401
1402 l = (map(lambda x: x[0],self.ACTION.keys()))
1403 l.sort()
1404 a1="\nState\n"
1405 if len(self.terminals) < 20:
1406 for a in self.terminals:
1407 a1=a1+" \t%s" %a
1408 for i in osets.Set(l):
1409 a3="\n%s" % i
1410 for a in self.terminals:
1411 if self.ACTION.has_key((i,a)):
1412 if self.ACTION[i,a][0]=="shift": x="s"
1413 else: x="r"
1414 a2="\t%s%s" %(x,self.ACTION[i,a][1])
1415 else:
1416 a2="\t"
1417 a3=a3+a2
1418 a1="%s%s" %(a1,a3)
1419 ac=a1
1420 else:
1421 for i in osets.Set(l):
1422 a3="%s\n" % i
1423 for a in self.terminals:
1424 if self.ACTION.has_key((i,a)):
1425 if self.ACTION[i,a][0]=="shift": x="s"
1426 else: x="r"
1427 a3= a3+"%s = %s%s\n" %(a,x,self.ACTION[i,a][1])
1428 a1="%s%s" %(a1,a3)
1429 ac=a1
1430
1431 l = (map(lambda x: x[0],self.GOTO.keys()))
1432 l.sort()
1433 a1 = "\nState\n"
1434 if len(self.nonterminals) < 20:
1435 for a in self.nonterminals:
1436 a1 = a1 + " \t%s" %a
1437 for i in osets.Set(l):
1438 a3 = "\n%s" % i
1439 for a in self.nonterminals:
1440 if self.GOTO.has_key((i,a)):
1441 a2 = "\t%s" %self.GOTO[(i,a)]
1442 else:
1443 a2 = "\t"
1444 a3 = a3 + a2
1445 a1 = "%s%s" %(a1,a3)
1446 else:
1447 for i in osets.Set(l):
1448 a3 = "%s\n" % i
1449 for a in self.nonterminals:
1450 if self.GOTO.has_key((i,a)):
1451 a3 = a3 + "%s = %s\n" %(a,self.GOTO[(i,a)])
1452 a1 = "%s%s" %(a1,a3)
1453 go = a1
1454 return "Action table:\n %s\n Goto table:%s\n" % (ac,go)
1455
1456
1457
1458 - def parsing(self,tokens,context = None):
1459 """LR Parsing Algorithm (aho86:_compil, page 218)
1460 @param tokens: pairs (TOKEN, SPECIAL_VALUE)
1461 @param context: a computational context for semantic actions
1462
1463 @return: parsed result
1464 """
1465
1466 self.stack = Stack()
1467 self.stack.push((0,[]))
1468 self.tokens = tokens
1469 self.tokens.append((self.endmark,self.endmark))
1470 self.context = context
1471 self.output = []
1472 self.ip = 0
1473 while 1:
1474 s = self.stack.top()[0]
1475 a = self.tokens[self.ip][0]
1476 if _DEBUG:
1477 print "Input: %s\nState: %s" %(map(lambda x:x[0],self.tokens[self.ip:]),s)
1478 print "Stack: %s" %self.stack
1479 try:
1480 if self.ACTION[s,a][0] == 'shift':
1481 if _DEBUG: print "Action: shift\n"
1482 self.stack.push((self.ACTION[s,a][1], self.tokens[self.ip][1]))
1483 self.ip = self.ip + 1
1484 elif self.ACTION[s,a][0] == 'reduce':
1485 n = self.ACTION[s,a][1]
1486 if _DEBUG: print "Action: reduce %s %s\n" %(n,str(self.rules[n]))
1487 semargs = [self.stack.pop()[1] for i in range(len(self.rules[n][1]))]
1488 semargs.reverse()
1489 if self.nosemrules:
1490 reduce = []
1491 else:
1492 reduce = Reduction(self.rules[n][2],semargs,self.context)
1493 del semargs
1494 s1 = self.stack.top()[0]
1495 a = self.rules[n][0]
1496 self.stack.push((self.GOTO[s1,a],reduce))
1497 self.output.append(n)
1498 elif self.ACTION[s,a] == ('accept', []):
1499 break
1500 else:
1501 raise LRParserError(s,a)
1502 except KeyError:
1503 if _DEBUG: print "Error in action: %s" %self.ACTION
1504 raise LRParserError(s,a)
1505 except SemanticError, m:
1506 if _DEBUG: print "Semantic Rule %d %s" %(n,self.rules[n][2])
1507 raise SemanticError(m,n,self.rules[n][2])
1508 return self.stack.top()[1]
1509
1511 """
1512 Transforms a string into a grammar description
1513
1514 @param st: is a string representing the grammar rules, with
1515 default symbols as below. Fisrt rule for start.
1516
1517 I{Example}::
1518 reg -> reg + reg E{lb}E{lb} self.OrSemRule E{rb}E{rb}
1519 // priority 'left'|
1520 ( reg ) E{lb}E{lb}self.ParSemRuleE{rb}E{rb} ;
1521 where:
1522
1523 - rulesym="->" production symbol
1524 - rhssep='' RHS symbols separator
1525 - opsym='//' operator definition separator
1526 - semsym=E{lb}E{lb} semantic rule start marker
1527 - csemsym=E{rb}E{rb} semantic rule end marker
1528 - rulesep='|' separator for multiple rules for a LHS
1529 - ruleend=';' end marker for one LHS rule"""
1530 self.pg=Yappy_grammar(**args)
1531 self.pg.input(st,context)
1532 return self.pg.context['rules']
1533
1534 - def gsrules(self,rulestr, **sym):
1535 """
1536 Transforms a string in a grammar description
1537
1538 @param rulestr: is a string representing the grammar rules, with
1539 default symbols as below.
1540
1541 @param sym: Dictionary with symbols used. Default ones:
1542 - rulesym="->" production symbol
1543 - rhssep='' RHS symbols separator
1544 - opsym='//' operator definition separator
1545 - semsym=E{lb}E{lb} semantic rule start marker
1546 - csemsym=E{rb}E{rb} semantic rule end marker
1547 - rulesep='|' separator for multiple rules for a LHS
1548 - ruleend=';' end marker for one LHS rule
1549 Example:
1550 reg -> reg + reg E{lb}E{lb} self.OrSemRule // (priority,'left') E{rb}E{rb} |
1551 ( reg ) E{lb}E{lb}self.ParSemRuleE{rb}E{rb} ;
1552 """
1553 if not sym:
1554 sym = Dict(rulesym="->",
1555 rhssep='',
1556 opsym='//',
1557 semsym='{{',
1558 csemsym='}}',
1559 rulesep='|',
1560 ruleend=';')
1561 gr = []
1562 rl = string.split(rulestr,sym['ruleend'])
1563 for l in rl:
1564 m = re.compile(sym['rulesym']).search(l)
1565 if not m: continue
1566 else:
1567 if m.start() == 0:
1568 raise GrammarError(l)
1569 else: lhs = l[0:m.start()].strip()
1570 if m.end() == len(l):
1571 raise GrammarError(l)
1572 else:
1573 rhss = string.strip(l[m.end():])
1574 if rhss == "[]":
1575 rhs = []
1576 sem = EmptySemRule
1577 op = None
1578 else:
1579 rhss = string.split(l[m.end():],sym['rulesep'])
1580 for rest in rhss:
1581 rest=string.strip(rest)
1582 if rhss == "[]":
1583 rhs = []
1584 sem = EmptySemRule
1585 op = None
1586 else:
1587 m=re.search(sym['semsym']+'(?P<opsem>.*)'+sym['csemsym'],rest)
1588 if not m:
1589 rhs = string.split(rest,None)
1590 sem = DefaultSemRule
1591 op = None
1592 else:
1593 if m.start() == 0:
1594 raise GrammarError(rest)
1595 else: rhs = string.split(rest[0:m.start()].strip())
1596 if m.group('opsem'):
1597 opsem = string.split(m.group('opsem'),sym['opsym'])
1598 if len(opsem) == 1:
1599 sem = string.strip(opsem[0])
1600 op = None
1601 elif len(opsem) == 2:
1602 sem = string.strip(opsem[0])
1603 op = string.strip(opsem[1])
1604 else:
1605 raise GrammarError(rest)
1606 else:
1607 sem = DefaultSemRule
1608 op = None
1609 if op == None:
1610 gr.append((lhs,rhs,eval(sem)))
1611 else:
1612 gr.append((lhs,rhs,eval(sem),eval(op)))
1613 return gr
1614
1615
1617 """Class for LR parser: without shelve and semantic rules(obsolete)
1618 """
1619
1621 """
1622 """
1623 self.table = LALRtable(grammar)
1624
1626 """LR Parsing Algorithm
1627 """
1628 self.stack = Stack()
1629 self.stack.push(0)
1630 self.input = tokens
1631 self.input.append(self.table.gr.endmark)
1632 self.output = []
1633 self.ip = 0
1634 while 1:
1635 s = self.stack.top()
1636 a = self.input[self.ip]
1637 if not self.table.ACTION.has_key((s,a)):
1638 raise LRParserError(s,a)
1639 elif self.table.ACTION[s,a][0] == 'shift':
1640
1641 self.stack.push(self.table.ACTION[s,a][1])
1642 self.ip = self.ip + 1
1643 elif self.table.ACTION[s,a][0] == 'reduce':
1644 n = self.table.ACTION[s,a][1]
1645 for i in range(len(self.table.gr.rules[n][1])):
1646 self.stack.pop()
1647 s1 = self.stack.top()
1648 a = self.table.gr.rules[n][0]
1649
1650 if not self.table.GOTO.has_key((s1,a)):
1651 raise LRParserError(s1,a)
1652 else:
1653 self.stack.push(self.table.GOTO[s1,a])
1654 self.output.append(n)
1655 elif self.table.ACTION[s,a] == ('accept', []):
1656 break
1657 else:
1658 raise LRParserError()
1659
1660
1661 -def Dict(**entries):
1662 """Create a dict out of the argument=value arguments"""
1663 return entries
1664
1665 -def grules(rules_list,rulesym="->",rhssep=None):
1666 """
1667 Transforms a list of rules in a grammar description. If a rule has
1668 no semantic rules, C{DefaultSemRule} is assumed.
1669
1670 @param rules_list: is a list of pairs (rule,sem)
1671 where rule is a string of the form:
1672 - Word rulesym Word1 ... Word2
1673 - Word rulesym []
1674 @param rulesym: LHS and RHS rule separator
1675 @param rhssep: RHS values separator (None for white chars)
1676 @return: a grammar description
1677 """
1678 gr = []
1679 sep = re.compile(rulesym)
1680 for r in rules_list:
1681 if type(r) is StringType:
1682 rule = r
1683 else:
1684 rule = r[0]
1685 m = sep.search(rule)
1686 if not m: continue
1687 else:
1688 if m.start() == 0:
1689 raise GrammarError(rule)
1690 else: lhs = rule[0:m.start()].strip()
1691 if m.end() == len(rule):
1692 raise GrammarError(rule)
1693 else:
1694 rest=string.strip(rule[m.end():])
1695 if rest == "[]":
1696 rhs = []
1697 else:
1698 rhs = string.split(rest,rhssep)
1699 if type(r) is StringType:
1700 gr.append((lhs,rhs,DefaultSemRule))
1701 elif len(r)==3:
1702 gr.append((lhs,rhs,r[1],r[2]))
1703 elif len(r)==2:
1704 gr.append((lhs,rhs,r[1]))
1705 else:
1706 raise GrammarError(r)
1707
1708 return gr
1709
1710
1711
1713 """ A basic class for parsing.
1714
1715 @ivar lex: a Lexer object
1716 """
1717
1718 - def __init__(self,tokenize,grammar, table='YappyTab',no_table=1,
1719 tabletype=LALRtable,noconflicts=1,expect=0,**args):
1720 """
1721 @param tokenize: same as for L{Lexer}
1722 @param grammar: if a string C{parse_grammar} is called
1723
1724 @param table: and no_table, tabletype same as for L{LRparser}
1725
1726 @param **args: dictionary where:
1727 - key C{tmpdir} is the directory
1728 where the parse table used by the Yappy Grammar is stored;
1729 - key C{usrdir} is the directory where the user tables are stored
1730 - key C{nosemrules} if 1 semantic actions are not applied
1731 """
1732 self.lex = Lexer(tokenize)
1733 operators = None
1734 if self.lex.__dict__.has_key("operators"):
1735 operators = self.lex.operators
1736 if type(grammar) is StringType:
1737 grammar = self.parse_grammar(grammar,{'locals':locals()},args)
1738 if args.has_key('usrdir') and os.path.isdir(args['usrdir']):
1739 table = string.rstrip(args['usrdir']) + '/' + table
1740 if os.path.dirname(table)=="" or os.path.exists(os.path.dirname(table)):
1741 LRparser.__init__(self,grammar,table,no_table,tabletype,operators,noconflicts,expect,**args)
1742 else:
1743 sys.stderr.write("Directory %s do not exist\n" %table)
1744 sys.exit()
1745 if (self.Log.noconflicts and ((self.Log.conflicts.has_key('sr') and
1746 len(self.Log.conflicts['sr'])!=
1747 self.Log.expect) or self.Log.conflicts.has_key('rr'))):
1748 print "LR conflicts: number %s value %s" %(len(self.Log.conflicts['sr']),self.Log.conflicts)
1749 print """If it is Ok, set expect to the number of conflicts and build table again"""
1750
1772
1778
1779
1781 """To be defined using output"""
1782 pass
1783
1785 """A test for each class"""
1786 pass
1787
1788
1790 regargs = re.compile(r'\$(\d+)')
1791 matchargs = regargs.finditer(strfun)
1792 for i in [(x.group(0),strargs+x.group(1)+"]") for x in matchargs]:
1793 strfun = string.replace(strfun,i[0],i[1])
1794 return strfun
1795
1797 """Reduction function for semantic rules:
1798 - C{fun} can be:
1799 -- a function
1800 -- or a string with positional arguments C{$n} that is expanded
1801 and evaluated with C{eval}
1802
1803 """
1804 if callable(fun):
1805 return apply(fun,[sargs, context])
1806 elif type(fun) is StringType:
1807 a = expandSemRule("sargs[",fun)
1808 l = context.get('locals',{})
1809 l.update(locals())
1810 return eval(a,context.get('globals',{}),l)
1811 else:
1812 raise SemanticError,'Wrong type: %s' %fun
1813
1814
1816 """Default semantic rule"""
1817 return sargs[0]
1818
1821
1822
1823
1824
1826 """ A parser for grammar rules. See C{test()} for an example. """
1827
1828 - def __init__(self,no_table=1, table='yappypar.tab',tabletype=LR1table,**args):
1829 grammar= grules([
1830 ("G -> RULE G",self.GRule),
1831 ("G -> []",EmptySemRule),
1832 ("RULE -> ID rulesym MULTI ruleend",self.RULERule) ,
1833 ("MULTI -> RHS rulesep MULTI",self.MULTIRule),
1834 ("MULTI -> RHS",self.MULTIRule),
1835 ("RHS -> []",EmptySemRule),
1836 ("RHS -> RH OPSEM",self.RHSRule),
1837 ("RH -> ID RH",self.RHRule),
1838 ("RH -> ID",self.RHRule),
1839 ("OPSEM -> []",self.OPSEMRule),
1840
1841
1842 ("OPSEM -> IDS",self.OPSEMRule1),
1843 ("OPSEM -> IDS OP",self.OPSEMRule1),
1844 ("OP -> opsym OPV",self.OPRule),
1845 ("OPV -> ID ID ", self.OPVRule)
1846 ])
1847
1848
1849 tokenize = [
1850 ("\{\{.*\}\}",lambda x: ("IDS",string.strip(x[2:-2]))),
1851 ("\s+",""),
1852 ("->",lambda x: ("rulesym",x)),
1853 ("\|",lambda x: ("rulesep",x)),
1854 (";",lambda x: ("ruleend",x)),
1855
1856
1857 ("//",lambda x: ("opsym",x)),
1858 (".*",lambda x: ("ID",x))]
1859 if args.has_key('tmpdir'):
1860 args1 = {'usrdir':string.rstrip(args['tmpdir'],'/')}
1861 else:
1862 args1 = {}
1863 Yappy.__init__(self,tokenize,grammar,table,no_table,**args1)
1864
1865
1866
1868 """ """
1869 try:
1870 int(arg[0])
1871 except ValueError:
1872 raise SemanticError("Precedence must be an integer: %s given" %arg[0])
1873 if arg[1]!= 'left' and arg[1]!= 'right' and arg[1]!= 'noassoc':
1874 raise SemanticError("Associativity must be 'left' or 'right' or 'noassoc': %s\
1875 given" %arg[1])
1876 return (int(arg[0]),arg[1])
1877
1878 - def OPRule(self,arg,context):
1880
1882 if len(arg) == 4:
1883 return (arg[1],arg[2])
1884 if len(arg) == 3:
1885 return arg[1]
1886 if len(arg) == 0:
1887 return 'DefaultSemRule'
1888
1890 if len(arg) == 2:
1891 return (arg[0],arg[1])
1892 if len(arg) == 1:
1893 return arg[0]
1894 if len(arg) == 0:
1895 return 'DefaultSemRule'
1896
1897
1898 - def RHRule(self,arg,context):
1899 if len(arg) == 1:
1900 return [arg[0]]
1901 if len(arg) == 2:
1902 return [arg[0]]+arg[1]
1903
1905 return (arg[0],arg[1])
1906
1908 if len(arg) == 1:
1909 return [arg[0]]
1910 else:
1911 return [arg[0]]+arg[2]
1912
1914 lhs=arg[0]
1915
1916 def grule(self,l):
1917 if l == []: return (lhs,[],EmptySemRule)
1918 if type(l[1]) is TupleType:
1919 return (lhs,l[0],eval(l[1][0],globals(),context['locals']),l[1][1])
1920 else:
1921 return (lhs,l[0],eval(l[1],globals(),context['locals']))
1922
1923 return map(lambda l:grule(self,l) ,arg[2])
1924
1925 - def GRule(self,args,context):
1926 if context.has_key('rules'):
1927 context['rules']= args[0]+context['rules']
1928 else:
1929 context['rules'] = args[0]
1930 return []
1931
1933 st = """
1934 reg -> reg + reg {{DefaultSemRule}} // 200 left |
1935 reg reg {{DefaultSemRule}} // 200 left |
1936 reg * {{DefaultSemRule}} |
1937 ( reg ) {{DefaultSemRule}} |
1938 id {{lambda l,c:l[0]}};
1939 reg -> ;
1940 a -> reg | reg ;
1941 """
1942 st1 = """
1943 reg -> reg + reg {{DefaultSemRule // 200 left}} |
1944 reg reg {{DefaultSemRule // 200 left}} |
1945 reg * {{DefaultSemRule}} |
1946 ( reg ) {{DefaultSemRule}} |
1947 id {{DefaultSemRule}};
1948 reg -> ;
1949 a -> reg | reg ;
1950 """
1951 self.input(st,{'locals':locals()})
1952 return self.context['rules']
1953
1954
1956 """ A simple class to implement stacks"""
1957
1959 """Reverse initial stack objects"""
1960 self.stack = []
1961 for x in start: self.push(x)
1962 self.stack.reverse()
1963
1964 - def push(self, object):
1965 self.stack = [object] + self.stack
1966
1968 if not self.stack:
1969 raise 'stack underflow'
1970 top, self.stack = self.stack[0], self.stack[1:]
1971 return top
1972
1974 """ Returns top of stack (not poping it)"""
1975 if not self.stack:
1976 raise 'stack underflow'
1977 return self.stack[0]
1978
1980 """ Tests if stack is empty"""
1981 return not self.stack
1982
1984 """ Empties stack"""
1985 self.stack=[]
1986
1988 return '[Stack:%s]' % self.stack
1989
1991 return cmp(self.stack, other.stack)
1992
1994 return len(self.stack)
1995
1997 return Stack(self.stack + other.stack)
1998
2000 return Stack(self.stack * reps)
2001
2003 return self.stack[offset]
2004
2006 return Stack(self.stack[low : high])
2007
2009 return getattr(self.stack, name)
2010