1 %{# (c) Copyright Francois Desarmenien 1998-2001, all rights reserved.
2 # (see COPYRIGHT in Parse::Yapp.pm pod section for use and distribution rights)
3 #
4 # Parse/Yapp/Parser.yp: Parse::Yapp::Parser.pm source file
5 #
6 # Use: yapp -m 'Parse::Yapp::Parse' -o Parse/Yapp/Parse.pm YappParse.yp
7 #
8 # to generate the Parser module.
9 #
10 %}
11
12 %{
13 require 5.004;
14
15 use Carp;
16
17 my($input,$lexlevel,@lineno,$nberr,$prec,$labelno);
18 my($syms,$head,$tail,$token,$term,$nterm,$rules,$precterm,$start,$nullable);
19 my($expect);
20
21 %}
22
23 %%
24
25 # Main rule
26 yapp: head body tail ;
27
28 #Common rules:
29
30 symbol: LITERAL {
31 exists($$syms{$_[1][0]})
32 or do {
33 $$syms{$_[1][0]} = $_[1][1];
34 $$term{$_[1][0]} = undef;
35 };
36 $_[1]
37 }
38 | ident #default action
39 ;
40
41 ident: IDENT {
42 exists($$syms{$_[1][0]})
43 or do {
44 $$syms{$_[1][0]} = $_[1][1];
45 $$term{$_[1][0]} = undef;
46 };
47 $_[1]
48 }
49 ;
50
51
52 # Head section:
53 head: headsec '%%'
54 ;
55
56 headsec: #empty #default action
57 | decls #default action
58 ;
59
60 decls: decls decl #default action
61 | decl #default action
62 ;
63
64 decl: '\n' #default action
65 | TOKEN typedecl symlist '\n'
66 {
67 for (@{$_[3]}) {
68 my($symbol,$lineno)=@$_;
69
70 exists($$token{$symbol})
71 and do {
72 _SyntaxError(0,
73 "Token $symbol redefined: ".
74 "Previously defined line $$syms{$symbol}",
75 $lineno);
76 next;
77 };
78 $$token{$symbol}=$lineno;
79 $$term{$symbol} = [ ];
80 }
81 undef
82 }
83 | ASSOC typedecl symlist '\n'
84 {
85 for (@{$_[3]}) {
86 my($symbol,$lineno)=@$_;
87
88 defined($$term{$symbol}[0])
89 and do {
90 _SyntaxError(1,
91 "Precedence for symbol $symbol redefined: ".
92 "Previously defined line $$syms{$symbol}",
93 $lineno);
94 next;
95 };
96 $$token{$symbol}=$lineno;
97 $$term{$symbol} = [ $_[1][0], $prec ];
98 }
99 ++$prec;
100 undef
101 }
102 | START ident '\n' { $start=$_[2][0]; undef }
103 | HEADCODE '\n' { push(@$head,$_[1]); undef }
104 | UNION CODE '\n' { undef } #ignore
105 | TYPE typedecl identlist '\n'
106 {
107 for ( @{$_[3]} ) {
108 my($symbol,$lineno)=@$_;
109
110 exists($$nterm{$symbol})
111 and do {
112 _SyntaxError(0,
113 "Non-terminal $symbol redefined: ".
114 "Previously defined line $$syms{$symbol}",
115 $lineno);
116 next;
117 };
118 delete($$term{$symbol}); #not a terminal
119 $$nterm{$symbol}=undef; #is a non-terminal
120 }
121 }
122 | EXPECT NUMBER '\n' { $expect=$_[2][0]; undef }
123 | error '\n' { $_[0]->YYErrok }
124 ;
125
126 typedecl: #empty
127 | '<' IDENT '>'
128 ;
129
130 symlist: symlist symbol { push(@{$_[1]},$_[2]); $_[1] }
131 | symbol { [ $_[1] ] }
132 ;
133
134 identlist: identlist ident { push(@{$_[1]},$_[2]); $_[1] }
135 | ident { [ $_[1] ] }
136 ;
137
138 # Rule section
139 body: rulesec '%%'
140 {
141 $start
142 or $start=$$rules[1][0];
143
144 ref($$nterm{$start})
145 or _SyntaxError(2,"Start symbol $start not found ".
146 "in rules section",$_[2][1]);
147
148 $$rules[0]=[ '$start', [ $start, chr(0) ], undef, undef ];
149 }
150 | '%%' { _SyntaxError(2,"No rules in input grammar",$_[1][1]); }
151 ;
152
153 rulesec: rulesec rules #default action
154 | rules #default action
155 ;
156
157 rules: IDENT ':' rhss ';' { _AddRules($_[1],$_[3]); undef }
158 | error ';' { $_[0]->YYErrok }
159 ;
160
161 rhss: rhss '|' rule { push(@{$_[1]},$_[3]); $_[1] }
162 | rule { [ $_[1] ] }
163 ;
164
165 rule: rhs prec epscode { push(@{$_[1]}, $_[2], $_[3]); $_[1] }
166 | rhs {
167 my($code)=undef;
168
169 defined($_[1])
170 and $_[1][-1][0] eq 'CODE'
171 and $code = ${pop(@{$_[1]})}[1];
172
173 push(@{$_[1]}, undef, $code);
174
175 $_[1]
176 }
177 ;
178
179 rhs: #empty #default action (will return undef)
180 | rhselts #default action
181 ;
182
183 rhselts: rhselts rhselt { push(@{$_[1]},$_[2]); $_[1] }
184 | rhselt { [ $_[1] ] }
185 ;
186
187 rhselt: symbol { [ 'SYMB', $_[1] ] }
188 | code { [ 'CODE', $_[1] ] }
189 ;
190
191 prec: PREC symbol
192 {
193 defined($$term{$_[2][0]})
194 or do {
195 _SyntaxError(1,"No precedence for symbol $_[2][0]",
196 $_[2][1]);
197 return undef;
198 };
199
200 ++$$precterm{$_[2][0]};
201 $$term{$_[2][0]}[1];
202 }
203 ;
204
205 epscode: { undef }
206 | code { $_[1] }
207 ;
208
209 code: CODE { $_[1] }
210 ;
211
212 # Tail section:
213
214 tail: /*empty*/
215 | TAILCODE { $tail=$_[1] }
216 ;
217
218 %%
219 sub _Error {
220 my($value)=$_[0]->YYCurval;
221
222 my($what)= $token ? "input: '$$value[0]'" : "end of input";
223
224 _SyntaxError(1,"Unexpected $what",$$value[1]);
225 }
226
227 sub _Lexer {
228
229 #At EOF
230 pos($$input) >= length($$input)
231 and return('',[ undef, -1 ]);
232
233 #In TAIL section
234 $lexlevel > 1
235 and do {
236 my($pos)=pos($$input);
237
238 $lineno[0]=$lineno[1];
239 $lineno[1]=-1;
240 pos($$input)=length($$input);
241 return('TAILCODE',[ substr($$input,$pos), $lineno[0] ]);
242 };
243
244 #Skip blanks
245 $lexlevel == 0
246 ? $$input=~m{\G((?:
247 [\t\ ]+ # Any white space char but \n
248 | \#[^\n]* # Perl like comments
249 | /\*.*?\*/ # C like comments
250 )+)}xsgc
251 : $$input=~m{\G((?:
252 \s+ # any white space char
253 | \#[^\n]* # Perl like comments
254 | /\*.*?\*/ # C like comments
255 )+)}xsgc
256 and do {
257 my($blanks)=$1;
258
259 #Maybe At EOF
260 pos($$input) >= length($$input)
261 and return('',[ undef, -1 ]);
262
263 $lineno[1]+= $blanks=~tr/\n//;
264 };
265
266 $lineno[0]=$lineno[1];
267
268 $$input=~/\G([A-Za-z_][A-Za-z0-9_]*)/gc
269 and return('IDENT',[ $1, $lineno[0] ]);
270
271 $$input=~/\G('(?:[^'\\]|\\\\|\\'|\\)+?')/gc
272 and do {
273 $1 eq "'error'"
274 and do {
275 _SyntaxError(0,"Literal 'error' ".
276 "will be treated as error token",$lineno[0]);
277 return('IDENT',[ 'error', $lineno[0] ]);
278 };
279 return('LITERAL',[ $1, $lineno[0] ]);
280 };
281
282 $$input=~/\G(%%)/gc
283 and do {
284 ++$lexlevel;
285 return($1, [ $1, $lineno[0] ]);
286 };
287
288 $$input=~/\G{/gc
289 and do {
290 my($level,$from,$code);
291
292 $from=pos($$input);
293
294 $level=1;
295 while($$input=~/([{}])/gc) {
296 substr($$input,pos($$input)-1,1) eq '\\' #Quoted
297 and next;
298 $level += ($1 eq '{' ? 1 : -1)
299 or last;
300 }
301 $level
302 and _SyntaxError(2,"Unmatched { opened line $lineno[0]",-1);
303 $code = substr($$input,$from,pos($$input)-$from-1);
304 $lineno[1]+= $code=~tr/\n//;
305 return('CODE',[ $code, $lineno[0] ]);
306 };
307
308 if($lexlevel == 0) {# In head section
309 $$input=~/\G%(left|right|nonassoc)/gc
310 and return('ASSOC',[ uc($1), $lineno[0] ]);
311 $$input=~/\G%(start)/gc
312 and return('START',[ undef, $lineno[0] ]);
313 $$input=~/\G%(expect)/gc
314 and return('EXPECT',[ undef, $lineno[0] ]);
315 $$input=~/\G%{/gc
316 and do {
317 my($code);
318
319 $$input=~/\G(.*?)%}/sgc
320 or _SyntaxError(2,"Unmatched %{ opened line $lineno[0]",-1);
321
322 $code=$1;
323 $lineno[1]+= $code=~tr/\n//;
324 return('HEADCODE',[ $code, $lineno[0] ]);
325 };
326 $$input=~/\G%(token)/gc
327 and return('TOKEN',[ undef, $lineno[0] ]);
328 $$input=~/\G%(type)/gc
329 and return('TYPE',[ undef, $lineno[0] ]);
330 $$input=~/\G%(union)/gc
331 and return('UNION',[ undef, $lineno[0] ]);
332 $$input=~/\G([0-9]+)/gc
333 and return('NUMBER',[ $1, $lineno[0] ]);
334
335 }
336 else {# In rule section
337 $$input=~/\G%(prec)/gc
338 and return('PREC',[ undef, $lineno[0] ]);
339 }
340
341 #Always return something
342 $$input=~/\G(.)/sg
343 or die "Parse::Yapp::Grammar::Parse: Match (.) failed: report as a BUG";
344
345 $1 eq "\n"
346 and ++$lineno[1];
347
348 ( $1 ,[ $1, $lineno[0] ]);
349
350 }
351
352 sub _SyntaxError {
353 my($level,$message,$lineno)=@_;
354
355 $message= "*".
356 [ 'Warning', 'Error', 'Fatal' ]->[$level].
357 "* $message, at ".
358 ($lineno < 0 ? "eof" : "line $lineno").
359 ".\n";
360
361 $level > 1
362 and die $message;
363
364 warn $message;
365
366 $level > 0
367 and ++$nberr;
368
369 $nberr == 20
370 and die "*Fatal* Too many errors detected.\n"
371 }
372
373 sub _AddRules {
374 my($lhs,$lineno)=@{$_[0]};
375 my($rhss)=$_[1];
376
377 ref($$nterm{$lhs})
378 and do {
379 _SyntaxError(1,"Non-terminal $lhs redefined: ".
380 "Previously declared line $$syms{$lhs}",$lineno);
381 return;
382 };
383
384 ref($$term{$lhs})
385 and do {
386 my($where) = exists($$token{$lhs}) ? $$token{$lhs} : $$syms{$lhs};
387 _SyntaxError(1,"Non-terminal $lhs previously ".
388 "declared as token line $where",$lineno);
389 return;
390 };
391
392 ref($$nterm{$lhs}) #declared through %type
393 or do {
394 $$syms{$lhs}=$lineno; #Say it's declared here
395 delete($$term{$lhs}); #No more a terminal
396 };
397 $$nterm{$lhs}=[]; #It's a non-terminal now
398
399 my($epsrules)=0; #To issue a warning if more than one epsilon rule
400
401 for my $rhs (@$rhss) {
402 my($tmprule)=[ $lhs, [ ], splice(@$rhs,-2) ]; #Init rule
403
404 @$rhs
405 or do {
406 ++$$nullable{$lhs};
407 ++$epsrules;
408 };
409
410 for (0..$#$rhs) {
411 my($what,$value)=@{$$rhs[$_]};
412
413 $what eq 'CODE'
414 and do {
415 my($name)='@'.++$labelno."-$_";
416 push(@$rules,[ $name, [], undef, $value ]);
417 push(@{$$tmprule[1]},$name);
418 next;
419 };
420 push(@{$$tmprule[1]},$$value[0]);
421 }
422 push(@$rules,$tmprule);
423 push(@{$$nterm{$lhs}},$#$rules);
424 }
425
426 $epsrules > 1
427 and _SyntaxError(0,"More than one empty rule for symbol $lhs",$lineno);
428 }
429
430 sub Parse {
431 my($self)=shift;
432
433 @_ > 0
434 or croak("No input grammar\n");
435
436 my($parsed)={};
437
438 $input=\$_[0];
439
440 $lexlevel=0;
441 @lineno=(1,1);
442 $nberr=0;
443 $prec=0;
444 $labelno=0;
445
446 $head=();
447 $tail="";
448
449 $syms={};
450 $token={};
451 $term={};
452 $nterm={};
453 $rules=[ undef ]; #reserve slot 0 for start rule
454 $precterm={};
455
456 $start="";
457 $nullable={};
458 $expect=0;
459
460 pos($$input)=0;
461
462
463 $self->YYParse(yylex => \&_Lexer, yyerror => \&_Error);
464
465 $nberr
466 and _SyntaxError(2,"Errors detected: No output",-1);
467
468 @$parsed{ 'HEAD', 'TAIL', 'RULES', 'NTERM', 'TERM',
469 'NULL', 'PREC', 'SYMS', 'START', 'EXPECT' }
470 = ( $head, $tail, $rules, $nterm, $term,
471 $nullable, $precterm, $syms, $start, $expect);
472
473 undef($input);
474 undef($lexlevel);
475 undef(@lineno);
476 undef($nberr);
477 undef($prec);
478 undef($labelno);
479
480 undef($head);
481 undef($tail);
482
483 undef($syms);
484 undef($token);
485 undef($term);
486 undef($nterm);
487 undef($rules);
488 undef($precterm);
489
490 undef($start);
491 undef($nullable);
492 undef($expect);
493
494 $parsed
495 }
Casiano Rodríguez León