[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

SableCC hangs when 'compiling' a grammar



Hello everybody,

I'm trying to generate a SableCC for a grammer that defines the Dynamic
Constraint Language (DCL) for XForms.

I've been using the ecmascript.grammar as an example and simplified and
changed it into a dcl.grammar.

Also, i've been removing all the Unicode character sets and replaced them
with ASCII counterparts (sablecc-ing the Unicode version did not even finish
after one hour....).
Next to that, i removed every 'comment' fragments... removing them seemed to
help a bit.

I start 'sablecc'-ing the dcl.grammar and after 6 a 7 minutes, the
complitation hangs in - as it seems - an infinite loop. At that moment, the
java VM uses about 178 MBytes of memory and stays there.

Is there something wrong with the grammar, and if so, why does SableCC hang
and not report an error?

I use the JDK 1.3 and SableCC-2.16.2. on an NT4.0 machine (SP6).

Your help is very much appreciated.
Thank you very much.

-- Anton


This is the grammar: dcl.grammar
  Begin =======================================================

Package org.xforms.dcl;

Helpers
   source_character = [0x0000..0x00FF];

   tab =  0x0009;
   vt =   0x000B;
   ff =   0x000C;
   sp =   0x0020;
   nbsp = 0x00A0;
   usp = 0x3000;
   
   simple_white_space = tab | vt | ff | sp | nbsp | usp;

   lf = 0x000A;
   cr = 0x000D;
   ls = 0x2028;
   ps = 0x2029;
   
   line_terminator = [lf + [cr + [ls + ps]]];

   non_terminator = [source_character - line_terminator];

   multi_line_comment =
        '/*' '*'* ([source_character - ['/' + '*']] [source_character -
'*']* '*'*)* '*/';

   multi_line_comment_no_lt =
        '/*' '*'* ([non_terminator - ['/' + '*']] [non_terminator - '*']*
'*'*)* '*/';

   single_line_comment = '//' non_terminator*;

   comment =
        multi_line_comment_no_lt
      | multi_line_comment
      | single_line_comment
      ;
   
   unicode_letter = 
        [0x0041..0x005A] | [0x0061..0x007A] |  0x00AA          |  0x00B5
      |  0x00BA          | [0x00C0..0x00D6] | [0x00D8..0x00F6] |  0x00F8;

   unicode_combining_mark = 0x0300;
  
   unicode_digit = [0x0030..0x0039];
  
   unicode_connector_punctuation = 0x005F;
  
   hex_digit = [['0'..'9'] + [['a'..'f'] + ['A'..'F']]];

   unicode_escape_sequence = 'u' hex_digit hex_digit hex_digit hex_digit;

   ncname_char =
        unicode_letter
      | unicode_digit
      | '.'
      | '-'
      | '_'
      | unicode_combining_mark
      ;
      
   identifier_start =
        unicode_letter
      | '_'
      ;

   identifier_part =
        identifier_start
      | unicode_combining_mark
      | unicode_digit
      | unicode_connector_punctuation
      | '\' unicode_escape_sequence
      ;

   decimal_digit = ['0'..'9'];

   non_zero_digit = ['1'..'9'];

   exponent_part = ('e' | 'E') ('+' | '-')? ['0'..'9']+;

   decimal_integer_literal = '0' | ['1'..'9'] ['0'..'9']*;
    
   single_escape_character =
        [''' + ['"' + ['\' + ['b' + ['f' + ['n' + ['r' + ['t' + 'v']]]]]]]];

   escape_character =
        [single_escape_character + [decimal_digit + ['x' + 'u']]];

   non_escape_character =
        [source_character - [escape_character + line_terminator]];

   character_escape_sequence =
        single_escape_character
      | non_escape_character
      ;

   escape_sequence =
        character_escape_sequence
      | '0' // TODO: [lookahead not a DecimalDigit]
      | 'x' hex_digit hex_digit
      | unicode_escape_sequence
      ;

   double_string_character =
        [source_character - ['"' + ['\' + line_terminator]]]
      | '\' escape_sequence
      ;

   single_string_character =
        [source_character - [''' + ['\' + line_terminator]]]
      | '\' escape_sequence
      ;
   
   any_string_character = 
        [source_character - ['\' + line_terminator]]
      | '\' escape_sequence
      ;

   white_space_no_lt = (simple_white_space | single_line_comment |
multi_line_comment_no_lt)+;
   white_space =       (simple_white_space | comment | line_terminator)+;

   root = '/';
   rootroot = '//';

   string_literal =
        '"' double_string_character* '"'
      | ''' single_string_character* '''
      ;

   ncname = identifier_start ncname_char*;
   prefix = ncname;
   qname  = (prefix ':')? ncname;

   axisname =  
        'ancestor' 
      | 'ancestor-or-self' 
      | 'attribute' 
      | 'child' 
      | 'descendant' 
      | 'descendant-or-self' 
      | 'following' 
      | 'following-sibling' 
      | 'namespace' 
      | 'parent' 
      | 'preceding' 
      | 'preceding-sibling' 
      | 'self'
      ;

  abbreviatedaxisspecifier = '@';
  
  nametest = '*' | ncname ':' '*' | qname;
  nodetype = 
        'comment' 
      | 'text' 
      | 'processing-instruction' 
      | 'node'
      ;
      
  xpliteral             = string_literal;
  subxpathexpr          = any_string_character*;
  predicateexpr         = subxpathexpr;

  axisspecifier   = axisname '::' abbreviatedaxisspecifier?;
  nodetest        = nametest | nodetype '(' ')' | 'processing-instruction'
'(' xpliteral ')';
  predicate       = '[' predicateexpr ']';
  abbreviatedstep = '.' | '..';

  step                            = axisspecifier nodetest predicate* |
abbreviatedstep;
  extrarootstep                   = root step;
  extrarootrootstep               = rootroot step;
  extrastep                       = extrarootstep | extrarootrootstep;
  relativelocationpath            = step extrastep*;
  abbreviatedabsolutelocationpath = rootroot relativelocationpath;
  absolutelocationpath            = root relativelocationpath? |
abbreviatedabsolutelocationpath;


States
   normal,
   nolt;


Tokens
          blank_no_lt = (simple_white_space | single_line_comment |
multi_line_comment_no_lt)+;
{normal}  blank =       (simple_white_space | comment | line_terminator)+;

   above  = 'above';
   below  = 'below';
   before = 'before';
   after  = 'after';
   within = 'within';
   
   and   = 'and';
   or    = 'or';
   xor   = 'xor';
   plus  = 'plus';
   minus = 'minus';
   times = 'times';
   over  = 'over';
   is    = 'is';
   
   not = 'not';
   dash = '-';
   
   perc = '%';

   if   = 'if';
   then = 'then';
   else = 'else';

   null = 'null';

   true =  'true';
   false = 'false';

   decimal_literal =
        ('0' | ['1'..'9'] ['0'..'9']*) '.' ['0'..'9']* exponent_part?
      | '.' ['0'..'9']+ exponent_part?
      | ('0' | ['1'..'9'] ['0'..'9']*) exponent_part?
      ;

   hex_integer_literal = ('0x' | '0X') hex_digit+;

   lbrace =    '{';
   rbrace =    '}';
   lparen =    '(';
   rparen =    ')';
   lbracket =  '[';
   rbracket =  ']';
   dot =       '.';
   semicolon = ';';
   comma =     ',';
   assign =    '=';

   string_literal = string_literal;

   identifier = identifier_start identifier_part*;
   
   locationpath = absolutelocationpath | relativelocationpath;


Ignored Tokens
   blank_no_lt,
   blank;


Productions
   program =
      {def} source_elements;

   primary_expression =
      {identifier} identifier |
      {xpath} locationpath |
      {literal} literal |
      {array_literal} array_literal |
      {expression} lparen expression rparen;

   array_literal =
      {elision} lbracket elision? rbracket |
      {element_list} lbracket element_list rbracket |
      {element_list_elision} lbracket element_list comma elision? rbracket;

   element_list =
      {assign} elision? assignment_expression |
      {list} element_list comma elision? assignment_expression;

   elision =
      {comma} comma |
      {elision} elision comma;

   member_expression =
      {primary} primary_expression |
      {member_bracket} member_expression lbracket expression rbracket;

   call_expression =
      {member} member_expression arguments |
      {args} call_expression arguments |
      {expression} call_expression lbracket expression rbracket;

   arguments =
      {empty} lparen rparen |
      {list} lparen argument_list rparen;

   argument_list =
      {assign} assignment_expression |
      {assign_list} argument_list comma assignment_expression;

   left_hand_side_expression =
      {member} member_expression |
      {call} call_expression;

   postfix_expression =
      {left} left_hand_side_expression |
      {perc} left_hand_side_expression perc;
      
   unary_expression =
      {postfix} postfix_expression |
      {minus} dash unary_expression |
      {not} not unary_expression;

   multiplicative_expression =
      {unary} unary_expression |
      {mul} multiplicative_expression times unary_expression |
      {div} multiplicative_expression over unary_expression;

   additive_expression =
      {mul} multiplicative_expression |
      {plus} additive_expression plus multiplicative_expression |
      {minus} additive_expression minus multiplicative_expression;

   within_expression =
      {def} within lparen [range1]:assignment_expression comma
[range2]:assignment_expression rparen;

   boolean_expression =
      {above} above additive_expression |
      {below} below additive_expression |
      {before} before additive_expression |
      {after} after additive_expression |
      {within} within_expression;
      
   is_expression = 
      {is} is boolean_expression | 
      {isnot} is not boolean_expression;
   
   relational_expression =
      {add} additive_expression |
      {is} relational_expression is_expression;

   equality_expression =
      {relational} relational_expression |
      {eq} equality_expression is relational_expression |
      {ne} equality_expression is not relational_expression;
      
   logical_and_expression =
      {equality} equality_expression |
      {logical_and} logical_and_expression and equality_expression;

   logical_xor_expression =
      {logical_and} logical_and_expression |
      {logical_xor} logical_or_expression xor logical_and_expression;

   logical_or_expression =
      {logical_and} logical_xor_expression |
      {logical_or} logical_or_expression or logical_xor_expression;

   conditional_expression =
      {logical_or} logical_or_expression |
      {cond} if logical_or_expression then [true]:assignment_expression else
[false]: assignment_expression;

   assignment_expression =
      {cond} conditional_expression |
      {left} left_hand_side_expression assignment_operator
assignment_expression;

   assignment_operator =
      {assign} assign;

   expression =
      {assign} assignment_expression |
      {list} expression comma assignment_expression;

   literal =
        {null}    null
      | {boolean} boolean_literal
      | {numeric} numeric_literal
      | {string}  string_literal
      ;

   numeric_literal =
        {decimal} decimal_literal
      | {hex}     hex_integer_literal
      ;

   boolean_literal =
        {true}  true
      | {false} false
      ;

   statement =
      {empty} empty_statement |
      {expression} expression_statement;

   statement_list =
      {statement} statement |
      {list} statement_list statement;

   empty_statement =
      {def} semicolon;

   expression_statement =
      {def} expression semicolon;

   source_elements =
      {one} source_element |
      {list} source_elements source_element;

   source_element =
      {statement} statement;

  End ==========================================================