//TODO-AC: many of the catchall errors are recoverable - either return a bad token which will be discarded or just discard and make a note.
//TODO-AC: meta-pattern macros?
//based on http://jflex.de/manual.html

package metalexer;

import static metalexer.LayoutParser.Terminals.*;

import beaver.Symbol;
import beaver.Scanner;

%%

//general header info
%public
%final
%class LayoutScanner

//required for beaver compatibility
%extends Scanner
%unicode
%function nextToken
%type Symbol
%yylexthrow Scanner.Exception

//for debugging - track line and column
%line
%column

%init{
  saveStateAndTransition(INSIDE_LOCAL_HEADER_SECTION);
  blobBuf = new StringBuffer();
  markStartPosition();
%init}

%{
  //// Returning symbols ///////////////////////////////////////////////////////

  //Create a symbol using the current line and column number, as computed by JFlex
  //No attached value
  //Symbol is assumed to start and end on the same line
  //e.g. symbol(SEMICOLON)
  private Symbol symbol(short type) {
    return symbol(type, null);
  }
  
  //Create a symbol using the current line and column number, as computed by JFlex
  //Attached value gives content information
  //Symbol is assumed to start and end on the same line
  //e.g. symbol(IDENTIFIER, "x")
  private Symbol symbol(short type, Object value) {
    //NB: JFlex is zero-indexed, but we want one-indexed
    int startLine = yyline + 1;
    int startCol = yycolumn + 1;
    int endLine = startLine;
    int endCol = startCol + yylength() - 1;
    return symbol(type, value, startLine, startCol, endLine, endCol);
  }
  
  //Create a symbol using explicit position information (one-indexed)
  private Symbol symbol(short type, Object value, int startLine, int startCol, int endLine, int endCol) {
    int startPos = Symbol.makePosition(startLine, startCol);
    int endPos = Symbol.makePosition(endLine, endCol);
    return new Symbol(type, startPos, endPos, value);
  }
  
  //// Position ////////////////////////////////////////////////////////////////
  
  //records the position of a symbol
  private static class PositionRecord {
      int startLine = -1;
      int startCol = -1;
      int endLine = -1;
      int endCol = -1;
  }
  
  //the position of the current symbol
  private PositionRecord pos = new PositionRecord();
  
  //populate the start line and column fields of the Position record with
  //values from JFlex
  private void markStartPosition() {
    //correct to one-indexed
    pos.startLine = yyline + 1;
    pos.startCol = yycolumn + 1;
  }
  
  //populate the start line and column fields of the Position record with
  //values from JFlex
  private void markEndPosition() {
    //correct to one-indexed
    pos.endLine = yyline + 1;
    pos.endCol = (yycolumn + 1) + yylength() - 1;
  }
  
  //like symbol(type), but uses the position stored in pos rather than
  //the position computed by JFlex
  private Symbol symbolFromMarkedPositions(short type) {
    return symbolFromMarkedPositions(type, null);
  }
  
  //like symbol(type, value), but uses the position stored in pos rather than
  //the position computed by JFlex
  private Symbol symbolFromMarkedPositions(short type, Object value) {
    return symbol(type, value, pos.startLine, pos.startCol, pos.endLine, pos.endCol);
  }
  
  //like symbol(type), but uses the start position stored in pos rather than
  //the start position computed by JFlex and an explicit length param rather
  //than yylength
  private Symbol symbolFromMarkedStart(short type, int length) {
    return symbolFromMarkedStart(type, null, length);
  }
  
  //like symbol(type, value), but uses the start position stored in pos rather than
  //the start position computed by JFlex and an explicit length param rather
  //than yylength
  private Symbol symbolFromMarkedStart(short type, Object value, int length) {
    return symbol(type, value, pos.startLine, pos.startCol, pos.startLine, pos.startCol + length - 1);
  }
  
  //// Errors //////////////////////////////////////////////////////////////////
  
  //throw an exceptions with position information from JFlex
  private void error(String msg) throws Scanner.Exception {
    //correct to one-indexed
    throw new Scanner.Exception(yyline + 1, yycolumn + 1, msg);
  }
  
  //throw an exceptions with position information from JFlex
  //columnOffset is added to the column
  private void error(String msg, int columnOffset) throws Scanner.Exception {
  //correct to one-indexed
    throw new Scanner.Exception(yyline + 1, yycolumn + 1 + columnOffset, msg);
  }
  
  //// State transitions ///////////////////////////////////////////////////////
  
  //stack entry: stack identifier + symbol position
  private static class StateRecord {
    int stateNum;
    PositionRecord pos;
    
    StateRecord(int stateNum, PositionRecord pos) {
        this.stateNum = stateNum;
        this.pos = pos;
    }
  }
  
  //most of our states are used for bracketing
  //this gives us a way to nest bracketing states
  private java.util.Stack<StateRecord> stateStack = new java.util.Stack<StateRecord>();
  
  void saveStateAndTransition(int newState) {
    stateStack.push(new StateRecord(yystate(), pos));
    pos = new PositionRecord();
    yybegin(newState);
  }
  
  void restoreState() {
    StateRecord rec = stateStack.pop();
    yybegin(rec.stateNum);
    pos = rec.pos;
  }
  
  //// Comment nesting /////////////////////////////////////////////////////////
  
  //number of '*/'s or '}'s expected
  private int nestingDepth = 0;
  
  //// Blob accumulation ///////////////////////////////////////////////////////
  
  //for accumulating the contents of a string literal, comment, action, etc
  private StringBuffer blobBuf = new StringBuffer();
  
  //// Other ///////////////////////////////////////////////////////////////////
  
  //for deleting terminal line breaks
  private static String frontChomp(String oldString) {
    if(oldString.startsWith("\r\n")) {
        return oldString.substring(2);
    } else if(oldString.startsWith("\n") || oldString.startsWith("\r")) {
        return oldString.substring(1);
    } else {
        return oldString;
    }
  }
%}

LineTerminator = \r|\n|\r\n
OtherWhiteSpace = [ \t\f]

Letter = [a-zA-Z]
Digit = [0-9]
Identifier = {Letter} (_ | {Letter} | {Digit})*

SectionSeparator = "%%"

Quote = \"

//TODO-AC: "a backslash followed by any other unicode character that stands for this character."
HexDigit = {Digit} | [a-fA-F]
EscapeSequence = \\ ( [nrtfb] | x {HexDigit}{2} | u {HexDigit}{1,4} | [0-3]? [0-7]? [0-7] | [\|\(\)\{\}\[\]\<\>\\\.\*\+\?\^\$\/\.\"\~\!])

Comment = "//"[^\r\n]*
OpenBracketComment = "/*"
CloseBracketComment = "*/"

OpenDeclRegion = "%{"
CloseDeclRegion = "%}"

OpenInitRegion = "%init{"
CloseInitRegion = "%init}"

Any = . | \n

//handles comments
DirectiveLookahead = !({Letter} | {Digit} | _) //NB: use not (instead of a class) because it also catches EOF

//within the local header section (serves as start state)
%xstate INSIDE_LOCAL_HEADER_SECTION
//within the inherited header section (serves as start state)
%xstate INSIDE_INHERITED_HEADER_SECTION
//within the option section (follows the header section)
%xstate INSIDE_OPTION_SECTION
//within the lexical rule section (follows the option section)
%xstate INSIDE_RULE_SECTION
//within a bracket comment (i.e. /*) - nestable
%xstate INSIDE_BRACKET_COMMENT
//within a string literal
%xstate INSIDE_STRING
//within a declaration region (i.e. %{)
%xstate INSIDE_DECL_REGION
//within a init region (i.e. %init{)
%xstate INSIDE_INIT_REGION
//within a directive with identifier and/or string arguments
%xstate INSIDE_STRING_IDENTIFIER_DIRECTIVE
//within a directive with a meta-pattern argument
%xstate INSIDE_META_PATTERN_DIRECTIVE

%%

<YYINITIAL> {
    {Any} {
        error("Unexpected character: " + yytext());
    }
    <<EOF>> {
        return symbol(EOF);
    }
}

//TODO-AC: might want to allow comments inside code blobs
//definitiely not: INSIDE_STRING, INSIDE_LOCAL_HEADER_SECTION
//maybe: INSIDE_DECL_REGION, INSIDE_INIT_REGION
<INSIDE_OPTION_SECTION, INSIDE_RULE_SECTION, INSIDE_STRING_IDENTIFIER_DIRECTIVE, INSIDE_META_PATTERN_DIRECTIVE> {
    //single-line comments
    {Comment} { 
        /* ignore */
        //return symbol(COMMENT, yytext());
    }
    
    //start multiline comment
    {OpenBracketComment} {
        saveStateAndTransition(INSIDE_BRACKET_COMMENT);
        //blobBuf = new StringBuffer(yytext());
        //markStartPosition();
        nestingDepth++;
    }
}

<INSIDE_LOCAL_HEADER_SECTION> {
    //end of section
    {SectionSeparator} { 
        markEndPosition();
        Symbol sym = symbolFromMarkedPositions(LOCAL_HEADER, blobBuf.toString());
        restoreState();
        
        saveStateAndTransition(INSIDE_INHERITED_HEADER_SECTION);
        blobBuf = new StringBuffer();
        markStartPosition();
        
        return sym;
        //TODO-AC: pushback separator and return it later?
    }
    
    {Any} {
        blobBuf.append(yytext());
    }
    <<EOF>> {
        yybegin(YYINITIAL); 
        error("Unterminated local header section.");
    }
}

<INSIDE_INHERITED_HEADER_SECTION> {
    //end of section
    {SectionSeparator} { 
        markEndPosition();
        Symbol sym = symbolFromMarkedPositions(INHERITED_HEADER, frontChomp(blobBuf.toString()));
        restoreState();
        saveStateAndTransition(INSIDE_OPTION_SECTION);
        return sym;
        //TODO-AC: pushback separator and return it later?
    }
    
    {Any} {
        blobBuf.append(yytext());
    }
    <<EOF>> {
        yybegin(YYINITIAL); 
        error("Unterminated inherited header section.");
    }
}

<INSIDE_OPTION_SECTION> {
    //whitespace
    {LineTerminator} { /* ignore */ }
    {OtherWhiteSpace} { /* ignore */ }
    
    //start declaration region
    {OpenDeclRegion} {
        saveStateAndTransition(INSIDE_DECL_REGION);
        blobBuf = new StringBuffer(yytext());
        markStartPosition();
    }
    
    //start init region
    {OpenInitRegion} {
        saveStateAndTransition(INSIDE_INIT_REGION);
        blobBuf = new StringBuffer(yytext());
        markStartPosition();
    }
    
    //no-arg directives
    "%helper" / {DirectiveLookahead} {
        return symbol(HELPER_DIRECTIVE);
    }
    
    //identifier directives
    "%layout" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(LAYOUT_DIRECTIVE);
    }
    "%start" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(START_DIRECTIVE);
    }
    "%component" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(COMPONENT_DIRECTIVE);
    }
    
    //string directives
    "%declare" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(DECLARE_DIRECTIVE);
    }
    "%initthrow" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(INITTHROW_DIRECTIVE);
    }
    "%lexthrow" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(LEXTHROW_DIRECTIVE);
    }
    
    //mixed directives
    "%option" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(OPTION_DIRECTIVE);
    }
    
    //invalid directives
    "%" { error("Invalid directive"); }
    
    //end of section
    {SectionSeparator} { 
        restoreState();
        saveStateAndTransition(INSIDE_RULE_SECTION);
        //TODO-AC: return symbol(SECTION_SEPARATOR);?
    }
    
    {Any} {
        error("Unexpected character in option section: " + yytext());
        yybegin(YYINITIAL); //central handling of EOF
    }
    <<EOF>> {
        yybegin(YYINITIAL); //central handling of EOF
    }
}

<INSIDE_RULE_SECTION> {
    //whitespace
    {LineTerminator} { /* ignore */ }
    {OtherWhiteSpace} { /* ignore */ }
    
    "%%embed" / {DirectiveLookahead} {
        return symbol(START_EMBED_GROUP);
    }
    
    "%name" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(EMBEDDING_NAME);
    }
    "%host" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(EMBEDDING_HOST);
    }
    "%guest" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(EMBEDDING_GUEST);
    }
    "%pair" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(EMBEDDING_PAIR);
    }
    
    "%start" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_META_PATTERN_DIRECTIVE);
        return symbol(EMBEDDING_START);
    }
    "%end" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_META_PATTERN_DIRECTIVE);
        return symbol(EMBEDDING_END);
    }
    
    "%%inherit" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(START_INHERIT_GROUP);
    }
    "%unembed" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(INHERIT_UNEMBED);
    }
    "%replace" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(INHERIT_REPLACE);
    }
    "%unoption" / {DirectiveLookahead} {
        saveStateAndTransition(INSIDE_STRING_IDENTIFIER_DIRECTIVE);
        return symbol(INHERIT_UNOPTION);
    }
    
    {Any} {
        error("Unexpected character in rule section: " + yytext());
        yybegin(YYINITIAL); //central handling of EOF
    }
    <<EOF>> {
        yybegin(YYINITIAL); //central handling of EOF
    }
}

//remainder of declaration region
<INSIDE_DECL_REGION> {
    %{CloseDeclRegion} { blobBuf.append(yytext().substring(1)); }
    {CloseDeclRegion} {
        blobBuf.append(yytext());
        markEndPosition();
        Symbol sym = symbolFromMarkedPositions(DECL_REGION, blobBuf.toString());
        restoreState();
        return sym;
    }
    {Any} { blobBuf.append(yytext()); }
    <<EOF>> {
        yybegin(YYINITIAL); 
        error("Unterminated declaration region: '" + blobBuf + "'");
    }
}

//remainder of init region
<INSIDE_INIT_REGION> {
    %{CloseInitRegion} { blobBuf.append(yytext().substring(1)); }
    {CloseInitRegion} {
        blobBuf.append(yytext());
        markEndPosition();
        Symbol sym = symbolFromMarkedPositions(INIT_REGION, blobBuf.toString());
        restoreState();
        return sym;
    }
    {Any} { blobBuf.append(yytext()); }
    <<EOF>> {
        yybegin(YYINITIAL); 
        error("Unterminated init region: '" + blobBuf + "'");
    }
}

//remainder of string literal (i.e. after initial single quote)
<INSIDE_STRING> {
    {Quote} {
        blobBuf.append(yytext());
        markEndPosition();
        Symbol sym = symbolFromMarkedPositions(STRING, blobBuf.toString());
        restoreState();
        return sym;
    }
    {EscapeSequence} { blobBuf.append(yytext()); }
    \\{Any} { blobBuf.append(yytext().substring(1)); }
    \\ {
        yybegin(YYINITIAL);
        error("Incomplete escape sequence");
    }
    {LineTerminator} {
        yybegin(YYINITIAL); 
        error("Unterminated string literal: '" + blobBuf + "'");
    }
    . { blobBuf.append(yytext()); }
    <<EOF>> {
        yybegin(YYINITIAL); 
        error("Unterminated string literal: '" + blobBuf + "'");
    }
}

//continue multiline comment
<INSIDE_BRACKET_COMMENT> {
    {OpenBracketComment} {
        //blobBuf.append(yytext());
        nestingDepth++;
    }
    {CloseBracketComment} { 
        //blobBuf.append(yytext());
        nestingDepth--;
        if(nestingDepth == 0) {
            //markEndPosition();
            //Symbol sym = symbolFromMarkedPositions(BRACKET_COMMENT, blobBuf.toString());
            restoreState();
            //return sym;
        }
    }
    {Any} {
        //blobBuf.append(yytext());
    }
    <<EOF>> {
        yybegin(YYINITIAL);
        //don't finish scanning if there's an unclosed comment
        if(nestingDepth != 0) {
            error(nestingDepth + " levels of comments not closed");
        }
    }
}

<INSIDE_STRING_IDENTIFIER_DIRECTIVE> {
    //whitespace
    {LineTerminator} { restoreState(); /* ignore */ }
    {OtherWhiteSpace} { /* ignore */ }
    
    //for strings
    {Quote} {
        saveStateAndTransition(INSIDE_STRING);
        blobBuf = new StringBuffer(yytext());
        markStartPosition();
    }
    
    {Identifier} { return symbol(IDENTIFIER, yytext()); }
    
    , { return symbol(COMMA); }
    
    //catchall - error
    {Any} {
        error("Unexpected character in directive option list: " + yytext());
        yybegin(YYINITIAL); //central handling of EOF
    }
    <<EOF>> {
        yybegin(YYINITIAL); //central handling of EOF
    }
}

<INSIDE_META_PATTERN_DIRECTIVE> {
    //whitespace
    {LineTerminator} { restoreState(); /* ignore */ }
    {OtherWhiteSpace} { /* ignore */ }
    
    "<ANY>" { return symbol(MP_ANY); }
    "<BOF>" { return symbol(MP_BOF); }
    "%"{Identifier}"%" { return symbol(MP_REGION, yytext().substring(1, yylength() - 1)); } //TODO-AC: dropping punctuation isn't really consistent
    {Identifier} { return symbol(MP_SYM, yytext()); }
    "(" { return symbol(MP_LPAREN); }
    ")" { return symbol(MP_RPAREN); }
    "[" { return symbol(MP_LSQUARE); }
    "]" { return symbol(MP_RSQUARE); }
    "^" { return symbol(MP_CARET); }
    "*" { return symbol(MP_STAR); }
    "+" { return symbol(MP_PLUS); }
    "?" { return symbol(MP_OPT); }
    "|" { return symbol(MP_OR); }
    
    //catchall - error
    {Any} {
        error("Unexpected character in meta-pattern: " + yytext());
        yybegin(YYINITIAL); //central handling of EOF
    }
    <<EOF>> {
        yybegin(YYINITIAL); //central handling of EOF
    }
}
