1.20. Lexing

Felix provides a mechanism for constructing lexers. The reglex construction matches a prefix of the string. Of all possible matches, reglex chooses the longest match. As for regmatch, if more than one regexp matches, the first written is used.

The expression for each regexp has access to three values of type iterator: lexeme_start, lexeme_end and buffer_end.

Start C++ section to tut/examples/tut121a.flx[1 /1 ]
     1: include "std";
     2: open Lexer;
     3: 
     4: regexp lower = ["abcdefghijklmnopqrstuvwxyz"];
     5: regexp upper = ["ABCDEFGHIJKLMNOPQRSTUVWXYZ"];
     6: regexp digit = ["0123456789"];
     7: regexp alpha = lower | upper | "_";
     8: regexp space = " ";
     9: regexp white = space +;
    10: 
    11: fun lexit(start:iterator, finish:iterator):
    12:   iterator * (string * string)
    13: =
    14: {
    15:   return
    16:     reglex start to finish with
    17:     | digit+ => "Number",
    18:       string_between(lexeme_start,lexeme_end)
    19: 
    20:     | alpha+ =>  "Identifier",
    21:       string_between(lexeme_start,lexeme_end)
    22: 
    23:     | white =>  "White",
    24:       string_between(lexeme_start,lexeme_end)
    25:     endmatch
    26:   ;
    27: }
    28: 
    29: 
    30: var s = "A string 2 lex";
    31: val first = start_iterator s;
    32: val finish = end_iterator s;
    33: var current = first;
    34: 
    35: while { current != finish }
    36: {
    37:     match lexit(current, finish) with
    38:     | ?next,(?kind,?lexeme) =>
    39:     {
    40:       current = next;
    41:       print kind; print ": "; print lexeme; endl;
    42:     }
    43:     endmatch
    44:   ;
    45: };
    46: print "Done.\n";
    47: 
End C++ section to tut/examples/tut121a.flx[1]
Start data section to tut/examples/tut121b.flx[1 /1 ]
     1: #!/bin/env flx
     2: include "std";
     3: include "flx_lex";
     4: use Lexer::sub;
     5: 
     6: print "Lexer here"; endl;
     7: 
     8: /* some /* commented */ stuffs */
     9: 
    10: val xx = 1214;
    11: 
    12: //val s = "A string is here == != @@ ";
    13: var s = Text_file::load("tut/examples/tut121b.flx");
    14: 
    15: //print s; endl;
    16: 
    17: i2 := Lexer::end_iterator s;
    18: var i1 = Lexer::start_iterator s;
    19: 
    20: proc print_token()
    21: {
    22:   open Flx_lex;
    23:   def var j, var des = pre_flx_lex (i1, i2);
    24:   match des with
    25:   | qQuote =>        { j,des = parse_q_string (j,i2); }
    26:   | qqqQuote =>      { j,des = parse_qqq_string (j,i2); }
    27:   | dQuote =>        { j,des = parse_d_string (j,i2); }
    28:   | dddQuote =>      { j,des = parse_ddd_string (j,i2); }
    29:   | rqQuote =>       { j,des = parse_rq_string (j,i2); }
    30:   | rqqqQuote =>     { j,des = parse_rqqq_string (j,i2); }
    31:   | rdQuote =>       { j,des = parse_rd_string (j,i2); }
    32:   | rdddQuote =>     { j,des = parse_rddd_string (j,i2); }
    33:   | Preprocessor =>  { j = to_eol(j,i2) - 1; }
    34:   | Cpp_comment =>   { j = to_eol(j,i2) - 1; }
    35:   | C_comment =>     { j = to_end_c_comment (j,i2); }
    36:   | _ => {}
    37:   endmatch;
    38:   dess :=
    39:     match des with
    40:     | Eol => "Eol"
    41:     | Ident => "Id"
    42:     | DOLLAR => "DOLLAR"
    43:     | QUEST => "QUEST"
    44:     | EXCLAMATION => "EXCLAMATION"
    45:     | LPAR => "LPAR"
    46:     | RPAR => "RPAR"
    47:     | LSQB => "LSQB"
    48:     | RSQB => "RSQB"
    49:     | LBRACE => "LBRACE"
    50:     | RBRACE => "RBRACE"
    51:     | COLON => "COLON"
    52:     | COMMA => "COMMA"
    53:     | SEMI => "SEMI"
    54:     | PLUS => "PLUS"
    55:     | MINUS => "MINUS"
    56:     | STAR => "STAR"
    57:     | SLASH => "SLASH"
    58:     | VBAR => "VBAR"
    59:     | AMPER => "AMPER"
    60:     | LESS => "LESS"
    61:     | GREATER => "GREATER"
    62:     | EQUAL => "EQUAL"
    63:     | DOT => "DOT"
    64:     | PERCENT => "PERCENT"
    65:     | BACKQUOTE => "BACKQUOTE"
    66:     | TILDE => "TILDE"
    67:     | CIRCUMFLEX => "CIRCUMFLEX"
    68:     | ANDLESS => "&<"
    69:     | ANDGREATER => "&>"
    70:     | EQEQUAL => "=="
    71:     | NOTEQUAL => "!="
    72:     | LESSEQUAL => "<="
    73:     | GREATEREQUAL => ">="
    74:     | LEFTSHIFT => "<<"
    75:     | RIGHTSHIFT => ">>"
    76:     | STARSTAR => "**"
    77:     | LESSCOLON => "<:"
    78:     | COLONGREATER => ":>"
    79:     | DOTDOT => ".."
    80:     | COLONCOLON => "::"
    81:     | PLUSPLUS => "++"
    82:     | MINUSMINUS => "--"
    83:     | PLUSEQUAL => "+="
    84:     | MINUSEQUAL => "-="
    85:     | STAREQUAL => "*="
    86:     | SLASHEQUAL => "/="
    87:     | PERCENTEQUAL => "%="
    88:     | CARETEQUAL => "^="
    89:     | VBAREQUAL => "|="
    90:     | AMPEREQUAL => "&="
    91:     | TILDEEQUAL => "~="
    92:     | COLONEQUAL => ":="
    93:     | RIGHTARROW => "->"
    94:     | EQRIGHTARROW => "=>"
    95:     | LEFTARROW => "<-"
    96:     | LSQANGLE => "[<"
    97:     | RSQANGLE => ">]"
    98:     | LSQBAR => "[|"
    99:     | RSQBAR => "|]"
   100:     | AMPERAMPER => "&&"
   101:     | VBARVBAR => "||"
   102:     | SLOSHAMPER => "\\&"
   103:     | SLOSHVBAR => "\\|"
   104:     | SLOSHCIRCUMFLEX => "\\|"
   105:     | LEFTSHIFTEQUAL => "<<="
   106:     | RIGHTSHIFTEQUAL => ">>="
   107:     | LEFTRIGHTARROW => "<->"
   108:     | ANDEQEQUAL => "&=="
   109:     | ANDNOTEQUAL => "&!="
   110:     | ANDLESSEQUAL => "&<="
   111:     | ANDGREATEREQUAL => "&>="
   112:     | DOTDOTDOT => "..."
   113:     | Preprocessor =>  "Pre"
   114:     | Cpp_comment =>   "Cppc"
   115:     | C_comment =>     "Cc"
   116:     | White => "White"
   117:     | Int => "Int"
   118:     | Float => "Float"
   119:     | _ => "Other"
   120:     endmatch
   121:   ;
   122:   print (dess ":       ").[0 to 9];
   123:   print ('"' (Lexer::string_between(i1,j)) '"');
   124:   endl;
   125:   i1 = j;
   126: }
   127: 
   128: use Lexer::ne;
   129: 
   130: while { i1 != i2 } { print_token; };
   131: 
End data section to tut/examples/tut121b.flx[1]