The expression for each regexp has access to three values of type iterator: lexeme_start, lexeme_end and buffer_end.
1: include "std"; 2: open Lexer; 3: 4: regexp lower = ["abcdefghijklmnopqrstuvwxyz"]; 5: regexp upper = ["ABCDEFGHIJKLMNOPQRSTUVWXYZ"]; 6: regexp digit = ["0123456789"]; 7: regexp alpha = lower | upper | "_"; 8: regexp space = " "; 9: regexp white = space +; 10: 11: fun lexit(start:iterator, finish:iterator): 12: iterator * (string * string) 13: = 14: { 15: return 16: reglex start to finish with 17: | digit+ => "Number", 18: string_between(lexeme_start,lexeme_end) 19: 20: | alpha+ => "Identifier", 21: string_between(lexeme_start,lexeme_end) 22: 23: | white => "White", 24: string_between(lexeme_start,lexeme_end) 25: endmatch 26: ; 27: } 28: 29: 30: var s = "A string 2 lex"; 31: val first = start_iterator s; 32: val finish = end_iterator s; 33: var current = first; 34: 35: while { current != finish } 36: { 37: match lexit(current, finish) with 38: | ?next,(?kind,?lexeme) => 39: { 40: current = next; 41: print kind; print ": "; print lexeme; endl; 42: } 43: endmatch 44: ; 45: }; 46: print "Done.\n"; 47:
1: #!/bin/env flx 2: include "std"; 3: include "flx_lex"; 4: use Lexer::sub; 5: 6: print "Lexer here"; endl; 7: 8: /* some /* commented */ stuffs */ 9: 10: val xx = 1214; 11: 12: //val s = "A string is here == != @@ "; 13: var s = Text_file::load("tut/examples/tut121b.flx"); 14: 15: //print s; endl; 16: 17: i2 := Lexer::end_iterator s; 18: var i1 = Lexer::start_iterator s; 19: 20: proc print_token() 21: { 22: open Flx_lex; 23: def var j, var des = pre_flx_lex (i1, i2); 24: match des with 25: | qQuote => { j,des = parse_q_string (j,i2); } 26: | qqqQuote => { j,des = parse_qqq_string (j,i2); } 27: | dQuote => { j,des = parse_d_string (j,i2); } 28: | dddQuote => { j,des = parse_ddd_string (j,i2); } 29: | rqQuote => { j,des = parse_rq_string (j,i2); } 30: | rqqqQuote => { j,des = parse_rqqq_string (j,i2); } 31: | rdQuote => { j,des = parse_rd_string (j,i2); } 32: | rdddQuote => { j,des = parse_rddd_string (j,i2); } 33: | Preprocessor => { j = to_eol(j,i2) - 1; } 34: | Cpp_comment => { j = to_eol(j,i2) - 1; } 35: | C_comment => { j = to_end_c_comment (j,i2); } 36: | _ => {} 37: endmatch; 38: dess := 39: match des with 40: | Eol => "Eol" 41: | Ident => "Id" 42: | DOLLAR => "DOLLAR" 43: | QUEST => "QUEST" 44: | EXCLAMATION => "EXCLAMATION" 45: | LPAR => "LPAR" 46: | RPAR => "RPAR" 47: | LSQB => "LSQB" 48: | RSQB => "RSQB" 49: | LBRACE => "LBRACE" 50: | RBRACE => "RBRACE" 51: | COLON => "COLON" 52: | COMMA => "COMMA" 53: | SEMI => "SEMI" 54: | PLUS => "PLUS" 55: | MINUS => "MINUS" 56: | STAR => "STAR" 57: | SLASH => "SLASH" 58: | VBAR => "VBAR" 59: | AMPER => "AMPER" 60: | LESS => "LESS" 61: | GREATER => "GREATER" 62: | EQUAL => "EQUAL" 63: | DOT => "DOT" 64: | PERCENT => "PERCENT" 65: | BACKQUOTE => "BACKQUOTE" 66: | TILDE => "TILDE" 67: | CIRCUMFLEX => "CIRCUMFLEX" 68: | ANDLESS => "&<" 69: | ANDGREATER => "&>" 70: | EQEQUAL => "==" 71: | NOTEQUAL => "!=" 72: | LESSEQUAL => "<=" 73: | GREATEREQUAL => ">=" 74: | LEFTSHIFT => "<<" 75: | RIGHTSHIFT => ">>" 76: | STARSTAR => "**" 77: | LESSCOLON => "<:" 78: | COLONGREATER => ":>" 79: | DOTDOT => ".." 80: | COLONCOLON => "::" 81: | PLUSPLUS => "++" 82: | MINUSMINUS => "--" 83: | PLUSEQUAL => "+=" 84: | MINUSEQUAL => "-=" 85: | STAREQUAL => "*=" 86: | SLASHEQUAL => "/=" 87: | PERCENTEQUAL => "%=" 88: | CARETEQUAL => "^=" 89: | VBAREQUAL => "|=" 90: | AMPEREQUAL => "&=" 91: | TILDEEQUAL => "~=" 92: | COLONEQUAL => ":=" 93: | RIGHTARROW => "->" 94: | EQRIGHTARROW => "=>" 95: | LEFTARROW => "<-" 96: | LSQANGLE => "[<" 97: | RSQANGLE => ">]" 98: | LSQBAR => "[|" 99: | RSQBAR => "|]" 100: | AMPERAMPER => "&&" 101: | VBARVBAR => "||" 102: | SLOSHAMPER => "\\&" 103: | SLOSHVBAR => "\\|" 104: | SLOSHCIRCUMFLEX => "\\|" 105: | LEFTSHIFTEQUAL => "<<=" 106: | RIGHTSHIFTEQUAL => ">>=" 107: | LEFTRIGHTARROW => "<->" 108: | ANDEQEQUAL => "&==" 109: | ANDNOTEQUAL => "&!=" 110: | ANDLESSEQUAL => "&<=" 111: | ANDGREATEREQUAL => "&>=" 112: | DOTDOTDOT => "..." 113: | Preprocessor => "Pre" 114: | Cpp_comment => "Cppc" 115: | C_comment => "Cc" 116: | White => "White" 117: | Int => "Int" 118: | Float => "Float" 119: | _ => "Other" 120: endmatch 121: ; 122: print (dess ": ").[0 to 9]; 123: print ('"' (Lexer::string_between(i1,j)) '"'); 124: endl; 125: i1 = j; 126: } 127: 128: use Lexer::ne; 129: 130: while { i1 != i2 } { print_token; }; 131: