Lezer grammar support

@precedence {
    statement,
    assign,
    traceParams @cut,
    member @left,
    pow @right,
    times @left,
    plus @left,
    compare @left,
    equal @left,
    and @left,
    or @left
}

ArrayLiteral {
    "{" Expr ("," Expr)* ","? "}"
}
Literal {
    IntegerLit | RealLit | StringLit | CharLit |
    BooleanLit | KNull | ArrayLiteral
}
BooleanLit { KTrue | KFalse }
PrimitiveType { KInteger | KBoolean | KReal | KString | KChar }
ArrayRange { IntegerLit ":" IntegerLit } 
ArrayType { KArray "[" ArrayRange ("," ArrayRange)? "]" KOf PrimitiveType }
Type { ArrayType | PrimitiveType }

// Exprs
Negation { "-" Expr }
Not { KNot Expr }
Grouping { "(" Expr ")" }
Typecast { PrimitiveType "(" Expr ")" }
FunctionCall { Ident "(" ArgList? ")" }
ArrayIndex { (Ident | Grouping) "[" Expr ("," Expr)? "]" }
Unary { 
    Ident | Negation | Literal | Typecast | Grouping
    | Not | FunctionCall
}
Expr {
    Unary |
    ArrayIndex |
    BinaryExpr
}

BinaryExpr {
    Expr !or KOr Expr |
    Expr !and KAnd Expr |
    Expr !equal ("=" | "<>") Expr |
    Expr !compare (">" | "<" | "<=" | ">=") Expr |
    Expr !plus ("+" | "-") Expr |
    Expr !times ("*" | "/") Expr |
    Expr !pow "^" Expr
}

// Blocks, Statements
Lvalue { Ident | ArrayIndex }
CaseofBranch { Expr ":" Statement Newline }
Parameter { Ident ":" Type }
ParameterList { Parameter ("," Parameter)* }
ArgList { Expr ("," Expr)* }
FileIdent { Ident | StringLit | Grouping }
FileMode { KRead | KWrite | KAppend }
FileModeList { FileMode (KAnd FileMode)* }

DeclareStatement { KExport? KDeclare Ident ("," Ident)* ":" Type }
ConstantStatement { KExport? KConstant Ident "<-" Expr }
OutputStatement { (KOutput | KPrint) Expr ("," Expr)* }
InputStatement { KInput Lvalue }
AssignStatement { Ident !assign "<-" Expr }
IfStatement { KIf Expr KThen Block (KElse Block)? KEndif }
CaseofStatement { KCase KOf Expr Newline (CaseofBranch)* (KOtherwise Statement)? KEndcase }
WhileStatement { KWhile Expr KDo Block KEndwhile }
RepeatUntilStatement { KRepeat Block KUntil Expr }
ForStatement { KFor Ident "<-" Expr KTo Expr (KStep Expr)? Newline Block KNext Ident }
ProcedureStatement { KExport? KProcedure Ident ("(" ParameterList ")")? Block KEndprocedure }
FunctionStatement { KExport? KFunction Ident ("(" ParameterList ")")? KReturns Type Block KEndfunction }
CallStatement { KCall Ident ("(" ArgList ")")? }
ReturnStatement { KReturn Expr? }
ScopeStatement { KScope Block KEndscope }
IncludeStatement { KInclude StringLit }
TraceStatement { KTrace !traceParams ("(" Ident ("," Ident)* ")")? (KTo StringLit)? Block KEndtrace}
OpenfileStatement { KOpenfile FileIdent KFor FileModeList }
WritefileStatement { KWritefile FileIdent "," Expr }
ReadfileStatement { KReadfile FileIdent "," Lvalue }
ClosefileStatement { KClosefile FileIdent }

Statement {
    (
        DeclareStatement | ConstantStatement | OutputStatement | InputStatement
        | AssignStatement | IfStatement | CaseofStatement | WhileStatement
        | RepeatUntilStatement | ForStatement | ProcedureStatement
        | FunctionStatement | CallStatement | ReturnStatement | ScopeStatement
        | IncludeStatement | TraceStatement | OpenfileStatement
        | WritefileStatement | ReadfileStatement | ClosefileStatement | Expr
    )? Newline
}
Block { Statement* }

@skip {
    space
    LineComment
    BlockComment
}

KTrue { @specialize<Ident, "true" | "TRUE"> }
KFalse { @specialize<Ident, "false" | "FALSE"> }
KDeclare { @specialize<Ident, "declare" | "DECLARE"> }
KOutput { @specialize<Ident, "output" | "OUTPUT"> }
KInput { @specialize<Ident, "input" | "INPUT"> }
KAnd { @specialize<Ident, "and" | "AND"> }
KOr { @specialize<Ident, "or" | "OR"> }
KNot { @specialize<Ident, "not" | "NOT"> }
KIf { @specialize<Ident, "if" | "IF"> }
KThen { @specialize<Ident, "then" | "THEN"> }
KElse { @specialize<Ident, "else" | "ELSE"> }
KEndif { @specialize<Ident, "endif" | "ENDIF"> }
KCase { @specialize<Ident, "case" | "CASE"> }
KOf { @specialize<Ident, "of" | "OF"> }
KOtherwise { @specialize<Ident, "otherwise" | "OTHERWISE"> }
KEndcase { @specialize<Ident, "endcase" | "ENDCASE"> }
KWhile { @specialize<Ident, "while" | "WHILE"> }
KDo { @specialize<Ident, "do" | "DO"> }
KEndwhile { @specialize<Ident, "endwhile" | "ENDWHILE"> }
KRepeat { @specialize<Ident, "repeat" | "REPEAT"> }
KUntil { @specialize<Ident, "until" | "UNTIL"> }
KFor { @specialize<Ident, "for" | "FOR"> }
KTo { @specialize<Ident, "to" | "TO"> }
KStep { @specialize<Ident, "step" | "STEP"> }
KNext { @specialize<Ident, "next" | "NEXT"> }
KProcedure { @specialize<Ident, "procedure" | "PROCEDURE"> }
KEndprocedure { @specialize<Ident, "endprocedure" | "ENDPROCEDURE"> }
KCall { @specialize<Ident, "call" | "CALL"> }
KFunction { @specialize<Ident, "function" | "FUNCTION"> }
KReturns { @specialize<Ident, "returns" | "RETURNS"> }
KReturn { @specialize<Ident, "return" | "RETURN"> }
KEndfunction { @specialize<Ident, "endfunction" | "ENDFUNCTION"> }
KInclude { @specialize<Ident, "include" | "INCLUDE"> }
KExport { @specialize<Ident, "export" | "EXPORT"> }
KScope { @specialize<Ident, "scope" | "SCOPE"> }
KEndscope { @specialize<Ident, "endscope" | "ENDSCOPE"> }
KPrint { @specialize<Ident, "print" | "PRINT"> }
KConstant { @specialize<Ident, "constant" | "CONSTANT"> }
KArray { @specialize<Ident, "array" | "ARRAY"> }
KTrace { @specialize<Ident, "trace" | "TRACE"> }
KEndtrace { @specialize<Ident, "endtrace" | "ENDTRACE"> }
KOpenfile { @specialize<Ident, "openfile" | "OPENFILE"> }
KReadfile { @specialize<Ident, "readfile" | "READFILE"> }
KWritefile { @specialize<Ident, "writefile" | "WRITEFILE"> }
KClosefile { @specialize<Ident, "closefile" | "CLOSEFILE"> }
KRead { @specialize<Ident, "read" | "READ"> }
KWrite { @specialize<Ident, "write" | "WRITE"> }
KAppend { @specialize<Ident, "append" | "APPEND"> }
KInteger { @specialize<Ident, "integer" | "INTEGER"> }
KReal { @specialize<Ident, "real" | "REAL"> }
KString { @specialize<Ident, "string" | "STRING"> }
KChar { @specialize<Ident, "char" | "CHAR"> }
KBoolean { @specialize<Ident, "boolean" | "BOOLEAN"> }
KNull { @specialize<Ident, "null" | "NULL"> }

@tokens {
    BlockComment { "/*" (![\*] | "*" ![/])* "*/" }
    LineComment[isolate] { "//" ![\n]* }
    stringContentDouble { ![\\\n"]+ }
    Escape { "\\" @asciiLetter } 
    space { $[ \t]+ }
    Newline { "\n" | "\r\n" }
    @precedence { RealLit IntegerLit }
    RealLit { $[0-9]+ "." $[0-9]+ }
    IntegerLit { $[0-9]+ }
    StringLit {
        '"' (stringContentDouble | Escape)* '"'
    }
    CharLit {
        "'" ( ![\\\n'] | Escape ) "'"
    }
    Ident { $[a-zA-Z_] $[a-zA-Z0-9_]* }

    Operators { "<>" | "<-" | "+" | "-" | "*" | "/" | "<" | ">" | "=" | "^" | "←" }
    Separators { "{" | "}" | "[" | "]" | "(" | ")" | ";" | ":" | "," }

}

That grammar up there has been haunting me for the past day…i have no idea how this parser generator even works; despite this perfectly OK looking grammar, why can’t it even parse a simple assignment X ← 4? The output given is this: Program(Ident,⚠,⚠(IntegerLit)) with the test script im using being as follows:


import { parser } from “./parser”;

let code = “x ← 4“;
//code = “1+2*3-4”

const tree = parser.parse(code);
console.log(tree.toString());

Could somebody please assist me in getting some basic parsing up and running? I have only written recursive descent parsers by hand before, and only have some experience writing shift-reduce parsers. This is really confusing; I can make sacrifices to this grammar as long as if I can get syntax highlighting working in some way.

Thank you for reading thus far!

That is not a complete grammar. It has no top rule, nor anything except an unused token that refers to the arrow operator.

wait sorry, I don’t think I pasted the entire file correctly… I do have a top rule that is just set to Block.

I think I have to investigate; I did try setting each symbol to a token and referring to it in the actual parser/grammar rule section, but is it normal for the tokens to appear directly in the parsed tree nodes? like

Assign(Lvalue(Ident),Assign,Expr(Unary(Literal(IntegerLit))))

EDIT: I just tried out actually spelling out each string operator’s token name and using it in the tree. The rule for assignment now looks like this:

AssignStatement { Ident !assign Assign Expr }

And the token in the @tokens section: Assign { “<-” | “←” }

Now the parser outputs

Program(Ident,⚠(Assign),⚠(IntegerLit))

Is this a precedence related issue? There is another rule ConstantStatement which also uses the assignment operator:

ConstantStatement { KConstant Ident Assign Expr }

Which also doesn’t in fact parse: Program(KConstant,⚠(Ident),⚠(Assign),⚠(IntegerLit))

In fact, none of my statements except very simple arithmetic ones actually parse correctly. The full corrected grammar is as follows:


@top Program { Block }

@precedence {
    statement,
    assign,
    traceParams @cut,
    member @left,
    pow @right,
    times @left,
    plus @left,
    compare @left,
    equal @left,
    and @left,
    or @left
}

ArrayLiteral {
    LCurly Expr (Comma Expr)* Comma? RCurly
}
Literal {
    IntegerLit | RealLit | StringLit | CharLit |
    BooleanLit | KNull | ArrayLiteral
}
BooleanLit { KTrue | KFalse }
PrimitiveType { KInteger | KBoolean | KReal | KString | KChar }
ArrayRange { IntegerLit Colon IntegerLit } 
ArrayType { KArray LBracket ArrayRange (Comma ArrayRange)? RBracket KOf PrimitiveType }
Type { ArrayType | PrimitiveType }

// Exprs
Negation { Sub Expr }
Not { KNot Expr }
Grouping { LParen Expr RParen }
Typecast { PrimitiveType LParen Expr RParen }
FunctionCall { Ident LParen ArgList? RParen }
ArrayIndex { (Ident | Grouping) LBracket Expr (Comma Expr)? RBracket }
Unary { 
    Ident | Negation | Literal | Typecast | Grouping
    | Not | FunctionCall
}
Expr {
    Unary |
    ArrayIndex |
    BinaryExpr
}

BinaryExpr {
    Expr !or KOr Expr |
    Expr !and KAnd Expr |
    Expr !equal (Eq | Neq) Expr |
    Expr !compare (Gt | Lt | Geq | Leq) Expr |
    Expr !plus (Add | Sub) Expr |
    Expr !times (Mul | Div) Expr |
    Expr !pow Pow Expr
}

// Blocks, Statements
Lvalue { Ident | ArrayIndex }
CaseofBranch { Expr Colon Statement Newline }
Parameter { Ident Colon Type }
ParameterList { Parameter (Comma Parameter)* }
ArgList { Expr (Comma Expr)* }
FileIdent { Ident | StringLit | Grouping }
FileMode { KRead | KWrite | KAppend }
FileModeList { FileMode (KAnd FileMode)* }

DeclareStatement { KExport? KDeclare Ident (Comma Ident)* Colon Type }
ConstantStatement { KExport? KConstant Ident Assign Expr }
OutputStatement { (KOutput | KPrint) Expr (Comma Expr)* }
InputStatement { KInput Lvalue }
AssignStatement { Ident !assign Assign Expr }
IfStatement { KIf Expr KThen Block (KElse Block)? KEndif }
CaseofStatement { KCase KOf Expr Newline (CaseofBranch)* (KOtherwise Statement)? KEndcase }
WhileStatement { KWhile Expr KDo Block KEndwhile }
RepeatUntilStatement { KRepeat Block KUntil Expr }
ForStatement { KFor Ident Assign Expr KTo Expr (KStep Expr)? Newline Block KNext Ident }
ProcedureStatement { KExport? KProcedure Ident (LParen ParameterList RParen)? Block KEndprocedure }
FunctionStatement { KExport? KFunction Ident (LParen ParameterList RParen)? KReturns Type Block KEndfunction }
CallStatement { KCall Ident (LParen ArgList RParen)? }
ReturnStatement { KReturn Expr? }
ScopeStatement { KScope Block KEndscope }
IncludeStatement { KInclude StringLit }
TraceStatement { KTrace !traceParams (LParen Ident (Comma Ident)* RParen)? (KTo StringLit)? Block KEndtrace}
OpenfileStatement { KOpenfile FileIdent KFor FileModeList }
WritefileStatement { KWritefile FileIdent Comma Expr }
ReadfileStatement { KReadfile FileIdent Comma Lvalue }
ClosefileStatement { KClosefile FileIdent }

Statement {
    (
        DeclareStatement | ConstantStatement | OutputStatement | InputStatement
        | AssignStatement | IfStatement | CaseofStatement | WhileStatement
        | RepeatUntilStatement | ForStatement | ProcedureStatement
        | FunctionStatement | CallStatement | ReturnStatement | ScopeStatement
        | IncludeStatement | TraceStatement | OpenfileStatement
        | WritefileStatement | ReadfileStatement | ClosefileStatement | Expr
    )? Newline
}
Block { Statement* }

KTrue { @specialize<Ident, "true" | "TRUE"> }
KFalse { @specialize<Ident, "false" | "FALSE"> }
KDeclare { @specialize<Ident, "declare" | "DECLARE"> }
KOutput { @specialize<Ident, "output" | "OUTPUT"> }
KInput { @specialize<Ident, "input" | "INPUT"> }
KAnd { @specialize<Ident, "and" | "AND"> }
KOr { @specialize<Ident, "or" | "OR"> }
KNot { @specialize<Ident, "not" | "NOT"> }
KIf { @specialize<Ident, "if" | "IF"> }
KThen { @specialize<Ident, "then" | "THEN"> }
KElse { @specialize<Ident, "else" | "ELSE"> }
KEndif { @specialize<Ident, "endif" | "ENDIF"> }
KCase { @specialize<Ident, "case" | "CASE"> }
KOf { @specialize<Ident, "of" | "OF"> }
KOtherwise { @specialize<Ident, "otherwise" | "OTHERWISE"> }
KEndcase { @specialize<Ident, "endcase" | "ENDCASE"> }
KWhile { @specialize<Ident, "while" | "WHILE"> }
KDo { @specialize<Ident, "do" | "DO"> }
KEndwhile { @specialize<Ident, "endwhile" | "ENDWHILE"> }
KRepeat { @specialize<Ident, "repeat" | "REPEAT"> }
KUntil { @specialize<Ident, "until" | "UNTIL"> }
KFor { @specialize<Ident, "for" | "FOR"> }
KTo { @specialize<Ident, "to" | "TO"> }
KStep { @specialize<Ident, "step" | "STEP"> }
KNext { @specialize<Ident, "next" | "NEXT"> }
KProcedure { @specialize<Ident, "procedure" | "PROCEDURE"> }
KEndprocedure { @specialize<Ident, "endprocedure" | "ENDPROCEDURE"> }
KCall { @specialize<Ident, "call" | "CALL"> }
KFunction { @specialize<Ident, "function" | "FUNCTION"> }
KReturns { @specialize<Ident, "returns" | "RETURNS"> }
KReturn { @specialize<Ident, "return" | "RETURN"> }
KEndfunction { @specialize<Ident, "endfunction" | "ENDFUNCTION"> }
KInclude { @specialize<Ident, "include" | "INCLUDE"> }
KExport { @specialize<Ident, "export" | "EXPORT"> }
KScope { @specialize<Ident, "scope" | "SCOPE"> }
KEndscope { @specialize<Ident, "endscope" | "ENDSCOPE"> }
KPrint { @specialize<Ident, "print" | "PRINT"> }
KConstant { @specialize<Ident, "constant" | "CONSTANT"> }
KArray { @specialize<Ident, "array" | "ARRAY"> }
KTrace { @specialize<Ident, "trace" | "TRACE"> }
KEndtrace { @specialize<Ident, "endtrace" | "ENDTRACE"> }
KOpenfile { @specialize<Ident, "openfile" | "OPENFILE"> }
KReadfile { @specialize<Ident, "readfile" | "READFILE"> }
KWritefile { @specialize<Ident, "writefile" | "WRITEFILE"> }
KClosefile { @specialize<Ident, "closefile" | "CLOSEFILE"> }
KRead { @specialize<Ident, "read" | "READ"> }
KWrite { @specialize<Ident, "write" | "WRITE"> }
KAppend { @specialize<Ident, "append" | "APPEND"> }
KInteger { @specialize<Ident, "integer" | "INTEGER"> }
KReal { @specialize<Ident, "real" | "REAL"> }
KString { @specialize<Ident, "string" | "STRING"> }
KChar { @specialize<Ident, "char" | "CHAR"> }
KBoolean { @specialize<Ident, "boolean" | "BOOLEAN"> }
KNull { @specialize<Ident, "null" | "NULL"> }

@skip {
    space
    LineComment
    BlockComment
}

@tokens {
    BlockComment { "/*" (![\*] | "*" ![/])* "*/" }
    LineComment[isolate] { "//" ![\n]* }
    stringContentDouble { ![\\\n"]+ }
    Escape { "\\" @asciiLetter } 
    space { $[ \t]+ }
    Newline { "\n" | "\r\n" | Semicolon }
    @precedence { RealLit IntegerLit }
    RealLit { $[0-9]+ "." $[0-9]+ }
    IntegerLit { $[0-9]+ }
    StringLit {
        '"' (stringContentDouble | Escape)* '"'
    }
    CharLit {
        "'" ( ![\\\n'] | Escape ) "'"
    }
    Ident { $[a-zA-Z_] $[a-zA-Z0-9_]* }

    Neq { "<>" }
    Assign { "<-" | "←" }
    Leq { "<=" }
    Geq { ">=" }
    Add { "+" }
    Sub { "-" }
    Mul { "*" }
    Div { "/" }
    Lt { "<" }
    Gt { ">" }
    Eq { "=" }
    Pow { "^" }
    LCurly { "{" }
    RCurly { "}" }
    LParen { "(" }
    RParen { ")" }
    LBracket { "[" }
    RBracket { "]" }
    Semicolon { ";" }
    Colon { ":" }
    Comma { "," }

}

Your problem is your @skip rule

@skip {
    space
    LineComment
    BlockComment
}

This is saying that it expects a line commend and then a block comment after every space. You’ll want | operators between these.

Yes, if the tokens use a capitalized name they appear in the tree.

Yes, if the tokens use a capitalized name they appear in the tree.

Since I ultimately want to use Lezer to do syntax highlighting with CodeMirror, would it be sensible to leave the tokens in? I’m not exactly sure as to how CodeMirror highlights stuff.

Merry Christmas, by the way!

Yes, you’ll want to make sure the tokens that need to be highlighted are present in the tree. Lower-case tokens and productions are mostly used for stuff like whitespace and structural nonterminals (generic expression rules and such), that don’t add much information.