Parse EBNF

(Redirected from EBNF parser)

Write a program that can parse a grammar in   Extended Backus–Naur Form   (EBNF),   and then parse something else according to the grammar.

Parse EBNF is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
This task has been clarified. Its programming examples are in need of review to ensure that they still fit the requirements of the task.


Task

The program is only required to decide whether or not the something else belongs to the language described by the grammar,  but for extra credit,   it can output a syntax tree.

See the tests.

The source of the Algol 68 sample is somewhat largish, so is on a separate page on Rosetta Code here: Parse EBNF/ALGOL 68.

Output:
Valid EBNF: a/z: 'a' { a = 'a1' ( 'a2' | 'a3' ) { 'a4' } [ 'a5' ] 'a6' ; } 'z'
 a: seq 
   literal  "a1"
   oneof 
     seq 
       literal  "a2"
     seq 
       literal  "a3"
   list 
     literal  "a4"
   opt 
     literal  "a5"
   literal  "a6"


"a1a3a4a4a5a6" is valid according to a

"a1 a2a6" is valid according to a

"a1 a3 a4 a6" is valid according to a

**** Syntax error near "a1 a4 a5 a6"
"a1 a4 a5 a6" is not valid according to a

**** Syntax error near "a5 a5 a6"
"a1 a2 a4 a5 a5 a6" is not valid according to a

**** Unexpected text: "a7" at the end of source
"a1 a2 a4 a5 a6 a7" is not valid according to a

**** Syntax error near "your ad here"
"your ad here" is not valid according to a


Valid EBNF: Arithmetic expressions: 
'Arithmetic expressions'
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
    plus = '+' | '-' .
    times = '*' | '/' .
    number = digit { digit } .
    digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' .
}
 expr: seq 
   rule  term
   list 
     rule  plus
     rule  term
 term: seq 
   rule  factor
   list 
     rule  times
     rule  factor
 factor: oneof 
   seq 
     rule  number
   seq 
     literal  "("
     rule  expr
     literal  ")"
 plus: oneof 
   seq 
     literal  "+"
   seq 
     literal  "-"
 times: oneof 
   seq 
     literal  "*"
   seq 
     literal  "/"
 number: seq 
   rule  digit
   list 
     rule  digit
 digit: oneof 
   seq 
     literal  "0"
   seq 
     literal  "1"
   seq 
     literal  "2"
   seq 
     literal  "3"
   seq 
     literal  "4"
   seq 
     literal  "5"
   seq 
     literal  "6"
   seq 
     literal  "7"
   seq 
     literal  "8"
   seq 
     literal  "9"


"2" is valid according to Arithmetic expressions

"2*3 + 4/23 - 7" is valid according to Arithmetic expressions

"(3 + 4) * 6-2+(4*(4))" is valid according to Arithmetic expressions

**** Syntax error near "-2"
"-2" is not valid according to Arithmetic expressions

**** Unexpected text: "+" at the end of source
"3 +" is not valid according to Arithmetic expressions

**** Syntax error near " 3"
"(4 + 3" is not valid according to Arithmetic expressions


**** Expected "{", not "a"
Invalid EBNF: a = '1';

**** Expected "}", not "(eof)"
Invalid EBNF: { a = '1' ;

**** Expected "=", not "world"
Invalid EBNF: { hello world = '1'; }

**** Rule "bar" not defined
Invalid EBNF: { foo = bar . }


Works with: C# version .NET 9
Translation of: Java
using System;
using System.Collections.Generic;
using System.Linq;

public class EBNFParser
{
    // Token class equivalent
    private class Token
    {
        public object Value { get; set; }
        public bool IsSequence { get; set; }
        
        public Token(object value, bool isSequence)
        {
            Value = value;
            IsSequence = isSequence;
        }
    }
    
    // Sequence class equivalent to ArrayList with constructor
    private class Sequence : List<object>
    {
        public Sequence(params object[] items)
        {
            AddRange(items);
        }
    }
    
    private string src;
    private char ch;
    private int sdx;
    private Token token;
    private bool err = false;
    private List<string> idents = new List<string>();
    private List<int> ididx = new List<int>();
    private List<Sequence> productions = new List<Sequence>();
    private Sequence extras = new Sequence();
    private string[] results = { "pass", "fail" };
    
    private int BoolToInt(bool b)
    {
        return b ? 1 : 0;
    }
    
    private int Invalid(string msg)
    {
        err = true;
        Console.WriteLine(msg);
        sdx = src.Length; // set to eof
        return -1;
    }
    
    private void SkipSpaces()
    {
        while (sdx < src.Length)
        {
            ch = src[sdx];
            if (" \t\r\n".IndexOf(ch) == -1)
            {
                break;
            }
            sdx++;
        }
    }
    
    private void GetToken()
    {
        // Yields a single character token, one of {}()[]|=.;
        // or {"terminal",string} or {"ident", string} or -1.
        SkipSpaces();
        if (sdx >= src.Length)
        {
            token = new Token(-1, false);
            return;
        }
        int tokstart = sdx;
        if ("{}()[]|=.;".IndexOf(ch) >= 0)
        {
            sdx++;
            token = new Token(ch, false);
        }
        else if (ch == '"' || ch == '\'')
        {
            char closech = ch;
            int tokend = tokstart + 1;
            while (tokend < src.Length && src[tokend] != closech)
            {
                tokend++;
            }
            if (tokend >= src.Length)
            {
                token = new Token(Invalid("no closing quote"), false);
            }
            else
            {
                sdx = tokend + 1;
                token = new Token(new Sequence("terminal", src.Substring(tokstart + 1, tokend - tokstart - 1)), true);
            }
        }
        else if (ch >= 'a' && ch <= 'z')
        {
            // To simplify things for the purposes of this task,
            // identifiers are strictly a-z only, not A-Z or 1-9.
            while (sdx < src.Length && ch >= 'a' && ch <= 'z')
            {
                sdx++;
                if (sdx < src.Length)
                {
                    ch = src[sdx];
                }
            }
            token = new Token(new Sequence("ident", src.Substring(tokstart, sdx - tokstart)), true);
        }
        else
        {
            token = new Token(Invalid("invalid ebnf"), false);
        }
    }
    
    private void MatchToken(char expectedCh)
    {
        if (!token.Value.Equals(expectedCh))
        {
            token = new Token(Invalid($"invalid ebnf ({expectedCh} expected)"), false);
        }
        else
        {
            GetToken();
        }
    }
    
    private int AddIdent(string ident)
    {
        int k = idents.IndexOf(ident);
        if (k == -1)
        {
            idents.Add(ident);
            k = idents.Count - 1;
            ididx.Add(-1);
        }
        return k;
    }
    
    private object Factor()
    {
        object res;
        if (token.IsSequence)
        {
            Sequence t = (Sequence)token.Value;
            if (t[0].Equals("ident"))
            {
                int idx = AddIdent((string)t[1]);
                t.Add(idx);
                token.Value = t;
            }
            res = token.Value;
            GetToken();
        }
        else if (token.Value.Equals('['))
        {
            GetToken();
            res = new Sequence("optional", Expression());
            MatchToken(']');
        }
        else if (token.Value.Equals('('))
        {
            GetToken();
            res = Expression();
            MatchToken(')');
        }
        else if (token.Value.Equals('{'))
        {
            GetToken();
            res = new Sequence("repeat", Expression());
            MatchToken('}');
        }
        else
        {
            throw new InvalidOperationException("invalid token in Factor() function");
        }
        if (res is Sequence seq && seq.Count == 1)
        {
            return seq[0];
        }
        return res;
    }
    
    private object Term()
    {
        Sequence res = new Sequence(Factor());
        object[] tokens = { -1, '|', '.', ';', ')', ']', '}' };
        
        while (true)
        {
            bool found = false;
            foreach (object t in tokens)
            {
                if (t.Equals(token.Value))
                {
                    found = true;
                    break;
                }
            }
            if (found) break;
            
            res.Add(Factor());
        }
        
        if (res.Count == 1)
        {
            return res[0];
        }
        return res;
    }
    
    private object Expression()
    {
        Sequence res = new Sequence(Term());
        if (token.Value.Equals('|'))
        {
            res = new Sequence("or", res[0]);
            while (token.Value.Equals('|'))
            {
                GetToken();
                res.Add(Term());
            }
        }
        if (res.Count == 1)
        {
            return res[0];
        }
        return res;
    }
    
    private object Production()
    {
        // Returns a token or -1; the real result is left in 'productions' etc,
        GetToken();
        if (!token.Value.Equals('}'))
        {
            if (token.Value.Equals(-1))
            {
                return Invalid("invalid ebnf (missing closing })");
            }
            if (!token.IsSequence)
            {
                return -1;
            }
            Sequence t = (Sequence)token.Value;
            if (!t[0].Equals("ident"))
            {
                return -1;
            }
            string ident = (string)t[1];
            int idx = AddIdent(ident);
            GetToken();
            MatchToken('=');
            if (token.Value.Equals(-1))
            {
                return -1;
            }
            productions.Add(new Sequence(ident, idx, Expression()));
            ididx[idx] = productions.Count - 1;
        }
        return token.Value;
    }
    
    private int Parse(string ebnf)
    {
        // Returns +1 if ok, -1 if bad.
        Console.WriteLine($"parse:\n{ebnf} ===>");
        err = false;
        src = ebnf;
        sdx = 0;
        idents.Clear();
        ididx.Clear();
        productions.Clear();
        extras.Clear();
        
        GetToken();
        if (token.IsSequence)
        {
            Sequence t = (Sequence)token.Value;
            t[0] = "title";
            extras.Add(token.Value);
            GetToken();
        }
        if (!token.Value.Equals('{'))
        {
            return Invalid("invalid ebnf (missing opening {)");
        }
        
        while (true)
        {
            object tokenResult = Production();
            if (tokenResult.Equals('}') || tokenResult.Equals(-1))
            {
                break;
            }
        }
        
        GetToken();
        if (token.IsSequence)
        {
            Sequence t = (Sequence)token.Value;
            t[0] = "comment";
            extras.Add(token.Value);
            GetToken();
        }
        if (!token.Value.Equals(-1))
        {
            return Invalid("invalid ebnf (missing eof?)");
        }
        if (err)
        {
            return -1;
        }
        
        int k = -1;
        for (int i = 0; i < ididx.Count; i++)
        {
            if (ididx[i] == -1)
            {
                k = i;
                break;
            }
        }
        if (k != -1)
        {
            return Invalid($"invalid ebnf (undefined:{idents[k]})");
        }
        
        PPrint(productions, "productions");
        PPrint(idents, "idents");
        PPrint(ididx, "ididx");
        PPrint(extras, "extras");
        return 1;
    }
    
    // Adjusts C#'s normal printing to look more like Phix output.
    private void PPrint(object ob, string header)
    {
        Console.WriteLine($"\n{header}:");
        string pp = ob.ToString();
        pp = pp.Replace("[", "{");
        pp = pp.Replace("]", "}");
        pp = pp.Replace(" ", ", ");
        for (int i = 0; i < idents.Count; i++)
        {
            pp = pp.Replace(i.ToString(), i.ToString());
        }
        Console.WriteLine(pp);
    }
    
    // The rules that Applies() has to deal with are:
    // {factors} - if rule[0] is not string,
    // just apply one after the other recursively.
    // {"terminal", "a1"}       -- literal constants
    // {"or", <e1>, <e2>, ...}  -- (any) one of n
    // {"repeat", <e1>}         -- as per "{}" in ebnf
    // {"optional", <e1>}       -- as per "[]" in ebnf
    // {"ident", <name>, idx}   -- apply the sub-rule
    
    private bool Applies(Sequence rule)
    {
        int wasSdx = sdx; // in case of failure
        object r1 = rule[0];
        
        if (!(r1 is string))
        {
            for (int i = 0; i < rule.Count; i++)
            {
                if (!Applies((Sequence)rule[i]))
                {
                    sdx = wasSdx;
                    return false;
                }
            }
        }
        else if (r1.Equals("terminal"))
        {
            SkipSpaces();
            string r2 = (string)rule[1];
            for (int i = 0; i < r2.Length; i++)
            {
                if (sdx >= src.Length || src[sdx] != r2[i])
                {
                    sdx = wasSdx;
                    return false;
                }
                sdx++;
            }
        }
        else if (r1.Equals("or"))
        {
            for (int i = 1; i < rule.Count; i++)
            {
                if (Applies((Sequence)rule[i]))
                {
                    return true;
                }
            }
            sdx = wasSdx;
            return false;
        }
        else if (r1.Equals("repeat"))
        {
            while (Applies((Sequence)rule[1]))
            {
                // continue repeating
            }
        }
        else if (r1.Equals("optional"))
        {
            Applies((Sequence)rule[1]);
        }
        else if (r1.Equals("ident"))
        {
            int i = (int)rule[2];
            int ii = ididx[i];
            if (!Applies((Sequence)productions[ii][2]))
            {
                sdx = wasSdx;
                return false;
            }
        }
        else
        {
            throw new InvalidOperationException("invalid rule in Applies() function");
        }
        return true;
    }
    
    private void CheckValid(string test)
    {
        src = test;
        sdx = 0;
        bool res = Applies((Sequence)productions[0][2]);
        SkipSpaces();
        if (sdx < src.Length)
        {
            res = false;
        }
        Console.WriteLine($"\"{test}\", {results[1 - BoolToInt(res)]}");
    }
    
    public static void Main(string[] args)
    {
        EBNFParser parser = new EBNFParser();
        
        string[] ebnfs = {
            "\"a\" {\n" +
            "    a = \"a1\" ( \"a2\" | \"a3\" ) { \"a4\" } [ \"a5\" ] \"a6\" ;\n" +
            "} \"z\" ",
            
            "{\n" +
            "    expr = term { plus term } .\n" +
            "    term = factor { times factor } .\n" +
            "    factor = number | '(' expr ')' .\n" +
            " \n" +
            "    plus = \"+\" | \"-\" .\n" +
            "    times = \"*\" | \"/\" .\n" +
            " \n" +
            "    number = digit { digit } .\n" +
            "    digit = \"0\" | \"1\" | \"2\" | \"3\" | \"4\" | \"5\" | \"6\" | \"7\" | \"8\" | \"9\" .\n" +
            "}",
            
            "a = \"1\"",
            "{ a = \"1\" ;",
            "{ hello world = \"1\"; }",
            "{ foo = bar . }"
        };
        
        string[][] tests = {
            new string[] {
                "a1a3a4a4a5a6",
                "a1 a2a6",
                "a1 a3 a4 a6",
                "a1 a4 a5 a6",
                "a1 a2 a4 a5 a5 a6",
                "a1 a2 a4 a5 a6 a7",
                "your ad here"
            },
            new string[] {
                "2",
                "2*3 + 4/23 - 7",
                "(3 + 4) * 6-2+(4*(4))",
                "-2",
                "3 +",
                "(4 + 3"
            }
        };
        
        for (int i = 0; i < ebnfs.Length; i++)
        {
            if (parser.Parse(ebnfs[i]) == 1)
            {
                Console.WriteLine("\ntests:");
                if (i < tests.Length)
                {
                    foreach (string test in tests[i])
                    {
                        parser.CheckValid(test);
                    }
                }
            }
            Console.WriteLine();
        }
    }
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

productions:
System.Collections.Generic.List`1{EBNFParser+Sequence}

idents:
System.Collections.Generic.List`1{System.String}

ididx:
System.Collections.Generic.List`1{System.Int32}

extras:
EBNFParser+Sequence

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

productions:
System.Collections.Generic.List`1{EBNFParser+Sequence}

idents:
System.Collections.Generic.List`1{System.String}

ididx:
System.Collections.Generic.List`1{System.Int32}

extras:
EBNFParser+Sequence

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>
invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)

parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)


Works with: C++ version 17
Translation of: Rust
#include <iostream>
#include <string>
#include <vector>
#include <memory>
#include <variant>
#include <algorithm>
#include <optional>

class Token {
public:
    enum Type { CHAR, SEQUENCE, EOF_TOKEN };
    Type type;
    char ch;
    std::vector<std::string> seq;
    
    Token() : type(EOF_TOKEN), ch('\0') {}
    Token(char c) : type(CHAR), ch(c) {}
    Token(const std::vector<std::string>& s) : type(SEQUENCE), ch('\0'), seq(s) {}
    
    bool isChar(char c) const {
        return type == CHAR && ch == c;
    }
    
    bool isEOF() const {
        return type == EOF_TOKEN;
    }
};

class Rule {
public:
    enum Type { TERMINAL, IDENT, OR, REPEAT, OPTIONAL, SEQUENCE };
    Type type;
    std::string value;
    size_t idx;
    std::vector<std::unique_ptr<Rule>> rules;
    std::unique_ptr<Rule> rule;
    
    Rule(Type t, const std::string& v) : type(t), value(v), idx(0) {}
    Rule(Type t, const std::string& v, size_t i) : type(t), value(v), idx(i) {}
    Rule(Type t, std::vector<std::unique_ptr<Rule>> r) : type(t), idx(0), rules(std::move(r)) {}
    Rule(Type t, std::unique_ptr<Rule> r) : type(t), idx(0), rule(std::move(r)) {}
    
    // Copy constructor
    Rule(const Rule& other) : type(other.type), value(other.value), idx(other.idx) {
        if (other.rule) {
            rule = std::make_unique<Rule>(*other.rule);
        }
        for (const auto& r : other.rules) {
            rules.push_back(std::make_unique<Rule>(*r));
        }
    }
    
    // Assignment operator
    Rule& operator=(const Rule& other) {
        if (this != &other) {
            type = other.type;
            value = other.value;
            idx = other.idx;
            rule.reset();
            rules.clear();
            
            if (other.rule) {
                rule = std::make_unique<Rule>(*other.rule);
            }
            for (const auto& r : other.rules) {
                rules.push_back(std::make_unique<Rule>(*r));
            }
        }
        return *this;
    }
    
    static std::unique_ptr<Rule> createTerminal(const std::string& v) {
        return std::make_unique<Rule>(TERMINAL, v);
    }
    
    static std::unique_ptr<Rule> createIdent(const std::string& v, size_t i) {
        return std::make_unique<Rule>(IDENT, v, i);
    }
    
    static std::unique_ptr<Rule> createOr(std::vector<std::unique_ptr<Rule>> r) {
        return std::make_unique<Rule>(OR, std::move(r));
    }
    
    static std::unique_ptr<Rule> createRepeat(std::unique_ptr<Rule> r) {
        return std::make_unique<Rule>(REPEAT, std::move(r));
    }
    
    static std::unique_ptr<Rule> createOptional(std::unique_ptr<Rule> r) {
        return std::make_unique<Rule>(OPTIONAL, std::move(r));
    }
    
    static std::unique_ptr<Rule> createSequence(std::vector<std::unique_ptr<Rule>> r) {
        return std::make_unique<Rule>(SEQUENCE, std::move(r));
    }
};

class EBNFParser {
private:
    std::string src;
    char ch;
    size_t sdx;
    Token token;
    bool err;
    std::vector<std::string> idents;
    std::vector<std::optional<size_t>> ididx;
    std::vector<std::tuple<std::string, size_t, std::unique_ptr<Rule>>> productions;
    std::vector<std::vector<std::string>> extras;
    
    int btoi(bool b) {
        return b ? 1 : 0;
    }
    
    int invalid(const std::string& msg) {
        err = true;
        std::cout << msg << std::endl;
        sdx = src.length();
        return -1;
    }
    
    void skipSpaces() {
        while (sdx < src.length()) {
            ch = src[sdx];
            if (ch != ' ' && ch != '\t' && ch != '\r' && ch != '\n') {
                break;
            }
            sdx++;
        }
    }
    
    void getToken() {
        skipSpaces();
        if (sdx >= src.length()) {
            token = Token();
            return;
        }
        
        size_t tokstart = sdx;
        
        if (std::string("{}()[]|=.;").find(ch) != std::string::npos) {
            sdx++;
            token = Token(ch);
        } else if (ch == '"' || ch == '\'') {
            char closech = ch;
            size_t tokend = tokstart + 1;
            while (tokend < src.length() && src[tokend] != closech) {
                tokend++;
            }
            if (tokend >= src.length()) {
                invalid("no closing quote");
                token = Token();
            } else {
                sdx = tokend + 1;
                std::string content = src.substr(tokstart + 1, tokend - tokstart - 1);
                token = Token(std::vector<std::string>{"terminal", content});
            }
        } else if (std::islower(ch)) {
            while (sdx < src.length() && std::islower(src[sdx])) {
                sdx++;
            }
            std::string ident = src.substr(tokstart, sdx - tokstart);
            token = Token(std::vector<std::string>{"ident", ident});
        } else {
            invalid("invalid ebnf");
            token = Token();
        }
    }
    
    void matchToken(char expected_ch) {
        if (!token.isChar(expected_ch)) {
            invalid("invalid ebnf (" + std::string(1, expected_ch) + " expected)");
        } else {
            getToken();
        }
    }
    
    size_t addIdent(const std::string& ident) {
        auto it = std::find(idents.begin(), idents.end(), ident);
        if (it != idents.end()) {
            return std::distance(idents.begin(), it);
        } else {
            idents.push_back(ident);
            size_t k = idents.size() - 1;
            ididx.push_back(std::nullopt);
            return k;
        }
    }
    
    std::unique_ptr<Rule> factor() {
        if (token.type == Token::SEQUENCE) {
            if (token.seq[0] == "ident") {
                size_t idx = addIdent(token.seq[1]);
                auto rule = Rule::createIdent(token.seq[1], idx);
                getToken();
                return rule;
            } else if (token.seq[0] == "terminal") {
                auto rule = Rule::createTerminal(token.seq[1]);
                getToken();
                return rule;
            }
        } else if (token.isChar('[')) {
            getToken();
            auto expr = expression();
            matchToken(']');
            return Rule::createOptional(std::move(expr));
        } else if (token.isChar('(')) {
            getToken();
            auto expr = expression();
            matchToken(')');
            return expr;
        } else if (token.isChar('{')) {
            getToken();
            auto expr = expression();
            matchToken('}');
            return Rule::createRepeat(std::move(expr));
        }
        
        throw std::runtime_error("invalid token in factor() function");
    }
    
    std::unique_ptr<Rule> term() {
        std::vector<std::unique_ptr<Rule>> factors;
        factors.push_back(factor());
        
        while (!token.isEOF() && !token.isChar('|') && !token.isChar('.') && 
               !token.isChar(';') && !token.isChar(')') && !token.isChar(']') && 
               !token.isChar('}')) {
            factors.push_back(factor());
        }
        
        if (factors.size() == 1) {
            return std::move(factors[0]);
        } else {
            return Rule::createSequence(std::move(factors));
        }
    }
    
    std::unique_ptr<Rule> expression() {
        auto first_term = term();
        
        if (token.isChar('|')) {
            std::vector<std::unique_ptr<Rule>> terms;
            terms.push_back(std::move(first_term));
            while (token.isChar('|')) {
                getToken();
                terms.push_back(term());
            }
            return Rule::createOr(std::move(terms));
        } else {
            return first_term;
        }
    }
    
    Token production() {
        getToken();
        if (token.isChar('}')) {
            return token;
        }
        if (token.isEOF()) {
            invalid("invalid ebnf (missing closing })");
            return Token();
        }
        
        if (token.type == Token::SEQUENCE && token.seq[0] == "ident") {
            std::string ident = token.seq[1];
            size_t idx = addIdent(ident);
            getToken();
            matchToken('=');
            if (token.isEOF()) {
                return Token();
            }
            auto expr = expression();
            productions.push_back(std::make_tuple(ident, idx, std::move(expr)));
            ididx[idx] = productions.size() - 1;
            return token;
        }
        
        return Token();
    }
    
    void skipSpacesAt(const std::string& s, size_t& sdx) const {
        while (sdx < s.length()) {
            char ch = s[sdx];
            if (ch != ' ' && ch != '\t' && ch != '\r' && ch != '\n') {
                break;
            }
            sdx++;
        }
    }
    
    bool applies(const Rule* rule, const std::string& s, size_t& sdx) const {
        size_t was_sdx = sdx;
        
        switch (rule->type) {
            case Rule::SEQUENCE:
                for (const auto& r : rule->rules) {
                    if (!applies(r.get(), s, sdx)) {
                        sdx = was_sdx;
                        return false;
                    }
                }
                return true;
                
            case Rule::TERMINAL:
                skipSpacesAt(s, sdx);
                for (char ch : rule->value) {
                    if (sdx >= s.length() || s[sdx] != ch) {
                        sdx = was_sdx;
                        return false;
                    }
                    sdx++;
                }
                return true;
                
            case Rule::OR:
                for (const auto& r : rule->rules) {
                    if (applies(r.get(), s, sdx)) {
                        return true;
                    }
                }
                sdx = was_sdx;
                return false;
                
            case Rule::REPEAT:
                while (applies(rule->rule.get(), s, sdx)) {
                    // continue repeating
                }
                return true;
                
            case Rule::OPTIONAL:
                applies(rule->rule.get(), s, sdx);
                return true;
                
            case Rule::IDENT:
                if (ididx[rule->idx].has_value()) {
                    size_t prod_idx = ididx[rule->idx].value();
                    if (!applies(std::get<2>(productions[prod_idx]).get(), s, sdx)) {
                        sdx = was_sdx;
                        return false;
                    }
                    return true;
                } else {
                    return false;
                }
        }
        return false;
    }
    
    void pprintProductions() const {
        std::cout << "\nproductions:" << std::endl;
        for (const auto& prod : productions) {
            std::cout << "(" << std::get<0>(prod) << ", " << std::get<1>(prod) << ", [Rule])" << std::endl;
        }
    }
    
    void pprintIdents() const {
        std::cout << "\nidents:" << std::endl;
        for (const auto& ident : idents) {
            std::cout << ident << " ";
        }
        std::cout << std::endl;
    }
    
    void pprintIdidx() const {
        std::cout << "\nididx:" << std::endl;
        for (const auto& idx : ididx) {
            if (idx.has_value()) {
                std::cout << idx.value() << " ";
            } else {
                std::cout << "None ";
            }
        }
        std::cout << std::endl;
    }
    
    void pprintExtras() const {
        std::cout << "\nextras:" << std::endl;
        for (const auto& extra : extras) {
            std::cout << "[";
            for (size_t i = 0; i < extra.size(); ++i) {
                std::cout << extra[i];
                if (i < extra.size() - 1) std::cout << ", ";
            }
            std::cout << "]" << std::endl;
        }
    }
    
public:
    EBNFParser() : ch('\0'), sdx(0), err(false) {}
    
    int parse(const std::string& ebnf) {
        std::cout << "parse:\n" << ebnf << " ===>\n" << std::endl;
        err = false;
        src = ebnf;
        sdx = 0;
        idents.clear();
        ididx.clear();
        productions.clear();
        extras.clear();
        
        try {
            getToken();
            if (token.type == Token::SEQUENCE) {
                auto t = token.seq;
                t[0] = "title";
                extras.push_back(t);
                getToken();
            }
            
            if (!token.isChar('{')) {
                return invalid("invalid ebnf (missing opening {)");
            }
            
            while (true) {
                Token result = production();
                if (result.isChar('}') || result.isEOF()) {
                    break;
                }
            }
            
            getToken();
            if (token.type == Token::SEQUENCE) {
                auto t = token.seq;
                t[0] = "comment";
                extras.push_back(t);
                getToken();
            }
            
            if (!token.isEOF()) {
                return invalid("invalid ebnf (missing eof?)");
            }
            if (err) {
                return -1;
            }
            
            // Check for undefined identifiers
            for (size_t i = 0; i < ididx.size(); ++i) {
                if (!ididx[i].has_value()) {
                    return invalid("invalid ebnf (undefined:" + idents[i] + ")");
                }
            }
            
            pprintProductions();
            pprintIdents();
            pprintIdidx();
            pprintExtras();
            return 1;
        } catch (const std::exception& e) {
            return -1;
        }
    }
    
    void checkValid(const std::string& test) const {
        size_t sdx = 0;
        bool res = applies(std::get<2>(productions[0]).get(), test, sdx);
        skipSpacesAt(test, sdx);
        bool final_res = res && sdx >= test.length();
        std::string result = final_res ? "pass" : "fail";
        std::cout << "\"" << test << "\", " << result << std::endl;
    }
};

int main() {
    EBNFParser parser;
    
    std::vector<std::string> ebnfs = {
        R"("a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z" )",
        R"({
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
})",
        R"(a = "1")",
        R"({ a = "1" ;)",
        R"({ hello world = "1"; })",
        R"({ foo = bar . })"
    };
    
    std::vector<std::vector<std::string>> tests = {
        {
            "a1a3a4a4a5a6",
            "a1 a2a6",
            "a1 a3 a4 a6",
            "a1 a4 a5 a6",
            "a1 a2 a4 a5 a5 a6",
            "a1 a2 a4 a5 a6 a7",
            "your ad here"
        },
        {
            "2",
            "2*3 + 4/23 - 7",
            "(3 + 4) * 6-2+(4*(4))",
            "-2",
            "3 +",
            "(4 + 3"
        }
    };
    
    for (size_t i = 0; i < ebnfs.size(); ++i) {
        if (parser.parse(ebnfs[i]) == 1) {
            std::cout << "\ntests:" << std::endl;
            if (i < tests.size()) {
                for (const auto& test : tests[i]) {
                    parser.checkValid(test);
                }
            }
        }
        std::cout << std::endl;
    }
    
    return 0;
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>


productions:
(a, 0, [Rule])

idents:
a 

ididx:
0 

extras:
[title, a]
[comment, z]

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>


productions:
(expr, 0, [Rule])
(term, 1, [Rule])
(factor, 3, [Rule])
(plus, 2, [Rule])
(times, 4, [Rule])
(number, 5, [Rule])
(digit, 6, [Rule])

idents:
expr term plus factor times number digit 

ididx:
0 1 3 2 4 5 6 

extras:

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>

invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>

invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>

invalid ebnf (= expected)

parse:
{ foo = bar . } ===>

invalid ebnf (undefined:bar)


The validators created by this parser allow white space before literals (only way to pass the tests). This means that while the parser rejects the "invalid" EBNF { hello world = "1"; }, an EBNF grammar parsed by it would deem it valid.

module EBNF
  class Syntax
    getter prods, start
    getter title, comment
    def initialize (@prods : Hash(String, Prod), @start : Prod, title, comment)
      @comment = comment || ""
      @title = title || ""
    end

    def matches (s)
      sc = Scanner.new s
      start.expr.matches sc
      sc.eof?
    end

    def dump (io)
      io.puts "title:   #{title}"
      io.puts "comment: #{comment}"
      io.puts "productions:"
      prods.keys.sort.each do |prod|
        head = "  #{prod} = "
        io << head
        prods[prod].expr.dump io, head.size
        io.puts
      end
      io.puts "start: #{start.name}"
    end

    def self.parse (source)
      syntax = Parser.new(source).parse_syntax
      syntax.prods.each_value do |prod|
        prod.fix_refs syntax.prods
      end
      syntax
    end
  end

  class Scanner
    getter ch : Char?, io

    def initialize (@io : IO)
      get
    end

    def initialize (s : String)
      @io = IO::Memory.new(s.to_slice, false)
      get
    end

    def get
      @ch = io.read_char
    end

    def save
      { @ch, io.pos }
    end

    def restore (tuple)
      @ch, @io.pos = tuple
    end

    def skip_spaces
      while (c = ch) && c.whitespace?; get; end
    end

    def over (s : String)
      s.each_char do |char|
        raise "<#{s}> expected at #{io.pos}" unless ch == char
        get
      end
    end

    def over (char : Char)
      over char.to_s
    end

    def over_one_of (s, msg = nil)
      msg ||= "one of <#{s}> expected"
      raise "#{msg} at #{io.pos}" unless (c = ch) && c.in_set? s
      get
    end

    def string
      quote = ch.not_nil!
      over quote
      result = String.build do |s|
        while (c = ch) && c != quote
          s << c; get
        end
      end
      over quote
      result
    end

    def ident
      raise "letter expected at #{io.pos}" unless (c = ch) && c.letter?
      String.build do |s|
        s << c; get
        while (c = ch) && c.alphanumeric?
          s << c; get
        end
      end
    end

    def eof?; ch == nil; end
  end

  class Parser
    getter sc
    def initialize (io)
      @sc = Scanner.new io
    end

    def skip; sc.skip_spaces; end

    def parse_syntax
      skip; title = if (c = sc.ch) && c.in_set? "'\""
                      sc.string
                    end
      prods = Hash(String, Prod).new
      first_prod = nil
      skip; sc.over '{'; skip
      while (c = sc.ch) && c.letter?
        prod = parse_prod
        first_prod ||= prod.name
        prods[prod.name] = prod
        skip
      end
      raise "production expected at #{sc.io.pos}" unless first_prod
      sc.over '}'
      skip; comment = if (c = sc.ch) && c.in_set? "\"'"
                        sc.string
                      end
      EBNF::Syntax.new prods, prods[first_prod], title, comment
    end

    def parse_prod
      skip; name = sc.ident
      skip; sc.over '='
      expr = parse_or
      skip; sc.over_one_of ".;", "terminator expected"
      EBNF::Prod.new name, expr
    end

    def parse_or
      exprs = [] of Expr
      loop do
        exprs << parse_seq
        skip; break unless sc.ch == '|'
        sc.over '|'
      end
      return exprs[0] if exprs.size == 1
      OrExpr.new exprs
    end

    def parse_seq
      exprs = [] of Expr
      loop do
        skip; break unless (c = sc.ch)
        case c
        when .letter?       then exprs << RefExpr.new sc.ident
        when .in_set? "\"'" then exprs << LitExpr.new sc.string
        when '[' then sc.over '['; exprs << OptExpr.new parse_or; sc.over ']'
        when '{' then sc.over '{'; exprs << RepExpr.new parse_or; sc.over '}'
        when '(' then sc.over '('; exprs << parse_or;             sc.over ')'
        else
          break
        end
      end
      raise "this parser doesn't support empty productions, at #{sc.io.pos}" unless exprs.size > 0
      return exprs[0] if exprs.size == 1
      SeqExpr.new exprs
    end
  end

  class Prod
    getter name, expr
    def initialize (@name : String, @expr : Expr)
    end

    def fix_refs (prods)
      @expr.fix_refs prods
    end
  end

  abstract class Expr
    getter subexprs = [] of Expr
    getter op = ""

    def dump (io, indent)
      io << "(" << op << " "
      newindent = indent + op.size + 2
      first = true
      subexprs.each do |expr|
        unless first
          io << "\n" << " " * newindent
        end
        first = false
        expr.dump io, newindent
      end
      io << ")"
    end

    def fix_refs (prods)
      subexprs.each &.fix_refs(prods)
    end
  end

  class OrExpr < Expr
    def initialize (@subexprs)
      @op = "or"
    end

    def matches (sc)
      pos = sc.save
      subexprs.each do |alt|
        return true if alt.matches sc
        sc.restore pos
      end
      false
    end
  end

  class SeqExpr < Expr
    def initialize (@subexprs)
      @op = "and"
    end

    def matches (sc)
      subexprs.each do |expr|
        return false unless expr.matches sc
      end
      true
    end
  end

  class RefExpr < Expr
    getter name, ref : Expr?
    def initialize (@name : String)
      @op = "ref"
    end

    def fix_refs (prods)
      @ref = prods[name]?.try &.expr
      raise "undefined production <#{name}>" unless @ref
    end

    def dump (io, indent=0)
      io << "<" << name << ">"
    end

    def matches (sc)
      raise "undefined production <#{name}>" unless ref
      ref.not_nil!.matches sc
    end
  end

  class LitExpr < Expr
    getter string

    def initialize (@string : String)
    end

    def dump (io, indent=0)
      string.inspect io
    end

    def matches (sc)
      sc.skip_spaces # necessary to pass tests (!?)
      begin
        sc.over string
      rescue
        return false
      end
      true
    end
  end

  class OptExpr < Expr
    def initialize (expr)
      @op = "opt"
      @subexprs = [expr]
    end

    def matches (sc)
      pos = sc.save
      unless subexprs.first.matches sc
        sc.restore pos
      end
      true
    end
  end

  class RepExpr < Expr
    def initialize (expr)
      @op = "rep"
      @subexprs = [expr]
    end

    def matches (sc)
      pos = sc.save
      while subexprs.first.matches sc
        pos = sc.save
      end
      sc.restore pos
      true
    end
  end
end

def test (syntax, tests)
  puts "SOURCE:"
  puts syntax
  syn = EBNF::Syntax.parse syntax
  puts "PARSED:"
  syn.dump STDOUT
  puts "TESTS:"
  tests.each do |test|
    result = syn.matches(test) ? "PASS" : "fail"
    puts "  #{result}  #{test}"
  end
end

syntax = <<-EOT
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"
EOT

test syntax, ["a1a3a4a4a5a6", "a1 a2a6", "a1 a3 a4 a6", "a1 a4 a5 a6",
              "a1 a2 a4 a5 a5 a6", "a1 a2 a4 a5 a6 a7", "your ad here"]

puts
syntax = <<-EOT
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .

    plus = "+" | "-" .
    times = "*" | "/" .

    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}
EOT

test syntax, ["2", "2*3 + 4/23 - 7", "(3 + 4) * 6-2+(4*(4))", "-2", "3 +", "(4 + 3"]

puts
puts "INVALID SYNTAXES:"
[%<a = "1";>, %<{ a = "1" ;>, %<{ hello world = "1"; }>, %<{ foo = bar . }>].each do |syn|
  puts "Parsing syntax: #{syn}"
  print "  -> "
  begin
    EBNF::Syntax.parse syn
    puts "parsed OK (!!)"
  rescue e
    puts "Error: #{e.message}"
  end
end
Output:
SOURCE:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"
PARSED:
title:   a
comment: z
productions:
  a = (and "a1"
           (or "a2"
               "a3")
           (rep "a4")
           (opt "a5")
           "a6")
start: a
TESTS:
  PASS  a1a3a4a4a5a6
  PASS  a1 a2a6
  PASS  a1 a3 a4 a6
  fail  a1 a4 a5 a6
  fail  a1 a2 a4 a5 a5 a6
  fail  a1 a2 a4 a5 a6 a7
  fail  your ad here

SOURCE:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .

    plus = "+" | "-" .
    times = "*" | "/" .

    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}
PARSED:
title:
comment:
productions:
  digit = (or "0"
              "1"
              "2"
              "3"
              "4"
              "5"
              "6"
              "7"
              "8"
              "9")
  expr = (and <term>
              (rep (and <plus>
                        <term>)))
  factor = (or <number>
               (and "("
                    <expr>
                    ")"))
  number = (and <digit>
                (rep <digit>))
  plus = (or "+"
             "-")
  term = (and <factor>
              (rep (and <times>
                        <factor>)))
  times = (or "*"
              "/")
start: expr
TESTS:
  PASS  2
  PASS  2*3 + 4/23 - 7
  PASS  (3 + 4) * 6-2+(4*(4))
  fail  -2
  fail  3 +
  fail  (4 + 3

INVALID SYNTAXES:
Parsing syntax: a = "1";
  -> Error: <{> expected at 1
Parsing syntax: { a = "1" ;
  -> Error: <}> expected at 11
Parsing syntax: { hello world = "1"; }
  -> Error: <=> expected at 9
Parsing syntax: { foo = bar . }
  -> Error: undefined production <bar>


Works with: Dart version 3.6.1
Translation of: Java
import 'dart:io';

class Token {
  dynamic value;
  bool isSequence;
  
  Token(this.value, this.isSequence);
}

class Sequence {
  List<dynamic> _items = [];
  
  Sequence([List<dynamic>? items]) {
    if (items != null) {
      _items.addAll(items);
    }
  }
  
  Sequence.from(List<dynamic> items) {
    _items.addAll(items);
  }
  
  // Delegate to the internal list
  void add(dynamic item) => _items.add(item);
  void addAll(Iterable<dynamic> items) => _items.addAll(items);
  dynamic operator [](int index) => _items[index];
  void operator []=(int index, dynamic value) => _items[index] = value;
  int get length => _items.length;
  void clear() => _items.clear();
  bool get isEmpty => _items.isEmpty;
  bool get isNotEmpty => _items.isNotEmpty;
  
  @override
  String toString() => _items.toString();
}

class EBNFParser {
  late String src;
  String ch = '';
  int sdx = 0;
  Token? token;
  bool err = false;
  List<String> idents = [];
  List<int> ididx = [];
  List<Sequence> productions = [];
  Sequence extras = Sequence();
  List<String> results = ["pass", "fail"];
  
  int btoi(bool b) {
    return b ? 1 : 0;
  }
  
  int invalid(String msg) {
    err = true;
    print(msg);
    sdx = src.length; // set to eof
    return -1;
  }
  
  void skipSpaces() {
    while (sdx < src.length) {
      ch = src[sdx];
      if (!" \t\r\n".contains(ch)) {
        break;
      }
      sdx++;
    }
  }
  
  void getToken() {
    // Yields a single character token, one of {}()[]|=.;
    // or {"terminal",string} or {"ident", string} or -1.
    skipSpaces();
    if (sdx >= src.length) {
      token = Token(-1, false);
      return;
    }
    int tokstart = sdx;
    if ("{}()[]|=.;".contains(ch)) {
      sdx++;
      token = Token(ch, false);
    } else if (ch == '"' || ch == "'") {
      String closech = ch;
      int tokend = tokstart + 1;
      while (tokend < src.length && src[tokend] != closech) {
        tokend++;
      }
      if (tokend >= src.length) {
        token = Token(invalid("no closing quote"), false);
      } else {
        sdx = tokend + 1;
        token = Token(Sequence(["terminal", src.substring(tokstart + 1, tokend)]), true);
      }
    } else if (ch.codeUnitAt(0) >= 'a'.codeUnitAt(0) && ch.codeUnitAt(0) <= 'z'.codeUnitAt(0)) {
      // To simplify things for the purposes of this task,
      // identifiers are strictly a-z only, not A-Z or 1-9.
      while (sdx < src.length && ch.codeUnitAt(0) >= 'a'.codeUnitAt(0) && ch.codeUnitAt(0) <= 'z'.codeUnitAt(0)) {
        sdx++;
        if (sdx < src.length) {
          ch = src[sdx];
        }
      }
      token = Token(Sequence(["ident", src.substring(tokstart, sdx)]), true);
    } else {
      token = Token(invalid("invalid ebnf"), false);
    }
  }
  
  void matchToken(String expectedCh) {
    if (token!.value != expectedCh) {
      token = Token(invalid("invalid ebnf ($expectedCh expected)"), false);
    } else {
      getToken();
    }
  }
  
  int addIdent(String ident) {
    int k = idents.indexOf(ident);
    if (k == -1) {
      idents.add(ident);
      k = idents.length - 1;
      ididx.add(-1);
    }
    return k;
  }
  
  dynamic factor() {
    dynamic res;
    if (token!.isSequence) {
      Sequence t = token!.value as Sequence;
      if (t[0] == "ident") {
        int idx = addIdent(t[1] as String);
        t.add(idx);
        token!.value = t;
      }
      res = token!.value;
      getToken();
    } else if (token!.value == '[') {
      getToken();
      res = Sequence(["optional", expression()]);
      matchToken(']');
    } else if (token!.value == '(') {
      getToken();
      res = expression();
      matchToken(')');
    } else if (token!.value == '{') {
      getToken();
      res = Sequence(["repeat", expression()]);
      matchToken('}');
    } else {
      throw Exception("invalid token in factor() function");
    }
    if (res is Sequence && res.length == 1) {
      return res[0];
    }
    return res;
  }
  
  dynamic term() {
    Sequence res = Sequence([factor()]);
    List<dynamic> tokens = [-1, '|', '.', ';', ')', ']', '}'];
    
    outer:
    while (true) {
      for (dynamic t in tokens) {
        if (t == token!.value) {
          break outer;
        }
      }
      res.add(factor());
    }
    
    if (res.length == 1) {
      return res[0];
    }
    return res;
  }
  
  dynamic expression() {
    Sequence res = Sequence([term()]);
    if (token!.value == '|') {
      res = Sequence(["or", res[0]]);
      while (token!.value == '|') {
        getToken();
        res.add(term());
      }
    }
    if (res.length == 1) {
      return res[0];
    }
    return res;
  }
  
  dynamic production() {
    // Returns a token or -1; the real result is left in 'productions' etc,
    getToken();
    if (token!.value != '}') {
      if (token!.value == -1) {
        return invalid("invalid ebnf (missing closing })");
      }
      if (!token!.isSequence) {
        return -1;
      }
      Sequence t = token!.value as Sequence;
      if (t[0] != "ident") {
        return -1;
      }
      String ident = t[1] as String;
      int idx = addIdent(ident);
      getToken();
      matchToken('=');
      if (token!.value == -1) {
        return -1;
      }
      productions.add(Sequence([ident, idx, expression()]));
      ididx[idx] = productions.length - 1;
    }
    return token!.value;
  }
  
  int parse(String ebnf) {
    // Returns +1 if ok, -1 if bad.
    print("parse:");
    print("$ebnf ===>");
    err = false;
    src = ebnf;
    sdx = 0;
    idents.clear();
    ididx.clear();
    productions.clear();
    extras.clear();
    
    getToken();
    if (token!.isSequence) {
      Sequence t = token!.value as Sequence;
      t[0] = "title";
      extras.add(token!.value);
      getToken();
    }
    if (token!.value != '{') {
      return invalid("invalid ebnf (missing opening {)");
    }
    
    while (true) {
      dynamic tokenResult = production();
      if (tokenResult == '}' || tokenResult == -1) {
        break;
      }
    }
    
    getToken();
    if (token!.isSequence) {
      Sequence t = token!.value as Sequence;
      t[0] = "comment";
      extras.add(token!.value);
      getToken();
    }
    if (token!.value != -1) {
      return invalid("invalid ebnf (missing eof?)");
    }
    if (err) {
      return -1;
    }
    
    int k = -1;
    for (int i = 0; i < ididx.length; i++) {
      if (ididx[i] == -1) {
        k = i;
        break;
      }
    }
    if (k != -1) {
      return invalid("invalid ebnf (undefined:${idents[k]})");
    }
    
    pprint(productions, "productions");
    pprint(idents, "idents");
    pprint(ididx, "ididx");
    pprint(extras, "extras");
    return 1;
  }
  
  // Adjusts Dart's normal printing to look more like Phix output.
  void pprint(dynamic ob, String header) {
    print("\n$header:");
    String pp = ob.toString();
    pp = pp.replaceAll("[", "{");
    pp = pp.replaceAll("]", "}");
    pp = pp.replaceAll(" ", ", ");
    for (int i = 0; i < idents.length; i++) {
      pp = pp.replaceAll(i.toString(), i.toString());
    }
    print(pp);
  }
  
  // The rules that applies() has to deal with are:
  // {factors} - if rule[0] is not string,
  // just apply one after the other recursively.
  // {"terminal", "a1"}       -- literal constants
  // {"or", <e1>, <e2>, ...}  -- (any) one of n
  // {"repeat", <e1>}         -- as per "{}" in ebnf
  // {"optional", <e1>}       -- as per "[]" in ebnf
  // {"ident", <name>, idx}   -- apply the sub-rule
  
  bool applies(Sequence rule) {
    int wasSdx = sdx; // in case of failure
    dynamic r1 = rule[0];
    
    if (r1 is! String) {
      for (int i = 0; i < rule.length; i++) {
        if (!applies(rule[i] as Sequence)) {
          sdx = wasSdx;
          return false;
        }
      }
    } else if (r1 == "terminal") {
      skipSpaces();
      String r2 = rule[1] as String;
      for (int i = 0; i < r2.length; i++) {
        if (sdx >= src.length || src[sdx] != r2[i]) {
          sdx = wasSdx;
          return false;
        }
        sdx++;
      }
    } else if (r1 == "or") {
      for (int i = 1; i < rule.length; i++) {
        if (applies(rule[i] as Sequence)) {
          return true;
        }
      }
      sdx = wasSdx;
      return false;
    } else if (r1 == "repeat") {
      while (applies(rule[1] as Sequence)) {
        // continue repeating
      }
    } else if (r1 == "optional") {
      applies(rule[1] as Sequence);
    } else if (r1 == "ident") {
      int i = rule[2] as int;
      int ii = ididx[i];
      if (!applies(productions[ii][2] as Sequence)) {
        sdx = wasSdx;
        return false;
      }
    } else {
      throw Exception("invalid rule in applies() function");
    }
    return true;
  }
  
  void checkValid(String test) {
    src = test;
    sdx = 0;
    bool res = applies(productions[0][2] as Sequence);
    skipSpaces();
    if (sdx < src.length) {
      res = false;
    }
    print('"$test", ${results[1 - btoi(res)]}');
  }
}

void main() {
  EBNFParser parser = EBNFParser();
  
  List<String> ebnfs = [
    '"a" {\n'
    '    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;\n'
    '} "z" ',
    
    '{\n'
    '    expr = term { plus term } .\n'
    '    term = factor { times factor } .\n'
    '    factor = number | \'(\' expr \')\' .\n'
    ' \n'
    '    plus = "+" | "-" .\n'
    '    times = "*" | "/" .\n'
    ' \n'
    '    number = digit { digit } .\n'
    '    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .\n'
    '}',
    
    'a = "1"',
    '{ a = "1" ;',
    '{ hello world = "1"; }',
    '{ foo = bar . }'
  ];
  
  List<List<String>> tests = [
    [
      "a1a3a4a4a5a6",
      "a1 a2a6",
      "a1 a3 a4 a6",
      "a1 a4 a5 a6",
      "a1 a2 a4 a5 a5 a6",
      "a1 a2 a4 a5 a6 a7",
      "your ad here"
    ],
    [
      "2",
      "2*3 + 4/23 - 7",
      "(3 + 4) * 6-2+(4*(4))",
      "-2",
      "3 +",
      "(4 + 3"
    ]
  ];
  
  for (int i = 0; i < ebnfs.length; i++) {
    if (parser.parse(ebnfs[i]) == 1) {
      print("\ntests:");
      if (i < tests.length) {
        for (String test in tests[i]) {
          parser.checkValid(test);
        }
      }
    }
    print("");
  }
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

productions:
{{a,, 0,, {{terminal,, a1},, {or,, {terminal,, a2},, {terminal,, a3}},, {repeat,, {terminal,, a4}},, {optional,, {terminal,, a5}},, {terminal,, a6}}}}

idents:
{a}

ididx:
{0}

extras:
{{title,, a},, {comment,, z}}

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

productions:
{{expr,, 0,, {{ident,, term,, 1},, {repeat,, {{ident,, plus,, 2},, {ident,, term,, 1}}}}},, {term,, 1,, {{ident,, factor,, 3},, {repeat,, {{ident,, times,, 4},, {ident,, factor,, 3}}}}},, {factor,, 3,, {or,, {ident,, number,, 5},, {{terminal,, (},, {ident,, expr,, 0},, {terminal,, )}}}},, {plus,, 2,, {or,, {terminal,, +},, {terminal,, -}}},, {times,, 4,, {or,, {terminal,, *},, {terminal,, /}}},, {number,, 5,, {{ident,, digit,, 6},, {repeat,, {ident,, digit,, 6}}}},, {digit,, 6,, {or,, {terminal,, 0},, {terminal,, 1},, {terminal,, 2},, {terminal,, 3},, {terminal,, 4},, {terminal,, 5},, {terminal,, 6},, {terminal,, 7},, {terminal,, 8},, {terminal,, 9}}}}

idents:
{expr,, term,, plus,, factor,, times,, number,, digit}

ididx:
{0,, 1,, 3,, 2,, 4,, 5,, 6}

extras:
{}

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>
invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)

parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)



Translation of: Phix


A more or less faithful translation except that indices are 0-based rather than 1-based and so 1 less than in the Phix results.

package main

import (
    "fmt"
    "strings"
)

// type aliases for Phix types
type object = interface{}
type sequence = []object

var (
    src         []rune
    ch          rune
    sdx         int
    token       object
    isSeq       bool
    err         = false
    idents      []string
    ididx       []int
    productions []sequence
    extras      sequence
    results     = [2]string{"pass", "fail"}
)

func btoi(b bool) int {
    if b {
        return 1
    }
    return 0
}

func invalid(msg string) int {
    err = true
    fmt.Println(msg)
    sdx = len(src) // set to eof
    return -1
}

func skipSpaces() {
    for {
        if sdx >= len(src) {
            break
        }
        ch = src[sdx]
        if strings.IndexRune(" \t\r\n", ch) == -1 {
            break
        }
        sdx++
    }
}

func getToken() {
    // Yields a single character token, one of {}()[]|=.;
    // or {"terminal",string} or {"ident", string} or -1.
    skipSpaces()
    if sdx >= len(src) {
        token = -1
        isSeq = false
        return
    }
    tokstart := sdx
    if strings.IndexRune("{}()[]|=.;", ch) >= 0 {
        sdx++
        token = ch
        isSeq = false
    } else if ch == '"' || ch == '\'' {
        closech := ch
        for tokend := sdx + 1; tokend < len(src); tokend++ {
            if src[tokend] == closech {
                sdx = tokend + 1
                token = sequence{"terminal", string(src[tokstart+1 : tokend])}
                isSeq = true
                return
            }
        }
        token = invalid("no closing quote")
        isSeq = false
    } else if ch >= 'a' && ch <= 'z' {
        // To simplify things for the purposes of this task,
        // identifiers are strictly a-z only, not A-Z or 1-9.
        for {
            sdx++
            if sdx >= len(src) {
                break
            }
            ch = src[sdx]
            if ch < 'a' || ch > 'z' {
                break
            }
        }
        token = sequence{"ident", string(src[tokstart:sdx])}
        isSeq = true
    } else {
        token = invalid("invalid ebnf")
        isSeq = false
    }
}

func matchToken(ch rune) {
    if token != ch {
        token = invalid(fmt.Sprintf("invalid ebnf (%c expected)", ch))
        isSeq = false
    } else {
        getToken()
    }
}

func addIdent(ident string) int {
    k := -1
    for i, id := range idents {
        if id == ident {
            k = i
            break
        }
    }
    if k == -1 {
        idents = append(idents, ident)
        k = len(idents) - 1
        ididx = append(ididx, -1)
    }
    return k
}

func factor() object {
    var res object
    if isSeq {
        t := token.([]object)
        if t[0] == "ident" {
            idx := addIdent(t[1].(string))
            t = append(t, idx)
            token = t
        }
        res = token
        getToken()
    } else if token == '[' {
        getToken()
        res = sequence{"optional", expression()}
        matchToken(']')
    } else if token == '(' {
        getToken()
        res = expression()
        matchToken(')')
    } else if token == '{' {
        getToken()
        res = sequence{"repeat", expression()}
        matchToken('}')
    } else {
        panic("invalid token in factor() function")
    }
    if s, ok := res.(sequence); ok && len(s) == 1 {
        return s[0]
    }
    return res
}

func term() object {
    res := sequence{factor()}
    tokens := []object{-1, '|', '.', ';', ')', ']', '}'}
outer:
    for {
        for _, t := range tokens {
            if t == token {
                break outer
            }
        }
        res = append(res, factor())
    }
    if len(res) == 1 {
        return res[0]
    }
    return res
}

func expression() object {
    res := sequence{term()}
    if token == '|' {
        res = sequence{"or", res[0]}
        for token == '|' {
            getToken()
            res = append(res, term())
        }
    }
    if len(res) == 1 {
        return res[0]
    }
    return res
}

func production() object {
    // Returns a token or -1; the real result is left in 'productions' etc,
    getToken()
    if token != '}' {
        if token == -1 {
            return invalid("invalid ebnf (missing closing })")
        }
        if !isSeq {
            return -1
        }
        t := token.(sequence)
        if t[0] != "ident" {
            return -1
        }
        ident := t[1].(string)
        idx := addIdent(ident)
        getToken()
        matchToken('=')
        if token == -1 {
            return -1
        }
        productions = append(productions, sequence{ident, idx, expression()})
        ididx[idx] = len(productions) - 1
    }
    return token
}

func parse(ebnf string) int {
    // Returns +1 if ok, -1 if bad.
    fmt.Printf("parse:\n%s ===>\n", ebnf)
    err = false
    src = []rune(ebnf)
    sdx = 0
    idents = idents[:0]
    ididx = ididx[:0]
    productions = productions[:0]
    extras = extras[:0]
    getToken()
    if isSeq {
        t := token.(sequence)
        t[0] = "title"
        extras = append(extras, token)
        getToken()
    }
    if token != '{' {
        return invalid("invalid ebnf (missing opening {)")
    }
    for {
        token = production()
        if token == '}' || token == -1 {
            break
        }
    }
    getToken()
    if isSeq {
        t := token.(sequence)
        t[0] = "comment"
        extras = append(extras, token)
        getToken()
    }
    if token != -1 {
        return invalid("invalid ebnf (missing eof?)")
    }
    if err {
        return -1
    }
    k := -1
    for i, idx := range ididx {
        if idx == -1 {
            k = i
            break
        }
    }
    if k != -1 {
        return invalid(fmt.Sprintf("invalid ebnf (undefined:%s)", idents[k]))
    }
    pprint(productions, "productions")
    pprint(idents, "idents")
    pprint(ididx, "ididx")
    pprint(extras, "extras")
    return 1
}

// Adjusts Go's normal printing of slices to look more like Phix output.
func pprint(ob object, header string) {
    fmt.Printf("\n%s:\n", header)
    pp := fmt.Sprintf("%q", ob)
    pp = strings.Replace(pp, "[", "{", -1)
    pp = strings.Replace(pp, "]", "}", -1)
    pp = strings.Replace(pp, " ", ", ", -1)
    for i := 0; i < len(idents); i++ {
        xs := fmt.Sprintf(`'\x%02d'`, i)
        is := fmt.Sprintf("%d", i)
        pp = strings.Replace(pp, xs, is, -1)
    }
    fmt.Println(pp)
}

// The rules that applies() has to deal with are:
// {factors} - if rule[0] is not string,
// just apply one after the other recursively.
// {"terminal", "a1"}       -- literal constants
// {"or", <e1>, <e2>, ...}  -- (any) one of n
// {"repeat", <e1>}         -- as per "{}" in ebnf
// {"optional", <e1>}       -- as per "[]" in ebnf
// {"ident", <name>, idx}   -- apply the sub-rule

func applies(rule sequence) bool {
    wasSdx := sdx // in case of failure
    r1 := rule[0]
    if _, ok := r1.(string); !ok {
        for i := 0; i < len(rule); i++ {
            if !applies(rule[i].(sequence)) {
                sdx = wasSdx
                return false
            }
        }
    } else if r1 == "terminal" {
        skipSpaces()
        r2 := []rune(rule[1].(string))
        for i := 0; i < len(r2); i++ {
            if sdx >= len(src) || src[sdx] != r2[i] {
                sdx = wasSdx
                return false
            }
            sdx++
        }
    } else if r1 == "or" {
        for i := 1; i < len(rule); i++ {
            if applies(rule[i].(sequence)) {
                return true
            }
        }
        sdx = wasSdx
        return false
    } else if r1 == "repeat" {
        for applies(rule[1].(sequence)) {
        }
    } else if r1 == "optional" {
        applies(rule[1].(sequence))
    } else if r1 == "ident" {
        i := rule[2].(int)
        ii := ididx[i]
        if !applies(productions[ii][2].(sequence)) {
            sdx = wasSdx
            return false
        }
    } else {
        panic("invalid rule in applies() function")
    }
    return true
}

func checkValid(test string) {
    src = []rune(test)
    sdx = 0
    res := applies(productions[0][2].(sequence))
    skipSpaces()
    if sdx < len(src) {
        res = false
    }
    fmt.Printf("%q, %s\n", test, results[1-btoi(res)])
}

func main() {
    ebnfs := []string{
        `"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z" `,
        `{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}`,
        `a = "1"`,
        `{ a = "1" ;`,
        `{ hello world = "1"; }`,
        `{ foo = bar . }`,
    }

    tests := [][]string{
        {
            "a1a3a4a4a5a6",
            "a1 a2a6",
            "a1 a3 a4 a6",
            "a1 a4 a5 a6",
            "a1 a2 a4 a5 a5 a6",
            "a1 a2 a4 a5 a6 a7",
            "your ad here",
        },
        {
            "2",
            "2*3 + 4/23 - 7",
            "(3 + 4) * 6-2+(4*(4))",
            "-2",
            "3 +",
            "(4 + 3",
        },
    }

    for i, ebnf := range ebnfs {
        if parse(ebnf) == +1 {
            fmt.Println("\ntests:")
            for _, test := range tests[i] {
                checkValid(test)
            }
        }
        fmt.Println()
    }
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

productions:
{{"a", 0, {{"terminal", "a1"}, {"or", {"terminal", "a2"}, {"terminal", "a3"}}, {"repeat", {"terminal", "a4"}}, {"optional", {"terminal", "a5"}}, {"terminal", "a6"}}}}

idents:
{"a"}

ididx:
{0}

extras:
{{"title", "a"}, {"comment", "z"}}

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

productions:
{{"expr", 0, {{"ident", "term", 1}, {"repeat", {{"ident", "plus", 2}, {"ident", "term", 1}}}}}, {"term", 1, {{"ident", "factor", 3}, {"repeat", {{"ident", "times", 4}, {"ident", "factor", 3}}}}}, {"factor", 3, {"or", {"ident", "number", 5}, {{"terminal", "("}, {"ident", "expr", 0}, {"terminal", ")"}}}}, {"plus", 2, {"or", {"terminal", "+"}, {"terminal", "-"}}}, {"times", 4, {"or", {"terminal", "*"}, {"terminal", "/"}}}, {"number", 5, {{"ident", "digit", 6}, {"repeat", {"ident", "digit", 6}}}}, {"digit", 6, {"or", {"terminal", "0"}, {"terminal", "1"}, {"terminal", "2"}, {"terminal", "3"}, {"terminal", "4"}, {"terminal", "5"}, {"terminal", "6"}, {"terminal", "7"}, {"terminal", "8"}, {"terminal", "9"}}}}

idents:
{"expr", "term", "plus", "factor", "times", "number", "digit"}

ididx:
{0, 1, 3, 2, 4, 5, 6}

extras:
{}

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>
invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)

parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)

We use Parsec to generate Parsec.

import Control.Applicative
import Control.Monad
import Data.Maybe
import qualified Data.Map as M
import System.Environment (getArgs)
import Text.Parsec hiding (many, optional, (<|>))
import Text.Parsec.String
import Text.Parsec.Error

-----------------------------------------------------------------
-- Main
-----------------------------------------------------------------

main = do
{- Uses the EBNF grammar contained in the first file to parse
the second file, then prints a parse tree. -}
    [grammar_file, other_file] <- getArgs
    ebnf_text <- readFile grammar_file
    case parseGrammar grammar_file ebnf_text of
        Left  err ->
            putStrLn $ "Failed to parse EBNF grammar: " ++ show err
        Right g   -> do
            putStrLn "Successfully parsed EBNF grammar."
            o <- readFile other_file
            case parseWithGrammar g other_file o of
                Left err ->
                    putStrLn $ "Failed to parse second file: " ++ show err
                Right tree ->
                    print tree

-----------------------------------------------------------------
-- Types and user functions
-----------------------------------------------------------------

parseGrammar :: FilePath -> String -> Either ParseError Grammar
parseGrammar fp s = 
    case runParser ebnf M.empty fp s of
        Left e ->
            Left e
        Right (Grammar g, usedNames) ->
            let undefinedRules = foldl (flip M.delete) usedNames $ map fst g
                (undefName, undefNamePos) = M.findMin undefinedRules
            in if   M.null undefinedRules
               then Right $ Grammar g
               else Left $ newErrorMessage
                        (Message $ "Undefined rule: " ++ undefName)
                        undefNamePos

parseWithGrammar :: Grammar -> FilePath -> String -> Either ParseError ParseTree
parseWithGrammar g@(Grammar ((_, firstR) : _)) fp s =
    runParser (liftA cleanTree $ firstR <* eof) g fp s

type GParser = Parsec String UsedNames
type UsedNames = M.Map String SourcePos

type Rule = Parsec String Grammar ParseTree
 -- We need to keep the Grammar around as a Parsec user state
 -- to look up named rules.
data Grammar = Grammar [(String, Rule)]
 -- Grammar would be a type synonym instead of an ADT, but
 -- infinite types aren't allowed in Haskell.

data ParseTree =
    ParseBranch String [ParseTree] |
    ParseLeaf String

instance Show ParseTree where
      show = showIndented 0
--    show (ParseBranch "" t) = '[' : concatMap ((' ' :) . show) t ++ "]"
--    show (ParseBranch s  t) = '(' : s ++ concatMap ((' ' :) . show) t ++ ")"
--    show (ParseLeaf s)      = show s

showIndented :: Int -> ParseTree -> String
showIndented i (ParseBranch "" []) =
    indent i "[]"
showIndented i (ParseBranch "" t) =
    indent i "[" ++
    concatMap (showIndented (i + 2)) t ++
    "]"
showIndented i (ParseBranch s  t) =
    indent i ("(" ++ s) ++
    concatMap (showIndented (i + 2)) t ++
    ")"
showIndented i (ParseLeaf s)      =
    indent i $ show s

indent :: Int -> String -> String
indent i s = "\n" ++ replicate i ' ' ++ s

cleanTree :: ParseTree -> ParseTree
-- Removes empty anonymous branches.
cleanTree (ParseBranch i ts) =
    ParseBranch i $ map cleanTree $ filter p ts
  where p (ParseBranch "" []) = False
        p _                   = True
cleanTree x                 = x

-----------------------------------------------------------------
-- GParser definitions
-----------------------------------------------------------------

ebnf :: GParser (Grammar, UsedNames)
ebnf = liftA2 (,) (ws *> syntax <* eof) getState

syntax :: GParser Grammar
syntax = liftA Grammar $
    optional title *>
    lcbtw '{' '}' (many production) <*
    optional comment

production :: GParser (String, Rule)
production = do
    i <- identifier
    lc '='
    r <- expression
    oneOf ".;"
    ws
    return (i, liftM (nameBranch i) r)
  where nameBranch i (ParseBranch _ l) = ParseBranch i l

expression, term :: GParser Rule
expression = liftA (foldl1 (<|>)) $ term `sepBy1` (lc '|')
term = liftA (branch . sequence) $ many1 factor

factor :: GParser Rule
factor = liftA try $
    liftA ruleByName rememberName <|>
    liftA (leaf . (<* ws) . string) literal <|>
    liftA perhaps (lcbtw '[' ']' expression) <|>
    lcbtw '(' ')' expression <|>
    liftA (branch . many) (lcbtw '{' '}' expression)
  where rememberName :: GParser String
        rememberName = do
            i <- identifier
            p <- getPosition
            modifyState $ M.insertWith (flip const) i p
              {- Adds i → p to the state only if i doesn't
              already have an entry. This ensures we report the
              *first* usage of each unknown identifier. -}
            return i

        ruleByName :: String -> Rule
        ruleByName name = do
            Grammar g <- getState
            fromJust (lookup name g) <?> name

        perhaps = option $ ParseLeaf ""

identifier :: GParser String
identifier = many1 (noneOf " \t\n=|(){}[].;\"'") <* ws

title = literal

comment = literal

literal =
       (lc '\'' *> manyTill anyChar (lc '\'')) <|>
       (lc '"'  *> manyTill anyChar (lc '"'))
    <* ws

-----------------------------------------------------------------
-- Miscellany
-----------------------------------------------------------------

leaf = liftA ParseLeaf
branch = liftA $ ParseBranch ""

lcbtw c1 c2 = between (lc c1) (lc c2)

lc :: Char -> GParser Char
lc c = char c <* ws

ws = many $ oneOf " \n\t"



Works with: Java version LTS version 17
Translation of: Go
import java.util.*;

public class EBNFParser {
    // Type aliases equivalent
    private static class Token {
        Object value;
        boolean isSequence;
        
        Token(Object value, boolean isSequence) {
            this.value = value;
            this.isSequence = isSequence;
        }
    }
    
    private static class Sequence extends ArrayList<Object> {
        public Sequence(Object... items) {
            Collections.addAll(this, items);
        }
    }
    
    private String src;
    private char ch;
    private int sdx;
    private Token token;
    private boolean err = false;
    private List<String> idents = new ArrayList<>();
    private List<Integer> ididx = new ArrayList<>();
    private List<Sequence> productions = new ArrayList<>();
    private Sequence extras = new Sequence();
    private String[] results = {"pass", "fail"};
    
    private int btoi(boolean b) {
        return b ? 1 : 0;
    }
    
    private int invalid(String msg) {
        err = true;
        System.out.println(msg);
        sdx = src.length(); // set to eof
        return -1;
    }
    
    private void skipSpaces() {
        while (sdx < src.length()) {
            ch = src.charAt(sdx);
            if (" \t\r\n".indexOf(ch) == -1) {
                break;
            }
            sdx++;
        }
    }
    
    private void getToken() {
        // Yields a single character token, one of {}()[]|=.;
        // or {"terminal",string} or {"ident", string} or -1.
        skipSpaces();
        if (sdx >= src.length()) {
            token = new Token(-1, false);
            return;
        }
        int tokstart = sdx;
        if ("{}()[]|=.;".indexOf(ch) >= 0) {
            sdx++;
            token = new Token(ch, false);
        } else if (ch == '"' || ch == '\'') {
            char closech = ch;
            int tokend = tokstart + 1;
            while (tokend < src.length() && src.charAt(tokend) != closech) {
                tokend++;
            }
            if (tokend >= src.length()) {
                token = new Token(invalid("no closing quote"), false);
            } else {
                sdx = tokend + 1;
                token = new Token(new Sequence("terminal", src.substring(tokstart + 1, tokend)), true);
            }
        } else if (ch >= 'a' && ch <= 'z') {
            // To simplify things for the purposes of this task,
            // identifiers are strictly a-z only, not A-Z or 1-9.
            while (sdx < src.length() && ch >= 'a' && ch <= 'z') {
                sdx++;
                if (sdx < src.length()) {
                    ch = src.charAt(sdx);
                }
            }
            token = new Token(new Sequence("ident", src.substring(tokstart, sdx)), true);
        } else {
            token = new Token(invalid("invalid ebnf"), false);
        }
    }
    
    private void matchToken(char expectedCh) {
        if (!token.value.equals(expectedCh)) {
            token = new Token(invalid(String.format("invalid ebnf (%c expected)", expectedCh)), false);
        } else {
            getToken();
        }
    }
    
    private int addIdent(String ident) {
        int k = idents.indexOf(ident);
        if (k == -1) {
            idents.add(ident);
            k = idents.size() - 1;
            ididx.add(-1);
        }
        return k;
    }
    
    private Object factor() {
        Object res;
        if (token.isSequence) {
            Sequence t = (Sequence) token.value;
            if (t.get(0).equals("ident")) {
                int idx = addIdent((String) t.get(1));
                t.add(idx);
                token.value = t;
            }
            res = token.value;
            getToken();
        } else if (token.value.equals('[')) {
            getToken();
            res = new Sequence("optional", expression());
            matchToken(']');
        } else if (token.value.equals('(')) {
            getToken();
            res = expression();
            matchToken(')');
        } else if (token.value.equals('{')) {
            getToken();
            res = new Sequence("repeat", expression());
            matchToken('}');
        } else {
            throw new RuntimeException("invalid token in factor() function");
        }
        if (res instanceof Sequence && ((Sequence) res).size() == 1) {
            return ((Sequence) res).get(0);
        }
        return res;
    }
    
    private Object term() {
        Sequence res = new Sequence(factor());
        Object[] tokens = {-1, '|', '.', ';', ')', ']', '}'};
        
        outer:
        while (true) {
            for (Object t : tokens) {
                if (t.equals(token.value)) {
                    break outer;
                }
            }
            res.add(factor());
        }
        
        if (res.size() == 1) {
            return res.get(0);
        }
        return res;
    }
    
    private Object expression() {
        Sequence res = new Sequence(term());
        if (token.value.equals('|')) {
            res = new Sequence("or", res.get(0));
            while (token.value.equals('|')) {
                getToken();
                res.add(term());
            }
        }
        if (res.size() == 1) {
            return res.get(0);
        }
        return res;
    }
    
    private Object production() {
        // Returns a token or -1; the real result is left in 'productions' etc,
        getToken();
        if (!token.value.equals('}')) {
            if (token.value.equals(-1)) {
                return invalid("invalid ebnf (missing closing })");
            }
            if (!token.isSequence) {
                return -1;
            }
            Sequence t = (Sequence) token.value;
            if (!t.get(0).equals("ident")) {
                return -1;
            }
            String ident = (String) t.get(1);
            int idx = addIdent(ident);
            getToken();
            matchToken('=');
            if (token.value.equals(-1)) {
                return -1;
            }
            productions.add(new Sequence(ident, idx, expression()));
            ididx.set(idx, productions.size() - 1);
        }
        return token.value;
    }
    
    private int parse(String ebnf) {
        // Returns +1 if ok, -1 if bad.
        System.out.printf("parse:\n%s ===>\n", ebnf);
        err = false;
        src = ebnf;
        sdx = 0;
        idents.clear();
        ididx.clear();
        productions.clear();
        extras.clear();
        
        getToken();
        if (token.isSequence) {
            Sequence t = (Sequence) token.value;
            t.set(0, "title");
            extras.add(token.value);
            getToken();
        }
        if (!token.value.equals('{')) {
            return invalid("invalid ebnf (missing opening {)");
        }
        
        while (true) {
            Object tokenResult = production();
            if (tokenResult.equals('}') || tokenResult.equals(-1)) {
                break;
            }
        }
        
        getToken();
        if (token.isSequence) {
            Sequence t = (Sequence) token.value;
            t.set(0, "comment");
            extras.add(token.value);
            getToken();
        }
        if (!token.value.equals(-1)) {
            return invalid("invalid ebnf (missing eof?)");
        }
        if (err) {
            return -1;
        }
        
        int k = -1;
        for (int i = 0; i < ididx.size(); i++) {
            if (ididx.get(i) == -1) {
                k = i;
                break;
            }
        }
        if (k != -1) {
            return invalid(String.format("invalid ebnf (undefined:%s)", idents.get(k)));
        }
        
        pprint(productions, "productions");
        pprint(idents, "idents");
        pprint(ididx, "ididx");
        pprint(extras, "extras");
        return 1;
    }
    
    // Adjusts Java's normal printing to look more like Phix output.
    private void pprint(Object ob, String header) {
        System.out.printf("\n%s:\n", header);
        String pp = ob.toString();
        pp = pp.replace("[", "{");
        pp = pp.replace("]", "}");
        pp = pp.replace(" ", ", ");
        for (int i = 0; i < idents.size(); i++) {
            pp = pp.replace(String.valueOf(i), String.valueOf(i));
        }
        System.out.println(pp);
    }
    
    // The rules that applies() has to deal with are:
    // {factors} - if rule[0] is not string,
    // just apply one after the other recursively.
    // {"terminal", "a1"}       -- literal constants
    // {"or", <e1>, <e2>, ...}  -- (any) one of n
    // {"repeat", <e1>}         -- as per "{}" in ebnf
    // {"optional", <e1>}       -- as per "[]" in ebnf
    // {"ident", <name>, idx}   -- apply the sub-rule
    
    private boolean applies(Sequence rule) {
        int wasSdx = sdx; // in case of failure
        Object r1 = rule.get(0);
        
        if (!(r1 instanceof String)) {
            for (int i = 0; i < rule.size(); i++) {
                if (!applies((Sequence) rule.get(i))) {
                    sdx = wasSdx;
                    return false;
                }
            }
        } else if (r1.equals("terminal")) {
            skipSpaces();
            String r2 = (String) rule.get(1);
            for (int i = 0; i < r2.length(); i++) {
                if (sdx >= src.length() || src.charAt(sdx) != r2.charAt(i)) {
                    sdx = wasSdx;
                    return false;
                }
                sdx++;
            }
        } else if (r1.equals("or")) {
            for (int i = 1; i < rule.size(); i++) {
                if (applies((Sequence) rule.get(i))) {
                    return true;
                }
            }
            sdx = wasSdx;
            return false;
        } else if (r1.equals("repeat")) {
            while (applies((Sequence) rule.get(1))) {
                // continue repeating
            }
        } else if (r1.equals("optional")) {
            applies((Sequence) rule.get(1));
        } else if (r1.equals("ident")) {
            int i = (Integer) rule.get(2);
            int ii = ididx.get(i);
            if (!applies((Sequence) productions.get(ii).get(2))) {
                sdx = wasSdx;
                return false;
            }
        } else {
            throw new RuntimeException("invalid rule in applies() function");
        }
        return true;
    }
    
    private void checkValid(String test) {
        src = test;
        sdx = 0;
        boolean res = applies((Sequence) productions.get(0).get(2));
        skipSpaces();
        if (sdx < src.length()) {
            res = false;
        }
        System.out.printf("\"%s\", %s\n", test, results[1 - btoi(res)]);
    }
    
    public static void main(String[] args) {
        EBNFParser parser = new EBNFParser();
        
        String[] ebnfs = {
            "\"a\" {\n" +
            "    a = \"a1\" ( \"a2\" | \"a3\" ) { \"a4\" } [ \"a5\" ] \"a6\" ;\n" +
            "} \"z\" ",
            
            "{\n" +
            "    expr = term { plus term } .\n" +
            "    term = factor { times factor } .\n" +
            "    factor = number | '(' expr ')' .\n" +
            " \n" +
            "    plus = \"+\" | \"-\" .\n" +
            "    times = \"*\" | \"/\" .\n" +
            " \n" +
            "    number = digit { digit } .\n" +
            "    digit = \"0\" | \"1\" | \"2\" | \"3\" | \"4\" | \"5\" | \"6\" | \"7\" | \"8\" | \"9\" .\n" +
            "}",
            
            "a = \"1\"",
            "{ a = \"1\" ;",
            "{ hello world = \"1\"; }",
            "{ foo = bar . }"
        };
        
        String[][] tests = {
            {
                "a1a3a4a4a5a6",
                "a1 a2a6",
                "a1 a3 a4 a6",
                "a1 a4 a5 a6",
                "a1 a2 a4 a5 a5 a6",
                "a1 a2 a4 a5 a6 a7",
                "your ad here"
            },
            {
                "2",
                "2*3 + 4/23 - 7",
                "(3 + 4) * 6-2+(4*(4))",
                "-2",
                "3 +",
                "(4 + 3"
            }
        };
        
        for (int i = 0; i < ebnfs.length; i++) {
            if (parser.parse(ebnfs[i]) == 1) {
                System.out.println("\ntests:");
                if (i < tests.length) {
                    for (String test : tests[i]) {
                        parser.checkValid(test);
                    }
                }
            }
            System.out.println();
        }
    }
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

productions:
{{a,, 0,, {{terminal,, a1},, {or,, {terminal,, a2},, {terminal,, a3}},, {repeat,, {terminal,, a4}},, {optional,, {terminal,, a5}},, {terminal,, a6}}}}

idents:
{a}

ididx:
{0}

extras:
{{title,, a},, {comment,, z}}

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

productions:
{{expr,, 0,, {{ident,, term,, 1},, {repeat,, {{ident,, plus,, 2},, {ident,, term,, 1}}}}},, {term,, 1,, {{ident,, factor,, 3},, {repeat,, {{ident,, times,, 4},, {ident,, factor,, 3}}}}},, {factor,, 3,, {or,, {ident,, number,, 5},, {{terminal,, (},, {ident,, expr,, 0},, {terminal,, )}}}},, {plus,, 2,, {or,, {terminal,, +},, {terminal,, -}}},, {times,, 4,, {or,, {terminal,, *},, {terminal,, /}}},, {number,, 5,, {{ident,, digit,, 6},, {repeat,, {ident,, digit,, 6}}}},, {digit,, 6,, {or,, {terminal,, 0},, {terminal,, 1},, {terminal,, 2},, {terminal,, 3},, {terminal,, 4},, {terminal,, 5},, {terminal,, 6},, {terminal,, 7},, {terminal,, 8},, {terminal,, 9}}}}

idents:
{expr,, term,, plus,, factor,, times,, number,, digit}

ididx:
{0,, 1,, 3,, 2,, 4,, 5,, 6}

extras:
{}

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>
invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)

parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)


Works with: NodeJS 16.14.2
Translation of: Java
class EBNFParser {
    constructor() {
        this.src = '';
        this.ch = '';
        this.sdx = 0;
        this.token = null;
        this.err = false;
        this.idents = [];
        this.ididx = [];
        this.productions = [];
        this.extras = [];
        this.results = ['pass', 'fail'];
    }

    btoi(b) {
        return b ? 1 : 0;
    }

    invalid(msg) {
        this.err = true;
        console.log(msg);
        this.sdx = this.src.length; // set to eof
        return -1;
    }

    skipSpaces() {
        while (this.sdx < this.src.length) {
            this.ch = this.src.charAt(this.sdx);
            if (' \t\r\n'.indexOf(this.ch) === -1) {
                break;
            }
            this.sdx++;
        }
    }

    getToken() {
        // Yields a single character token, one of {}()[]|=.;
        // or ["terminal",string] or ["ident", string] or -1.
        this.skipSpaces();
        if (this.sdx >= this.src.length) {
            this.token = { value: -1, isSequence: false };
            return;
        }
        const tokstart = this.sdx;
        if ('{}()[]|=.;'.indexOf(this.ch) >= 0) {
            this.sdx++;
            this.token = { value: this.ch, isSequence: false };
        } else if (this.ch === '"' || this.ch === "'") {
            const closech = this.ch;
            let tokend = tokstart + 1;
            while (tokend < this.src.length && this.src.charAt(tokend) !== closech) {
                tokend++;
            }
            if (tokend >= this.src.length) {
                this.token = { value: this.invalid('no closing quote'), isSequence: false };
            } else {
                this.sdx = tokend + 1;
                this.token = { 
                    value: ['terminal', this.src.substring(tokstart + 1, tokend)], 
                    isSequence: true 
                };
            }
        } else if (this.ch >= 'a' && this.ch <= 'z') {
            // To simplify things for the purposes of this task,
            // identifiers are strictly a-z only, not A-Z or 1-9.
            while (this.sdx < this.src.length && this.ch >= 'a' && this.ch <= 'z') {
                this.sdx++;
                if (this.sdx < this.src.length) {
                    this.ch = this.src.charAt(this.sdx);
                }
            }
            this.token = { 
                value: ['ident', this.src.substring(tokstart, this.sdx)], 
                isSequence: true 
            };
        } else {
            this.token = { value: this.invalid('invalid ebnf'), isSequence: false };
        }
    }

    matchToken(expectedCh) {
        if (this.token.value !== expectedCh) {
            this.token = { 
                value: this.invalid(`invalid ebnf (${expectedCh} expected)`), 
                isSequence: false 
            };
        } else {
            this.getToken();
        }
    }

    addIdent(ident) {
        let k = this.idents.indexOf(ident);
        if (k === -1) {
            this.idents.push(ident);
            k = this.idents.length - 1;
            this.ididx.push(-1);
        }
        return k;
    }

    factor() {
        let res;
        if (this.token.isSequence) {
            const t = this.token.value;
            if (t[0] === 'ident') {
                const idx = this.addIdent(t[1]);
                t.push(idx);
                this.token.value = t;
            }
            res = this.token.value;
            this.getToken();
        } else if (this.token.value === '[') {
            this.getToken();
            res = ['optional', this.expression()];
            this.matchToken(']');
        } else if (this.token.value === '(') {
            this.getToken();
            res = this.expression();
            this.matchToken(')');
        } else if (this.token.value === '{') {
            this.getToken();
            res = ['repeat', this.expression()];
            this.matchToken('}');
        } else {
            throw new Error('invalid token in factor() function');
        }
        if (Array.isArray(res) && res.length === 1) {
            return res[0];
        }
        return res;
    }

    term() {
        const res = [this.factor()];
        const tokens = [-1, '|', '.', ';', ')', ']', '}'];
        
        while (true) {
            if (tokens.includes(this.token.value)) {
                break;
            }
            res.push(this.factor());
        }
        
        if (res.length === 1) {
            return res[0];
        }
        return res;
    }

    expression() {
        let res = [this.term()];
        if (this.token.value === '|') {
            res = ['or', res[0]];
            while (this.token.value === '|') {
                this.getToken();
                res.push(this.term());
            }
        }
        if (res.length === 1) {
            return res[0];
        }
        return res;
    }

    production() {
        // Returns a token or -1; the real result is left in 'productions' etc,
        this.getToken();
        if (this.token.value !== '}') {
            if (this.token.value === -1) {
                return this.invalid('invalid ebnf (missing closing })');
            }
            if (!this.token.isSequence) {
                return -1;
            }
            const t = this.token.value;
            if (t[0] !== 'ident') {
                return -1;
            }
            const ident = t[1];
            const idx = this.addIdent(ident);
            this.getToken();
            this.matchToken('=');
            if (this.token.value === -1) {
                return -1;
            }
            this.productions.push([ident, idx, this.expression()]);
            this.ididx[idx] = this.productions.length - 1;
        }
        return this.token.value;
    }

    parse(ebnf) {
        // Returns +1 if ok, -1 if bad.
        console.log(`parse:\n${ebnf} ===>`);
        this.err = false;
        this.src = ebnf;
        this.sdx = 0;
        this.idents = [];
        this.ididx = [];
        this.productions = [];
        this.extras = [];
        
        this.getToken();
        if (this.token.isSequence) {
            const t = this.token.value;
            t[0] = 'title';
            this.extras.push(this.token.value);
            this.getToken();
        }
        if (this.token.value !== '{') {
            return this.invalid('invalid ebnf (missing opening {)');
        }
        
        while (true) {
            const tokenResult = this.production();
            if (tokenResult === '}' || tokenResult === -1) {
                break;
            }
        }
        
        this.getToken();
        if (this.token.isSequence) {
            const t = this.token.value;
            t[0] = 'comment';
            this.extras.push(this.token.value);
            this.getToken();
        }
        if (this.token.value !== -1) {
            return this.invalid('invalid ebnf (missing eof?)');
        }
        if (this.err) {
            return -1;
        }
        
        let k = -1;
        for (let i = 0; i < this.ididx.length; i++) {
            if (this.ididx[i] === -1) {
                k = i;
                break;
            }
        }
        if (k !== -1) {
            return this.invalid(`invalid ebnf (undefined:${this.idents[k]})`);
        }
        
        this.pprint(this.productions, 'productions');
        this.pprint(this.idents, 'idents');
        this.pprint(this.ididx, 'ididx');
        this.pprint(this.extras, 'extras');
        return 1;
    }

    // Adjusts JavaScript's normal printing to look more like Phix output.
    pprint(ob, header) {
        console.log(`\n${header}:`);
        let pp = JSON.stringify(ob);
        pp = pp.replace(/\[/g, '{');
        pp = pp.replace(/\]/g, '}');
        pp = pp.replace(/,/g, ', ');
        for (let i = 0; i < this.idents.length; i++) {
            pp = pp.replace(new RegExp(`\\b${i}\\b`, 'g'), String(i));
        }
        console.log(pp);
    }

    // The rules that applies() has to deal with are:
    // [factors] - if rule[0] is not string,
    // just apply one after the other recursively.
    // ["terminal", "a1"]       -- literal constants
    // ["or", <e1>, <e2>, ...}  -- (any) one of n
    // ["repeat", <e1>]         -- as per "{}" in ebnf
    // ["optional", <e1>]       -- as per "[]" in ebnf
    // ["ident", <name>, idx]   -- apply the sub-rule

    applies(rule) {
        const wasSdx = this.sdx; // in case of failure
        const r1 = rule[0];
        
        if (typeof r1 !== 'string') {
            for (let i = 0; i < rule.length; i++) {
                if (!this.applies(rule[i])) {
                    this.sdx = wasSdx;
                    return false;
                }
            }
        } else if (r1 === 'terminal') {
            this.skipSpaces();
            const r2 = rule[1];
            for (let i = 0; i < r2.length; i++) {
                if (this.sdx >= this.src.length || this.src.charAt(this.sdx) !== r2.charAt(i)) {
                    this.sdx = wasSdx;
                    return false;
                }
                this.sdx++;
            }
        } else if (r1 === 'or') {
            for (let i = 1; i < rule.length; i++) {
                if (this.applies(rule[i])) {
                    return true;
                }
            }
            this.sdx = wasSdx;
            return false;
        } else if (r1 === 'repeat') {
            while (this.applies(rule[1])) {
                // continue repeating
            }
        } else if (r1 === 'optional') {
            this.applies(rule[1]);
        } else if (r1 === 'ident') {
            const i = rule[2];
            const ii = this.ididx[i];
            if (!this.applies(this.productions[ii][2])) {
                this.sdx = wasSdx;
                return false;
            }
        } else {
            throw new Error('invalid rule in applies() function');
        }
        return true;
    }

    checkValid(test) {
        this.src = test;
        this.sdx = 0;
        let res = this.applies(this.productions[0][2]);
        this.skipSpaces();
        if (this.sdx < this.src.length) {
            res = false;
        }
        console.log(`"${test}", ${this.results[1 - this.btoi(res)]}`);
    }
}

// Main execution
const parser = new EBNFParser();

const ebnfs = [
    '"a" {\n' +
    '    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;\n' +
    '} "z" ',
    
    '{\n' +
    '    expr = term { plus term } .\n' +
    '    term = factor { times factor } .\n' +
    '    factor = number | \'(\' expr \')\' .\n' +
    ' \n' +
    '    plus = "+" | "-" .\n' +
    '    times = "*" | "/" .\n' +
    ' \n' +
    '    number = digit { digit } .\n' +
    '    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .\n' +
    '}',
    
    'a = "1"',
    '{ a = "1" ;',
    '{ hello world = "1"; }',
    '{ foo = bar . }'
];

const tests = [
    [
        'a1a3a4a4a5a6',
        'a1 a2a6',
        'a1 a3 a4 a6',
        'a1 a4 a5 a6',
        'a1 a2 a4 a5 a5 a6',
        'a1 a2 a4 a5 a6 a7',
        'your ad here'
    ],
    [
        '2',
        '2*3 + 4/23 - 7',
        '(3 + 4) * 6-2+(4*(4))',
        '-2',
        '3 +',
        '(4 + 3'
    ]
];

for (let i = 0; i < ebnfs.length; i++) {
    if (parser.parse(ebnfs[i]) === 1) {
        console.log('\ntests:');
        if (i < tests.length) {
            for (const test of tests[i]) {
                parser.checkValid(test);
            }
        }
    }
    console.log('');
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

productions:
{{"a", 0, {{"terminal", "a1"}, {"or", {"terminal", "a2"}, {"terminal", "a3"}}, {"repeat", {"terminal", "a4"}}, {"optional", {"terminal", "a5"}}, {"terminal", "a6"}}}}

idents:
{"a"}

ididx:
{0}

extras:
{{"title", "a"}, {"comment", "z"}}

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

productions:
{{"expr", 0, {{"ident", "term", 1}, {"repeat", {{"ident", "plus", 2}, {"ident", "term", 1}}}}}, {"term", 1, {{"ident", "factor", 3}, {"repeat", {{"ident", "times", 4}, {"ident", "factor", 3}}}}}, {"factor", 3, {"or", {"ident", "number", 5}, {{"terminal", "("}, {"ident", "expr", 0}, {"terminal", ")"}}}}, {"plus", 2, {"or", {"terminal", "+"}, {"terminal", "-"}}}, {"times", 4, {"or", {"terminal", "*"}, {"terminal", "/"}}}, {"number", 5, {{"ident", "digit", 6}, {"repeat", {"ident", "digit", 6}}}}, {"digit", 6, {"or", {"terminal", "0"}, {"terminal", "1"}, {"terminal", "2"}, {"terminal", "3"}, {"terminal", "4"}, {"terminal", "5"}, {"terminal", "6"}, {"terminal", "7"}, {"terminal", "8"}, {"terminal", "9"}}}}

idents:
{"expr", "term", "plus", "factor", "times", "number", "digit"}

ididx:
{0, 1, 3, 2, 4, 5, 6}

extras:
{}

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>
invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)

parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)

Creates a Grammar struct from an EBNF grammar, which has a matching Regex for each production of the EBNF grammar.

Tested with Julia v1.7.2

struct Grammar
    regex::Regex
    rules::Dict{Symbol,Regex}
    root::Regex
    function Grammar(regex::Regex)
        names = keys(match(regex, ""))
        new(regex,
            Dict{Symbol,Regex}(
                Symbol(name) => Regex(
                    "$(regex.pattern)(?&$(name))",
                    regex.compile_options,
                    regex.match_options
                )
                for name in names if name isa String),
            Regex(
                "$(regex.pattern)^\\s*(?&$(first(names)))\\s*\$",
                regex.compile_options,
                regex.match_options))
    end
end

Base.getindex(grammar::Grammar, key) = grammar.rules[key]
Base.occursin(grammar::Grammar, str::AbstractString) = occursin(grammar.root, str)
Base.show(io::IO, grammar::Grammar) = print(io, "Grammar(", grammar.regex, ")")

const ebnfgrammar = Grammar(r"""
(?(DEFINE) # EBNF syntax
(?<syntax>
    (?&title)? \s*+ { \s*+ ( (?&production) \s*+ )* } \s*+ (?&comment)? )
(?<production>
    (?&identifier) \s*+ = \s*+ (?&expression) \s*+ [.;] )
(?<expression>
    (?&term) ( \s*+ \| \s*+ (?&term) )* )
(?<term>
    (?&factor) ( \s*+ (?&factor) )* )
(?<factor>
    (?&identifier)
|   (?&literal)
|   \[ \s*+ (?&expression) \s*+ \]
|   \( \s*+ (?&expression) \s*+ \)
|    { \s*+ (?&expression) \s*+  } )
(?<identifier>
    [^'"\s=|(){}[\].;]+ )
(?<title>
    (?&literal) )
(?<comment>
    (?&literal) )
(?<literal>
    ' [^']+ '
|   " [^"]+ " )
)"""x)

function syntax(grammar::Grammar, str::AbstractString)
    if occursin(grammar[:syntax], str)
        """(?(DEFINE)$(join(production(grammar, eachmatch(grammar[:production], str)))))"""
    else
        throw(ErrorException("Invalid EBNF syntax."))
    end
end

production(grammar::Grammar, iter) = (production(grammar, m.match) for m in iter)
function production(grammar::Grammar, str::AbstractString)
    rstrip(c -> isspace(c) || c == '.' || c == ';', str)
    id, expr = split(str, r"\s*=\s*", limit=2)
    "(?<$(id)>" * join(
        expression(grammar, eachmatch(grammar[:expression], expr)),
        "\\s*+|\\s*+") * "\\s*+)"
end

expression(grammar::Grammar, iter) = (expression(grammar, m.match) for m in iter)
function expression(grammar::Grammar, str::AbstractString)
    join(term(grammar, eachmatch(grammar[:term], str)), "\\s*+|\\s*+")
end

term(grammar::Grammar, iter) = (term(grammar, m.match) for m in iter)
function term(grammar::Grammar, str::AbstractString)
    join(factor(grammar, eachmatch(grammar[:factor], str)), "\\s*+")
end

factor(grammar::Grammar, iter) = (factor(grammar, m.match) for m in iter)
function factor(grammar::Grammar, str::AbstractString)
    str = strip(str)
    if startswith(str, r"['\"]")
        literal(grammar, str)
    elseif startswith(str, r"[[({]")
        "(\\s*+" * expression(grammar, str[begin+1:end-1]) * (
            if startswith(str, '[')
                "\\s*+)?"
            elseif startswith(str, '{')
                "\\s*+)*"
            else
                "\\s*+)"
            end
        )
    else
        "(?&$(str))"
    end
end

literal(::Grammar, str::AbstractString) = "\\Q$(str[begin+1:end-1])\\E"

grammar(ebnf::AbstractString) = Grammar(Regex(syntax(ebnfgrammar, ebnf)))

using Test

oneliner = grammar("""
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"
""")

@testset "One liner" begin
    @test occursin(oneliner, "a1a3a4a4a5a6")
    @test occursin(oneliner, "a1 a2a6")
    @test occursin(oneliner, "a1 a3 a4 a6")
    @test !occursin(oneliner, "a1 a4 a5 a6")
    @test !occursin(oneliner, "a1 a2 a4 a5 a5 a6")
    @test !occursin(oneliner, "a1 a2 a4 a5 a6 a7")
    @test !occursin(oneliner, "your ad here")
end

arithmetic = grammar("""
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .

    plus = "+" | "-" .
    times = "*" | "/" .

    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}
""")

@testset "Arithmetic expressions" begin
    @test occursin(arithmetic, "2")
    @test occursin(arithmetic, "2*3 + 4/23 - 7")
    @test occursin(arithmetic, "(3 + 4) * 6-2+(4*(4))")
    @test !occursin(arithmetic, "-2")
    @test !occursin(arithmetic, "3 +")
    @test !occursin(arithmetic, "(4 + 3")
end

@testset "Invalid EBNF" begin
    @test_throws ErrorException grammar("""a = "1";""")
    @test_throws ErrorException grammar("""{ a = "1";""")
    @test_throws ErrorException grammar("""{ hello world = "1"; }""")
    @test_throws ErrorException grammar("{ foo = bar . }")
end
Works with: Kotlin version 1.3.31
Translation of: Java
class MainKt {
    // Data classes for better structure
    private data class Token(val value: Any, val isSequence: Boolean)
    
    private class Sequence(vararg items: Any) : ArrayList<Any>() {
        init {
            addAll(items)
        }
    }
    
    private var src: String = ""
    private var ch: Char = ' '
    private var sdx: Int = 0
    private var token: Token = Token(-1, false)
    private var err: Boolean = false
    private val idents = mutableListOf<String>()
    private val ididx = mutableListOf<Int>()
    private val productions = mutableListOf<Sequence>()
    private val extras = Sequence()
    private val results = arrayOf("pass", "fail")
    
    private fun Boolean.toInt() = if (this) 1 else 0
    
    private fun invalid(msg: String): Int {
        err = true
        println(msg)
        sdx = src.length // set to eof
        return -1
    }
    
    private fun skipSpaces() {
        while (sdx < src.length) {
            ch = src[sdx]
            if (ch !in " \t\r\n") {
                break
            }
            sdx++
        }
    }
    
    private fun getToken() {
        // Yields a single character token, one of {}()[]|=.;
        // or {"terminal",string} or {"ident", string} or -1.
        skipSpaces()
        if (sdx >= src.length) {
            token = Token(-1, false)
            return
        }
        val tokstart = sdx
        when {
            ch in "{}()[]|=.;" -> {
                sdx++
                token = Token(ch, false)
            }
            ch == '"' || ch == '\'' -> {
                val closech = ch
                var tokend = tokstart + 1
                while (tokend < src.length && src[tokend] != closech) {
                    tokend++
                }
                if (tokend >= src.length) {
                    token = Token(invalid("no closing quote"), false)
                } else {
                    sdx = tokend + 1
                    token = Token(Sequence("terminal", src.substring(tokstart + 1, tokend)), true)
                }
            }
            ch in 'a'..'z' -> {
                // To simplify things for the purposes of this task,
                // identifiers are strictly a-z only, not A-Z or 1-9.
                while (sdx < src.length && ch in 'a'..'z') {
                    sdx++
                    if (sdx < src.length) {
                        ch = src[sdx]
                    }
                }
                token = Token(Sequence("ident", src.substring(tokstart, sdx)), true)
            }
            else -> {
                token = Token(invalid("invalid ebnf"), false)
            }
        }
    }
    
    private fun matchToken(expectedCh: Char) {
        if (token.value != expectedCh) {
            token = Token(invalid("invalid ebnf ($expectedCh expected)"), false)
        } else {
            getToken()
        }
    }
    
    private fun addIdent(ident: String): Int {
        var k = idents.indexOf(ident)
        if (k == -1) {
            idents.add(ident)
            k = idents.size - 1
            ididx.add(-1)
        }
        return k
    }
    
    private fun factor(): Any {
        val res: Any = when {
            token.isSequence -> {
                val t = token.value as Sequence
                if (t[0] == "ident") {
                    val idx = addIdent(t[1] as String)
                    t.add(idx)
                    token = Token(t, true)
                }
                val result = token.value
                getToken()
                result
            }
            token.value == '[' -> {
                getToken()
                val result = Sequence("optional", expression())
                matchToken(']')
                result
            }
            token.value == '(' -> {
                getToken()
                val result = expression()
                matchToken(')')
                result
            }
            token.value == '{' -> {
                getToken()
                val result = Sequence("repeat", expression())
                matchToken('}')
                result
            }
            else -> throw RuntimeException("invalid token in factor() function")
        }
        return if (res is Sequence && res.size == 1) res[0] else res
    }
    
    private fun term(): Any {
        val res = Sequence(factor())
        val tokens = setOf(-1, '|', '.', ';', ')', ']', '}')
        
        while (token.value !in tokens) {
            res.add(factor())
        }
        
        return if (res.size == 1) res[0] else res
    }
    
    private fun expression(): Any {
        var res = Sequence(term())
        if (token.value == '|') {
            res = Sequence("or", res[0])
            while (token.value == '|') {
                getToken()
                res.add(term())
            }
        }
        return if (res.size == 1) res[0] else res
    }
    
    private fun production(): Any {
        // Returns a token or -1; the real result is left in 'productions' etc,
        getToken()
        if (token.value != '}') {
            if (token.value == -1) {
                return invalid("invalid ebnf (missing closing })")
            }
            if (!token.isSequence) {
                return -1
            }
            val t = token.value as Sequence
            if (t[0] != "ident") {
                return -1
            }
            val ident = t[1] as String
            val idx = addIdent(ident)
            getToken()
            matchToken('=')
            if (token.value == -1) {
                return -1
            }
            productions.add(Sequence(ident, idx, expression()))
            ididx[idx] = productions.size - 1
        }
        return token.value
    }
    
    private fun parse(ebnf: String): Int {
        // Returns +1 if ok, -1 if bad.
        println("parse:\n$ebnf ===>\n")
        err = false
        src = ebnf
        sdx = 0
        idents.clear()
        ididx.clear()
        productions.clear()
        extras.clear()
        
        getToken()
        if (token.isSequence) {
            val t = token.value as Sequence
            t[0] = "title"
            extras.add(token.value)
            getToken()
        }
        if (token.value != '{') {
            return invalid("invalid ebnf (missing opening {)")
        }
        
        while (true) {
            val tokenResult = production()
            if (tokenResult == '}' || tokenResult == -1) {
                break
            }
        }
        
        getToken()
        if (token.isSequence) {
            val t = token.value as Sequence
            t[0] = "comment"
            extras.add(token.value)
            getToken()
        }
        if (token.value != -1) {
            return invalid("invalid ebnf (missing eof?)")
        }
        if (err) {
            return -1
        }
        
        val k = ididx.indexOfFirst { it == -1 }
        if (k != -1) {
            return invalid("invalid ebnf (undefined:${idents[k]})")
        }
        
        pprint(productions, "productions")
        pprint(idents, "idents")
        pprint(ididx, "ididx")
        pprint(extras, "extras")
        return 1
    }
    
    // Adjusts Kotlin's normal printing to look more like Phix output.
    private fun pprint(ob: Any, header: String) {
        println("\n$header:")
        var pp = ob.toString()
        pp = pp.replace("[", "{")
        pp = pp.replace("]", "}")
        pp = pp.replace(" ", ", ")
        for (i in idents.indices) {
            pp = pp.replace(i.toString(), i.toString())
        }
        println(pp)
    }
    
    // The rules that applies() has to deal with are:
    // {factors} - if rule[0] is not string,
    // just apply one after the other recursively.
    // {"terminal", "a1"}       -- literal constants
    // {"or", <e1>, <e2>, ...}  -- (any) one of n
    // {"repeat", <e1>}         -- as per "{}" in ebnf
    // {"optional", <e1>}       -- as per "[]" in ebnf
    // {"ident", <name>, idx}   -- apply the sub-rule
    
    private fun applies(rule: Sequence): Boolean {
        val wasSdx = sdx // in case of failure
        val r1 = rule[0]
        
        when {
            r1 !is String -> {
                for (i in rule.indices) {
                    if (!applies(rule[i] as Sequence)) {
                        sdx = wasSdx
                        return false
                    }
                }
            }
            r1 == "terminal" -> {
                skipSpaces()
                val r2 = rule[1] as String
                for (i in r2.indices) {
                    if (sdx >= src.length || src[sdx] != r2[i]) {
                        sdx = wasSdx
                        return false
                    }
                    sdx++
                }
            }
            r1 == "or" -> {
                for (i in 1 until rule.size) {
                    if (applies(rule[i] as Sequence)) {
                        return true
                    }
                }
                sdx = wasSdx
                return false
            }
            r1 == "repeat" -> {
                while (applies(rule[1] as Sequence)) {
                    // continue repeating
                }
            }
            r1 == "optional" -> {
                applies(rule[1] as Sequence)
            }
            r1 == "ident" -> {
                val i = rule[2] as Int
                val ii = ididx[i]
                if (!applies(productions[ii][2] as Sequence)) {
                    sdx = wasSdx
                    return false
                }
            }
            else -> throw RuntimeException("invalid rule in applies() function")
        }
        return true
    }
    
    private fun checkValid(test: String) {
        src = test
        sdx = 0
        var res = applies(productions[0][2] as Sequence)
        skipSpaces()
        if (sdx < src.length) {
            res = false
        }
        println("\"$test\", ${results[1 - res.toInt()]}")
    }
    
    companion object {
        @JvmStatic
        fun main(args: Array<String>) {
            val parser = MainKt()
            
            val ebnfs = arrayOf(
                "\"a\" {\n" +
                "    a = \"a1\" ( \"a2\" | \"a3\" ) { \"a4\" } [ \"a5\" ] \"a6\" ;\n" +
                "} \"z\" ",
                
                "{\n" +
                "    expr = term { plus term } .\n" +
                "    term = factor { times factor } .\n" +
                "    factor = number | '(' expr ')' .\n" +
                " \n" +
                "    plus = \"+\" | \"-\" .\n" +
                "    times = \"*\" | \"/\" .\n" +
                " \n" +
                "    number = digit { digit } .\n" +
                "    digit = \"0\" | \"1\" | \"2\" | \"3\" | \"4\" | \"5\" | \"6\" | \"7\" | \"8\" | \"9\" .\n" +
                "}",
                
                "a = \"1\"",
                "{ a = \"1\" ;",
                "{ hello world = \"1\"; }",
                "{ foo = bar . }"
            )
            
            val tests = arrayOf(
                arrayOf(
                    "a1a3a4a4a5a6",
                    "a1 a2a6",
                    "a1 a3 a4 a6",
                    "a1 a4 a5 a6",
                    "a1 a2 a4 a5 a5 a6",
                    "a1 a2 a4 a5 a6 a7",
                    "your ad here"
                ),
                arrayOf(
                    "2",
                    "2*3 + 4/23 - 7",
                    "(3 + 4) * 6-2+(4*(4))",
                    "-2",
                    "3 +",
                    "(4 + 3"
                )
            )
            
            for (i in ebnfs.indices) {
                if (parser.parse(ebnfs[i]) == 1) {
                    println("\ntests:")
                    if (i < tests.size) {
                        for (test in tests[i]) {
                            parser.checkValid(test)
                        }
                    }
                }
                println()
            }
        }
    }
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>


productions:
{{a,, 0,, {{terminal,, a1},, {or,, {terminal,, a2},, {terminal,, a3}},, {repeat,, {terminal,, a4}},, {optional,, {terminal,, a5}},, {terminal,, a6}}}}

idents:
{a}

ididx:
{0}

extras:
{{title,, a},, {comment,, z}}

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>


productions:
{{expr,, 0,, {{ident,, term,, 1},, {repeat,, {{ident,, plus,, 2},, {ident,, term,, 1}}}}},, {term,, 1,, {{ident,, factor,, 3},, {repeat,, {{ident,, times,, 4},, {ident,, factor,, 3}}}}},, {factor,, 3,, {or,, {ident,, number,, 5},, {{terminal,, (},, {ident,, expr,, 0},, {terminal,, )}}}},, {plus,, 2,, {or,, {terminal,, +},, {terminal,, -}}},, {times,, 4,, {or,, {terminal,, *},, {terminal,, /}}},, {number,, 5,, {{ident,, digit,, 6},, {repeat,, {ident,, digit,, 6}}}},, {digit,, 6,, {or,, {terminal,, 0},, {terminal,, 1},, {terminal,, 2},, {terminal,, 3},, {terminal,, 4},, {terminal,, 5},, {terminal,, 6},, {terminal,, 7},, {terminal,, 8},, {terminal,, 9}}}}

idents:
{expr,, term,, plus,, factor,, times,, number,, digit}

ididx:
{0,, 1,, 3,, 2,, 4,, 5,, 6}

extras:
{}

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>

invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>

invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>

invalid ebnf (= expected)

parse:
{ foo = bar . } ===>

invalid ebnf (undefined:bar)


Works with: Lua version 5.4
Translation of: Python
-- EBNF Parser in Lua

local EBNFParser = {}
EBNFParser.__index = EBNFParser

function EBNFParser:new()
    local self = setmetatable({}, EBNFParser)
    self.src = ""
    self.ch = ""
    self.sdx = 1  -- Lua uses 1-based indexing
    self.token = nil
    self.err = false
    self.idents = {}
    self.ididx = {}
    self.productions = {}
    self.extras = {}
    self.results = {"pass", "fail"}
    return self
end

function EBNFParser:btoi(b)
    return b and 1 or 0
end

function EBNFParser:invalid(msg)
    self.err = true
    print(msg)
    self.sdx = #self.src + 1  -- set to eof
    return -1
end

function EBNFParser:skipSpaces()
    while self.sdx <= #self.src do
        self.ch = self.src:sub(self.sdx, self.sdx)
        if not self.ch:match("[ \t\r\n]") then
            break
        end
        self.sdx = self.sdx + 1
    end
end

function EBNFParser:getToken()
    self:skipSpaces()
    if self.sdx > #self.src then
        self.token = {value = -1, isSequence = false}
        return
    end
    
    local tokstart = self.sdx
    if self.ch:match("[{}()%[%]|=.;]") then
        self.sdx = self.sdx + 1
        self.token = {value = self.ch, isSequence = false}
    elseif self.ch == '"' or self.ch == "'" then
        local closech = self.ch
        local tokend = tokstart + 1
        while tokend <= #self.src and self.src:sub(tokend, tokend) ~= closech do
            tokend = tokend + 1
        end
        if tokend > #self.src then
            self.token = {value = self:invalid("no closing quote"), isSequence = false}
        else
            self.sdx = tokend + 1
            self.token = {
                value = {"terminal", self.src:sub(tokstart + 1, tokend - 1)},
                isSequence = true
            }
        end
    elseif self.ch:match("[a-z]") then
        -- Identifiers are strictly a-z only
        while self.sdx <= #self.src and self.ch:match("[a-z]") do
            self.sdx = self.sdx + 1
            if self.sdx <= #self.src then
                self.ch = self.src:sub(self.sdx, self.sdx)
            end
        end
        self.token = {
            value = {"ident", self.src:sub(tokstart, self.sdx - 1)},
            isSequence = true
        }
    else
        self.token = {value = self:invalid("invalid ebnf"), isSequence = false}
    end
end

function EBNFParser:matchToken(expectedCh)
    if self.token.value ~= expectedCh then
        self.token = {
            value = self:invalid("invalid ebnf (" .. expectedCh .. " expected)"),
            isSequence = false
        }
    else
        self:getToken()
    end
end

function EBNFParser:addIdent(ident)
    for i, id in ipairs(self.idents) do
        if id == ident then
            return i
        end
    end
    table.insert(self.idents, ident)
    local k = #self.idents
    table.insert(self.ididx, -1)
    return k
end

function EBNFParser:factor()
    local res
    if self.token.isSequence then
        local t = self.token.value
        if t[1] == "ident" then
            local idx = self:addIdent(t[2])
            table.insert(t, idx)
            self.token.value = t
        end
        res = self.token.value
        self:getToken()
    elseif self.token.value == "[" then
        self:getToken()
        res = {"optional", self:expression()}
        self:matchToken("]")
    elseif self.token.value == "(" then
        self:getToken()
        res = self:expression()
        self:matchToken(")")
    elseif self.token.value == "{" then
        self:getToken()
        res = {"repeat", self:expression()}
        self:matchToken("}")
    else
        error("invalid token in factor() function")
    end
    
    if type(res) == "table" and #res == 1 then
        return res[1]
    end
    return res
end

function EBNFParser:term()
    local res = {self:factor()}
    local tokens = {-1, "|", ".", ";", ")", "]", "}"}
    
    while true do
        local found = false
        for _, token in ipairs(tokens) do
            if self.token.value == token then
                found = true
                break
            end
        end
        if found then
            break
        end
        table.insert(res, self:factor())
    end
    
    if #res == 1 then
        return res[1]
    end
    return res
end

function EBNFParser:expression()
    local res = {self:term()}
    if self.token.value == "|" then
        res = {"or", res[1]}
        while self.token.value == "|" do
            self:getToken()
            table.insert(res, self:term())
        end
    end
    
    if #res == 1 then
        return res[1]
    end
    return res
end

function EBNFParser:production()
    self:getToken()
    if self.token.value ~= "}" then
        if self.token.value == -1 then
            return self:invalid("invalid ebnf (missing closing })")
        end
        if not self.token.isSequence then
            return -1
        end
        
        local t = self.token.value
        if t[1] ~= "ident" then
            return -1
        end
        
        local ident = t[2]
        local idx = self:addIdent(ident)
        self:getToken()
        self:matchToken("=")
        if self.token.value == -1 then
            return -1
        end
        
        table.insert(self.productions, {ident, idx, self:expression()})
        self.ididx[idx] = #self.productions
    end
    
    return self.token.value
end

function EBNFParser:parse(ebnf)
    print("parse:\n" .. ebnf .. " ===>")
    self.err = false
    self.src = ebnf
    self.sdx = 1
    self.idents = {}
    self.ididx = {}
    self.productions = {}
    self.extras = {}
    
    self:getToken()
    if self.token.isSequence then
        local t = self.token.value
        t[1] = "title"
        table.insert(self.extras, self.token.value)
        self:getToken()
    end
    
    if self.token.value ~= "{" then
        return self:invalid("invalid ebnf (missing opening {)")
    end
    
    while true do
        local tokenResult = self:production()
        if tokenResult == "}" or tokenResult == -1 then
            break
        end
    end
    
    self:getToken()
    if self.token.isSequence then
        local t = self.token.value
        t[1] = "comment"
        table.insert(self.extras, self.token.value)
        self:getToken()
    end
    
    if self.token.value ~= -1 then
        return self:invalid("invalid ebnf (missing eof?)")
    end
    
    if self.err then
        return -1
    end
    
    local k = -1
    for i = 1, #self.ididx do
        if self.ididx[i] == -1 then
            k = i
            break
        end
    end
    if k ~= -1 then
        return self:invalid("invalid ebnf (undefined:" .. self.idents[k] .. ")")
    end
    
    self:pprint(self.productions, "productions")
    self:pprint(self.idents, "idents")
    self:pprint(self.ididx, "ididx")
    self:pprint(self.extras, "extras")
    return 1
end

-- Simple JSON-like serialization for Lua tables
local function serialize(t, depth)
    depth = depth or 0
    if type(t) ~= "table" then
        if type(t) == "string" then
            return '"' .. t .. '"'
        else
            return tostring(t)
        end
    end
    
    local result = "{"
    local first = true
    for i, v in ipairs(t) do
        if not first then
            result = result .. ", "
        end
        result = result .. serialize(v, depth + 1)
        first = false
    end
    result = result .. "}"
    return result
end

function EBNFParser:pprint(ob, header)
    print("\n" .. header .. ":")
    local pp = serialize(ob)
    print(pp)
end

function EBNFParser:applies(rule)
    local wasSdx = self.sdx  -- in case of failure
    local r1 = rule[1]
    
    if type(r1) ~= "string" then
        for i = 1, #rule do
            if not self:applies(rule[i]) then
                self.sdx = wasSdx
                return false
            end
        end
    elseif r1 == "terminal" then
        self:skipSpaces()
        local r2 = rule[2]
        for i = 1, #r2 do
            if self.sdx > #self.src or self.src:sub(self.sdx, self.sdx) ~= r2:sub(i, i) then
                self.sdx = wasSdx
                return false
            end
            self.sdx = self.sdx + 1
        end
    elseif r1 == "or" then
        for i = 2, #rule do
            if self:applies(rule[i]) then
                return true
            end
        end
        self.sdx = wasSdx
        return false
    elseif r1 == "repeat" then
        while self:applies(rule[2]) do
            -- continue repeating
        end
    elseif r1 == "optional" then
        self:applies(rule[2])
    elseif r1 == "ident" then
        local i = rule[3]
        local ii = self.ididx[i]
        if not self:applies(self.productions[ii][3]) then
            self.sdx = wasSdx
            return false
        end
    else
        error("invalid rule in applies() function")
    end
    
    return true
end

function EBNFParser:checkValid(test)
    self.src = test
    self.sdx = 1
    local res = self:applies(self.productions[1][3])
    self:skipSpaces()
    if self.sdx <= #self.src then
        res = false
    end
    print('"' .. test .. '", ' .. self.results[1 + self:btoi(not res)])
end

-- Main execution
local parser = EBNFParser:new()

local ebnfs = {
    '"a" {\n' ..
    '    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;\n' ..
    '} "z" ',
    
    '{\n' ..
    '    expr = term { plus term } .\n' ..
    '    term = factor { times factor } .\n' ..
    '    factor = number | \'(\' expr \')\' .\n' ..
    ' \n' ..
    '    plus = "+" | "-" .\n' ..
    '    times = "*" | "/" .\n' ..
    ' \n' ..
    '    number = digit { digit } .\n' ..
    '    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .\n' ..
    '}',
    
    'a = "1"',
    '{ a = "1" ;',
    '{ hello world = "1"; }',
    '{ foo = bar . }'
}

local tests = {
    {
        'a1a3a4a4a5a6',
        'a1 a2a6',
        'a1 a3 a4 a6',
        'a1 a4 a5 a6',
        'a1 a2 a4 a5 a5 a6',
        'a1 a2 a4 a5 a6 a7',
        'your ad here'
    },
    {
        '2',
        '2*3 + 4/23 - 7',
        '(3 + 4) * 6-2+(4*(4))',
        '-2',
        '3 +',
        '(4 + 3'
    }
}

for i = 1, #ebnfs do
    if parser:parse(ebnfs[i]) == 1 then
        print('\ntests:')
        if i <= #tests then
            for _, test in ipairs(tests[i]) do
                parser:checkValid(test)
            end
        end
    end
    print('')
end
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

productions:
{{"a", 1, {{"terminal", "a1"}, {"or", {"terminal", "a2"}, {"terminal", "a3"}}, {"repeat", {"terminal", "a4"}}, {"optional", {"terminal", "a5"}}, {"terminal", "a6"}}}}

idents:
{"a"}

ididx:
{1}

extras:
{{"title", "a"}, {"comment", "z"}}

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

productions:
{{"expr", 1, {{"ident", "term", 2}, {"repeat", {{"ident", "plus", 3}, {"ident", "term", 2}}}}}, {"term", 2, {{"ident", "factor", 4}, {"repeat", {{"ident", "times", 5}, {"ident", "factor", 4}}}}}, {"factor", 4, {"or", {"ident", "number", 6}, {{"terminal", "("}, {"ident", "expr", 1}, {"terminal", ")"}}}}, {"plus", 3, {"or", {"terminal", "+"}, {"terminal", "-"}}}, {"times", 5, {"or", {"terminal", "*"}, {"terminal", "/"}}}, {"number", 6, {{"ident", "digit", 7}, {"repeat", {"ident", "digit", 7}}}}, {"digit", 7, {"or", {"terminal", "0"}, {"terminal", "1"}, {"terminal", "2"}, {"terminal", "3"}, {"terminal", "4"}, {"terminal", "5"}, {"terminal", "6"}, {"terminal", "7"}, {"terminal", "8"}, {"terminal", "9"}}}}

idents:
{"expr", "term", "plus", "factor", "times", "number", "digit"}

ididx:
{1, 2, 4, 3, 5, 6, 7}

extras:
{}

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>
invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)

parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)




MODULE EBNF;

FROM  ASCII         IMPORT  EOL;
FROM  InOut         IMPORT  Done, Read, Write, WriteLn, WriteInt, WriteString;
FROM  EBNFScanner   IMPORT  Symbol, sym, id, Ino, GetSym, MarkError, SkipLine;
FROM  TableHandler  IMPORT  WordLength, Table, overflow, InitTable, Record, Tabulate;

VAR   T0, T1                : Table;

PROCEDURE skip (n : INTEGER);

BEGIN
  MarkError (n);
  WHILE  (sym < lpar) OR (sym > period)  DO  GetSym  END
END skip;

PROCEDURE Expression;

  PROCEDURE Term;

    PROCEDURE Factor;

    BEGIN
      IF  sym = ident  THEN
        Record (T0, id, Ino);
        GetSym
      ELSIF  sym = literal  THEN
        Record (T1, id, Ino);
        GetSym
      ELSIF  sym = lpar  THEN
        GetSym;
        Expression;
        IF  sym = rpar  THEN  GetSym  ELSE  skip (2)  END
      ELSIF  sym = lbk  THEN
        GetSym;
        Expression;
        IF  sym = rbk  THEN  GetSym  ELSE  skip (3)  END
      ELSIF  sym = lbr  THEN
        GetSym;
        Expression;
        IF  sym = rbr  THEN  GetSym  ELSE  skip (4)  END
      ELSE
        skip (5)
      END
    END Factor;

  BEGIN
    Factor;
    WHILE  sym < bar  DO  Factor  END
  END Term;

BEGIN
  Term;
  WHILE  sym = bar  DO
    GetSym;
    Term
  END
END Expression;


PROCEDURE Production;

BEGIN
  Record (T0, id, - INTEGER (Ino));
  GetSym;
  IF  sym = eql  THEN  GetSym  ELSE  skip (7)  END;
  Expression;
  IF  sym # period  THEN
    MarkError (8);
    SkipLine
  END;
  GetSym
END Production;


BEGIN
  InitTable (T0);
  InitTable (T1);
  GetSym;
  WHILE  (sym = ident) AND (overflow = 0)  DO  Production  END;
  IF  overflow > 0  THEN
    WriteLn;
    WriteString ("Table overflow");
    WriteInt (overflow, 6);
  END;
  Write (35C);
  Tabulate (T0);
  Tabulate (T1);
END EBNF.

And the source for the EBNF scanner. I hope you like nested procedures.

IMPLEMENTATION MODULE EBNFScanner;

FROM  ASCII        IMPORT  LF;
FROM  InOut        IMPORT  Read, Write, WriteLn, WriteInt, WriteBf, EOF;

VAR   ch                   : CHAR;

MODULE LineHandler;

  IMPORT   LF, EOF, ch, Ino, Read, Write, WriteLn, WriteInt, WriteBf;
  EXPORT   GetCh, MarkError, SkipLine;

  CONST    LineWidth = 100;

  VAR      cc        : INTEGER;
           cc1       : INTEGER;
           cc2       : INTEGER;
           line      : ARRAY [0..LineWidth - 1] OF CHAR;

  PROCEDURE GetLine;

  BEGIN
    IF  cc2 > 0  THEN
      WriteLn;
      cc2 := 0
    END;
    Read (ch);
    IF  EOF ()  THEN
      line [0] := 177C;
      cc1 := 1
    ELSE
      INC (Ino);
      WriteInt (Ino, 5);
      Write (' ');
      cc1 := 0;
      LOOP
        Write (ch);
        line [cc1] := ch;
        INC (cc1);
        IF  ch = LF  THEN  EXIT  END;
        Read (ch)
      END
    END
  END GetLine;


    PROCEDURE GetCh;

    BEGIN
      WHILE  cc = cc1  DO
        cc := 0;
        GetLine
      END;
      ch := line [cc];
      INC (cc)
    END GetCh;


    PROCEDURE MarkError (n  : INTEGER);

    BEGIN
      IF  cc2 = 0  THEN
        Write ('*');
        cc2 := 3;
        REPEAT
          Write (' ');
          DEC (cc2)
        UNTIL  cc2 = 0;
      END;
      WHILE  cc2 < cc  DO
        Write (' ');
        INC (cc2)
      END;
      Write ('^');
      WriteInt (n, 1);
      INC (cc2, 2)
    END MarkError;

    PROCEDURE SkipLine;

    BEGIN
      WHILE  ch # LF  DO  GetCh  END;
      GetCh
    END SkipLine;

  BEGIN          (* BEGIN of LineHandler        *)
    cc  := 0;
    cc1 := 0;
    cc2 := 0
  END LineHandler;

PROCEDURE GetSym;

VAR     i          : CARDINAL;

BEGIN
  WHILE  ch <= ' '  DO  GetCh  END;
  IF  ch = '/'  THEN
    SkipLine;
    WHILE  ch <= ' '  DO  GetCh  END
  END;
  IF  (CAP (ch) <= 'Z') AND (CAP (ch) >= 'A')  THEN
    i := 0;
    sym := literal;
    REPEAT
      IF  i < IdLength  THEN
        id [i] := ch;
        INC (i)
      END;
      IF  ch > 'Z' THEN  sym := ident  END;
      GetCh
    UNTIL  (CAP (ch) < 'A') OR (CAP (ch) > 'Z');
    id [i] := ' '
  ELSIF  ch = "'"  THEN
    i := 0;
    GetCh;
    sym := literal;
    WHILE  ch # "'"  DO
      IF  i < IdLength  THEN
        id [i] := ch;
        INC (i)
      END;
      GetCh
    END;
    GetCh;
    id [i] := ' '
    WHILE  ch # "'"  DO
      IF  i < IdLength  THEN
        id [i] := ch;
        INC (i)
      END;
      GetCh
    END;
    GetCh;
    id [i] := ' '
  ELSIF  ch = '"'  THEN
    i := 0;
    GetCh;
    sym := literal;
    WHILE  ch # '"'  DO
      IF  i < IdLength  THEN
        id [i] := ch;
        INC (i)
      END;
      GetCh
    END;
    GetCh;
    id [i] := ' '
  ELSIF  ch = '='  THEN  sym := eql;   GetCh
  ELSIF  ch = '('  THEN  sym := lpar;  GetCh
  ELSIF  ch = ')'  THEN  sym := rpar;  GetCh
  ELSIF  ch = '['  THEN  sym := lbk;   GetCh
  ELSIF  ch = ']'  THEN  sym := rbk;   GetCh
  ELSIF  ch = '{'  THEN  sym := lbr;   GetCh
  ELSIF  ch = '}'  THEN  sym := rbr;   GetCh
  ELSIF  ch = '|'  THEN  sym := bar;   GetCh
  ELSIF  ch = '.'  THEN  sym := period;  GetCh
  ELSIF  ch = 177C THEN  sym := other;  GetCh
  ELSE
    sym := other;
    GetCh
  END
END GetSym;

BEGIN
  Ino := 0;
  ch := ' '
END EBNFScanner.

Version 1

#!/usr/bin/perl

use strict;                        # http://www.rosettacode.org/wiki/Parse_EBNF
use warnings;
$SIG{__WARN__} = sub { print "\nWARN: @_\n"; exit };

my $h = qr/\G\s*/;
my $identifier = qr/$h([a-z]\w*)\b/i;
my $literal = qr/$h(['"])(.+?)\1/s;
my ($title, $comment, %productions, %called, $startsymbol, $show, $errpos);

sub node { bless [ @_[1..$#_] ], $_[0] }
sub err { die "ERROR: ", s/\G\s*\K/<**ERROR @_**>/r, "\n" }
sub want { /$h\Q$_[1]\E/gc ? shift : err "missing '$_[1]' " }
sub addin { node $_[0] => ref $_[1] eq $_[0] ? @{$_[1]} : $_[1], pop }

for my $case ( split /^-{50}.*\n/m, do { local $/; @ARGV ? <> : <DATA> } )
  {
  $show = $case =~ s/^#show.*//m;
  my ($ebnf, $tests) = map s/^#.*\n//gmr, split /^#test.*\n/m, $case, 2;
  parse( $ebnf, ($tests // "") =~ /.*\n/g );
  }

sub parse
  {
  eval
    {
    (%productions, %called, $startsymbol) = ();
    local $_ = shift // 'empty ebnf source string';
    print '-' x 75, "\n", s/\s*\z/\n\n/r;
    syntax(); ################################################## parse the EBNF
    print "       title: $title\n     comment: $comment\n";
    $startsymbol or err "no productions";
    print "start symbol: $startsymbol\n";
    for my $key ( sort keys %productions )
      {
      $show and print "\n$key =\n", $productions{$key}->show =~ s/^/   /gmr;
      }
    delete @called{keys %productions};
    %called and die "\nERROR: undefined production(s) <@{[ keys %called]}>\n";

    for ( @_ ) ###################################################### run tests
      {
      $errpos = undef;
      print "\ntry: $_";
      print eval
        {
        $productions{$startsymbol}->run or pos($_) = $errpos, err; ### run tree
        /$h\n/gc or err 'incomplete parse';
        } ? "valid\n" : $@;
      }
    1;
    } or print "$@\n";
  }

sub syntax ############################################## subs for parsing EBNF
  {
  $title = /$literal/gc ? $2 : 'none';
  /$h\{/gc or err 'missing open brace';
  while( /$identifier\s*=/gc )              # is this the start of a production
    {
    my $name = $1;
    $startsymbol //= $name;
    my $tree = expr(0);
    $productions{$name} =
      $productions{$name} ? addin ALT => $productions{$name}, $tree : $tree;
    /$h[.;]/gc or err 'missing production terminator';
    }
  /$h\}/gc or err 'missing close brace';
  $comment = /$literal/gc ? $2 : 'none';
  /$h\z/gc or err 'extra characters after parse';
  }

sub expr
  {
  my $tree =
    /$identifier/gc ? do { $called{$1}++; node PROD => $1 } :
    /$literal/gc    ? node LIT => $2 :
    /$h\[/gc        ? node OPTION => want expr(0), ']' :
    /$h\{/gc        ? node REPEAT => want expr(0), '}' :
    /$h\(/gc        ?                want expr(0), ')' :
    err 'Invalid expression';
  $tree =
    /\G\s+/gc                           ? $tree :
    $_[0] <= 1 && /\G(?=[[{('"a-z])/gci ? addin SEQ => $tree, expr(2) :
    $_[0] <= 0 && /\G\|/gc              ? addin ALT => $tree, expr(1) :
    return $tree while 1;
  }

################################################### run parse tree
sub LIT::run { /$h\Q$_[0][0]\E/gc }
sub SEQ::run
  {
  my $pos = pos($_) // 0;
  for my $node ( @{ $_[0] } )
    {
    $node->run or $errpos = pos($_), pos($_) = $pos, return 0;
    }
  return 1;
  }
sub OPTION::run
  {
  my $pos = pos($_) // 0;
  $_[0][0]->run or pos($_) = $pos, return 1;
  }
sub PROD::run
  {
  $productions{ $_[0][0] }->run;
  }
sub REPEAT::run
  {
  my $pos = pos($_) // 0;
  $pos = pos($_) while $_[0][0]->run;
  pos($_) = $pos;
  return 1;
  }
sub ALT::run
  {
  my $pos = pos($_) // 0;
  for my $node ( @{ $_[0] } )
    {
    eval{ $node->run } and return 1;
    pos($_) = $pos;
    }
  return 0;
  }

sub LIT::show { "LIT $_[0][0]\n" } ###################### for nested tree print
sub PROD::show { "PROD $_[0][0]\n" }
sub UNIVERSAL::show
  {
  join '', ref $_[0], "\n", map $_->show =~ s/^/   /gmr, @{ $_[0] };
  }

__DATA__
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"
#show
#tests
    a1a3a4a4a5a6
    a1 a2a6
    a1 a3 a4 a6 
    a1 a4 a5 a6
    a1 a2 a4 a5 a5 a6
    a1 a2 a4 a5 a6 a7
    your ad here
----------------------------------------------------------------------
"Arithmetic expressions" {
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .

    plus = "+" | "-" .
    times = "*" | "/" .

    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} 'http://www.rosettacode.org/wiki/Parse_EBNF'
#tests
    2
    2*3 + 4/23 - 7
    (3 + 4) * 6-2+(4*(4)) 
    -2
    3 +
    (4 + 3
----------------------------------------------------------------------
'some invalid EBNF' { a = "1" ;
----------------------------------------------------------------------
a = "1";
----------------------------------------------------------------------
{ hello world = "1"; }
----------------------------------------------------------------------
{ foo = bar . } "undefined production check"
----------------------------------------------------------------------
Output:
---------------------------------------------------------------------------
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"

       title: a
     comment: z
start symbol: a

a =
   SEQ
      LIT a1
      ALT
         LIT a2
         LIT a3
      REPEAT
         LIT a4
      OPTION
         LIT a5
      LIT a6

try:     a1a3a4a4a5a6
valid

try:     a1 a2a6
valid

try:     a1 a3 a4 a6 
valid

try:     a1 a4 a5 a6
ERROR:     a1 <**ERROR **>a4 a5 a6


try:     a1 a2 a4 a5 a5 a6
ERROR:     a1 a2 a4 a5 <**ERROR **>a5 a6


try:     a1 a2 a4 a5 a6 a7
ERROR:     a1 a2 a4 a5 a6 <**ERROR incomplete parse**>a7


try:     your ad here
ERROR:     <**ERROR **>your ad here

---------------------------------------------------------------------------
"Arithmetic expressions" {
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .

    plus = "+" | "-" .
    times = "*" | "/" .

    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} 'http://www.rosettacode.org/wiki/Parse_EBNF'

       title: Arithmetic expressions
     comment: http://www.rosettacode.org/wiki/Parse_EBNF
start symbol: expr

try:     2
valid

try:     2*3 + 4/23 - 7
valid

try:     (3 + 4) * 6-2+(4*(4)) 
valid

try:     -2
ERROR:     <**ERROR **>-2


try:     3 +
ERROR:     3 <**ERROR incomplete parse**>+


try:     (4 + 3
ERROR:     <**ERROR **>(4 + 3

---------------------------------------------------------------------------
'some invalid EBNF' { a = "1" ;

ERROR: 'some invalid EBNF' { a = "1" ;
<**ERROR missing close brace**>

---------------------------------------------------------------------------
a = "1";

ERROR: <**ERROR missing open brace**>a = "1";


---------------------------------------------------------------------------
{ hello world = "1"; }

ERROR: { <**ERROR missing close brace**>hello world = "1"; }


---------------------------------------------------------------------------
{ foo = bar . } "undefined production check"

       title: none
     comment: undefined production check
start symbol: foo

ERROR: undefined production(s) <bar>


Version 2

Works with: Perl version 5.22.1
Translation of: Java
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;

package EBNFParser;
use Data::Dumper;

sub new {
    my $class = shift;
    my $self = {
        src => '',
        ch => '',
        sdx => 0,
        token => undef,
        err => 0,
        idents => [],
        ididx => [],
        productions => [],
        extras => [],
        results => ["pass", "fail"]
    };
    bless $self, $class;
    return $self;
}

sub btoi {
    my ($self, $b) = @_;
    return $b ? 1 : 0;
}

sub invalid {
    my ($self, $msg) = @_;
    $self->{err} = 1;
    print "$msg\n";
    $self->{sdx} = length($self->{src}); # set to eof
    return -1;
}

sub skip_spaces {
    my $self = shift;
    while ($self->{sdx} < length($self->{src})) {
        $self->{ch} = substr($self->{src}, $self->{sdx}, 1);
        last unless $self->{ch} =~ /[ \t\r\n]/;
        $self->{sdx}++;
    }
}

sub get_token {
    my $self = shift;
    # Yields a single character token, one of {}()[]|=.;
    # or ["terminal", string] or ["ident", string] or -1.
    $self->skip_spaces();
    if ($self->{sdx} >= length($self->{src})) {
        $self->{token} = -1;
        return;
    }
    my $tokstart = $self->{sdx};
    if ($self->{ch} =~ /[{}()\[\]|=.;]/) {
        $self->{sdx}++;
        $self->{token} = $self->{ch};
    } elsif ($self->{ch} eq '"' || $self->{ch} eq "'") {
        my $closech = $self->{ch};
        my $tokend = $tokstart + 1;
        while ($tokend < length($self->{src}) && substr($self->{src}, $tokend, 1) ne $closech) {
            $tokend++;
        }
        if ($tokend >= length($self->{src})) {
            $self->{token} = $self->invalid("no closing quote");
        } else {
            $self->{sdx} = $tokend + 1;
            $self->{token} = ["terminal", substr($self->{src}, $tokstart + 1, $tokend - $tokstart - 1)];
        }
    } elsif ($self->{ch} =~ /[a-z]/) {
        # To simplify things for the purposes of this task,
        # identifiers are strictly a-z only, not A-Z or 1-9.
        while ($self->{sdx} < length($self->{src}) && substr($self->{src}, $self->{sdx}, 1) =~ /[a-z]/) {
            $self->{sdx}++;
            if ($self->{sdx} < length($self->{src})) {
                $self->{ch} = substr($self->{src}, $self->{sdx}, 1);
            }
        }
        $self->{token} = ["ident", substr($self->{src}, $tokstart, $self->{sdx} - $tokstart)];
    } else {
        $self->{token} = $self->invalid("invalid ebnf");
    }
}

sub match_token {
    my ($self, $expected_ch) = @_;
    if ($self->{token} ne $expected_ch) {
        $self->{token} = $self->invalid(sprintf("invalid ebnf (%s expected)", $expected_ch));
    } else {
        $self->get_token();
    }
}

sub add_ident {
    my ($self, $ident) = @_;
    my $k = -1;
    for my $i (0..$#{$self->{idents}}) {
        if ($self->{idents}->[$i] eq $ident) {
            $k = $i;
            last;
        }
    }
    if ($k == -1) {
        push @{$self->{idents}}, $ident;
        $k = $#{$self->{idents}};
        push @{$self->{ididx}}, -1;
    }
    return $k;
}

sub factor {
    my $self = shift;
    my $res;
    if (ref($self->{token}) eq 'ARRAY') {
        my $t = $self->{token};
        if ($t->[0] eq "ident") {
            my $idx = $self->add_ident($t->[1]);
            push @$t, $idx;
            $self->{token} = $t;
        }
        $res = $self->{token};
        $self->get_token();
    } elsif ($self->{token} eq '[') {
        $self->get_token();
        $res = ["optional", $self->expression()];
        $self->match_token(']');
    } elsif ($self->{token} eq '(') {
        $self->get_token();
        $res = $self->expression();
        $self->match_token(')');
    } elsif ($self->{token} eq '{') {
        $self->get_token();
        $res = ["repeat", $self->expression()];
        $self->match_token('}');
    } else {
        die "invalid token in factor() function";
    }
    if (ref($res) eq 'ARRAY' && @$res == 1) {
        return $res->[0];
    }
    return $res;
}

sub term {
    my $self = shift;
    my $res = [$self->factor()];
    my @tokens = (-1, '|', '.', ';', ')', ']', '}');
    
    while (1) {
        my $found = 0;
        for my $t (@tokens) {
            if (defined($self->{token}) && $t eq $self->{token}) {
                $found = 1;
                last;
            }
        }
        last if $found;
        push @$res, $self->factor();
    }
    
    if (@$res == 1) {
        return $res->[0];
    }
    return $res;
}

sub expression {
    my $self = shift;
    my $res = [$self->term()];
    if (defined($self->{token}) && $self->{token} eq '|') {
        $res = ["or", $res->[0]];
        while (defined($self->{token}) && $self->{token} eq '|') {
            $self->get_token();
            push @$res, $self->term();
        }
    }
    if (@$res == 1) {
        return $res->[0];
    }
    return $res;
}

sub production {
    my $self = shift;
    # Returns a token or -1; the real result is left in 'productions' etc,
    $self->get_token();
    if (defined($self->{token}) && $self->{token} ne '}') {
        if ($self->{token} eq -1) {
            return $self->invalid("invalid ebnf (missing closing })");
        }
        if (ref($self->{token}) ne 'ARRAY') {
            return -1;
        }
        my $t = $self->{token};
        if ($t->[0] ne "ident") {
            return -1;
        }
        my $ident = $t->[1];
        my $idx = $self->add_ident($ident);
        $self->get_token();
        $self->match_token('=');
        if ($self->{token} eq -1) {
            return -1;
        }
        push @{$self->{productions}}, [$ident, $idx, $self->expression()];
        $self->{ididx}->[$idx] = $#{$self->{productions}};
    }
    return $self->{token};
}

sub parse {
    my ($self, $ebnf) = @_;
    # Returns +1 if ok, -1 if bad.
    printf "parse:\n%s ===>\n", $ebnf;
    $self->{err} = 0;
    $self->{src} = $ebnf;
    $self->{sdx} = 0;
    $self->{idents} = [];
    $self->{ididx} = [];
    $self->{productions} = [];
    $self->{extras} = [];
    
    $self->get_token();
    if (ref($self->{token}) eq 'ARRAY') {
        my $t = $self->{token};
        $t->[0] = "title";
        push @{$self->{extras}}, $self->{token};
        $self->get_token();
    }
    if (!defined($self->{token}) || $self->{token} ne '{') {
        return $self->invalid("invalid ebnf (missing opening {)");
    }
    
    while (1) {
        my $token_result = $self->production();
        last if (defined($token_result) && ($token_result eq '}' || $token_result eq -1));
    }
    
    $self->get_token();
    if (ref($self->{token}) eq 'ARRAY') {
        my $t = $self->{token};
        $t->[0] = "comment";
        push @{$self->{extras}}, $self->{token};
        $self->get_token();
    }
    if (defined($self->{token}) && $self->{token} ne -1) {
        return $self->invalid("invalid ebnf (missing eof?)");
    }
    if ($self->{err}) {
        return -1;
    }
    
    my $k = -1;
    for my $i (0..$#{$self->{ididx}}) {
        if ($self->{ididx}->[$i] == -1) {
            $k = $i;
            last;
        }
    }
    if ($k != -1) {
        return $self->invalid(sprintf("invalid ebnf (undefined:%s)", $self->{idents}->[$k]));
    }
    
    $self->pprint($self->{productions}, "productions");
    $self->pprint($self->{idents}, "idents");
    $self->pprint($self->{ididx}, "ididx");
    $self->pprint($self->{extras}, "extras");
    return 1;
}

# Adjusts Perl's normal printing to look more like Phix output.
sub pprint {
    my ($self, $ob, $header) = @_;
    printf "\n%s:\n", $header;
    local $Data::Dumper::Terse = 1;
    local $Data::Dumper::Indent = 0;
    my $pp = Dumper($ob);
    $pp =~ s/\[/\{/g;
    $pp =~ s/\]/\}/g;
    $pp =~ s/,/, /g;
    chomp $pp;
    print "$pp\n";
}

# The rules that applies() has to deal with are:
# [factors] - if rule[0] is not string,
# just apply one after the other recursively.
# ["terminal", "a1"]       -- literal constants
# ["or", <e1>, <e2>, ...]  -- (any) one of n
# ["repeat", <e1>]         -- as per "{}" in ebnf
# ["optional", <e1>]       -- as per "[]" in ebnf
# ["ident", <name>, idx]   -- apply the sub-rule

sub applies {
    my ($self, $rule) = @_;
    my $was_sdx = $self->{sdx}; # in case of failure
    my $r1 = ref($rule) eq 'ARRAY' ? $rule->[0] : $rule;
    
    if (ref($rule) eq 'ARRAY' && ref($r1) ne '') {
        for my $i (0..$#{$rule}) {
            if (!$self->applies($rule->[$i])) {
                $self->{sdx} = $was_sdx;
                return 0;
            }
        }
    } elsif (ref($rule) eq 'ARRAY' && $r1 eq "terminal") {
        $self->skip_spaces();
        my $r2 = $rule->[1];
        for my $i (0..length($r2)-1) {
            if ($self->{sdx} >= length($self->{src}) || 
                substr($self->{src}, $self->{sdx}, 1) ne substr($r2, $i, 1)) {
                $self->{sdx} = $was_sdx;
                return 0;
            }
            $self->{sdx}++;
        }
    } elsif (ref($rule) eq 'ARRAY' && $r1 eq "or") {
        for my $i (1..$#{$rule}) {
            if ($self->applies($rule->[$i])) {
                return 1;
            }
        }
        $self->{sdx} = $was_sdx;
        return 0;
    } elsif (ref($rule) eq 'ARRAY' && $r1 eq "repeat") {
        while ($self->applies($rule->[1])) {
            # continue repeating
        }
    } elsif (ref($rule) eq 'ARRAY' && $r1 eq "optional") {
        $self->applies($rule->[1]);
    } elsif (ref($rule) eq 'ARRAY' && $r1 eq "ident") {
        my $i = $rule->[2];
        my $ii = $self->{ididx}->[$i];
        if (!$self->applies($self->{productions}->[$ii]->[2])) {
            $self->{sdx} = $was_sdx;
            return 0;
        }
    } else {
        die "invalid rule in applies() function";
    }
    return 1;
}

sub check_valid {
    my ($self, $test) = @_;
    $self->{src} = $test;
    $self->{sdx} = 0;
    my $res = $self->applies($self->{productions}->[0]->[2]);
    $self->skip_spaces();
    if ($self->{sdx} < length($self->{src})) {
        $res = 0;
    }
    printf "\"%s\", %s\n", $test, $self->{results}->[1 - $self->btoi($res)];
}

# Main execution
package main;

my $parser = EBNFParser->new();

my @ebnfs = (
    "\"a\" {\n" .
    "    a = \"a1\" ( \"a2\" | \"a3\" ) { \"a4\" } [ \"a5\" ] \"a6\" ;\n" .
    "} \"z\" ",
    
    "{\n" .
    "    expr = term { plus term } .\n" .
    "    term = factor { times factor } .\n" .
    "    factor = number | '(' expr ')' .\n" .
    " \n" .
    "    plus = \"+\" | \"-\" .\n" .
    "    times = \"*\" | \"/\" .\n" .
    " \n" .
    "    number = digit { digit } .\n" .
    "    digit = \"0\" | \"1\" | \"2\" | \"3\" | \"4\" | \"5\" | \"6\" | \"7\" | \"8\" | \"9\" .\n" .
    "}",
    
    "a = \"1\"",
    "{ a = \"1\" ;",
    "{ hello world = \"1\"; }",
    "{ foo = bar . }"
);

my @tests = (
    [
        "a1a3a4a4a5a6",
        "a1 a2a6",
        "a1 a3 a4 a6",
        "a1 a4 a5 a6",
        "a1 a2 a4 a5 a5 a6",
        "a1 a2 a4 a5 a6 a7",
        "your ad here"
    ],
    [
        "2",
        "2*3 + 4/23 - 7",
        "(3 + 4) * 6-2+(4*(4))",
        "-2",
        "3 +",
        "(4 + 3"
    ]
);

for my $i (0..$#ebnfs) {
    if ($parser->parse($ebnfs[$i]) == 1) {
        print "\ntests:\n";
        if ($i < @tests) {
            for my $test (@{$tests[$i]}) {
                $parser->check_valid($test);
            }
        }
    }
    print "\n";
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

productions:
{{'a', 0, {{'terminal', 'a1'}, {'or', {'terminal', 'a2'}, {'terminal', 'a3'}}, {'repeat', {'terminal', 'a4'}}, {'optional', {'terminal', 'a5'}}, {'terminal', 'a6'}}}}

idents:
{'a'}

ididx:
{0}

extras:
{{'title', 'a'}, {'comment', 'z'}}

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

productions:
{{'expr', 0, {{'ident', 'term', 1}, {'repeat', {{'ident', 'plus', 2}, {'ident', 'term', 1}}}}}, {'term', 1, {{'ident', 'factor', 3}, {'repeat', {{'ident', 'times', 4}, {'ident', 'factor', 3}}}}}, {'factor', 3, {'or', {'ident', 'number', 5}, {{'terminal', '('}, {'ident', 'expr', 0}, {'terminal', ')'}}}}, {'plus', 2, {'or', {'terminal', '+'}, {'terminal', '-'}}}, {'times', 4, {'or', {'terminal', '*'}, {'terminal', '/'}}}, {'number', 5, {{'ident', 'digit', 6}, {'repeat', {'ident', 'digit', 6}}}}, {'digit', 6, {'or', {'terminal', '0'}, {'terminal', '1'}, {'terminal', '2'}, {'terminal', '3'}, {'terminal', '4'}, {'terminal', '5'}, {'terminal', '6'}, {'terminal', '7'}, {'terminal', '8'}, {'terminal', '9'}}}}

idents:
{'expr', 'term', 'plus', 'factor', 'times', 'number', 'digit'}

ididx:
{0, 1, 3, 2, 4, 5, 6}

extras:
{}

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>
invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)

parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)


with javascript_semantics
string src
integer ch, sdx
object token
 
bool error = false
function invalid(string msg)
    error = true
    ?msg
    sdx = length(src)+1 -- (set to eof)
    return -1
end function
 
procedure skip_spaces()
    while 1 do
        if sdx>length(src) then exit end if
        ch = src[sdx]
        if not find(ch,{' ','\t','\r','\n'}) then exit end if
        sdx += 1
    end while
end procedure
 
procedure get_token()
-- yeilds a single character token, one of {}()[]|=.;
-- or {"terminal",string} or {"ident", string} or -1.
    skip_spaces()
    if sdx>length(src) then
        token = -1
        return
    end if
    integer tokstart = sdx
    if find(ch,"{}()[]|=.;") then
        sdx += 1
        token = ch
    elsif ch='\"'
       or ch='\'' then
        integer closech = ch
        for tokend=sdx+1 to length(src) do
            if src[tokend] = closech then
                sdx = tokend+1
                token = {"terminal",src[tokstart+1..tokend-1]}
                return
            end if
        end for
        token = invalid("no closing quote")
    elsif ch>='a' and ch<='z' then
        -- to simplify things for the purposes of this task, 
        -- identifiers are strictly a-z only, not A-Z or 1-9
        while 1 do
            sdx += 1
            if sdx>length(src) then exit end if
            ch = src[sdx]
            if ch<'a' or ch>'z' then exit end if
        end while
        token = {"ident",src[tokstart..sdx-1]}
    else
        token = invalid("invalid ebnf")
    end if
end procedure
 
procedure match_token(integer ch)
    if token!=ch then
        token = invalid(sprintf("invalid ebnf (%c expected)",ch))
    else
        get_token()
    end if
end procedure
 
sequence idents = {},
         ididx = {}
 
function add_ident(string ident)
    integer k = find(ident,idents)
    if k=0 then
        idents = append(idents,ident)
        k = length(idents)
        ididx = append(ididx,0)
    end if
    return k
end function
 
forward function expression()
 
function factor()
object res
    if sequence(token) then
        if token[1]="ident" then
            token &= add_ident(token[2])
        end if
        res = token
        get_token()
    elsif token='[' then
        get_token()
        res = {"optional",expression()}
        match_token(']')
    elsif token='(' then
        get_token()
        res = expression()
        match_token(')')
    elsif token='{' then
        get_token()
        res = {"repeat",expression()}
        match_token('}')
    else
        ?9/0        -- erm??
--      res = -1    --  ""
    end if
    return res
end function
 
function term()
--  sequence res = {"factors",factor()} -- (opted against this)
    sequence res = {factor()}
    while not find(token,{-1,'|','.',';',')',']','}'}) do
        res = append(res,factor())
    end while
    if length(res)=1 then res = res[1] end if
    return res
end function
 
function expression()
    object res = term()
    if token='|' then
        res = {"or",res}
        while token='|' do
            get_token()
            res = append(res,term())
        end while
    end if
    return res
end function
 
sequence productions = {}
 
function production()
-- returns a token or -1; the real result is left in productions[] etc.
    get_token()
    if token!='}' then
        if token=-1 then
            return invalid("invalid ebnf (missing closing })")
        end if
        if not sequence(token)
        or token[1]!="ident" then
            return -1
        end if
        string ident = token[2]
        integer idx = add_ident(ident)
        get_token()
        match_token('=')
        if token=-1 then
            return -1
        end if
        sequence res = expression()
        productions = append(productions,{ident,idx,res})
        ididx[idx] = length(productions)
    end if
    return token
end function
 
sequence extras = {}
 
function parse(string ebnf)
-- returns: +1 if ok, -1 if bad
    puts(1,"parse: "&ebnf&" ===>\n")
    error = false
    src = ebnf
    sdx = 1
    idents = {}
    ididx = {}
    productions = {}
    extras = {}
    get_token()
    if sequence(token) then
        token[1] = "title"
        extras = append(extras,token)
        get_token()
    end if
    if token!='{' then
        return invalid("invalid ebnf (missing opening {)")
    end if
    while 1 do
        token = production()
        if token='}' or token=-1 then exit end if
    end while
    get_token()
    if sequence(token) then
        token[1] = "comment"
        extras = append(extras,token)
        get_token()
    end if
    if token!=-1 then
        return invalid("invalid ebnf (missing eof?)")
    end if      
    if error then return -1 end if
    integer k = find(0,ididx)
    if k!=0 then
        return invalid("invalid ebnf (undefined:"&idents[k]&")")
    end if
    ppOpt({pp_Pause,0})
    pp(productions)
--  pp(idents)
--  pp(ididx)
--  pp(extras)
    return 1
end function
 
-- The rules that applies() has to deal with are:
-- {factors} - if rule[1] is not string, just apply one after the other,
--             recursively. As per term() above, originally had "factors",
--              but getting rid of it made the syntax tree much clearer)
-- {"terminal", "a1"}       -- literal constants
-- {"or", <e1>, <e2>, ...}  -- (any) one of n
-- {"repeat", <e1>}         -- as per "{}" in ebnf
-- {"optional", <e1>}       -- as per "[]" in ebnf
-- {"ident", <name>, idx}   -- apply the sub-rule
 
function applies(sequence rule)
integer was_sdx = sdx   -- in case of failure
    object r1 = rule[1]
    if not string(r1) then
        for i=1 to length(rule) do
            if not applies(rule[i]) then
                sdx = was_sdx
                return false
            end if
        end for
    elsif r1="terminal" then
        skip_spaces()
        for i=1 to length(rule[2]) do
            if sdx>length(src)
            or src[sdx]!=rule[2][i] then
                sdx = was_sdx
                return false
            end if
            sdx += 1
        end for
    elsif r1="or" then
        for i=2 to length(rule) do
            if applies(rule[i]) then
                return true
            end if
        end for
        sdx = was_sdx
        return false
    elsif r1="repeat" then
        while applies(rule[2]) do
        end while
    elsif r1="optional" then
        if applies(rule[2]) then
        end if
    elsif r1="ident" then
        integer i = rule[3],
                ii = ididx[i]
        if not applies(productions[ii][3]) then
            sdx = was_sdx
            return false
        end if
    else
        ?9/0
    end if
    return true
end function
 
procedure check_valid(string test)
    src = test
    sdx = 1
    bool res = applies(productions[1][3])
    skip_spaces()
    if sdx<=length(src) then res = false end if
    ?{test,{"pass","fail"}[2-res]}
end procedure
 
constant ebnf = {""" 
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z" """,
""" 
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}""",
`a = "1"`,
`{ a = "1" ;`,
`{ hello world = "1"; }`,
`{ foo = bar . }`
}
 
constant tests = { {"a1a3a4a4a5a6",
                    "a1 a2a6",
                    "a1 a3 a4 a6",
                    "a1 a4 a5 a6",
                    "a1 a2 a4 a5 a5 a6",
                    "a1 a2 a4 a5 a6 a7",
                    "your ad here"},
                   {"2",
                    "2*3 + 4/23 - 7",
                    "(3 + 4) * 6-2+(4*(4))",
                    "-2",
                    "3 +",
                    "(4 + 3"}}
 
for i=1 to length(ebnf) do
    if parse(ebnf[i])=+1 then
        ?"tests:"
        for j=1 to length(tests[i]) do
            check_valid(tests[i][j])
        end for
    end if
end for

In real use, I would be tempted to use numeric literals rather than string tags in such structures, but the latter certainly make things ten times easier to debug, plus I got an instantly legible syntax tree dump (the bit just after "===>" below) practically for free.

Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>
{{"a", 1,
  {{"terminal", "a1"}, {"or", {"terminal", "a2"}, {"terminal", "a3"}},
   {"repeat", {"terminal", "a4"}}, {"optional", {"terminal", "a5"}},
   {"terminal", "a6"}}}}
"tests:"
{"a1a3a4a4a5a6","pass"}
{"a1 a2a6","pass"}
{"a1 a3 a4 a6","pass"}
{"a1 a4 a5 a6","fail"}
{"a1 a2 a4 a5 a5 a6","fail"}
{"a1 a2 a4 a5 a6 a7","fail"}
{"your ad here","fail"}
parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .

    plus = "+" | "-" .
    times = "*" | "/" .

    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>
{{"expr", 1,
  {{"ident", "term", 2},
   {"repeat", {{"ident", "plus", 3}, {"ident", "term", 2}}}}},
 {"term", 2,
  {{"ident", "factor", 4},
   {"repeat", {{"ident", "times", 5}, {"ident", "factor", 4}}}}},
 {"factor", 4,
  {"or", {"ident", "number", 6},
   {{"terminal", "("}, {"ident", "expr", 1}, {"terminal", ")"}}}},
 {"plus", 3, {"or", {"terminal", "+"}, {"terminal", "-"}}},
 {"times", 5, {"or", {"terminal", "*"}, {"terminal", "/"}}},
 {"number", 6, {{"ident", "digit", 7}, {"repeat", {"ident", "digit", 7}}}},
 {"digit", 7,
  {"or", {"terminal", "0"}, {"terminal", "1"}, {"terminal", "2"},
   {"terminal", "3"}, {"terminal", "4"}, {"terminal", "5"},
   {"terminal", "6"}, {"terminal", "7"}, {"terminal", "8"},
   {"terminal", "9"}}}}
"tests:"
{"2","pass"}
{"2*3 + 4/23 - 7","pass"}
{"(3 + 4) * 6-2+(4*(4))","pass"}
{"-2","fail"}
{"3 +","fail"}
{"(4 + 3","fail"}
parse: a = "1" ===>
"invalid ebnf (missing opening {)"
parse: { a = "1" ; ===>
"invalid ebnf (missing closing })"
parse: { hello world = "1"; } ===>
"invalid ebnf (= expected)"
parse: { foo = bar . } ===>
"invalid ebnf (undefined:bar)"
(de EBNF
   "expr  : term ( ( PLUS | MINUS )  term )* ;"
   "term  : factor ( ( MULT | DIV ) factor )* ;"
   "factor   : NUMBER ;" )

(for E EBNF
   (use (@S @E)
      (unless (and (match '(@S : @E ;) (str E)) (not (cdr @S)))
         (quit "Invalid EBNF" E) )
      (put (car @S) 'ebnf @E) ) )
(de matchEbnf (Pat)
   (cond
      ((asoq Pat '((PLUS . +) (MINUS . -) (MULT . *) (DIV . /)))
         (let Op (cdr @)
            (when (= Op (car *Lst))
               (pop '*Lst)
               Op ) ) )
      ((== 'NUMBER Pat)
         (cond
            ((num? (car *Lst))
               (pop '*Lst)
               @ )
            ((and (= "-" (car *Lst)) (num? (cadr *Lst)))
               (setq *Lst (cddr *Lst))
               (- @) ) ) )
      ((get Pat 'ebnf) (parseLst @))
      ((atom Pat))
      (T
         (loop
            (T (matchEbnf (pop 'Pat)) @)
            (NIL Pat)
            (NIL (== '| (pop 'Pat)))
            (NIL Pat) ) ) ) )

(de parseLst (Pat)
   (let (P (pop 'Pat)  X (matchEbnf P))
      (loop
         (NIL Pat)
         (if (n== '* (cadr Pat))
            (if (matchEbnf (pop 'Pat))
               (setq X (list @ X))
               (throw) )
            (loop
               (NIL *Lst)
               (NIL (matchEbnf (car Pat)))
               (setq X (list @ X (or (matchEbnf P) (throw)))) )
            (setq Pat (cddr Pat)) ) )
      X ) )

(de parseEbnf (Str)
   (let *Lst (str Str "")
      (catch NIL
         (parseLst (get 'expr 'ebnf)) ) ) )

Output:

: (parseEbnf "1 + 2 * -3 / 7 - 3 * 4")
-> (- (+ 1 (/ (* 2 -3) 7)) (* 3 4))


Works with: Python
Translation of: JavaScript
import json


class EBNFParser:
    def __init__(self):
        self.src = ''
        self.ch = ''
        self.sdx = 0
        self.token = None
        self.err = False
        self.idents = []
        self.ididx = []
        self.productions = []
        self.extras = []
        self.results = ['pass', 'fail']

    def btoi(self, b):
        return 1 if b else 0

    def invalid(self, msg):
        self.err = True
        print(msg)
        self.sdx = len(self.src)  # set to eof
        return -1

    def skip_spaces(self):
        while self.sdx < len(self.src):
            self.ch = self.src[self.sdx]
            if self.ch not in ' \t\r\n':
                break
            self.sdx += 1

    def get_token(self):
        # Yields a single character token, one of {}()[]|=.;
        # or ["terminal",string] or ["ident", string] or -1.
        self.skip_spaces()
        if self.sdx >= len(self.src):
            self.token = {'value': -1, 'is_sequence': False}
            return
        
        tokstart = self.sdx
        if self.ch in '{}()[]|=.;':
            self.sdx += 1
            self.token = {'value': self.ch, 'is_sequence': False}
        elif self.ch in '"\'':
            closech = self.ch
            tokend = tokstart + 1
            while tokend < len(self.src) and self.src[tokend] != closech:
                tokend += 1
            if tokend >= len(self.src):
                self.token = {'value': self.invalid('no closing quote'), 'is_sequence': False}
            else:
                self.sdx = tokend + 1
                self.token = {
                    'value': ['terminal', self.src[tokstart + 1:tokend]], 
                    'is_sequence': True
                }
        elif 'a' <= self.ch <= 'z':
            # To simplify things for the purposes of this task,
            # identifiers are strictly a-z only, not A-Z or 1-9.
            while self.sdx < len(self.src) and 'a' <= self.ch <= 'z':
                self.sdx += 1
                if self.sdx < len(self.src):
                    self.ch = self.src[self.sdx]
            self.token = {
                'value': ['ident', self.src[tokstart:self.sdx]], 
                'is_sequence': True
            }
        else:
            self.token = {'value': self.invalid('invalid ebnf'), 'is_sequence': False}

    def match_token(self, expected_ch):
        if self.token['value'] != expected_ch:
            self.token = {
                'value': self.invalid(f'invalid ebnf ({expected_ch} expected)'), 
                'is_sequence': False
            }
        else:
            self.get_token()

    def add_ident(self, ident):
        try:
            k = self.idents.index(ident)
        except ValueError:
            self.idents.append(ident)
            k = len(self.idents) - 1
            self.ididx.append(-1)
        return k

    def factor(self):
        if self.token['is_sequence']:
            t = self.token['value']
            if t[0] == 'ident':
                idx = self.add_ident(t[1])
                t.append(idx)
                self.token['value'] = t
            res = self.token['value']
            self.get_token()
        elif self.token['value'] == '[':
            self.get_token()
            res = ['optional', self.expression()]
            self.match_token(']')
        elif self.token['value'] == '(':
            self.get_token()
            res = self.expression()
            self.match_token(')')
        elif self.token['value'] == '{':
            self.get_token()
            res = ['repeat', self.expression()]
            self.match_token('}')
        else:
            raise ValueError('invalid token in factor() function')
        
        if isinstance(res, list) and len(res) == 1:
            return res[0]
        return res

    def term(self):
        res = [self.factor()]
        tokens = [-1, '|', '.', ';', ')', ']', '}']
        
        while True:
            if self.token['value'] in tokens:
                break
            res.append(self.factor())
        
        if len(res) == 1:
            return res[0]
        return res

    def expression(self):
        res = [self.term()]
        if self.token['value'] == '|':
            res = ['or', res[0]]
            while self.token['value'] == '|':
                self.get_token()
                res.append(self.term())
        
        if len(res) == 1:
            return res[0]
        return res

    def production(self):
        # Returns a token or -1; the real result is left in 'productions' etc,
        self.get_token()
        if self.token['value'] != '}':
            if self.token['value'] == -1:
                return self.invalid('invalid ebnf (missing closing })')
            if not self.token['is_sequence']:
                return -1
            
            t = self.token['value']
            if t[0] != 'ident':
                return -1
            
            ident = t[1]
            idx = self.add_ident(ident)
            self.get_token()
            self.match_token('=')
            if self.token['value'] == -1:
                return -1
            
            self.productions.append([ident, idx, self.expression()])
            self.ididx[idx] = len(self.productions) - 1
        
        return self.token['value']

    def parse(self, ebnf):
        # Returns +1 if ok, -1 if bad.
        print(f'parse:\n{ebnf} ===>')
        self.err = False
        self.src = ebnf
        self.sdx = 0
        self.idents = []
        self.ididx = []
        self.productions = []
        self.extras = []
        
        self.get_token()
        if self.token['is_sequence']:
            t = self.token['value']
            t[0] = 'title'
            self.extras.append(self.token['value'])
            self.get_token()
        
        if self.token['value'] != '{':
            return self.invalid('invalid ebnf (missing opening {)')
        
        while True:
            token_result = self.production()
            if token_result == '}' or token_result == -1:
                break
        
        self.get_token()
        if self.token['is_sequence']:
            t = self.token['value']
            t[0] = 'comment'
            self.extras.append(self.token['value'])
            self.get_token()
        
        if self.token['value'] != -1:
            return self.invalid('invalid ebnf (missing eof?)')
        
        if self.err:
            return -1
        
        k = -1
        for i in range(len(self.ididx)):
            if self.ididx[i] == -1:
                k = i
                break
        
        if k != -1:
            return self.invalid(f'invalid ebnf (undefined:{self.idents[k]})')
        
        self.pprint(self.productions, 'productions')
        self.pprint(self.idents, 'idents')
        self.pprint(self.ididx, 'ididx')
        self.pprint(self.extras, 'extras')
        return 1

    def pprint(self, ob, header):
        # Adjusts Python's normal printing to look more like Phix output.
        print(f'\n{header}:')
        pp = json.dumps(ob)
        pp = pp.replace('[', '{')
        pp = pp.replace(']', '}')
        pp = pp.replace(',', ', ')
        for i in range(len(self.idents)):
            pp = pp.replace(f'\\b{i}\\b', str(i))
        print(pp)

    def applies(self, rule):
        # The rules that applies() has to deal with are:
        # [factors] - if rule[0] is not string,
        # just apply one after the other recursively.
        # ["terminal", "a1"]       -- literal constants
        # ["or", <e1>, <e2>, ...}  -- (any) one of n
        # ["repeat", <e1>]         -- as per "{}" in ebnf
        # ["optional", <e1>]       -- as per "[]" in ebnf
        # ["ident", <name>, idx]   -- apply the sub-rule
        
        was_sdx = self.sdx  # in case of failure
        r1 = rule[0]
        
        if not isinstance(r1, str):
            for i in range(len(rule)):
                if not self.applies(rule[i]):
                    self.sdx = was_sdx
                    return False
        elif r1 == 'terminal':
            self.skip_spaces()
            r2 = rule[1]
            for i in range(len(r2)):
                if self.sdx >= len(self.src) or self.src[self.sdx] != r2[i]:
                    self.sdx = was_sdx
                    return False
                self.sdx += 1
        elif r1 == 'or':
            for i in range(1, len(rule)):
                if self.applies(rule[i]):
                    return True
            self.sdx = was_sdx
            return False
        elif r1 == 'repeat':
            while self.applies(rule[1]):
                # continue repeating
                pass
        elif r1 == 'optional':
            self.applies(rule[1])
        elif r1 == 'ident':
            i = rule[2]
            ii = self.ididx[i]
            if not self.applies(self.productions[ii][2]):
                self.sdx = was_sdx
                return False
        else:
            raise ValueError('invalid rule in applies() function')
        
        return True

    def check_valid(self, test):
        self.src = test
        self.sdx = 0
        res = self.applies(self.productions[0][2])
        self.skip_spaces()
        if self.sdx < len(self.src):
            res = False
        print(f'"{test}", {self.results[1 - self.btoi(res)]}')


# Main execution
if __name__ == "__main__":
    parser = EBNFParser()

    ebnfs = [
        '"a" {\n' +
        '    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;\n' +
        '} "z" ',
        
        '{\n' +
        '    expr = term { plus term } .\n' +
        '    term = factor { times factor } .\n' +
        '    factor = number | \'(\' expr \')\' .\n' +
        ' \n' +
        '    plus = "+" | "-" .\n' +
        '    times = "*" | "/" .\n' +
        ' \n' +
        '    number = digit { digit } .\n' +
        '    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .\n' +
        '}',
        
        'a = "1"',
        '{ a = "1" ;',
        '{ hello world = "1"; }',
        '{ foo = bar . }'
    ]

    tests = [
        [
            'a1a3a4a4a5a6',
            'a1 a2a6',
            'a1 a3 a4 a6',
            'a1 a4 a5 a6',
            'a1 a2 a4 a5 a5 a6',
            'a1 a2 a4 a5 a6 a7',
            'your ad here'
        ],
        [
            '2',
            '2*3 + 4/23 - 7',
            '(3 + 4) * 6-2+(4*(4))',
            '-2',
            '3 +',
            '(4 + 3'
        ]
    ]

    for i in range(len(ebnfs)):
        if parser.parse(ebnfs[i]) == 1:
            print('\ntests:')
            if i < len(tests):
                for test in tests[i]:
                    parser.check_valid(test)
        print('')
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

productions:
{{"a",  0,  {{"terminal",  "a1"},  {"or",  {"terminal",  "a2"},  {"terminal",  "a3"}},  {"repeat",  {"terminal",  "a4"}},  {"optional",  {"terminal",  "a5"}},  {"terminal",  "a6"}}}}

idents:
{"a"}

ididx:
{0}

extras:
{{"title",  "a"},  {"comment",  "z"}}

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

productions:
{{"expr",  0,  {{"ident",  "term",  1},  {"repeat",  {{"ident",  "plus",  2},  {"ident",  "term",  1}}}}},  {"term",  1,  {{"ident",  "factor",  3},  {"repeat",  {{"ident",  "times",  4},  {"ident",  "factor",  3}}}}},  {"factor",  3,  {"or",  {"ident",  "number",  5},  {{"terminal",  "("},  {"ident",  "expr",  0},  {"terminal",  ")"}}}},  {"plus",  2,  {"or",  {"terminal",  "+"},  {"terminal",  "-"}}},  {"times",  4,  {"or",  {"terminal",  "*"},  {"terminal",  "/"}}},  {"number",  5,  {{"ident",  "digit",  6},  {"repeat",  {"ident",  "digit",  6}}}},  {"digit",  6,  {"or",  {"terminal",  "0"},  {"terminal",  "1"},  {"terminal",  "2"},  {"terminal",  "3"},  {"terminal",  "4"},  {"terminal",  "5"},  {"terminal",  "6"},  {"terminal",  "7"},  {"terminal",  "8"},  {"terminal",  "9"}}}}

idents:
{"expr",  "term",  "plus",  "factor",  "times",  "number",  "digit"}

ididx:
{0,  1,  3,  2,  4,  5,  6}

extras:
{}

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>
invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)

parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)



R

You can run the code at https://rdrr.io/snippets/

Works with: R
Translation of: Python
# EBNF Parser in R
library(jsonlite)

EBNFParser <- function() {
  # Initialize parser state
  self <- list()
  
  # Instance variables
  self$src <- ""
  self$ch <- ""
  self$sdx <- 1  # R uses 1-based indexing
  self$token <- NULL
  self$err <- FALSE
  self$idents <- character(0)
  self$ididx <- integer(0)
  self$productions <- list()
  self$extras <- list()
  self$results <- c("pass", "fail")
  
  # Helper function to convert boolean to integer
  self$btoi <- function(b) {
    if (b) 1 else 0
  }
  
  # Error handling function
  self$invalid <- function(msg) {
    self$err <<- TRUE
    cat(msg, "\n")
    self$sdx <<- nchar(self$src) + 1  # set to eof
    return(-1)
  }
  
  # Skip whitespace characters
  self$skip_spaces <- function() {
    while (self$sdx <= nchar(self$src)) {
      self$ch <<- substr(self$src, self$sdx, self$sdx)
      if (!grepl("[ \t\r\n]", self$ch)) {
        break
      }
      self$sdx <<- self$sdx + 1
    }
  }
  
  # Tokenizer function
  self$get_token <- function() {
    self$skip_spaces()
    if (self$sdx > nchar(self$src)) {
      self$token <<- list(value = -1, is_sequence = FALSE)
      return()
    }
    
    tokstart <- self$sdx
    if (grepl("[{}()\\[\\]|=.;]", self$ch)) {
      self$sdx <<- self$sdx + 1
      self$token <<- list(value = self$ch, is_sequence = FALSE)
    } else if (self$ch %in% c('"', "'")) {
      closech <- self$ch
      tokend <- tokstart + 1
      while (tokend <= nchar(self$src) && substr(self$src, tokend, tokend) != closech) {
        tokend <- tokend + 1
      }
      if (tokend > nchar(self$src)) {
        self$token <<- list(value = self$invalid("no closing quote"), is_sequence = FALSE)
      } else {
        self$sdx <<- tokend + 1
        self$token <<- list(
          value = list("terminal", substr(self$src, tokstart + 1, tokend - 1)),
          is_sequence = TRUE
        )
      }
    } else if (grepl("[a-z]", self$ch)) {
      # Identifiers are strictly a-z only
      while (self$sdx <= nchar(self$src) && grepl("[a-z]", self$ch)) {
        self$sdx <<- self$sdx + 1
        if (self$sdx <= nchar(self$src)) {
          self$ch <<- substr(self$src, self$sdx, self$sdx)
        }
      }
      self$token <<- list(
        value = list("ident", substr(self$src, tokstart, self$sdx - 1)),
        is_sequence = TRUE
      )
    } else {
      self$token <<- list(value = self$invalid("invalid ebnf"), is_sequence = FALSE)
    }
  }
  
  # Match expected token
  self$match_token <- function(expected_ch) {
    if (!identical(self$token$value, expected_ch)) {
      self$token <<- list(
        value = self$invalid(paste0("invalid ebnf (", expected_ch, " expected)")),
        is_sequence = FALSE
      )
    } else {
      self$get_token()
    }
  }
  
  # Add identifier to symbol table
  self$add_ident <- function(ident) {
    k <- which(self$idents == ident)
    if (length(k) == 0) {
      self$idents <<- c(self$idents, ident)
      k <- length(self$idents)
      self$ididx <<- c(self$ididx, -1)
    } else {
      k <- k[1]
    }
    return(k)
  }
  
  # Parse factor (atomic elements)
  self$factor <- function() {
    if (self$token$is_sequence) {
      t <- self$token$value
      if (t[[1]] == "ident") {
        idx <- self$add_ident(t[[2]])
        t[[3]] <- idx
        self$token$value <<- t
      }
      res <- self$token$value
      self$get_token()
    } else if (identical(self$token$value, "[")) {
      self$get_token()
      res <- list("optional", self$expression())
      self$match_token("]")
    } else if (identical(self$token$value, "(")) {
      self$get_token()
      res <- self$expression()
      self$match_token(")")
    } else if (identical(self$token$value, "{")) {
      self$get_token()
      res <- list("repeat", self$expression())
      self$match_token("}")
    } else {
      stop("invalid token in factor() function")
    }
    
    if (is.list(res) && length(res) == 1) {
      return(res[[1]])
    }
    return(res)
  }
  
  # Parse term (sequence of factors)
  self$term <- function() {
    res <- list(self$factor())
    tokens <- list(-1, "|", ".", ";", ")", "]", "}")
    
    while (TRUE) {
      if (any(sapply(tokens, function(x) identical(self$token$value, x)))) {
        break
      }
      res[[length(res) + 1]] <- self$factor()
    }
    
    if (length(res) == 1) {
      return(res[[1]])
    }
    return(res)
  }
  
  # Parse expression (alternatives)
  self$expression <- function() {
    res <- list(self$term())
    if (identical(self$token$value, "|")) {
      res <- list("or", res[[1]])
      while (identical(self$token$value, "|")) {
        self$get_token()
        res[[length(res) + 1]] <- self$term()
      }
    }
    
    if (length(res) == 1) {
      return(res[[1]])
    }
    return(res)
  }
  
  # Parse production rule
  self$production <- function() {
    self$get_token()
    if (!identical(self$token$value, "}")) {
      if (identical(self$token$value, -1)) {
        return(self$invalid("invalid ebnf (missing closing })"))
      }
      if (!self$token$is_sequence) {
        return(-1)
      }
      
      t <- self$token$value
      if (t[[1]] != "ident") {
        return(-1)
      }
      
      ident <- t[[2]]
      idx <- self$add_ident(ident)
      self$get_token()
      self$match_token("=")
      if (identical(self$token$value, -1)) {
        return(-1)
      }
      
      self$productions[[length(self$productions) + 1]] <<- list(ident, idx, self$expression())
      self$ididx[idx] <<- length(self$productions)
    }
    
    return(self$token$value)
  }
  
  # Main parsing function
  self$parse <- function(ebnf) {
    cat("parse:\n", ebnf, " ===>\n", sep = "")
    self$err <<- FALSE
    self$src <<- ebnf
    self$sdx <<- 1
    self$idents <<- character(0)
    self$ididx <<- integer(0)
    self$productions <<- list()
    self$extras <<- list()
    
    self$get_token()
    if (self$token$is_sequence) {
      t <- self$token$value
      t[[1]] <- "title"
      self$extras[[length(self$extras) + 1]] <<- self$token$value
      self$get_token()
    }
    
    if (!identical(self$token$value, "{")) {
      return(self$invalid("invalid ebnf (missing opening {)"))
    }
    
    while (TRUE) {
      token_result <- self$production()
      if (identical(token_result, "}") || identical(token_result, -1)) {
        break
      }
    }
    
    self$get_token()
    if (self$token$is_sequence) {
      t <- self$token$value
      t[[1]] <- "comment"
      self$extras[[length(self$extras) + 1]] <<- self$token$value
      self$get_token()
    }
    
    if (!identical(self$token$value, -1)) {
      return(self$invalid("invalid ebnf (missing eof?)"))
    }
    
    if (self$err) {
      return(-1)
    }
    
    k <- which(self$ididx == -1)
    if (length(k) > 0) {
      return(self$invalid(paste0("invalid ebnf (undefined:", self$idents[k[1]], ")")))
    }
    
    self$pprint(self$productions, "productions")
    self$pprint(self$idents, "idents")
    self$pprint(self$ididx, "ididx")
    self$pprint(self$extras, "extras")
    return(1)
  }
  
  # Pretty print function
  self$pprint <- function(ob, header) {
    cat("\n", header, ":\n", sep = "")
    pp <- toJSON(ob, auto_unbox = TRUE)
    pp <- gsub("\\[", "{", pp)
    pp <- gsub("\\]", "}", pp)
    pp <- gsub(",", ", ", pp)
    for (i in seq_along(self$idents)) {
      pp <- gsub(paste0("\\b", i-1, "\\b"), as.character(i-1), pp)
    }
    cat(pp, "\n")
  }
  
  # Apply grammar rules to input
  self$applies <- function(rule) {
    was_sdx <- self$sdx  # in case of failure
    r1 <- rule[[1]]
    
    if (!is.character(r1)) {
      for (i in seq_along(rule)) {
        if (!self$applies(rule[[i]])) {
          self$sdx <<- was_sdx
          return(FALSE)
        }
      }
    } else if (r1 == "terminal") {
      self$skip_spaces()
      r2 <- rule[[2]]
      for (i in seq_len(nchar(r2))) {
        if (self$sdx > nchar(self$src) || substr(self$src, self$sdx, self$sdx) != substr(r2, i, i)) {
          self$sdx <<- was_sdx
          return(FALSE)
        }
        self$sdx <<- self$sdx + 1
      }
    } else if (r1 == "or") {
      for (i in 2:length(rule)) {
        if (self$applies(rule[[i]])) {
          return(TRUE)
        }
      }
      self$sdx <<- was_sdx
      return(FALSE)
    } else if (r1 == "repeat") {
      while (self$applies(rule[[2]])) {
        # continue repeating
      }
    } else if (r1 == "optional") {
      self$applies(rule[[2]])
    } else if (r1 == "ident") {
      i <- rule[[3]]
      ii <- self$ididx[i]
      if (!self$applies(self$productions[[ii]][[3]])) {
        self$sdx <<- was_sdx
        return(FALSE)
      }
    } else {
      stop("invalid rule in applies() function")
    }
    
    return(TRUE)
  }
  
  # Check if input is valid according to grammar
  self$check_valid <- function(test) {
    self$src <<- test
    self$sdx <<- 1
    res <- self$applies(self$productions[[1]][[3]])
    self$skip_spaces()
    if (self$sdx <= nchar(self$src)) {
      res <- FALSE
    }
    cat('"', test, '", ', self$results[1 + self$btoi(!res)], "\n", sep = "")
  }
  
  return(self)
}

# Main execution
parser <- EBNFParser()

ebnfs <- c(
  '"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z" ',
  
  '{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | \'(\' expr \')\' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}',
  
  'a = "1"',
  '{ a = "1" ;',
  '{ hello world = "1"; }',
  '{ foo = bar . }'
)

tests <- list(
  c(
    'a1a3a4a4a5a6',
    'a1 a2a6',
    'a1 a3 a4 a6',
    'a1 a4 a5 a6',
    'a1 a2 a4 a5 a5 a6',
    'a1 a2 a4 a5 a6 a7',
    'your ad here'
  ),
  c(
    '2',
    '2*3 + 4/23 - 7',
    '(3 + 4) * 6-2+(4*(4))',
    '-2',
    '3 +',
    '(4 + 3'
  )
)

for (i in seq_along(ebnfs)) {
  if (parser$parse(ebnfs[i]) == 1) {
    cat('\ntests:\n')
    if (i <= length(tests)) {
      for (test in tests[[i]]) {
        parser$check_valid(test)
      }
    }
  }
  cat('\n')
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>
invalid ebnf 
invalid ebnf (= expected) 

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>
invalid ebnf 
invalid ebnf (= expected) 

parse:
a = "1" ===>
invalid ebnf 
invalid ebnf (missing opening {) 

parse:
{ a = "1" ; ===>
invalid ebnf 
invalid ebnf (= expected) 

parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected) 

parse:
{ foo = bar . } ===>
invalid ebnf 
invalid ebnf (= expected) 





(formerly Perl 6)

Works with: Rakudo version 2019.03.1

This parses the EBNF rule set using a Raku grammar, then if it parses as valid EBNF, constructs a grammar and parses the test strings with that. EBNF rule sets that are naively syntactically correct but missing rules will parse as valid but will give a runtime failure warning about missing methods. It is implemented and exercised using the flavor of EBNF and test cases specified on the test page.

# A Raku grammar to parse EBNF
grammar EBNF {
    rule         TOP { ^ <title>? '{' [ <production> ]+ '}' <comment>? $ }
    rule  production { <name> '=' <expression> <[.;]> }
    rule  expression { <term> +% "|" }
    rule        term { <factor>+ }
    rule      factor { <group> | <repeat> | <optional> | <identifier> | <literal> }
    rule       group { '(' <expression> ')' }
    rule      repeat { '{' <expression> '}' }
    rule    optional { '[' <expression> ']' }
    token identifier { <-[\|\(\)\{\}\[\]\.\;\"\'\s]>+ } #"
    token    literal { ["'" <-[']>+ "'" | '"' <-["]>+ '"'] } #"
    token      title { <literal> }
    token    comment { <literal> }
    token       name { <identifier>  <?before \h* '='> }
}

class EBNF::Actions {
    method        TOP($/) { 
                            say "Syntax Tree:\n", $/; # Dump the syntax tree to STDOUT
                            make 'grammar ' ~
                              ($<title> ?? $<title>.subst(/\W/, '', :g) !! 'unnamed') ~
                              " \{\n rule TOP \{^[<" ~ $/<production>[0]<name> ~
                              ">]+\$\}\n " ~ $<production>>>.ast ~ "\}"
                          }
   method production($/) { 
                            make 'token ' ~ $<name> ~ ' {' ~
                              $<expression>.ast ~ "}\n"
                          }
   method expression($/) { make join '|', $<term>>>.ast }
   method       term($/) { make join '\h*', $<factor>>>.ast }
   method     factor($/) { 
                            make $<literal>  ?? $<literal> !!
                              $<group>    ?? '[' ~ $<group>.ast    ~ ']'  !!
                              $<repeat>   ?? '[' ~ $<repeat>.ast   ~ '\\s*]*' !!
                              $<optional> ?? '[' ~ $<optional>.ast ~ ']?' !!
                              '<' ~ $<identifier> ~ '>'
                          }
   method     repeat($/) { make $<expression>.ast }
   method   optional($/) { make $<expression>.ast }
   method      group($/) { make $<expression>.ast }
}

# An array of test cases
my @tests = (
    {
        ebnf => 
            q<"a" {
                a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
            } "z">
        ,
        teststrings => [
            'a1a3a4a4a5a6',
            'a1 a2a6',
            'a1 a3 a4 a6',
            'a1 a4 a5 a6',
            'a1 a2 a4 a4 a5 a6',
            'a1 a2 a4 a5 a5 a6',
            'a1 a2 a4 a5 a6 a7',
            'your ad here' 
        ]
    },
    {
        ebnf =>
            q<{
                expr = term { plus term } .
                term = factor { times factor } .
                factor = number | '(' expr ')' .
 
                plus = "+" | "-" .
                times = "*" | "/" .
 
                number = digit { digit } .
                digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
            }>
        ,
        teststrings => [
            '2',
            '2*3 + 4/23 - 7',
            '(3 + 4) * 6-2+(4*(4))',
            '-2',
            '3 +',
            '(4 + 3'
        ]
    },
    {
        ebnf => q<a = "1";>,
        teststrings => ['foobar']
    },
    {
        ebnf => q<{ a = "1" ;>,
        teststrings => ['foobar']
    },
    {
        ebnf => q<{ hello world = "1"; }>,
        teststrings => ['foobar']
    },
    {
        ebnf => q<{ foo = bar . }>,
        teststrings => ['foobar']
    }
);

# Test the parser.
my $i = 1;
for @tests -> $test {
    unless EBNF.parse($test<ebnf>) {
         say "Parsing EBNF grammar:\n";
         say "{$test<ebnf>.subst(/^^\h*/,'',:g)}\n";
         say "Invalid syntax. Can not be parsed.\n";
         say '*' x 79;
         next;
    }
    my $p = EBNF.parse($test<ebnf>, :actions(EBNF::Actions));
    my $grammar = $p.ast;
    $grammar ~~ m/^'grammar '(\w+)/;
    my $title = $0;
    my $fn = 'EBNFtest'~$i++;
    my $fh = open($fn, :w) orelse .die;
    $fh.say( "\{\n", $grammar );
    $fh.say( qq|say "Parsing EBNF grammar '$title':\\n";| );
    $fh.say( qq|say q<{$test<ebnf>.subst(/^^\h*/,'',:g)}>;| );
    $fh.say(  q|say "\nValid syntax.\n\nTesting:\n";| );
    $fh.say(  q|CATCH { default { say " - $_" } };| );
    my $len = max $test<teststrings>.flat>>.chars;
    for $test<teststrings>.flat -> $s {
        $fh.say( qq|printf "%{$len}s", '{$s}';| ~
                 qq|printf " - %s\\n", {$title}.parse('{$s}')| ~
                 qq| ?? 'valid.' !! 'NOT valid.';|
               );
    }
    $fh.say( qq| "\\n"} |);
    $fh.close;
    say qqx/raku $fn/;
    say '*' x 79, "\n";
    unlink $fn;
}

Output:

Syntax Tree:
「"a" {
                a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
            } "z"」
 title => 「"a"」
  literal => 「"a"」
 production => 「a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
            」
  name => 「a」
   identifier => 「a」
  expression => 「"a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" 」
   term => 「"a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" 」
    factor => 「"a1" 」
     literal => 「"a1"」
    factor => 「( "a2" | "a3" ) 」
     group => 「( "a2" | "a3" ) 」
      expression => 「"a2" | "a3" 」
       term => 「"a2" 」
        factor => 「"a2" 」
         literal => 「"a2"」
       term => 「 "a3" 」
        factor => 「"a3" 」
         literal => 「"a3"」
    factor => 「{ "a4" } 」
     repeat => 「{ "a4" } 」
      expression => 「"a4" 」
       term => 「"a4" 」
        factor => 「"a4" 」
         literal => 「"a4"」
    factor => 「[ "a5" ] 」
     optional => 「[ "a5" ] 」
      expression => 「"a5" 」
       term => 「"a5" 」
        factor => 「"a5" 」
         literal => 「"a5"」
    factor => 「"a6" 」
     literal => 「"a6"」
 comment => 「"z"」
  literal => 「"z"」

Parsing EBNF grammar 'a':

"a" {
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"

Valid syntax.

Testing:

     a1a3a4a4a5a6 - valid.
          a1 a2a6 - valid.
      a1 a3 a4 a6 - valid.
      a1 a4 a5 a6 - NOT valid.
a1 a2 a4 a4 a5 a6 - valid.
a1 a2 a4 a5 a5 a6 - NOT valid.
a1 a2 a4 a5 a6 a7 - NOT valid.
     your ad here - NOT valid.

*******************************************************************************

Syntax Tree:
「{
                expr = term { plus term } .
                term = factor { times factor } .
                factor = number | '(' expr ')' .

                plus = "+" | "-" .
                times = "*" | "/" .

                number = digit { digit } .
                digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
            }」
 production => 「expr = term { plus term } .
                」
  name => 「expr」
   identifier => 「expr」
  expression => 「term { plus term } 」
   term => 「term { plus term } 」
    factor => 「term 」
     identifier => 「term」
    factor => 「{ plus term } 」
     repeat => 「{ plus term } 」
      expression => 「plus term 」
       term => 「plus term 」
        factor => 「plus 」
         identifier => 「plus」
        factor => 「term 」
         identifier => 「term」
 production => 「term = factor { times factor } .
                」
  name => 「term」
   identifier => 「term」
  expression => 「factor { times factor } 」
   term => 「factor { times factor } 」
    factor => 「factor 」
     identifier => 「factor」
    factor => 「{ times factor } 」
     repeat => 「{ times factor } 」
      expression => 「times factor 」
       term => 「times factor 」
        factor => 「times 」
         identifier => 「times」
        factor => 「factor 」
         identifier => 「factor」
 production => 「factor = number | '(' expr ')' .

                」
  name => 「factor」
   identifier => 「factor」
  expression => 「number | '(' expr ')' 」
   term => 「number 」
    factor => 「number 」
     identifier => 「number」
   term => 「 '(' expr ')' 」
    factor => 「'(' 」
     literal => 「'('」
    factor => 「expr 」
     identifier => 「expr」
    factor => 「')' 」
     literal => 「')'」
 production => 「plus = "+" | "-" .
                」
  name => 「plus」
   identifier => 「plus」
  expression => 「"+" | "-" 」
   term => 「"+" 」
    factor => 「"+" 」
     literal => 「"+"」
   term => 「 "-" 」
    factor => 「"-" 」
     literal => 「"-"」
 production => 「times = "*" | "/" .

                」
  name => 「times」
   identifier => 「times」
  expression => 「"*" | "/" 」
   term => 「"*" 」
    factor => 「"*" 」
     literal => 「"*"」
   term => 「 "/" 」
    factor => 「"/" 」
     literal => 「"/"」
 production => 「number = digit { digit } .
                」
  name => 「number」
   identifier => 「number」
  expression => 「digit { digit } 」
   term => 「digit { digit } 」
    factor => 「digit 」
     identifier => 「digit」
    factor => 「{ digit } 」
     repeat => 「{ digit } 」
      expression => 「digit 」
       term => 「digit 」
        factor => 「digit 」
         identifier => 「digit」
 production => 「digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
            」
  name => 「digit」
   identifier => 「digit」
  expression => 「"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 」
   term => 「"0" 」
    factor => 「"0" 」
     literal => 「"0"」
   term => 「 "1" 」
    factor => 「"1" 」
     literal => 「"1"」
   term => 「 "2" 」
    factor => 「"2" 」
     literal => 「"2"」
   term => 「 "3" 」
    factor => 「"3" 」
     literal => 「"3"」
   term => 「 "4" 」
    factor => 「"4" 」
     literal => 「"4"」
   term => 「 "5" 」
    factor => 「"5" 」
     literal => 「"5"」
   term => 「 "6" 」
    factor => 「"6" 」
     literal => 「"6"」
   term => 「 "7" 」
    factor => 「"7" 」
     literal => 「"7"」
   term => 「 "8" 」
    factor => 「"8" 」
     literal => 「"8"」
   term => 「 "9" 」
    factor => 「"9" 」
     literal => 「"9"」

Parsing EBNF grammar 'unnamed':

{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .

plus = "+" | "-" .
times = "*" | "/" .

number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}

Valid syntax.

Testing:

                    2 - valid.
       2*3 + 4/23 - 7 - valid.
(3 + 4) * 6-2+(4*(4)) - valid.
                   -2 - NOT valid.
                  3 + - NOT valid.
               (4 + 3 - NOT valid.

*******************************************************************************

Parsing EBNF grammar:

a = "1";

Invalid syntax. Can not be parsed.

*******************************************************************************
Parsing EBNF grammar:

{ a = "1" ;

Invalid syntax. Can not be parsed.

*******************************************************************************
Parsing EBNF grammar:

{ hello world = "1"; }

Invalid syntax. Can not be parsed.

*******************************************************************************
Syntax Tree:
「{ foo = bar . }」
 production => 「foo = bar . 」
  name => 「foo」
   identifier => 「foo」
  expression => 「bar 」
   term => 「bar 」
    factor => 「bar 」
     identifier => 「bar」

Parsing EBNF grammar 'unnamed':

{ foo = bar . }

Valid syntax.

Testing:

foobar - No such method 'bar' for invocant of type 'unnamed'

*******************************************************************************
This example is under development. It was marked thus on 12/May/2011. Please help complete the example.
This example is incomplete. The tokenizer is here, but the parser is very incomplete. Please ensure that it meets all task requirements and remove this message.
#--
# The tokenizer splits the input into Tokens like "identifier",
# ":", ")*" and so on. This design uses a StringScanner on each line of
# input, therefore a Token can never span more than one line.
#
# Each Token knows its original line and position, so an error message
# can locate a bad token.
#++

require 'strscan'

# A line of input.
# where::  A location like "file.txt:3"
# str::    String of this line
Line = Struct.new :where, :str

# A token.
# cat::   A category like :colon, :ident or so on
# str::   String of this token
# line::  Line containing this token
# pos::   Position of this token within this line
Token = Struct.new :cat, :str, :line, :pos

# Reads and returns the next Token. At end of file, returns nil.
#--
# Needs @filename and @in.
#++
def next_token
  # Loop until we reach a Token.
  loop do
    # If at end of line, then get next line, or else declare end of
    # file.
    if @scanner.eos?
      if s = @in.gets
        # Each line needs a new Line object. Tokens can hold references
        # to old Line objects.
        @line = Line.new("#{@filename}:#{@in.lineno}", s)
        @scanner.string = s
      else
        return nil  # End of file
      end
    end

    # Skip whitespace.
    break unless @scanner.skip(/[[:space:]]+/)
  end

  # Read token by regular expression.
  if s = @scanner.scan(/:/)
    c = :colon
  elsif s = @scanner.scan(/;/)
    c = :semicolon
  elsif s = @scanner.scan(/\(/)
    c = :paren
  elsif s = @scanner.scan(/\)\?/)
    c = :option
  elsif s = @scanner.scan(/\)\*/)
    c = :repeat
  elsif s = @scanner.scan(/\)/)
    c = :group
  elsif s = @scanner.scan(/\|/)
    c = :bar
  elsif s = @scanner.scan(/[[:alpha:]][[:alnum:]]*/)
    c = :ident
  elsif s = @scanner.scan(/'[^']*'|"[^"]*"/)
    # Fix syntax highlighting for Rosetta Code. => '
    c = :string
  elsif s = @scanner.scan(/'[^']*|"[^"]*/)
    c = :bad_string
  elsif s = @scanner.scan(/.*/)
    c = :unknown
  end

  Token.new(c, s, @line, (@scanner.pos - s.length))
end

# Prints a _message_ to standard error, along with location of _token_.
def error(token, message)
  line = token.line

  # We print a caret ^ pointing at the bad token. We make a very crude
  # attempt to align the caret ^ in the correct column. If the input
  # line has a non-[:print:] character, like a tab, then we print it as
  # a space.
  STDERR.puts <<EOF
#{line.where}: #{message}
#{line.str.gsub(/[^[:print:]]/, " ")}
#{" " * token.pos}^
EOF
end


#--
# The parser converts Tokens to a Grammar object. The parser also
# detects syntax errors.
#++

# A parsed EBNF grammar. It is an Array of Productions.
class Grammar < Array; end

# A production.
# ident::  The identifier
# alts::   An Array of Alternatives
Production = Struct.new :ident, :alts

# An array of Alternatives, as from "(a | b)".
class Group < Array; end

# An optional group, as from "(a | b)?".
class OptionalGroup < Group; end

# A repeated group, as from "(a | b)*".
class RepeatedGroup < Group; end

# An array of identifiers and string literals.
class Alternative < Array; end

#--
# Needs @filename and @in.
#++
def parse
  # TODO: this only dumps the tokens.
  while t = next_token
    error(t, "#{t.cat}")
  end
end

# Set @filename and @in. Parse input.
case ARGV.length
when 0 then @filename = "-"
when 1 then @filename = ARGV[0]
else fail "Too many arguments"
end
open(@filename) do |f|
  @in = f
  @scanner = StringScanner.new("")
  parse
end


Works with: Rust
Translation of: Java
use std::collections::HashMap;

#[derive(Debug, Clone)]
enum Token {
    Char(char),
    Sequence(Vec<String>),
    EOF,
}

#[derive(Debug, Clone)]
enum Rule {
    Terminal(String),
    Ident(String, usize),
    Or(Vec<Rule>),
    Repeat(Box<Rule>),
    Optional(Box<Rule>),
    Sequence(Vec<Rule>),
}

pub struct EBNFParser {
    src: String,
    ch: char,
    sdx: usize,
    token: Token,
    err: bool,
    idents: Vec<String>,
    ididx: Vec<Option<usize>>,
    productions: Vec<(String, usize, Rule)>,
    extras: Vec<Vec<String>>,
}

impl EBNFParser {
    pub fn new() -> Self {
        EBNFParser {
            src: String::new(),
            ch: '\0',
            sdx: 0,
            token: Token::EOF,
            err: false,
            idents: Vec::new(),
            ididx: Vec::new(),
            productions: Vec::new(),
            extras: Vec::new(),
        }
    }

    fn btoi(&self, b: bool) -> usize {
        if b { 1 } else { 0 }
    }

    fn invalid(&mut self, msg: &str) -> i32 {
        self.err = true;
        println!("{}", msg);
        self.sdx = self.src.len(); // set to eof
        -1
    }

    fn skip_spaces(&mut self) {
        while self.sdx < self.src.len() {
            self.ch = self.src.chars().nth(self.sdx).unwrap_or('\0');
            if ![' ', '\t', '\r', '\n'].contains(&self.ch) {
                break;
            }
            self.sdx += 1;
        }
    }

    fn get_token(&mut self) {
        // Yields a single character token, one of {}()[]|=.;
        // or ["terminal",string] or ["ident", string] or EOF.
        self.skip_spaces();
        if self.sdx >= self.src.len() {
            self.token = Token::EOF;
            return;
        }
        let tokstart = self.sdx;
        
        if "{}()[]|=.;".contains(self.ch) {
            self.sdx += 1;
            self.token = Token::Char(self.ch);
        } else if self.ch == '"' || self.ch == '\'' {
            let closech = self.ch;
            let mut tokend = tokstart + 1;
            while tokend < self.src.len() && self.src.chars().nth(tokend).unwrap_or('\0') != closech {
                tokend += 1;
            }
            if tokend >= self.src.len() {
                self.invalid("no closing quote");
                self.token = Token::EOF;
            } else {
                self.sdx = tokend + 1;
                let content = self.src[tokstart + 1..tokend].to_string();
                self.token = Token::Sequence(vec!["terminal".to_string(), content]);
            }
        } else if self.ch.is_ascii_lowercase() {
            // To simplify things for the purposes of this task,
            // identifiers are strictly a-z only, not A-Z or 1-9.
            while self.sdx < self.src.len() {
                self.ch = self.src.chars().nth(self.sdx).unwrap_or('\0');
                if !self.ch.is_ascii_lowercase() {
                    break;
                }
                self.sdx += 1;
            }
            let ident = self.src[tokstart..self.sdx].to_string();
            self.token = Token::Sequence(vec!["ident".to_string(), ident]);
        } else {
            self.invalid("invalid ebnf");
            self.token = Token::EOF;
        }
    }

    fn match_token(&mut self, expected_ch: char) {
        if !matches!(&self.token, Token::Char(ch) if *ch == expected_ch) {
            self.invalid(&format!("invalid ebnf ({} expected)", expected_ch));
        } else {
            self.get_token();
        }
    }

    fn add_ident(&mut self, ident: String) -> usize {
        if let Some(k) = self.idents.iter().position(|x| x == &ident) {
            k
        } else {
            self.idents.push(ident);
            let k = self.idents.len() - 1;
            self.ididx.push(None);
            k
        }
    }

    fn factor(&mut self) -> Result<Rule, String> {
        match &self.token.clone() {
            Token::Sequence(t) => {
                if t[0] == "ident" {
                    let idx = self.add_ident(t[1].clone());
                    let rule = Rule::Ident(t[1].clone(), idx);
                    self.get_token();
                    Ok(rule)
                } else if t[0] == "terminal" {
                    let rule = Rule::Terminal(t[1].clone());
                    self.get_token();
                    Ok(rule)
                } else {
                    Err("invalid token sequence".to_string())
                }
            }
            Token::Char('[') => {
                self.get_token();
                let expr = self.expression()?;
                self.match_token(']');
                Ok(Rule::Optional(Box::new(expr)))
            }
            Token::Char('(') => {
                self.get_token();
                let expr = self.expression()?;
                self.match_token(')');
                Ok(expr)
            }
            Token::Char('{') => {
                self.get_token();
                let expr = self.expression()?;
                self.match_token('}');
                Ok(Rule::Repeat(Box::new(expr)))
            }
            _ => Err("invalid token in factor() function".to_string()),
        }
    }

    fn term(&mut self) -> Result<Rule, String> {
        let mut factors = vec![self.factor()?];
        
        let stop_tokens = [Token::EOF, Token::Char('|'), Token::Char('.'), Token::Char(';'),
                          Token::Char(')'), Token::Char(']'), Token::Char('}')];
        
        while !stop_tokens.iter().any(|t| std::mem::discriminant(t) == std::mem::discriminant(&self.token)) {
            factors.push(self.factor()?);
        }
        
        if factors.len() == 1 {
            Ok(factors.into_iter().next().unwrap())
        } else {
            Ok(Rule::Sequence(factors))
        }
    }

    fn expression(&mut self) -> Result<Rule, String> {
        let first_term = self.term()?;
        
        if matches!(self.token, Token::Char('|')) {
            let mut terms = vec![first_term];
            while matches!(self.token, Token::Char('|')) {
                self.get_token();
                terms.push(self.term()?);
            }
            Ok(Rule::Or(terms))
        } else {
            Ok(first_term)
        }
    }

    fn production(&mut self) -> Result<Token, String> {
        // Returns a token or EOF; the real result is left in 'productions' etc,
        self.get_token();
        if matches!(self.token, Token::Char('}')) {
            return Ok(self.token.clone());
        }
        if matches!(self.token, Token::EOF) {
            self.invalid("invalid ebnf (missing closing })");
            return Ok(Token::EOF);
        }
        
        if let Token::Sequence(t) = &self.token {
            if t[0] == "ident" {
                let ident = t[1].clone();
                let idx = self.add_ident(ident.clone());
                self.get_token();
                self.match_token('=');
                if matches!(self.token, Token::EOF) {
                    return Ok(Token::EOF);
                }
                let expr = self.expression()?;
                self.productions.push((ident, idx, expr));
                self.ididx[idx] = Some(self.productions.len() - 1);
                return Ok(self.token.clone());
            }
        }
        
        Ok(Token::EOF)
    }

    pub fn parse(&mut self, ebnf: &str) -> i32 {
        // Returns +1 if ok, -1 if bad.
        println!("parse:\n{} ===>\n", ebnf);
        self.err = false;
        self.src = ebnf.to_string();
        self.sdx = 0;
        self.idents.clear();
        self.ididx.clear();
        self.productions.clear();
        self.extras.clear();
        
        self.get_token();
        if let Token::Sequence(mut t) = self.token.clone() {
            t[0] = "title".to_string();
            self.extras.push(t);
            self.get_token();
        }
        
        if !matches!(self.token, Token::Char('{')) {
            return self.invalid("invalid ebnf (missing opening {)");
        }
        
        loop {
            match self.production() {
                Ok(Token::Char('}')) | Ok(Token::EOF) => break,
                Err(_) => return -1,
                _ => continue,
            }
        }
        
        self.get_token();
        if let Token::Sequence(mut t) = self.token.clone() {
            t[0] = "comment".to_string();
            self.extras.push(t);
            self.get_token();
        }
        
        if !matches!(self.token, Token::EOF) {
            return self.invalid("invalid ebnf (missing eof?)");
        }
        if self.err {
            return -1;
        }
        
        // Check for undefined identifiers
        for (i, ididx_val) in self.ididx.iter().enumerate() {
            if ididx_val.is_none() {
                return self.invalid(&format!("invalid ebnf (undefined:{})", self.idents[i]));
            }
        }
        
        self.pprint_productions();
        self.pprint_idents();
        self.pprint_ididx();
        self.pprint_extras();
        1
    }

    fn pprint_productions(&self) {
        println!("\nproductions:");
        println!("{:?}", self.productions);
    }

    fn pprint_idents(&self) {
        println!("\nidents:");
        println!("{:?}", self.idents);
    }

    fn pprint_ididx(&self) {
        println!("\nididx:");
        println!("{:?}", self.ididx);
    }

    fn pprint_extras(&self) {
        println!("\nextras:");
        println!("{:?}", self.extras);
    }

    fn applies(&self, rule: &Rule, src: &str, sdx: &mut usize) -> bool {
        let was_sdx = *sdx;
        
        match rule {
            Rule::Sequence(rules) => {
                for rule in rules {
                    if !self.applies(rule, src, sdx) {
                        *sdx = was_sdx;
                        return false;
                    }
                }
                true
            }
            Rule::Terminal(terminal) => {
                self.skip_spaces_at(src, sdx);
                for ch in terminal.chars() {
                    if *sdx >= src.len() || src.chars().nth(*sdx).unwrap_or('\0') != ch {
                        *sdx = was_sdx;
                        return false;
                    }
                    *sdx += 1;
                }
                true
            }
            Rule::Or(rules) => {
                for rule in rules {
                    if self.applies(rule, src, sdx) {
                        return true;
                    }
                }
                *sdx = was_sdx;
                false
            }
            Rule::Repeat(rule) => {
                while self.applies(rule, src, sdx) {
                    // continue repeating
                }
                true
            }
            Rule::Optional(rule) => {
                self.applies(rule, src, sdx);
                true
            }
            Rule::Ident(_name, idx) => {
                if let Some(prod_idx) = self.ididx[*idx] {
                    if !self.applies(&self.productions[prod_idx].2, src, sdx) {
                        *sdx = was_sdx;
                        return false;
                    }
                    true
                } else {
                    false
                }
            }
        }
    }

    fn skip_spaces_at(&self, src: &str, sdx: &mut usize) {
        while *sdx < src.len() {
            let ch = src.chars().nth(*sdx).unwrap_or('\0');
            if ![' ', '\t', '\r', '\n'].contains(&ch) {
                break;
            }
            *sdx += 1;
        }
    }

    fn check_valid(&self, test: &str) {
        let mut sdx = 0;
        let res = self.applies(&self.productions[0].2, test, &mut sdx);
        self.skip_spaces_at(test, &mut sdx);
        let final_res = res && sdx >= test.len();
        let result = if final_res { "pass" } else { "fail" };
        println!("\"{}\", {}", test, result);
    }
}

fn main() {
    let mut parser = EBNFParser::new();
    
    let ebnfs = [
        r#""a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z" "#,
        r#"{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}"#,
        r#"a = "1""#,
        r#"{ a = "1" ;"#,
        r#"{ hello world = "1"; }"#,
        r#"{ foo = bar . }"#,
    ];
    
    let tests = [
        vec![
            "a1a3a4a4a5a6",
            "a1 a2a6",
            "a1 a3 a4 a6",
            "a1 a4 a5 a6",
            "a1 a2 a4 a5 a5 a6",
            "a1 a2 a4 a5 a6 a7",
            "your ad here",
        ],
        vec![
            "2",
            "2*3 + 4/23 - 7",
            "(3 + 4) * 6-2+(4*(4))",
            "-2",
            "3 +",
            "(4 + 3",
        ],
    ];
    
    for (i, ebnf) in ebnfs.iter().enumerate() {
        if parser.parse(ebnf) == 1 {
            println!("\ntests:");
            if i < tests.len() {
                for test in &tests[i] {
                    parser.check_valid(test);
                }
            }
        }
        println!();
    }
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

invalid ebnf (missing eof?)

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

invalid ebnf (= expected)

parse:
a = "1" ===>

invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>

invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>

invalid ebnf (= expected)

parse:
{ foo = bar . } ===>

invalid ebnf (undefined:bar)




Works with: Scala version 2.13.8
Translation of: Java
import scala.collection.mutable
import scala.util.{Try, Success, Failure}

case class Token(value: Any, isSequence: Boolean)

class EBNFParser {
  type Sequence = mutable.ArrayBuffer[Any]
  
  private var src: String = ""
  private var ch: Char = ' '
  private var sdx: Int = 0
  private var token: Token = Token(-1, false)
  private var err: Boolean = false
  private val idents: mutable.ArrayBuffer[String] = mutable.ArrayBuffer[String]()
  private val ididx: mutable.ArrayBuffer[Int] = mutable.ArrayBuffer[Int]()
  private val productions: mutable.ArrayBuffer[Sequence] = mutable.ArrayBuffer[Sequence]()
  private val extras: Sequence = mutable.ArrayBuffer[Any]()
  private val results = Array("pass", "fail")
  
  private def btoi(b: Boolean): Int = if (b) 1 else 0
  
  private def invalid(msg: String): Int = {
    err = true
    println(msg)
    sdx = src.length // set to eof
    -1
  }
  
  private def skipSpaces(): Unit = {
    while (sdx < src.length) {
      ch = src(sdx)
      if (!" \t\r\n".contains(ch)) {
        return
      }
      sdx += 1
    }
  }
  
  private def getToken(): Unit = {
    // Yields a single character token, one of {}()[]|=.;
    // or Sequence("terminal", string) or Sequence("ident", string) or -1.
    skipSpaces()
    if (sdx >= src.length) {
      token = Token(-1, false)
      return
    }
    val tokstart = sdx
    if ("{}()[]|=.;".contains(ch)) {
      sdx += 1
      token = Token(ch, false)
    } else if (ch == '"' || ch == '\'') {
      val closech = ch
      var tokend = tokstart + 1
      while (tokend < src.length && src(tokend) != closech) {
        tokend += 1
      }
      if (tokend >= src.length) {
        token = Token(invalid("no closing quote"), false)
      } else {
        sdx = tokend + 1
        val seq = mutable.ArrayBuffer[Any]("terminal", src.substring(tokstart + 1, tokend))
        token = Token(seq, true)
      }
    } else if (ch >= 'a' && ch <= 'z') {
      // To simplify things for the purposes of this task,
      // identifiers are strictly a-z only, not A-Z or 1-9.
      while (sdx < src.length && ch >= 'a' && ch <= 'z') {
        sdx += 1
        if (sdx < src.length) {
          ch = src(sdx)
        }
      }
      val seq = mutable.ArrayBuffer[Any]("ident", src.substring(tokstart, sdx))
      token = Token(seq, true)
    } else {
      token = Token(invalid("invalid ebnf"), false)
    }
  }
  
  private def matchToken(expectedCh: Char): Unit = {
    if (token.value != expectedCh) {
      token = Token(invalid(s"invalid ebnf ($expectedCh expected)"), false)
    } else {
      getToken()
    }
  }
  
  private def addIdent(ident: String): Int = {
    idents.indexOf(ident) match {
      case -1 =>
        idents += ident
        val k = idents.length - 1
        ididx += -1
        k
      case k => k
    }
  }
  
  private def factor(): Any = {
    val res = if (token.isSequence) {
      val t = token.value.asInstanceOf[Sequence]
      if (t(0) == "ident") {
        val idx = addIdent(t(1).asInstanceOf[String])
        t += idx
        token = Token(t, true)
      }
      val result = token.value
      getToken()
      result
    } else if (token.value == '[') {
      getToken()
      val seq = mutable.ArrayBuffer[Any]("optional", expression())
      matchToken(']')
      seq
    } else if (token.value == '(') {
      getToken()
      val result = expression()
      matchToken(')')
      result
    } else if (token.value == '{') {
      getToken()
      val seq = mutable.ArrayBuffer[Any]("repeat", expression())
      matchToken('}')
      seq
    } else {
      throw new RuntimeException("invalid token in factor() function")
    }
    
    res match {
      case seq: mutable.ArrayBuffer[_] if seq.length == 1 => seq(0)
      case other => other
    }
  }
  
  private def term(): Any = {
    val res = mutable.ArrayBuffer[Any](factor())
    val tokens = Set[Any](-1, '|', '.', ';', ')', ']', '}')
    
    while (!tokens.contains(token.value)) {
      res += factor()
    }
    
    if (res.length == 1) res(0) else res
  }
  
  private def expression(): Any = {
    var res = mutable.ArrayBuffer[Any](term())
    if (token.value == '|') {
      res = mutable.ArrayBuffer[Any]("or", res(0))
      while (token.value == '|') {
        getToken()
        res += term()
      }
    }
    if (res.length == 1) res(0) else res
  }
  
  private def production(): Any = {
    // Returns a token or -1; the real result is left in 'productions' etc,
    getToken()
    if (token.value != '}') {
      if (token.value == -1) {
        return invalid("invalid ebnf (missing closing })")
      }
      if (!token.isSequence) {
        return -1
      }
      val t = token.value.asInstanceOf[Sequence]
      if (t(0) != "ident") {
        return -1
      }
      val ident = t(1).asInstanceOf[String]
      val idx = addIdent(ident)
      getToken()
      matchToken('=')
      if (token.value == -1) {
        return -1
      }
      val prod = mutable.ArrayBuffer[Any](ident, idx, expression())
      productions += prod
      ididx(idx) = productions.length - 1
    }
    token.value
  }
  
  def parse(ebnf: String): Int = {
    // Returns +1 if ok, -1 if bad.
    println(s"parse:\n$ebnf ===>\n")
    err = false
    src = ebnf
    sdx = 0
    idents.clear()
    ididx.clear()
    productions.clear()
    extras.clear()
    
    getToken()
    if (token.isSequence) {
      val t = token.value.asInstanceOf[Sequence]
      t(0) = "title"
      extras += token.value
      getToken()
    }
    if (token.value != '{') {
      return invalid("invalid ebnf (missing opening {)")
    }
    
    var continue = true
    while (continue) {
      val tokenResult = production()
      if (tokenResult == '}' || tokenResult == -1) {
        continue = false
      }
    }
    
    getToken()
    if (token.isSequence) {
      val t = token.value.asInstanceOf[Sequence]
      t(0) = "comment"
      extras += token.value
      getToken()
    }
    if (token.value != -1) {
      return invalid("invalid ebnf (missing eof?)")
    }
    if (err) {
      return -1
    }
    
    val k = ididx.indexOf(-1)
    if (k != -1) {
      return invalid(s"invalid ebnf (undefined:${idents(k)})")
    }
    
    pprint(productions, "productions")
    pprint(idents, "idents")
    pprint(ididx, "ididx")
    pprint(extras, "extras")
    1
  }
  
  // Adjusts Scala's normal printing to look more like Phix output.
  private def pprint(ob: Any, header: String): Unit = {
    println(s"\n$header:")
    var pp = ob.toString
    pp = pp.replace("ArrayBuffer(", "{")
    pp = pp.replace(")", "}")
    pp = pp.replace(" ", ", ")
    for (i <- idents.indices) {
      pp = pp.replace(i.toString, i.toString)
    }
    println(pp)
  }
  
  // The rules that applies() has to deal with are:
  // {factors} - if rule(0) is not string,
  // just apply one after the other recursively.
  // {"terminal", "a1"}       -- literal constants
  // {"or", <e1>, <e2>, ...}  -- (any) one of n
  // {"repeat", <e1>}         -- as per "{}" in ebnf
  // {"optional", <e1>}       -- as per "[]" in ebnf
  // {"ident", <name>, idx}   -- apply the sub-rule
  
  private def applies(rule: Sequence): Boolean = {
    val wasSdx = sdx // in case of failure
    val r1 = rule(0)
    
    val result = r1 match {
      case s: String => s match {
        case "terminal" =>
          skipSpaces()
          val r2 = rule(1).asInstanceOf[String]
          r2.forall { char =>
            if (sdx >= src.length || src(sdx) != char) {
              false
            } else {
              sdx += 1
              true
            }
          }
        case "or" =>
          (1 until rule.length).exists(i => applies(rule(i).asInstanceOf[Sequence]))
        case "repeat" =>
          while (applies(rule(1).asInstanceOf[Sequence])) {
            // continue repeating
          }
          true
        case "optional" =>
          applies(rule(1).asInstanceOf[Sequence])
          true
        case "ident" =>
          val i = rule(2).asInstanceOf[Int]
          val ii = ididx(i)
          applies(productions(ii)(2).asInstanceOf[Sequence])
        case _ =>
          throw new RuntimeException("invalid rule in applies() function")
      }
      case _ =>
        // {factors} - if rule(0) is not string, apply one after the other recursively
        rule.forall(r => applies(r.asInstanceOf[Sequence]))
    }
    
    if (!result) {
      sdx = wasSdx
    }
    result
  }
  
  private def checkValid(test: String): Unit = {
    src = test
    sdx = 0
    var res = applies(productions(0)(2).asInstanceOf[Sequence])
    skipSpaces()
    if (sdx < src.length) {
      res = false
    }
    println(s""""$test", ${results(1 - btoi(res))}""")
  }
  
  def main(args: Array[String]): Unit = {
    val ebnfs = Array(
      """"a" {
        |    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
        |} "z" """.stripMargin,
      
      """{
        |    expr = term { plus term } .
        |    term = factor { times factor } .
        |    factor = number | '(' expr ')' .
        | 
        |    plus = "+" | "-" .
        |    times = "*" | "/" .
        | 
        |    number = digit { digit } .
        |    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
        |}""".stripMargin,
      
      """a = "1"""",
      """{ a = "1" ;""",
      """{ hello world = "1"; }""",
      """{ foo = bar . }"""
    )
    
    val tests = Array(
      Array(
        "a1a3a4a4a5a6",
        "a1 a2a6",
        "a1 a3 a4 a6",
        "a1 a4 a5 a6",
        "a1 a2 a4 a5 a5 a6",
        "a1 a2 a4 a5 a6 a7",
        "your ad here"
      ),
      Array(
        "2",
        "2*3 + 4/23 - 7",
        "(3 + 4) * 6-2+(4*(4))",
        "-2",
        "3 +",
        "(4 + 3"
      )
    )
    
    for (i <- ebnfs.indices) {
      if (parse(ebnfs(i)) == 1) {
        println("\ntests:")
        if (i < tests.length) {
          tests(i).foreach(checkValid)
        }
      }
      println()
    }
  }
}

object EBNFParser {
  def main(args: Array[String]): Unit = {
    val parser = new EBNFParser()
    parser.main(args)
  }
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>


productions:
{{a,, 0,, {{terminal,, a1},, {or,, {terminal,, a2},, {terminal,, a3}},, {repeat,, {terminal,, a4}},, {optional,, {terminal,, a5}},, {terminal,, a6}}}}

idents:
{a}

ididx:
{0}

extras:
{{title,, a},, {comment,, z}}

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>


productions:
{{expr,, 0,, {{ident,, term,, 1},, {repeat,, {{ident,, plus,, 2},, {ident,, term,, 1}}}}},, {term,, 1,, {{ident,, factor,, 3},, {repeat,, {{ident,, times,, 4},, {ident,, factor,, 3}}}}},, {factor,, 3,, {or,, {ident,, number,, 5},, {{terminal,, (},, {ident,, expr,, 0},, {terminal,, }}}}},, {plus,, 2,, {or,, {terminal,, +},, {terminal,, -}}},, {times,, 4,, {or,, {terminal,, *},, {terminal,, /}}},, {number,, 5,, {{ident,, digit,, 6},, {repeat,, {ident,, digit,, 6}}}},, {digit,, 6,, {or,, {terminal,, 0},, {terminal,, 1},, {terminal,, 2},, {terminal,, 3},, {terminal,, 4},, {terminal,, 5},, {terminal,, 6},, {terminal,, 7},, {terminal,, 8},, {terminal,, 9}}}}

idents:
{expr,, term,, plus,, factor,, times,, number,, digit}

ididx:
{0,, 1,, 3,, 2,, 4,, 5,, 6}

extras:
{}

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>

invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>

invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>

invalid ebnf (= expected)

parse:
{ foo = bar . } ===>

invalid ebnf (undefined:bar)



Works with: Swift
Translation of: Kotlin
import Foundation

class EBNFParser {
    // Data structures for better organization
    private struct Token {
        let value: Any
        let isSequence: Bool
        
        init(_ value: Any, _ isSequence: Bool) {
            self.value = value
            self.isSequence = isSequence
        }
    }
    
    private class Sequence {
        private var items: [Any] = []
        
        init(_ items: Any...) {
            self.items = Array(items)
        }
        
        subscript(index: Int) -> Any {
            get { return items[index] }
            set { items[index] = newValue }
        }
        
        func add(_ item: Any) {
            items.append(item)
        }
        
        var count: Int {
            return items.count
        }
        
        var size: Int {
            return items.count
        }
        
        func contains(_ item: Any) -> Bool {
            for i in items {
                if let str1 = i as? String, let str2 = item as? String {
                    if str1 == str2 { return true }
                }
                if let int1 = i as? Int, let int2 = item as? Int {
                    if int1 == int2 { return true }
                }
                if let char1 = i as? Character, let char2 = item as? Character {
                    if char1 == char2 { return true }
                }
            }
            return false
        }
        
        var description: String {
            return items.map { "\($0)" }.joined(separator: ", ")
        }
    }
    
    private var src: String = ""
    private var ch: Character = " "
    private var sdx: Int = 0
    private var token: Token = Token(-1, false)
    private var err: Bool = false
    private var idents: [String] = []
    private var ididx: [Int] = []
    private var productions: [Sequence] = []
    private var extras = Sequence()
    private let results = ["pass", "fail"]
    
    private func boolToInt(_ value: Bool) -> Int {
        return value ? 1 : 0
    }
    
    private func invalid(_ msg: String) -> Int {
        err = true
        print(msg)
        sdx = src.count // set to eof
        return -1
    }
    
    private func skipSpaces() {
        while sdx < src.count {
            let index = src.index(src.startIndex, offsetBy: sdx)
            ch = src[index]
            if !(" \t\r\n".contains(ch)) {
                break
            }
            sdx += 1
        }
    }
    
    private func getToken() {
        // Yields a single character token, one of {}()[]|=.;
        // or {"terminal",string} or {"ident", string} or -1.
        skipSpaces()
        if sdx >= src.count {
            token = Token(-1, false)
            return
        }
        let tokstart = sdx
        
        if "{}()[]|=.;".contains(ch) {
            sdx += 1
            token = Token(ch, false)
        } else if ch == "\"" || ch == "'" {
            let closech = ch
            var tokend = tokstart + 1
            while tokend < src.count {
                let index = src.index(src.startIndex, offsetBy: tokend)
                if src[index] == closech {
                    break
                }
                tokend += 1
            }
            if tokend >= src.count {
                token = Token(invalid("no closing quote"), false)
            } else {
                sdx = tokend + 1
                let startIndex = src.index(src.startIndex, offsetBy: tokstart + 1)
                let endIndex = src.index(src.startIndex, offsetBy: tokend)
                let substring = String(src[startIndex..<endIndex])
                token = Token(Sequence("terminal", substring), true)
            }
        } else if ch.isLowercase && ch.isLetter {
            // To simplify things for the purposes of this task,
            // identifiers are strictly a-z only, not A-Z or 1-9.
            while sdx < src.count {
                let index = src.index(src.startIndex, offsetBy: sdx)
                ch = src[index]
                if !ch.isLowercase || !ch.isLetter {
                    break
                }
                sdx += 1
            }
            let startIndex = src.index(src.startIndex, offsetBy: tokstart)
            let endIndex = src.index(src.startIndex, offsetBy: sdx)
            let substring = String(src[startIndex..<endIndex])
            token = Token(Sequence("ident", substring), true)
        } else {
            token = Token(invalid("invalid ebnf"), false)
        }
    }
    
    private func matchToken(_ expectedCh: Character) {
        if let tokenChar = token.value as? Character, tokenChar != expectedCh {
            token = Token(invalid("invalid ebnf (\(expectedCh) expected)"), false)
        } else if !(token.value is Character) {
            token = Token(invalid("invalid ebnf (\(expectedCh) expected)"), false)
        } else {
            getToken()
        }
    }
    
    private func addIdent(_ ident: String) -> Int {
        if let k = idents.firstIndex(of: ident) {
            return k
        } else {
            idents.append(ident)
            let k = idents.count - 1
            ididx.append(-1)
            return k
        }
    }
    
    private func factor() -> Any {
        let res: Any
        
        if token.isSequence {
            let t = token.value as! Sequence
            if let firstItem = t[0] as? String, firstItem == "ident" {
                let identName = t[1] as! String
                let idx = addIdent(identName)
                t.add(idx)
                token = Token(t, true)
            }
            res = token.value
            getToken()
        } else if let tokenChar = token.value as? Character {
            switch tokenChar {
            case "[":
                getToken()
                let result = Sequence("optional", expression())
                matchToken("]")
                res = result
            case "(":
                getToken()
                let result = expression()
                matchToken(")")
                res = result
            case "{":
                getToken()
                let result = Sequence("repeat", expression())
                matchToken("}")
                res = result
            default:
                fatalError("invalid token in factor() function")
            }
        } else {
            fatalError("invalid token in factor() function")
        }
        
        if let sequence = res as? Sequence, sequence.count == 1 {
            return sequence[0]
        }
        return res
    }
    
    private func term() -> Any {
        let res = Sequence(factor())
        let tokens: Set<Character> = ["|", ".", ";", ")", "]", "}"]
        
        while true {
            if let tokenChar = token.value as? Character, tokens.contains(tokenChar) {
                break
            }
            if let tokenInt = token.value as? Int, tokenInt == -1 {
                break
            }
            res.add(factor())
        }
        
        return res.count == 1 ? res[0] : res
    }
    
    private func expression() -> Any {
        var res = Sequence(term())
        if let tokenChar = token.value as? Character, tokenChar == "|" {
            res = Sequence("or", res[0])
            while let tokenChar = token.value as? Character, tokenChar == "|" {
                getToken()
                res.add(term())
            }
        }
        return res.count == 1 ? res[0] : res
    }
    
    private func production() -> Any {
        // Returns a token or -1; the real result is left in 'productions' etc,
        getToken()
        if let tokenChar = token.value as? Character, tokenChar != "}" {
            if let tokenInt = token.value as? Int, tokenInt == -1 {
                return invalid("invalid ebnf (missing closing })")
            }
            if !token.isSequence {
                return -1
            }
            let t = token.value as! Sequence
            if let firstItem = t[0] as? String, firstItem != "ident" {
                return -1
            }
            let ident = t[1] as! String
            let idx = addIdent(ident)
            getToken()
            matchToken("=")
            if let tokenInt = token.value as? Int, tokenInt == -1 {
                return -1
            }
            productions.append(Sequence(ident, idx, expression()))
            ididx[idx] = productions.count - 1
        }
        return token.value
    }
    
    private func parse(_ ebnf: String) -> Int {
        // Returns +1 if ok, -1 if bad.
        print("parse:\n\(ebnf) ===>\n")
        err = false
        src = ebnf
        sdx = 0
        idents.removeAll()
        ididx.removeAll()
        productions.removeAll()
        extras = Sequence()
        
        getToken()
        if token.isSequence {
            let t = token.value as! Sequence
            t[0] = "title"
            extras.add(token.value)
            getToken()
        }
        if let tokenChar = token.value as? Character, tokenChar != "{" {
            return invalid("invalid ebnf (missing opening {)")
        }
        
        while true {
            let tokenResult = production()
            if let tokenChar = tokenResult as? Character, tokenChar == "}" {
                break
            }
            if let tokenInt = tokenResult as? Int, tokenInt == -1 {
                break
            }
        }
        
        getToken()
        if token.isSequence {
            let t = token.value as! Sequence
            t[0] = "comment"
            extras.add(token.value)
            getToken()
        }
        if let tokenInt = token.value as? Int, tokenInt != -1 {
            return invalid("invalid ebnf (missing eof?)")
        }
        if err {
            return -1
        }
        
        if let k = ididx.firstIndex(of: -1) {
            return invalid("invalid ebnf (undefined:\(idents[k]))")
        }
        
        pprint(productions, "productions")
        pprint(idents, "idents")
        pprint(ididx, "ididx")
        pprint(extras, "extras")
        return 1
    }
    
    // Adjusts Swift's normal printing to look more like Phix output.
    private func pprint(_ ob: Any, _ header: String) {
        print("\n\(header):")
        var pp = "\(ob)"
        pp = pp.replacingOccurrences(of: "[", with: "{")
        pp = pp.replacingOccurrences(of: "]", with: "}")
        pp = pp.replacingOccurrences(of: " ", with: ", ")
        for i in idents.indices {
            pp = pp.replacingOccurrences(of: "\(i)", with: "\(i)")
        }
        print(pp)
    }
    
    // The rules that applies() has to deal with are:
    // {factors} - if rule[0] is not string,
    // just apply one after the other recursively.
    // {"terminal", "a1"}       -- literal constants
    // {"or", <e1>, <e2>, ...}  -- (any) one of n
    // {"repeat", <e1>}         -- as per "{}" in ebnf
    // {"optional", <e1>}       -- as per "[]" in ebnf
    // {"ident", <name>, idx}   -- apply the sub-rule
    
    private func applies(_ rule: Sequence) -> Bool {
        let wasSdx = sdx // in case of failure
        let r1 = rule[0]
        
        if let r1String = r1 as? String {
            switch r1String {
            case "terminal":
                skipSpaces()
                let r2 = rule[1] as! String
                for char in r2 {
                    if sdx >= src.count {
                        sdx = wasSdx
                        return false
                    }
                    let index = src.index(src.startIndex, offsetBy: sdx)
                    if src[index] != char {
                        sdx = wasSdx
                        return false
                    }
                    sdx += 1
                }
            case "or":
                for i in 1..<rule.count {
                    if applies(rule[i] as! Sequence) {
                        return true
                    }
                }
                sdx = wasSdx
                return false
            case "repeat":
                while applies(rule[1] as! Sequence) {
                    // continue repeating
                }
            case "optional":
                _ = applies(rule[1] as! Sequence)
            case "ident":
                let i = rule[2] as! Int
                let ii = ididx[i]
                if !applies(productions[ii][2] as! Sequence) {
                    sdx = wasSdx
                    return false
                }
            default:
                fatalError("invalid rule in applies() function")
            }
        } else {
            // r1 is not a string, so apply each rule in sequence
            for i in 0..<rule.count {
                if !applies(rule[i] as! Sequence) {
                    sdx = wasSdx
                    return false
                }
            }
        }
        return true
    }
    
    private func checkValid(_ test: String) {
        src = test
        sdx = 0
        var res = applies(productions[0][2] as! Sequence)
        skipSpaces()
        if sdx < src.count {
            res = false
        }
        print("\"\(test)\", \(results[1 - boolToInt(res)])")
    }
    
    func run() {
        let ebnfs = [
            "\"a\" {\n" +
            "    a = \"a1\" ( \"a2\" | \"a3\" ) { \"a4\" } [ \"a5\" ] \"a6\" ;\n" +
            "} \"z\" ",
            
            "{\n" +
            "    expr = term { plus term } .\n" +
            "    term = factor { times factor } .\n" +
            "    factor = number | '(' expr ')' .\n" +
            " \n" +
            "    plus = \"+\" | \"-\" .\n" +
            "    times = \"*\" | \"/\" .\n" +
            " \n" +
            "    number = digit { digit } .\n" +
            "    digit = \"0\" | \"1\" | \"2\" | \"3\" | \"4\" | \"5\" | \"6\" | \"7\" | \"8\" | \"9\" .\n" +
            "}",
            
            "a = \"1\"",
            "{ a = \"1\" ;",
            "{ hello world = \"1\"; }",
            "{ foo = bar . }"
        ]
        
        let tests = [
            [
                "a1a3a4a4a5a6",
                "a1 a2a6",
                "a1 a3 a4 a6",
                "a1 a4 a5 a6",
                "a1 a2 a4 a5 a5 a6",
                "a1 a2 a4 a5 a6 a7",
                "your ad here"
            ],
            [
                "2",
                "2*3 + 4/23 - 7",
                "(3 + 4) * 6-2+(4*(4))",
                "-2",
                "3 +",
                "(4 + 3"
            ]
        ]
        
        for i in ebnfs.indices {
            if parse(ebnfs[i]) == 1 {
                print("\ntests:")
                if i < tests.count {
                    for test in tests[i] {
                        checkValid(test)
                    }
                }
            }
            print()
        }
    }
}

// Usage
let parser = EBNFParser()
parser.run()
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

invalid ebnf

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

invalid ebnf

parse:
a = "1" ===>

invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>


productions:
{}

idents:
{}

ididx:
{}

extras:
Main.EBNFParser.(unknown, context, at, $5564c3abf008).Sequence

tests:

parse:
{ hello world = "1"; } ===>


productions:
{}

idents:
{}

ididx:
{}

extras:
Main.EBNFParser.(unknown, context, at, $5564c3abf008).Sequence

tests:

parse:
{ foo = bar . } ===>


productions:
{}

idents:
{}

ididx:
{}

extras:
Main.EBNFParser.(unknown, context, at, $5564c3abf008).Sequence

...




This example is in need of improvement:

This is not an EBNF parser. It never uses EBNF. It is a calculator parser, but there is already a calculator parser at Arithmetic evaluation#Tcl. One should adjust this solution to parse the EBNF language, not the calculator language.

Demonstration lexer and parser. Note that this parser supports parenthesized expressions, making the grammar recursive.

package require Tcl 8.6

# Utilities to make the coroutine easier to use
proc provide args {while {![yield $args]} {yield}}
proc next lexer {$lexer 1}
proc pushback lexer {$lexer 0}

# Lexical analyzer coroutine core
proc lexer {str} {
    yield [info coroutine]
    set symbols {+ PLUS - MINUS * MULT / DIV ( LPAR ) RPAR}
    set idx 0
    while 1 {
	switch -regexp -matchvar m -- $str {
	    {^\s+} {
		# No special action for whitespace
	    }
	    {^([-+*/()])} {
		provide [dict get $symbols [lindex $m 1]] [lindex $m 1] $idx
	    }
	    {^(\d+)} {
		provide NUMBER [lindex $m 1] $idx
	    }
	    {^$} {
		provide EOT "EOT" $idx
		return
	    }
	    . {
		provide PARSE_ERROR [lindex $m 0] $idx
	    }
	}
	# Trim the matched string
	set str [string range $str [string length [lindex $m 0]] end]
	incr idx [string length [lindex $m 0]]
    }
}

# Utility functions to help with making an LL(1) parser; ParseLoop handles
# EBNF looping constructs, ParseSeq handles sequence constructs.
proc ParseLoop {lexer def} {
    upvar 1 token token payload payload index index
    foreach {a b} $def {
	if {$b ne "-"} {set b [list set c $b]}
	lappend m $a $b
    }
    lappend m default {pushback $lexer; break}
    while 1 {
	lassign [next $lexer] token payload index
	switch -- $token {*}$m
	if {[set c [catch {uplevel 1 $c} res opt]]} {
	    dict set opt -level [expr {[dict get $opt -level]+1}]
	    return -options $opt $res
	}
    }
}
proc ParseSeq {lexer def} {
    upvar 1 token token payload payload index index
    foreach {t s} $def {
	lassign [next $lexer] token payload index
	switch -- $token $t {
	    if {[set c [catch {uplevel 1 $s} res opt]]} {
		dict set opt -level [expr {[dict get $opt -level]+1}]
		return -options $opt $res
	    }
	} EOT {
	    throw SYNTAX "end of text at position $index"
	} default {
	    throw SYNTAX "\"$payload\" at position $index"
	}
    }
}

# Main parser driver; contains "master" grammar that ensures that the whole
# text is matched and not just a prefix substring. Note also that the parser
# runs the lexer as a coroutine (with a fixed name in this basic demonstration
# code).
proc parse {str} {
    set lexer [coroutine l lexer $str]
    try {
	set parsed [parse.expr $lexer]
	ParseLoop $lexer {
	    EOT {
		return $parsed
	    }
	}
	throw SYNTAX "\"$payload\" at position $index"
    } trap SYNTAX msg {
	return -code error "syntax error: $msg"
    } finally {
	catch {rename $lexer ""}
    }
}

# Now the descriptions of how to match each production in the grammar...
proc parse.expr {lexer} {
    set expr [parse.term $lexer]
    ParseLoop $lexer {
	PLUS - MINUS {
	    set expr [list $token $expr [parse.term $lexer]]
	}
    }
    return $expr
}
proc parse.term {lexer} {
    set term [parse.factor $lexer]
    ParseLoop $lexer {
	MULT - DIV {
	    set term [list $token $term [parse.factor $lexer]]
	}
    }
    return $term
}
proc parse.factor {lexer} {
    ParseLoop $lexer {
	NUMBER {
	    return $payload
	}
	MINUS {
	    ParseSeq $lexer {
		NUMBER {return -$payload}
	    }
	}
	LPAR {
	    set result [parse.expr $lexer]
	    ParseSeq $lexer {
		RPAR {return $result}
	    }
	    break
	}
	EOT {
	    throw SYNTAX "end of text at position $index"
	}
    }
    throw SYNTAX "\"$payload\" at position $index"
}
# Demonstration code
puts [parse "1 - 2 - -3 * 4 + 5"]
puts [parse "1 - 2 - -3 * (4 + 5)"]

Output:

PLUS {MINUS {MINUS 1 2} {MULT -3 4}} 5
MINUS {MINUS 1 2} {MULT -3 {PLUS 4 5}}
Translation of: Phix
Library: Wren-fmt
Library: Wren-iterate
Library: Wren-str
Library: Wren-seq

Translated via the Go entry.

import "./fmt" for Conv, Fmt
import "./iterate" for Stepped
import "./str" for Char
import "./seq" for Lst

var src         = ""
var ch          = ""
var sdx         = 0
var token       = null
var isSeq       = false
var err         = false
var idents      = []
var ididx       = []
var productions = []
var extras      = []
var results     = ["pass", "fail"]

var invalid = Fn.new { |msg|
    err = true
    System.print(msg)
    sdx = src.count // set to EOF
    return -1
}

var skipSpaces = Fn.new {
    while (true) {
        if (sdx >= src.count) return
        ch = src[sdx]
        if (!" \t\r\n".contains(ch)) return
        sdx = sdx + 1
    }
}

var getToken = Fn.new {
    // Yields a single character token, one of {}()[]|=.;
    // or {"terminal",string} or {"ident", string} or -1.
    skipSpaces.call()
    if (sdx >= src.count) {
        token = -1
        isSeq = false
        return
    }
    var tokstart = sdx
    if ("{}()[]|=.;".contains(ch)) {
        sdx = sdx + 1
        token = ch
        isSeq = false
    } else if (ch == "\"" || ch == "'") {
        var closech = ch
        for (tokend in Stepped.ascend(sdx + 1...src.count)) {
            if (src[tokend] == closech) {
                sdx = tokend + 1
                token = ["terminal", src[tokstart+1...tokend]]
                isSeq = true
                return
            }
        }
        token = invalid.call("no closing quote")
        isSeq = false
    } else if (Char.isAsciiLower(ch)) {
        // To simplify things for the purposes of this task,
        // identifiers are strictly a-z only, not A-Z or 1-9.
        while (true) {
            sdx = sdx + 1
            if (sdx >= src.count) break
            ch = src[sdx]
            if (!Char.isAsciiLower(ch)) break
        }
        token = ["ident", src[tokstart...sdx]]
        isSeq = true
    } else {
        token = invalid.call("invalid ebnf")
        isSeq = false
    }
}

var matchToken = Fn.new { |ch|
    if (token != ch) {
        token = invalid.call("invalid ebnf (%(ch) expected)")
        isSeq = false
    } else {
        getToken.call()
    }
}

var addIdent = Fn.new { |ident|
    var k = -1
    var i = 0
    for (id in idents) {
        if (id == ident) {
            k = i
            break
        }
        i =  i + 1
    }
    if (k == -1) {
        idents.add(ident)
        k = idents.count - 1
        ididx.add(-1)
    }
    return k
}

var expression // forward declaration

var factor = Fn.new {
    var res
    if (isSeq) {
        if (token[0] == "ident") {
            var idx = addIdent.call(token[1])
            token.add(idx)
        }
        res = token
        getToken.call()
    } else if (token == "[") {
        getToken.call()
        res = ["optional", expression.call()]
        matchToken.call("]")
    } else if (token == "(") {
        getToken.call()
        res = expression.call()
        matchToken.call(")")
    } else if (token == "{") {
        getToken.call()
        res = ["repeat", expression.call()]
        matchToken.call("}")
    } else {
        Fiber.abort("invalid token in factor() function")
    }
    if (res is Sequence && res.count == 1) return res[0]
    return res
}

var term = Fn.new {
    var res = [factor.call()]
    var tokens = [-1, "|", ".", ";", ")", "]", "}"]
    while (true) {
        var outer = false
        for (t in tokens) {
            if (t == token) {
                outer = true
                break
            }
        }
        if (outer) break
        res.add(factor.call())
    }
    if (res.count == 1) return res[0]
    return res
}

// declared earlier
expression = Fn.new {
    var res = [term.call()]
    if (token == "|") {
        res = ["or", res[0]]
        while (token == "|") {
            getToken.call()
            res.add(term.call())
        }
    }
    if (res.count == 1) return res[0]
    return res
}

var production = Fn.new {
    // Returns a token or -1; the real result is left in 'productions' etc,
    getToken.call()
    if (token != "}") {
        if (token == -1) return invalid.call("invalid ebnf (missing closing })")
        if (!isSeq) return -1
        if (token[0] != "ident") return -1
        var ident = token[1]
        var idx = addIdent.call(ident)
        getToken.call()
        matchToken.call("=")
        if (token == -1) return -1
        productions.add([ident, idx, expression.call()])
        ididx[idx] = productions.count - 1
    }
    return token
}

// Adjusts Wren's normal printing of lists to look more like Phix output.
var pprint = Fn.new { |ob, header|
    Fmt.print("\n$s:", header)
    var quote // recursive closure
    quote = Fn.new { |list|
        for (i in 0...list.count) {
            if (list[i] is String) {
                list[i] = Fmt.swrite("$q", list[i])
            } else if (list[i] is List) quote.call(list[i])
         }
    }
    var obs
    if (ob is String) {
        obs = Fmt.swrite("$q", ob)
    } else if (ob is List) {
        obs = Lst.clone(ob)
        quote.call(obs)
    }
    var pp = obs.toString
    pp = pp.replace("[", "{")
    pp = pp.replace("]", "}")
    for (i in 0...idents.count) {
        var xs = Fmt.swrite("'\\x$02d'", i)
        pp = pp.replace(xs, i.toString)
    }
    System.print(pp)
}

var parse = Fn.new { |ebnf|
    // Returns +1 if ok, -1 if bad.
    System.print("parse:\n%(ebnf) ===>")
    err = false
    src = ebnf
    sdx = 0
    idents.clear()
    ididx.clear()
    productions.clear()
    extras.clear()
    getToken.call()
    if (isSeq) {
        token[0] = "title"
        extras.add(token)
        getToken.call()
    }
    if (token != "{") return invalid.call("invalid ebnf (missing opening {)")
    while (true) {
        token = production.call()
        if (token == "}" || token == -1) break
    }
    getToken.call()
    if (isSeq) {
        token[0] = "comment"
        extras.add(token)
        getToken.call()
    }
    if (token != -1) return invalid.call("invalid ebnf (missing eof?)")
    if (err) return -1
    var k = -1
    var i = 0
    for (idx in ididx) {
        if (idx == -1) {
            k = i
            break
        }
        i = i + 1
    }
    if (k != -1) return invalid.call("invalid ebnf (undefined:%(idents[k]))")
    pprint.call(productions, "productions")
    pprint.call(idents, "idents")
    pprint.call(ididx, "ididx")
    pprint.call(extras, "extras")
    return 1
}

// The rules that applies() has to deal with are:
// {factors} - if rule[0] is not string,
// just apply one after the other recursively.
// {"terminal", "a1"}       -- literal constants
// {"or", <e1>, <e2>, ...}  -- (any) one of n
// {"repeat", <e1>}         -- as per "{}" in ebnf
// {"optional", <e1>}       -- as per "[]" in ebnf
// {"ident", <name>, idx}   -- apply the sub-rule
var applies // recursive function
applies = Fn.new { |rule|
    var wasSdx = sdx // in case of failure
    var r1 = rule[0]
    if (!(r1 is String)) {
        for (i in 0...rule.count) {
            if (!applies.call(rule[i])) {
                sdx = wasSdx
                return false
            }
        }
    } else if (r1 == "terminal") {
        skipSpaces.call()
        var r2 = rule[1]
        for (i in 0...r2.count) {
            if (sdx >= src.count || src[sdx] != r2[i]) {
                sdx = wasSdx
                return false
            }
            sdx = sdx + 1
        }
    } else if (r1 == "or") {
        for (i in Stepped.ascend(1...rule.count)) {
            if (applies.call(rule[i])) return true
        }
        sdx = wasSdx
        return false
    } else if (r1 == "repeat") {
        while (applies.call(rule[1])) {}
    } else if (r1 == "optional") {
        applies.call(rule[1])
    } else if (r1 == "ident") {
        var i = rule[2]
        var ii = ididx[i]
        if (!(applies.call(productions[ii][2]))) {
            sdx = wasSdx
            return false
        }
    } else {
        Fiber.abort("invalid rule in applies() function")
    }
    return true
}

var checkValid = Fn.new { |test|
    src = test
    sdx = 0
    var res = applies.call(productions[0][2])
    skipSpaces.call()
    if (sdx < src.count) res = false
    Fmt.print("$q, $s", test, results[1-Conv.btoi(res)])
}

var ebnfs = [
    """"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z" """,
    """{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .

    plus = "+" | "-" .
    times = "*" | "/" .

    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}""",
    "a = \"1\"",
    """{ a = "1" ;""",
    """{ hello world = "1"; }""",
    """{ foo = bar . }"""
]

var tests = [
    [
        "a1a3a4a4a5a6",
        "a1 a2a6",
        "a1 a3 a4 a6",
        "a1 a4 a5 a6",
        "a1 a2 a4 a5 a5 a6",
        "a1 a2 a4 a5 a6 a7",
        "your ad here"
    ],
    [
        "2",
        "2*3 + 4/23 - 7",
        "(3 + 4) * 6-2+(4*(4))",
        "-2",
        "3 +",
        "(4 + 3"
    ]
]

var i = 0
for (ebnf in ebnfs) {
    if (parse.call(ebnf) == 1) {
        System.print("\ntests:")
        for (test in tests[i]) checkValid.call(test)
    }
    System.print()
    i = i + 1
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>

productions:
{{"a", 0, {{"terminal", "a1"}, {"or", {"terminal", "a2"}, {"terminal", "a3"}}, {"repeat", {"terminal", "a4"}}, {"optional", {"terminal", "a5"}}, {"terminal", "a6"}}}}

idents:
{"a"}

ididx:
{0}

extras:
{{"title", "a"}, {"comment", "z"}}

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .

    plus = "+" | "-" .
    times = "*" | "/" .

    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>

productions:
{{"expr", 0, {{"ident", "term", 1}, {"repeat", {{"ident", "plus", 2}, {"ident", "term", 1}}}}}, {"term", 1, {{"ident", "factor", 3}, {"repeat", {{"ident", "times", 4}, {"ident", "factor", 3}}}}}, {"factor", 3, {"or", {"ident", "number", 5}, {{"terminal", "("}, {"ident", "expr", 0}, {"terminal", ")"}}}}, {"plus", 2, {"or", {"terminal", "+"}, {"terminal", "-"}}}, {"times", 4, {"or", {"terminal", "*"}, {"terminal", "/"}}}, {"number", 5, {{"ident", "digit", 6}, {"repeat", {"ident", "digit", 6}}}}, {"digit", 6, {"or", {"terminal", "0"}, {"terminal", "1"}, {"terminal", "2"}, {"terminal", "3"}, {"terminal", "4"}, {"terminal", "5"}, {"terminal", "6"}, {"terminal", "7"}, {"terminal", "8"}, {"terminal", "9"}}}}

idents:
{"expr", "term", "plus", "factor", "times", "number", "digit"}

ididx:
{0, 1, 3, 2, 4, 5, 6}

extras:
{}

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>
invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>
invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>
invalid ebnf (= expected)

parse:
{ foo = bar . } ===>
invalid ebnf (undefined:bar)


Works with: Zig version 0.14.1
Translation of: Rust
const std = @import("std");
const print = std.debug.print;
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;

const ParseError = error{
    OutOfMemory,
    InvalidTokenSequence,
    InvalidTokenInFactor,
};

const Token = union(enum) {
    char: u8,
    sequence: []const []const u8,
    eof,

    pub fn deinit(self: Token, allocator: Allocator) void {
        switch (self) {
            .sequence => |seq| {
                for (seq) |str| {
                    allocator.free(str);
                }
                allocator.free(seq);
            },
            else => {},
        }
    }
};

const Rule = union(enum) {
    terminal: []const u8,
    ident: struct { name: []const u8, idx: usize },
    my_or: []const *Rule,
    repeat: *Rule,
    optional: *Rule,
    sequence: []const *Rule,

    pub fn deinit(self: *Rule, allocator: Allocator) void {
        switch (self.*) {
            .terminal => |term| allocator.free(term),
            .ident => |ident| allocator.free(ident.name),
            .my_or => |rules| {
                for (rules) |rule| {
                    rule.deinit(allocator);
                    allocator.destroy(rule);
                }
                allocator.free(rules);
            },
            .repeat => |rule| {
                rule.deinit(allocator);
                allocator.destroy(rule);
            },
            .optional => |rule| {
                rule.deinit(allocator);
                allocator.destroy(rule);
            },
            .sequence => |rules| {
                for (rules) |rule| {
                    rule.deinit(allocator);
                    allocator.destroy(rule);
                }
                allocator.free(rules);
            },
        }
    }
};

const Production = struct {
    name: []const u8,
    idx: usize,
    rule: *Rule,

    pub fn deinit(self: Production, allocator: Allocator) void {
        allocator.free(self.name);
        self.rule.deinit(allocator);
        allocator.destroy(self.rule);
    }
};

pub const EBNFParser = struct {
    allocator: Allocator,
    src: []const u8,
    ch: u8,
    sdx: usize,
    token: Token,
    err: bool,
    idents: ArrayList([]const u8),
    ididx: ArrayList(?usize),
    productions: ArrayList(Production),
    extras: ArrayList([]const []const u8),

    pub fn init(allocator: Allocator) EBNFParser {
        return EBNFParser{
            .allocator = allocator,
            .src = "",
            .ch = 0,
            .sdx = 0,
            .token = .eof,
            .err = false,
            .idents = ArrayList([]const u8).init(allocator),
            .ididx = ArrayList(?usize).init(allocator),
            .productions = ArrayList(Production).init(allocator),
            .extras = ArrayList([]const []const u8).init(allocator),
        };
    }

    pub fn deinit(self: *EBNFParser) void {
        for (self.idents.items) |ident| {
            self.allocator.free(ident);
        }
        self.idents.deinit();
        self.ididx.deinit();
        
        for (self.productions.items) |prod| {
            prod.deinit(self.allocator);
        }
        self.productions.deinit();
        
        for (self.extras.items) |extra| {
            for (extra) |str| {
                self.allocator.free(str);
            }
            self.allocator.free(extra);
        }
        self.extras.deinit();
        
        self.token.deinit(self.allocator);
    }

    fn btoi(b: bool) usize {
        return if (b) 1 else 0;
    }

    fn invalid(self: *EBNFParser, msg: []const u8) i32 {
        self.err = true;
        print("{s}\n", .{msg});
        self.sdx = self.src.len; // set to eof
        return -1;
    }

    fn skipSpaces(self: *EBNFParser) void {
        while (self.sdx < self.src.len) {
            self.ch = self.src[self.sdx];
            if (self.ch != ' ' and self.ch != '\t' and self.ch != '\r' and self.ch != '\n') {
                break;
            }
            self.sdx += 1;
        }
    }

    fn getToken(self: *EBNFParser) ParseError!void {
        // Clean up previous token
        self.token.deinit(self.allocator);
        
        self.skipSpaces();
        if (self.sdx >= self.src.len) {
            self.token = .eof;
            return;
        }
        
        const tokstart = self.sdx;
        
        if (std.mem.indexOfScalar(u8, "{}()[]|=.;", self.ch) != null) {
            self.sdx += 1;
            self.token = Token{ .char = self.ch };
        } else if (self.ch == '"' or self.ch == '\'') {
            const closech = self.ch;
            var tokend = tokstart + 1;
            while (tokend < self.src.len and self.src[tokend] != closech) {
                tokend += 1;
            }
            if (tokend >= self.src.len) {
                _ = self.invalid("no closing quote");
                self.token = .eof;
            } else {
                self.sdx = tokend + 1;
                const content = try self.allocator.dupe(u8, self.src[tokstart + 1..tokend]);
                const terminal_str = try self.allocator.dupe(u8, "terminal");
                const seq = try self.allocator.alloc([]const u8, 2);
                seq[0] = terminal_str;
                seq[1] = content;
                self.token = Token{ .sequence = seq };
            }
        } else if (std.ascii.isLower(self.ch)) {
            // To simplify things for the purposes of this task,
            // identifiers are strictly a-z only, not A-Z or 1-9.
            while (self.sdx < self.src.len) {
                self.ch = self.src[self.sdx];
                if (!std.ascii.isLower(self.ch)) {
                    break;
                }
                self.sdx += 1;
            }
            const ident = try self.allocator.dupe(u8, self.src[tokstart..self.sdx]);
            const ident_str = try self.allocator.dupe(u8, "ident");
            const seq = try self.allocator.alloc([]const u8, 2);
            seq[0] = ident_str;
            seq[1] = ident;
            self.token = Token{ .sequence = seq };
        } else {
            _ = self.invalid("invalid ebnf");
            self.token = .eof;
        }
    }

    fn matchToken(self: *EBNFParser, expected_ch: u8) ParseError!void {
        switch (self.token) {
            .char => |ch| {
                if (ch != expected_ch) {
                    _ = self.invalid("invalid ebnf (token mismatch)");
                } else {
                    try self.getToken();
                }
            },
            else => {
                _ = self.invalid("invalid ebnf (expected char token)");
            },
        }
    }

    fn addIdent(self: *EBNFParser, ident: []const u8) ParseError!usize {
        for (self.idents.items, 0..) |existing_ident, k| {
            if (std.mem.eql(u8, existing_ident, ident)) {
                return k;
            }
        }
        
        const ident_copy = try self.allocator.dupe(u8, ident);
        try self.idents.append(ident_copy);
        const k = self.idents.items.len - 1;
        try self.ididx.append(null);
        return k;
    }

    fn factor(self: *EBNFParser) ParseError!*Rule {
        const rule = try self.allocator.create(Rule);
        
        switch (self.token) {
            .sequence => |seq| {
                if (std.mem.eql(u8, seq[0], "ident")) {
                    const idx = try self.addIdent(seq[1]);
                    const name_copy = try self.allocator.dupe(u8, seq[1]);
                    rule.* = Rule{ .ident = .{ .name = name_copy, .idx = idx } };
                    try self.getToken();
                } else if (std.mem.eql(u8, seq[0], "terminal")) {
                    const term_copy = try self.allocator.dupe(u8, seq[1]);
                    rule.* = Rule{ .terminal = term_copy };
                    try self.getToken();
                } else {
                    self.allocator.destroy(rule);
                    return error.InvalidTokenSequence;
                }
            },
            .char => |ch| {
                switch (ch) {
                    '[' => {
                        try self.getToken();
                        const expr = try self.expression();
                        try self.matchToken(']');
                        rule.* = Rule{ .optional = expr };
                    },
                    '(' => {
                        try self.getToken();
                        const expr = try self.expression();
                        try self.matchToken(')');
                        self.allocator.destroy(rule);
                        return expr;
                    },
                    '{' => {
                        try self.getToken();
                        const expr = try self.expression();
                        try self.matchToken('}');
                        rule.* = Rule{ .repeat = expr };
                    },
                    else => {
                        self.allocator.destroy(rule);
                        return error.InvalidTokenInFactor;
                    },
                }
            },
            else => {
                self.allocator.destroy(rule);
                return error.InvalidTokenInFactor;
            },
        }
        
        return rule;
    }

    fn term(self: *EBNFParser) ParseError!*Rule {
        var factors = ArrayList(*Rule).init(self.allocator);
        defer factors.deinit();
        
        try factors.append(try self.factor());
        
        while (true) {
            switch (self.token) {
                .eof => break,
                .char => |ch| {
                    switch (ch) {
                        '|', '.', ';', ')', ']', '}' => break,
                        else => {},
                    }
                },
                else => {},
            }
            
            // Check if we should stop
            const should_stop = switch (self.token) {
                .eof => true,
                .char => |ch| switch (ch) {
                    '|', '.', ';', ')', ']', '}' => true,
                    else => false,
                },
                else => false,
            };
            
            if (should_stop) break;
            
            try factors.append(try self.factor());
        }
        
        if (factors.items.len == 1) {
            return factors.items[0];
        } else {
            const rule = try self.allocator.create(Rule);
            const rules_copy = try self.allocator.dupe(*Rule, factors.items);
            rule.* = Rule{ .sequence = rules_copy };
            return rule;
        }
    }

    fn expression(self: *EBNFParser) ParseError!*Rule {
        const first_term = try self.term();
        
        switch (self.token) {
            .char => |ch| {
                if (ch == '|') {
                    var terms = ArrayList(*Rule).init(self.allocator);
                    defer terms.deinit();
                    
                    try terms.append(first_term);
                    
                    while (true) {
                        switch (self.token) {
                            .char => |token_ch| {
                                if (token_ch == '|') {
                                    try self.getToken();
                                    try terms.append(try self.term());
                                } else {
                                    break;
                                }
                            },
                            else => break,
                        }
                    }
                    
                    const rule = try self.allocator.create(Rule);
                    const rules_copy = try self.allocator.dupe(*Rule, terms.items);
                    rule.* = Rule{ .my_or = rules_copy };
                    return rule;
                }
            },
            else => {},
        }
        
        return first_term;
    }

    fn production(self: *EBNFParser) ParseError!Token {
        try self.getToken();
        
        switch (self.token) {
            .char => |ch| {
                if (ch == '}') {
                    return self.token;
                }
            },
            .eof => {
                _ = self.invalid("invalid ebnf (missing closing })");
                return .eof;
            },
            else => {},
        }
        
        switch (self.token) {
            .sequence => |seq| {
                if (std.mem.eql(u8, seq[0], "ident")) {
                    const ident = seq[1];
                    const idx = try self.addIdent(ident);
                    try self.getToken();
                    try self.matchToken('=');
                    
                    switch (self.token) {
                        .eof => return .eof,
                        else => {},
                    }
                    
                    const expr = try self.expression();
                    const ident_copy = try self.allocator.dupe(u8, ident);
                    const prod = Production{
                        .name = ident_copy,
                        .idx = idx,
                        .rule = expr,
                    };
                    try self.productions.append(prod);
                    self.ididx.items[idx] = self.productions.items.len - 1;
                    return self.token;
                }
            },
            else => {},
        }
        
        return .eof;
    }

    pub fn parse(self: *EBNFParser, ebnf: []const u8) ParseError!i32 {
        print("parse:\n{s} ===>\n\n", .{ebnf});
        self.err = false;
        self.src = ebnf;
        self.sdx = 0;
        
        // Clear previous data
        for (self.idents.items) |ident| {
            self.allocator.free(ident);
        }
        self.idents.clearRetainingCapacity();
        self.ididx.clearRetainingCapacity();
        
        for (self.productions.items) |prod| {
            prod.deinit(self.allocator);
        }
        self.productions.clearRetainingCapacity();
        
        for (self.extras.items) |extra| {
            for (extra) |str| {
                self.allocator.free(str);
            }
            self.allocator.free(extra);
        }
        self.extras.clearRetainingCapacity();
        
        try self.getToken();
        
        switch (self.token) {
            .sequence => |seq| {
                const title_str = try self.allocator.dupe(u8, "title");
                const content = try self.allocator.dupe(u8, seq[1]);
                const extra = try self.allocator.alloc([]const u8, 2);
                extra[0] = title_str;
                extra[1] = content;
                try self.extras.append(extra);
                try self.getToken();
            },
            else => {},
        }
        
        switch (self.token) {
            .char => |ch| {
                if (ch != '{') {
                    return self.invalid("invalid ebnf (missing opening {)");
                }
            },
            else => {
                return self.invalid("invalid ebnf (missing opening {)");
            },
        }
        
        while (true) {
            const prod_result = self.production() catch return -1;
            switch (prod_result) {
                .char => |ch| {
                    if (ch == '}') break;
                },
                .eof => break,
                else => continue,
            }
        }
        
        try self.getToken();
        switch (self.token) {
            .sequence => |seq| {
                const comment_str = try self.allocator.dupe(u8, "comment");
                const content = try self.allocator.dupe(u8, seq[1]);
                const extra = try self.allocator.alloc([]const u8, 2);
                extra[0] = comment_str;
                extra[1] = content;
                try self.extras.append(extra);
                try self.getToken();
            },
            else => {},
        }
        
        switch (self.token) {
            .eof => {},
            else => {
                return self.invalid("invalid ebnf (missing eof?)");
            },
        }
        
        if (self.err) {
            return -1;
        }
        
        // Check for undefined identifiers
        for (self.ididx.items, 0..) |ididx_val, i| {
            if (ididx_val == null) {
                print("invalid ebnf (undefined:{s})\n", .{self.idents.items[i]});
                return -1;
            }
        }
        
        self.pprintProductions();
        self.pprintIdents();
        self.pprintIdidx();
        self.pprintExtras();
        return 1;
    }

    fn pprintProductions(self: *EBNFParser) void {
        print("\nproductions:\n", .{});
        for (self.productions.items) |prod| {
            print("({s}, {d}, rule)\n", .{ prod.name, prod.idx });
        }
    }

    fn pprintIdents(self: *EBNFParser) void {
        print("\nidents:\n", .{});
        for (self.idents.items) |ident| {
            print("{s}, ", .{ident});
        }
        print("\n", .{});
    }

    fn pprintIdidx(self: *EBNFParser) void {
        print("\nididx:\n", .{});
        for (self.ididx.items) |idx| {
            if (idx) |i| {
                print("{d}, ", .{i});
            } else {
                print("null, ", .{});
            }
        }
        print("\n", .{});
    }

    fn pprintExtras(self: *EBNFParser) void {
        print("\nextras:\n", .{});
        for (self.extras.items) |extra| {
            for (extra) |str| {
                print("{s} ", .{str});
            }
            print("\n", .{});
        }
    }

    fn applies(self: *EBNFParser, rule: *Rule, src: []const u8, sdx: *usize) bool {
        const was_sdx = sdx.*;
        
        switch (rule.*) {
            .sequence => |rules| {
                for (rules) |subrule| {
                    if (!self.applies(subrule, src, sdx)) {
                        sdx.* = was_sdx;
                        return false;
                    }
                }
                return true;
            },
            .terminal => |terminal| {
                self.skipSpacesAt(src, sdx);
                for (terminal) |ch| {
                    if (sdx.* >= src.len or src[sdx.*] != ch) {
                        sdx.* = was_sdx;
                        return false;
                    }
                    sdx.* += 1;
                }
                return true;
            },
            .my_or => |rules| {
                for (rules) |subrule| {
                    if (self.applies(subrule, src, sdx)) {
                        return true;
                    }
                }
                sdx.* = was_sdx;
                return false;
            },
            .repeat => |subrule| {
                while (self.applies(subrule, src, sdx)) {
                    // continue repeating
                }
                return true;
            },
            .optional => |subrule| {
                _ = self.applies(subrule, src, sdx);
                return true;
            },
            .ident => |ident_info| {
                if (self.ididx.items[ident_info.idx]) |prod_idx| {
                    if (!self.applies(self.productions.items[prod_idx].rule, src, sdx)) {
                        sdx.* = was_sdx;
                        return false;
                    }
                    return true;
                } else {
                    return false;
                }
            },
        }
    }

    fn skipSpacesAt(self: *EBNFParser, src: []const u8, sdx: *usize) void {
        _ = self;
        while (sdx.* < src.len) {
            const ch = src[sdx.*];
            if (ch != ' ' and ch != '\t' and ch != '\r' and ch != '\n') {
                break;
            }
            sdx.* += 1;
        }
    }

    fn checkValid(self: *EBNFParser, my_test: []const u8) void {
        var sdx: usize = 0;
        const res = self.applies(self.productions.items[0].rule, my_test, &sdx);
        self.skipSpacesAt(my_test, &sdx);
        const final_res = res and sdx >= my_test.len;
        const result = if (final_res) "pass" else "fail";
        print("\"{s}\", {s}\n", .{ my_test, result });
    }
};

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();
    
    var parser = EBNFParser.init(allocator);
    defer parser.deinit();
    
    const ebnfs = [_][]const u8{
        "\"a\" {\n    a = \"a1\" ( \"a2\" | \"a3\" ) { \"a4\" } [ \"a5\" ] \"a6\" ;\n} \"z\" ",
        "{\n    expr = term { plus term } .\n    term = factor { times factor } .\n    factor = number | '(' expr ')' .\n \n    plus = \"+\" | \"-\" .\n    times = \"*\" | \"/\" .\n \n    number = digit { digit } .\n    digit = \"0\" | \"1\" | \"2\" | \"3\" | \"4\" | \"5\" | \"6\" | \"7\" | \"8\" | \"9\" .\n}",
        "a = \"1\"",
        "{ a = \"1\" ;",
        "{ hello world = \"1\"; }",
        "{ foo = bar . }",
    };
    
    const tests = [_][]const []const u8{
        &[_][]const u8{
            "a1a3a4a4a5a6",
            "a1 a2a6",
            "a1 a3 a4 a6",
            "a1 a4 a5 a6",
            "a1 a2 a4 a5 a5 a6",
            "a1 a2 a4 a5 a6 a7",
            "your ad here",
        },
        &[_][]const u8{
            "2",
            "2*3 + 4/23 - 7",
            "(3 + 4) * 6-2+(4*(4))",
            "-2",
            "3 +",
            "(4 + 3",
        },
    };
    
    for (ebnfs, 0..) |ebnf, i| {
        const result = parser.parse(ebnf) catch -1;
        if (result == 1) {
            print("\ntests:\n", .{});
            if (i < tests.len) {
                for (tests[i]) |my_test| {
                    parser.checkValid(my_test);
                }
            }
        }
        print("\n", .{});
    }
}
Output:
parse:
"a" {
    a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"  ===>


productions:
(�, 0, rule)

idents:
a, 

ididx:
0, 

extras:
title a 
comment z 

tests:
"a1a3a4a4a5a6", pass
"a1 a2a6", pass
"a1 a3 a4 a6", pass
"a1 a4 a5 a6", fail
"a1 a2 a4 a5 a5 a6", fail
"a1 a2 a4 a5 a6 a7", fail
"your ad here", fail

parse:
{
    expr = term { plus term } .
    term = factor { times factor } .
    factor = number | '(' expr ')' .
 
    plus = "+" | "-" .
    times = "*" | "/" .
 
    number = digit { digit } .
    digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
} ===>


productions:
(����, 0, rule)
(����, 1, rule)
(������, 3, rule)
(����, 2, rule)
(�����, 4, rule)
(������, 5, rule)
(�����, 6, rule)

idents:
expr, term, plus, factor, times, number, digit, 

ididx:
0, 1, 3, 2, 4, 5, 6, 

extras:

tests:
"2", pass
"2*3 + 4/23 - 7", pass
"(3 + 4) * 6-2+(4*(4))", pass
"-2", fail
"3 +", fail
"(4 + 3", fail

parse:
a = "1" ===>

invalid ebnf (missing opening {)

parse:
{ a = "1" ; ===>

invalid ebnf (missing closing })

parse:
{ hello world = "1"; } ===>

invalid ebnf (expected char token)

parse:
{ foo = bar . } ===>

invalid ebnf (undefined:bar)