Skip to main content
Tweeted twitter.com/StackCodeReview/status/724766055822970881
misspelled given
Source Link
// an enumeration of named token types. for
// this language, there is only one named
// token type (the number). all other token
// types are referred to by their string value
const (
    _ = iota
    T_NUM
)

// a token type refers to the terminal class of the
// token (eg T_NUM), whereas the value refers to
// its textual representation (eg "23")
type Token struct {
    Type  int
    Value string
}

// a slice of tokens that represents the input stack.
// the base pointer is passed to the production func
// that is matching against the stack.
type TokenStack []Token

// type definition for a production. given a stack and
// a base pointer, return the position at which the
// production ends. if no production is
// matched return NoMatch, or -1.
type Production func(TokenStack, int) int

// returned by a production func if no match  is found
// on the token stack at a given base pointer
const NoMatch = -1

// will return true if a given stack is valid for the
// language. a parse is valid when the return value from
// a production equals the length of the input stack
// (meaning the entire stack matches the production)
func Parse(s TokenStack) bool {
    p := E(s, 0)
    return M(p) && p == len(s)
}

// a production func for the rule:
//
//  E : E1 '*' E
//    | E1 '/' E
//    | E1 '+' E
//    | E1 '-' E
//    | E1
//    ;
func E(s TokenStack, p int) int {
    if p1 := Consecutive(s, p, E1, V("*"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("/"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("+"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("-"), E); M(p1) {
        return p1
    } else if p1 = E1(s, p); M(p1) {
        return p1
    }
    return NoMatch
}

// a production func for the rule:
//
//  E1 : T_NUM
//     | '(' E ')'
//     ;
func E1(s TokenStack, p int) int {
    if p1 := T(T_NUM)(s, p); M(p1) {
        return p1
    } else if p1 = Consecutive(s, p, V("("), E, V(")")); M(p1) {
        return p1
    }
    return NoMatch
}

// will try to match consecutive productions against
// a token stack, keeping track of the length of each
// match. if every consecutive rule matches, return
// the length of every match added to the base pointer
// originally passed to this function
func Consecutive(s TokenStack, p int, rules ...Production) int {
    var match_len = 0
    for _, rulei := range rules {
        if p1 := rulei(s, p+match_len); M(p1) {
            match_len += p1 - (p + match_len)
        } else {
            return NoMatch
        }
    }
    return p + match_len
}

// return a production func that will
// match a givingiven token value
func V(v string) Production {
    return func(s TokenStack, p int) int {
        if len(s) > p && string(s[p].Value) == v {
            return p + 1
        }
        return NoMatch
    }
}

// return a production func that will
// match a givingiven token type
func T(t int) Production {
    return func(s TokenStack, p int) int {
        if len(s) > p && s[p].Type == t {
            return p + 1
        }
        return NoMatch
    }
}

// a function to conveniently check the
// return value of a production for a match
func M(p int) bool {
    return p > NoMatch
}
// an enumeration of named token types. for
// this language, there is only one named
// token type (the number). all other token
// types are referred to by their string value
const (
    _ = iota
    T_NUM
)

// a token type refers to the terminal class of the
// token (eg T_NUM), whereas the value refers to
// its textual representation (eg "23")
type Token struct {
    Type  int
    Value string
}

// a slice of tokens that represents the input stack.
// the base pointer is passed to the production func
// that is matching against the stack.
type TokenStack []Token

// type definition for a production. given a stack and
// a base pointer, return the position at which the
// production ends. if no production is
// matched return NoMatch, or -1.
type Production func(TokenStack, int) int

// returned by a production func if no match  is found
// on the token stack at a given base pointer
const NoMatch = -1

// will return true if a given stack is valid for the
// language. a parse is valid when the return value from
// a production equals the length of the input stack
// (meaning the entire stack matches the production)
func Parse(s TokenStack) bool {
    p := E(s, 0)
    return M(p) && p == len(s)
}

// a production func for the rule:
//
//  E : E1 '*' E
//    | E1 '/' E
//    | E1 '+' E
//    | E1 '-' E
//    | E1
//    ;
func E(s TokenStack, p int) int {
    if p1 := Consecutive(s, p, E1, V("*"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("/"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("+"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("-"), E); M(p1) {
        return p1
    } else if p1 = E1(s, p); M(p1) {
        return p1
    }
    return NoMatch
}

// a production func for the rule:
//
//  E1 : T_NUM
//     | '(' E ')'
//     ;
func E1(s TokenStack, p int) int {
    if p1 := T(T_NUM)(s, p); M(p1) {
        return p1
    } else if p1 = Consecutive(s, p, V("("), E, V(")")); M(p1) {
        return p1
    }
    return NoMatch
}

// will try to match consecutive productions against
// a token stack, keeping track of the length of each
// match. if every consecutive rule matches, return
// the length of every match added to the base pointer
// originally passed to this function
func Consecutive(s TokenStack, p int, rules ...Production) int {
    var match_len = 0
    for _, rulei := range rules {
        if p1 := rulei(s, p+match_len); M(p1) {
            match_len += p1 - (p + match_len)
        } else {
            return NoMatch
        }
    }
    return p + match_len
}

// return a production func that will
// match a givin token value
func V(v string) Production {
    return func(s TokenStack, p int) int {
        if len(s) > p && string(s[p].Value) == v {
            return p + 1
        }
        return NoMatch
    }
}

// return a production func that will
// match a givin token type
func T(t int) Production {
    return func(s TokenStack, p int) int {
        if len(s) > p && s[p].Type == t {
            return p + 1
        }
        return NoMatch
    }
}

// a function to conveniently check the
// return value of a production for a match
func M(p int) bool {
    return p > NoMatch
}
// an enumeration of named token types. for
// this language, there is only one named
// token type (the number). all other token
// types are referred to by their string value
const (
    _ = iota
    T_NUM
)

// a token type refers to the terminal class of the
// token (eg T_NUM), whereas the value refers to
// its textual representation (eg "23")
type Token struct {
    Type  int
    Value string
}

// a slice of tokens that represents the input stack.
// the base pointer is passed to the production func
// that is matching against the stack.
type TokenStack []Token

// type definition for a production. given a stack and
// a base pointer, return the position at which the
// production ends. if no production is
// matched return NoMatch, or -1.
type Production func(TokenStack, int) int

// returned by a production func if no match  is found
// on the token stack at a given base pointer
const NoMatch = -1

// will return true if a given stack is valid for the
// language. a parse is valid when the return value from
// a production equals the length of the input stack
// (meaning the entire stack matches the production)
func Parse(s TokenStack) bool {
    p := E(s, 0)
    return M(p) && p == len(s)
}

// a production func for the rule:
//
//  E : E1 '*' E
//    | E1 '/' E
//    | E1 '+' E
//    | E1 '-' E
//    | E1
//    ;
func E(s TokenStack, p int) int {
    if p1 := Consecutive(s, p, E1, V("*"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("/"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("+"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("-"), E); M(p1) {
        return p1
    } else if p1 = E1(s, p); M(p1) {
        return p1
    }
    return NoMatch
}

// a production func for the rule:
//
//  E1 : T_NUM
//     | '(' E ')'
//     ;
func E1(s TokenStack, p int) int {
    if p1 := T(T_NUM)(s, p); M(p1) {
        return p1
    } else if p1 = Consecutive(s, p, V("("), E, V(")")); M(p1) {
        return p1
    }
    return NoMatch
}

// will try to match consecutive productions against
// a token stack, keeping track of the length of each
// match. if every consecutive rule matches, return
// the length of every match added to the base pointer
// originally passed to this function
func Consecutive(s TokenStack, p int, rules ...Production) int {
    var match_len = 0
    for _, rulei := range rules {
        if p1 := rulei(s, p+match_len); M(p1) {
            match_len += p1 - (p + match_len)
        } else {
            return NoMatch
        }
    }
    return p + match_len
}

// return a production func that will
// match a given token value
func V(v string) Production {
    return func(s TokenStack, p int) int {
        if len(s) > p && string(s[p].Value) == v {
            return p + 1
        }
        return NoMatch
    }
}

// return a production func that will
// match a given token type
func T(t int) Production {
    return func(s TokenStack, p int) int {
        if len(s) > p && s[p].Type == t {
            return p + 1
        }
        return NoMatch
    }
}

// a function to conveniently check the
// return value of a production for a match
func M(p int) bool {
    return p > NoMatch
}
Source Link

recursive descent parser

Here is a non-predictive recursive descent parser I wrote for the following grammar:

E : E1 '*' E
  | E1 '/' E
  | E1 '+' E
  | E1 '-' E
  | E1
  ;
  
E1 : NUM
   | '(' E ')'
   ;

It can be run against the token stream

func main() {
    // (m) * (m / (m))
    valid := Parse([]Token{
        {Value: "("},
        {Type: T_NUM},
        {Value: ")"},
        {Value: "*"},
        {Value: "("},
        {Type: T_NUM},
        {Value: "/"},
        {Value: "("},
        {Type: T_NUM},
        {Value: ")"},
        {Value: ")"},
    })  

    println(valid)
}

It doesn't build an AST, it simply validates a token stream against the language.

// an enumeration of named token types. for
// this language, there is only one named
// token type (the number). all other token
// types are referred to by their string value
const (
    _ = iota
    T_NUM
)

// a token type refers to the terminal class of the
// token (eg T_NUM), whereas the value refers to
// its textual representation (eg "23")
type Token struct {
    Type  int
    Value string
}

// a slice of tokens that represents the input stack.
// the base pointer is passed to the production func
// that is matching against the stack.
type TokenStack []Token

// type definition for a production. given a stack and
// a base pointer, return the position at which the
// production ends. if no production is
// matched return NoMatch, or -1.
type Production func(TokenStack, int) int

// returned by a production func if no match  is found
// on the token stack at a given base pointer
const NoMatch = -1

// will return true if a given stack is valid for the
// language. a parse is valid when the return value from
// a production equals the length of the input stack
// (meaning the entire stack matches the production)
func Parse(s TokenStack) bool {
    p := E(s, 0)
    return M(p) && p == len(s)
}

// a production func for the rule:
//
//  E : E1 '*' E
//    | E1 '/' E
//    | E1 '+' E
//    | E1 '-' E
//    | E1
//    ;
func E(s TokenStack, p int) int {
    if p1 := Consecutive(s, p, E1, V("*"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("/"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("+"), E); M(p1) {
        return p1
    } else if p1 := Consecutive(s, p, E1, V("-"), E); M(p1) {
        return p1
    } else if p1 = E1(s, p); M(p1) {
        return p1
    }
    return NoMatch
}

// a production func for the rule:
//
//  E1 : T_NUM
//     | '(' E ')'
//     ;
func E1(s TokenStack, p int) int {
    if p1 := T(T_NUM)(s, p); M(p1) {
        return p1
    } else if p1 = Consecutive(s, p, V("("), E, V(")")); M(p1) {
        return p1
    }
    return NoMatch
}

// will try to match consecutive productions against
// a token stack, keeping track of the length of each
// match. if every consecutive rule matches, return
// the length of every match added to the base pointer
// originally passed to this function
func Consecutive(s TokenStack, p int, rules ...Production) int {
    var match_len = 0
    for _, rulei := range rules {
        if p1 := rulei(s, p+match_len); M(p1) {
            match_len += p1 - (p + match_len)
        } else {
            return NoMatch
        }
    }
    return p + match_len
}

// return a production func that will
// match a givin token value
func V(v string) Production {
    return func(s TokenStack, p int) int {
        if len(s) > p && string(s[p].Value) == v {
            return p + 1
        }
        return NoMatch
    }
}

// return a production func that will
// match a givin token type
func T(t int) Production {
    return func(s TokenStack, p int) int {
        if len(s) > p && s[p].Type == t {
            return p + 1
        }
        return NoMatch
    }
}

// a function to conveniently check the
// return value of a production for a match
func M(p int) bool {
    return p > NoMatch
}