I've written a simple recursive descent JSON parser in Rust. It just reads in a JSON file and prints the syntax tree to the terminal. I'm still learning Rust, and I'd appreciate any review/feedback. Particularly in regard to error handling and idiomatic language, but there could still be bugs lurking around. I've omitted tests for brevity.
lexer.rs
use core::fmt;
use anyhow::{bail, Result};
#[derive(Debug, PartialEq)]
pub enum Token<'a> {
LeftBrace,
RightBrace,
LeftBracket,
RightBracket,
Colon,
Comma,
Str(&'a str),
Number(Numeric),
True,
False,
Null,
}
impl fmt::Display for Token<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::LeftBrace => write!(f, "{{"),
Self::RightBrace => write!(f, "}}"),
Self::LeftBracket => write!(f, "["),
Self::RightBracket => write!(f, "]"),
Self::Colon => write!(f, ":"),
Self::Comma => write!(f, ","),
Self::Str(s) => write!(f, "\"{s}\""),
Self::Number(Numeric::Integer(n)) => write!(f, "{n}"),
Self::Number(Numeric::Float(n)) => write!(f, "{n}"),
Self::True => write!(f, "true"),
Self::False => write!(f, "false"),
Self::Null => write!(f, "null"),
}
}
}
#[derive(Debug, PartialEq, Copy, Clone)]
pub enum Numeric {
Float(f64),
Integer(i64),
}
pub struct Lexer<'a> {
json: &'a str,
}
impl<'a> Lexer<'a> {
pub fn new(json: &'a str) -> Self {
Self {
json,
}
}
pub fn lex(&mut self) -> Result<Vec<Token>> {
use Token::*;
let mut tokens: Vec<Token> = vec![];
loop {
let mut chars = self.json.chars();
match chars.next() {
Some(c) => {
match c {
'{' => {
tokens.push(LeftBrace);
self.json = chars.as_str();
}
'}' => {
tokens.push(RightBrace);
self.json = chars.as_str();
}
'[' => {
tokens.push(LeftBracket);
self.json = chars.as_str();
}
']' => {
tokens.push(RightBracket);
self.json = chars.as_str();
}
':' => {
tokens.push(Colon);
self.json = chars.as_str();
}
',' => {
tokens.push(Comma);
self.json = chars.as_str();
}
'"' => {
let s = self.lex_string()?;
tokens.push(Str(s))
}
'0'..='9' | '-' => {
let num = self.lex_number()?;
tokens.push(Number(num));
}
't' if self.json.starts_with("true") => {
tokens.push(True);
self.json = &self.json["true".len()..];
}
'f' if self.json.starts_with("false") => {
tokens.push(False);
self.json = &self.json["false".len()..];
}
'n' if self.json.starts_with("null") => {
tokens.push(Null);
self.json = &self.json["null".len()..];
}
' ' | '\t' | '\r' | '\n' => {
self.json = chars.as_str();
}
c => bail!("Lexing failure on character {c}"),
}
}
None => {
break;
}
}
}
Ok(tokens)
}
fn lex_string(&mut self) -> Result<&'a str> {
let mut charindices = self.json.char_indices();
let _ = charindices.next(); // skip first '"'
let mut escape = false;
let n;
loop {
match charindices.next() {
None => bail!("Unexpected end of input in string literal"),
Some((i, '"')) if !escape => {
n = i;
break;
}
Some((_, '\\')) => {
escape = true;
}
Some(_) => {
escape = false;
}
}
}
let result = &self.json[1..n];
self.json = charindices.as_str();
Ok(result)
}
fn lex_number(&mut self) -> Result<Numeric> {
let mut charindices = self.json.char_indices().peekable();
let mut n = 0;
let mut floating_point = false;
if matches!(charindices.peek(), Some((_, '-'))) {
charindices.next();
}
if matches!(charindices.peek(), Some((_, '.')) | Some((_, 'e')) | Some((_, 'E'))) {
bail!("Invalid numeric literal");
}
if matches!(charindices.peek(), Some((_, '0'))) {
charindices.next().unwrap(); // safe from if condition
if let Some((_, c)) = charindices.peek() {
match c {
'e' | 'E' | '.' | ' ' | '\t' | '\r' | '\n' | ',' | ']' | '}' => {}
_ => bail!("Invalid numeric literal"),
}
}
}
// read integer
loop {
match charindices.peek() {
None => {
n = self.json.as_bytes().len();
break;
}
Some((i, c)) => {
match c {
'0'..='9' => {
charindices.next();
}
'.' | 'E' | 'e' => {
break;
}
' ' | '\t' | '\r' | '\n' | ',' | ']' | '}' => {
n = *i;
break;
}
_ => bail!("Invalid numeric literal"),
}
}
}
}
// read fraction
if matches!(charindices.peek(), Some((_, '.'))) {
floating_point = true;
charindices.next();
loop {
match charindices.peek() {
None => {
n = self.json.as_bytes().len();
break;
}
Some((i, c)) => match c {
'0'..='9' => {
charindices.next();
}
'e' | 'E' => break,
' ' | '\t' | '\r' | '\n' | ',' | ']' | '}' => {
n = *i;
break;
}
_ => bail!("Invalid numeric literal"),
}
}
}
}
// read exponent
if matches!(charindices.peek(), Some((_, 'e')) | Some((_, 'E'))) {
floating_point = true;
charindices.next();
if matches!(charindices.peek(), Some((_, '+')) | Some((_, '-'))) {
charindices.next();
}
loop {
match charindices.peek() {
None => {
n = self.json.as_bytes().len();
break;
}
Some((i, c)) => match c {
'0'..='9' => {
charindices.next();
}
' ' | '\t' | '\r' | '\n' | ',' | ']' | '}' => {
n = *i;
break;
}
_ => bail!("Invalid numeric literal"),
}
}
}
}
let result = if floating_point {
let res = self.json[0..n].parse::<f64>()?;
Ok(Numeric::Float(res))
} else {
let res = self.json[0..n].parse::<i64>()?;
Ok(Numeric::Integer(res))
};
self.json = &self.json[n..];
result
}
}
parser.rs
use std::collections::HashMap;
use std::iter::Peekable;
use std::slice::Iter;
use anyhow::{bail, Result};
use crate::lexer::Token;
#[derive(Debug)]
pub enum JsonTree<'a> {
Object(HashMap<&'a str, JsonTree<'a>>),
Array(Vec<JsonTree<'a>>),
Atom(Token<'a>),
}
pub struct Parser { }
impl<'a> Parser {
pub fn new() -> Self {
Self { }
}
pub fn parse(&'a self, vec: &'a [Token]) -> Result<Option<JsonTree>> {
self.value(&mut vec.iter().peekable())
}
fn value(&'a self, iter: &mut Peekable<Iter<Token<'a>>>) -> Result<Option<JsonTree>> {
match iter.peek() {
Some(Token::LeftBrace) => {
match self.object(iter) {
Ok(object) => Ok(Some(object)),
Err(e) => Err(e.context("In parsing of object")),
}
}
Some(Token::LeftBracket) => {
match self.array(iter) {
Ok(array) => Ok(Some(array)),
Err(e) => Err(e.context("In parsing of array"))
}
}
Some(Token::Str(s)) => {
iter.next();
Ok(Some(JsonTree::Atom(Token::Str(s))))
}
Some(Token::Number(n)) => {
iter.next();
Ok(Some(JsonTree::Atom(Token::Number(*n))))
}
Some(Token::True) => {
iter.next();
Ok(Some(JsonTree::Atom(Token::True)))
}
Some(Token::False) => {
iter.next();
Ok(Some(JsonTree::Atom(Token::False)))
}
Some(Token::Null) => {
iter.next();
Ok(Some(JsonTree::Atom(Token::Null)))
}
None => Ok(None),
Some(t) => bail!("Unexpected token {t:?}"),
}
}
fn object(&'a self, iter: &mut Peekable<Iter<Token<'a>>>) -> Result<JsonTree> {
iter.next(); // consume left brace
let mut elements: HashMap<&str, JsonTree> = HashMap::new();
if matches!(iter.peek(), Some(Token::RightBrace)) {
return Ok(JsonTree::Object(elements));
}
loop {
let name = match iter.next() {
Some(Token::Str(s)) => s,
t => bail!("Invalid token {t:?}"),
};
match iter.next() {
Some(Token::Colon) => {}
Some(t) => bail!("Expected ':' but found {:?}", t),
None => bail!("Unexpected end of input"),
}
let member = match self.value(iter) {
Ok(Some(value)) => value,
Ok(None) => bail!("Unexpected end of input"),
Err(e) => return Err(e.context("In object member definition")),
};
elements.insert(name, member);
if matches!(iter.peek(), Some(Token::Comma)) {
iter.next();
} else {
break;
}
}
let token = iter.next();
if !matches!(token, Some(Token::RightBrace)) {
bail!("Expected }}, got {token:?}");
}
Ok(JsonTree::Object(elements))
}
fn array(&'a self, iter: &mut Peekable<Iter<Token<'a>>>) -> Result<JsonTree> {
iter.next();
let mut elements = Vec::new();
if matches!(iter.peek(), Some(Token::RightBracket)) {
iter.next();
return Ok(JsonTree::Array(elements));
}
loop {
let element = match self.value(iter) {
Ok(Some(value)) => value,
Ok(None) => bail!("Unexpected end of input"),
Err(e) => return Err(e.context("In array definition")),
};
elements.push(element);
if matches!(iter.peek(), Some(Token::Comma)) {
iter.next();
} else {
break;
}
}
let token = iter.next();
if !matches!(token, Some(Token::RightBracket)) {
bail!("Expected ] but got {token:?}");
}
Ok(JsonTree::Array(elements))
}
}
main.rs
use std::io::Read;
use std::path::PathBuf;
use std::process::exit;
use std::fs::File;
use anyhow::Result;
use clap::Parser;
use lexer::Lexer;
use parser::Parser as MyParser;
mod lexer;
mod parser;
fn main() -> Result<()> {
let args = Args::parse();
let mut file = match File::open(&args.path) {
Ok(f) => f,
Err(e) => {
println!("Encountered error opening file {}: {e}", args.path.to_str().unwrap());
exit(-1);
}
};
let mut contents = String::new();
match file.read_to_string(&mut contents) {
Ok(_) => {}
Err(e) => {
println!("Encountered error reading file {}: {e}", args.path.to_str().unwrap());
exit(-1);
}
}
let mut lexer = Lexer::new(&contents);
let tokens = lexer.lex()?;
let parser = MyParser::new();
match parser.parse(&tokens) {
Ok(Some(tree)) => {
println!("{tree:?}")
}
Ok(None) => {}
Err(e) => {
println!("Error parsing {}: {e}", args.path.to_str().unwrap());
exit(1);
}
}
Ok(())
}
// Simple JSON parser
#[derive(Parser, Debug)]
struct Args {
// The input file path
path: PathBuf,
}