//! # Naive JSON Parser //! //! Based on [JSON Parser with JavaScript](https://lihautan.com/json-parser-with-javascript/) #![forbid(unsafe_code)] use std::collections::HashMap; use std::iter::FromIterator; use std::{char, u16}; /// The type of JSON value #[derive(Debug, PartialEq)] pub enum JSONValue { /// Object Literal Object(HashMap), /// Array Literal Array(Vec), /// String Literal String(String), /// Number Literal Number(f64), /// True Literal True, /// False Literal False, /// Null Literal Null, } /// The type of error returned by the parser #[derive(Debug, PartialEq)] pub enum ParseError { /// The input looks like JSON, but seems to end UnexpectedEndOfInput(String), /// Looks like JSON, but seems to have characters after it should ExpectedEndOfInput(String), /// Wasn't this supposed to be an object literal? ExpectedObjectKey(String), /// Hey, wasn't there supposed to be...? ExpectedToken(String), /// What's this character? UnexpectedToken(String), /// Shouldn't this be a numeral? ExpectedDigit(String), /// There's a backslash...were you going somewhere with that? ExpectedEscapeChar(String), /// Should be a unicode escape character...missing a few digits? ExpectedUnicodeEscape(String), } /// This struct holds a little state for parsing #[derive(Debug, PartialEq)] pub struct JSON { /// The input JSON String as a character array chars: Vec, /// The internal parsing index i: usize, } impl JSON { /// Private constructor fn new(json: &str) -> Self { JSON { chars: json.chars().collect(), i: 0, } } /// Parse a `JSONValue` from the current JSON string /// /// This is probably an abuse of iterators...but it's still much better than the alternative /// of nested matches. /// /// In order to determine the type of JSON value, each parse method is tried, until one /// matches, or a parse error happens. /// /// * `Option`s implement IntoIterator, which returns an iterator of 0 or 1 items: the /// transferred (not borrowed) Some() value. /// * The `chain` method of iterators allows you to link iterators together, /// to act as one iterator /// * The first result from the iterator is the first parse method with a non-empty value, /// and should be the value wanted fn parse_value(&mut self) -> Result { self.skip_whitespace(); let mut value = self .parse_string()? .into_iter() .chain(self.parse_number()?.into_iter()) .chain(self.parse_object()?.into_iter()) .chain(self.parse_array()?.into_iter()) .chain(self.parse_keyword("true", JSONValue::True)?.into_iter()) .chain(self.parse_keyword("false", JSONValue::False)?.into_iter()) .chain(self.parse_keyword("null", JSONValue::Null)?.into_iter()); match value.next() { Some(val) => Ok(val), None => Err(ParseError::UnexpectedEndOfInput(String::from( "Doesn't seem to be valid JSON", ))), } } /// See if there's a `JSONValue::Object` next in the JSON fn parse_object(&mut self) -> Result, ParseError> { if self.chars[self.i] != '{' { return Ok(None); } self.increment(1); self.skip_whitespace(); let mut result: HashMap = HashMap::new(); let mut initial = true; // if it is not '}', // we take the path of string -> whitespace -> ':' -> value -> ... while self.chars[self.i] != '}' { if initial == false { self.eat(',')?; self.skip_whitespace(); } let key = match self.parse_string()? { Some(value) => match value { JSONValue::String(s) => s, _ => panic!("parse_string returned non-string value"), }, None => String::new(), }; self.skip_whitespace(); self.eat(':')?; let value = self.parse_value()?; result.insert(key, value); initial = false; } // Move to the next character: '}' self.increment(1); Ok(Some(JSONValue::Object(result))) } /// See if there's a `JSONValue::Array` next in the JSON fn parse_array(&mut self) -> Result, ParseError> { if self.chars[self.i] != '[' { return Ok(None); } self.increment(1); self.skip_whitespace(); let mut result: Vec = vec![]; let mut initial = true; while self.chars[self.i] != ']' { if initial == false { self.eat(',')?; } let value = self.parse_value()?; result.push(value); initial = false; } // move to next character: ']' self.increment(1); Ok(Some(JSONValue::Array(result))) } /// See if there's a `JSONValue::String` next in the JSON fn parse_string(&mut self) -> Result, ParseError> { if self.chars[self.i] != '"' { return Ok(None); } self.increment(1); let mut result = String::new(); while self.chars[self.i] != '"' && self.i < self.chars.len() - 1 { // All the escape sequences... if self.chars[self.i] == '\\' { let ch = self.chars[self.i + 1]; if ['"', '\\', '/'].contains(&ch) { let escaped = ch.escape_default().next().unwrap_or(ch); result.push(escaped); self.increment(1); } else if ['b', 'f', 'n', 'r', 't'].contains(&ch) { let ch = match ch { 'b' => '\u{8}', 'f' => '\x0C', 'n' => '\n', 'r' => '\r', 't' => '\t', _ => panic!("Shouldn't be possible!"), }; result.push(ch); self.increment(1); } else if ch == 'u' && self.chars[self.i + 2].is_ascii_hexdigit() && self.chars[self.i + 3].is_ascii_hexdigit() && self.chars[self.i + 4].is_ascii_hexdigit() && self.chars[self.i + 5].is_ascii_hexdigit() { // Blech, parse out a JSON unicode (utf16) escape code. Handles surrogate pairs // by giving you the replacement character...because...yeah let char_str = String::from_iter(&self.chars[self.i + 2..=self.i + 5]); let code = u16::from_str_radix(&char_str, 16) .expect("Failed to parse unicode escape number"); let string = String::from_utf16_lossy(&[code]); result.push_str(&string); self.increment(5); } } else { result.push(self.chars[self.i]); } self.increment(1); } self.increment(1); Ok(Some(JSONValue::String(result))) } /// See if there's a `JSONValue::Number` next in the JSON fn parse_number(&mut self) -> Result, ParseError> { // If it doesn't start with 0-9 or a minus sign, it's probably not a number if ! (self.chars[self.i].is_ascii_digit() || self.chars[self.i] == '-') { return Ok(None); } // All this looping basically just counts the number of characters in the number let start = self.i; let max = self.chars.len() - 1; let mut n = start; if self.chars[n] == '-' && n < max { n += 1; } if self.chars[n] == '0' && n < max { n += 1; } else if self.chars[n] >= '1' && self.chars[n] <= '9' && n < max { n += 1; loop { if n + 1 > max { break; } if self.chars[n + 1].is_ascii_digit() { n += 1; } else { break; } } } if self.chars[n] == '.' && n < max { n += 1; loop { if n + 1 > max { break; } if self.chars[n + 1].is_ascii_digit() { n += 1; } else { break; } } } if self.chars[n] == 'e' || self.chars[n] == 'E' && n < max { n += 1; if self.chars[n] == '-' || self.chars[n] == '+' && n < max { n += 1; } // expect digit loop { if n + 1 > max { break; } if self.chars[n + 1].is_ascii_digit() { n += 1; } else { break; } } } // If there are numeric digits attempt to parse the digits as a number if n > start { let end = if n < self.chars.len() { n } else { max }; let str = String::from_iter(&self.chars[start..=end]); match str.parse::() { Ok(n) => { self.increment(str.len() - 1); return Ok(Some(JSONValue::Number(n))) }, Err(e) => Err(ParseError::ExpectedDigit(format!("'{}', {:#?}", str, e))), } } else { Ok(None) } } /// See if there's a `JSONValue::True`, `JSONValue::False`, or a `JSONValue::Null` next in the JSON fn parse_keyword( &mut self, search: &str, value: JSONValue, ) -> Result, ParseError> { let start = self.i; let end = if self.i + search.len() > self.chars.len() { self.chars.len() } else { self.i + search.len() }; let slice = &String::from_iter(&self.chars[start..end]); if slice == search { self.i += search.len(); return Ok(Some(value)); } Ok(None) } /// Increment the internal index until the next character is not a whitespace character fn skip_whitespace(&mut self) { while self.chars[self.i].is_ascii_whitespace() { self.increment(1); } } /// 'Eat' the specified character /// /// * If the next `char` matches the one passed, the internal index is incremented /// * If the next `char` does not match the one passed, a `ParseError::ExpectedToken` /// error is returned fn eat(&mut self, ch: char) -> Result<(), ParseError> { if self.chars[self.i] != ch { let msg = format!("Expected {}.", ch); return Err(ParseError::ExpectedToken(msg)); } self.increment(1); Ok(()) } fn increment(&mut self, amount: usize) { let current = self.i; if current + amount >= self.chars.len() { self.i = self.chars.len() - 1; } else { self.i += amount; } } /// Convert a `&str` containing JSON into a `Result` pub fn parse(json: &str) -> Result { JSON::new(json).parse_value() } } #[cfg(test)] mod tests { use super::*; #[test] fn parse_keyword() { let mut parser = JSON::new(r#""foobarbaz""#); let res = JSON::parse_keyword(&mut parser, "true", JSONValue::True); assert_eq!(res, Ok(None)); let mut parser = JSON::new("true"); let res = JSON::parse_keyword(&mut parser, "true", JSONValue::True); assert_eq!(res, Ok(Some(JSONValue::True))); } #[test] fn skip_whitespace() { let mut parser = JSON::new(" \t\r\nx"); parser.skip_whitespace(); assert_eq!('x', parser.chars[parser.i]); } #[test] fn parse_string() { let mut parser = JSON::new(r#""\t""#); let res = JSON::parse_string(&mut parser); assert_eq!(res, Ok(Some(JSONValue::String(String::from("\t"))))); let mut parser = JSON::new(r#""\u203d""#); let res = JSON::parse_string(&mut parser); assert_eq!(res, Ok(Some(JSONValue::String(String::from("‽"))))); } #[test] fn parse_empty_array() { let mut parser = JSON::new("[]"); let res = JSON::parse_value(&mut parser); assert_eq!(res, Ok(JSONValue::Array(vec![]))); } #[test] fn parse_number() { // This function works like I think, right? assert_ne!(','.is_ascii_digit(), true); let mut parser = JSON::new(r#""foo""#); let res = JSON::parse_number(&mut parser); assert_eq!(res, Ok(None)); let mut parser = JSON::new("3.14159"); let res = JSON::parse_number(&mut parser); assert_eq!(res, Ok(Some(JSONValue::Number(3.14159f64)))); let mut parser = JSON::new("3e4"); let res = JSON::parse_number(&mut parser); assert_eq!(res, Ok(Some(JSONValue::Number(3e4f64)))); let mut parser = JSON::new("1.234,"); let res = JSON::parse_number(&mut parser); assert_eq!(res, Ok(Some(JSONValue::Number(1.234f64)))); } #[test] fn can_parse_array_of_keywords() { let result = JSON::parse("[true,false,null]"); assert_eq!( result, Ok(JSONValue::Array(vec![ JSONValue::True, JSONValue::False, JSONValue::Null ])) ); } #[test] fn parse_json_types() { // Boolean / Null let res = JSON::parse("true"); assert_eq!(res, Ok(JSONValue::True)); let res = JSON::parse("false"); assert_eq!(res, Ok(JSONValue::False)); let res = JSON::parse("null"); assert_eq!(res, Ok(JSONValue::Null)); // Number let res = JSON::parse("9.38083151965"); assert_eq!(res, Ok(JSONValue::Number(9.38083151965)), "Failed to parse number"); // String let res = JSON::parse(r#""/^$/""#); assert_eq!(res, Ok(JSONValue::String(String::from("/^$/"))), "Failed to parse string"); } #[test] fn can_parse_arbitrary_json() { let result = JSON::parse("[{}]"); assert_eq!(result, Ok(JSONValue::Array(vec![JSONValue::Object(HashMap::new())]))); let result = JSON::parse( r#"[{ "a": 9.38083151965, "b": 4e3, "c": [1, 2, 3], "d": "foo", "e": { "f": { "g": { "h": null } } }, "i": ["\"", "\\", "/", "\b", "\f", "\n", "\r", "\t", "\u0001", "\uface"] }]"#, ); assert!(result.is_ok(), format!("{:#?}", result)); } }