//! # Naive JSON Parser //! //! Based on [JSON Parser with JavaScript](https://lihautan.com/json-parser-with-javascript/) //! //! This project is only concerned with parsing JSON. Serializing the [JSONValue](enum.JSONValue.html) into //! a native rust object is out of scope. //! //! Basic usage: //! ```rust //! use std::convert::TryFrom; //! use naive_json_parser::{JSON, JSONArray, JSONValue}; //! //! // Convert the JSON string to a `JSONValue` //! let result = JSON::parse("[0, 1, 2]"); //! # assert!(&result.is_ok()); //! //! // Let's assume you know the JSON is valid //! let result = result.unwrap(); //! //! // If you want the value inside of the top `JSONValue`, you //! // may use the `unwrap` method //! let array: JSONArray = result.clone().unwrap(); //! //! // You may also try the type conversion directly, so you can handle a potential error //! let array = match JSONArray::try_from(result.clone()) { //! Ok(a) => a, //! Err(_) => todo!(), //! }; //! //! // If you want to create a `JSONValue` from one of its wrapped types, you //! // may use the `from` or `into` methods //! let json_array = JSONValue::from(array.clone()); // or //! let json_array: JSONValue = array.clone().into(); //! ``` #![forbid(unsafe_code)] use std::collections::HashMap; use std::convert::TryFrom; use std::iter::FromIterator; use std::{char, u16}; pub type JSONResult = Result; pub type JSONArray = Vec; pub type JSONMap = HashMap; /// The type of JSON value /// /// The `From` trait is implemented for all the /// types of values wrapped in the `JSONValue` enum /// /// Additionally, `()` will convert to `JSONValue::Null` #[derive(Clone, Debug, PartialEq)] pub enum JSONValue { /// Object Literal Object(HashMap), /// Array Literal Array(Vec), /// String Literal String(String), /// Number Literal Number(f64), /// True Literal True, /// False Literal False, /// Null Literal Null, } impl JSONValue { /// Convert the wrapped `JSONValue` to its simpler rust value /// /// This is a convenience method that calls the `try_from` method /// for the appropriate type. This will panic if the output type of /// the unwrap does not match the type of value in the `JSONValue` struct. /// /// Example: /// ``` /// use naive_json_parser::JSONValue; /// /// let str = "Four score and seven years ago..."; /// let wrapped = JSONValue::from(str); /// /// // s is now the `String` that was in the `JSONValue` enum /// let s: String = wrapped.unwrap(); /// /// # assert_eq!(str, &s); /// ``` /// /// Panicing example: /// ```should_panic /// # use naive_json_parser::JSONValue; /// // Don't try to unwrap one type as another /// let json = JSONValue::from(4.5); /// /// // json currently has a `JSONValue::Number` value /// // trying to unwrap the value as a string will result in a /// // panic /// let s: String = json.unwrap(); // This panics /// ``` pub fn unwrap>(self) -> T { match T::try_from(self) { Ok(val) => val, Err(_) => panic!("Tried to unwrap an empty value") } } } impl TryFrom for JSONMap { type Error = &'static str; /// Extracts the `HashMap` in the `JSONValue` enum, if it exists. /// /// Returns an error if `v` is not a `JSONValue::Object`. fn try_from(v: JSONValue) -> Result { match v { JSONValue::Object(o) => Ok(o), _ => Err("Invalid type conversion") } } } impl TryFrom for JSONArray { type Error = &'static str; /// Extracts the `Vec` in the `JSONValue` enum, if it exists. /// /// Returns an error if `v` is not a `JSONValue::Array`. fn try_from(v: JSONValue) -> Result { match v { JSONValue::Array(a) => Ok(a), _ => Err("Invalid type conversion") } } } impl TryFrom for f64 { type Error = &'static str; /// Extracts the `f64` in the `JSONValue` enum, if it exists. /// /// Returns an error if `v` is not a `JSONValue::Number`. fn try_from(v: JSONValue) -> Result { match v { JSONValue::Number(n) => Ok(n), _ => Err("Invalid type conversion") } } } impl TryFrom for String { type Error = &'static str; /// Extracts the `String` in the `JSONValue` enum, if it exists. /// /// Returns an error if `v` is not a `JSONValue::String`. fn try_from(v: JSONValue) -> Result { match v { JSONValue::String(s) => Ok(s), _ => Err("Invalid type conversion") } } } impl TryFrom for bool { type Error = &'static str; /// Extracts the `bool` in the `JSONValue` enum, if it exists. /// /// Returns an error if `v` is not a `JSONValue::True` or `JSONValue::False`. fn try_from(v: JSONValue) -> Result { match v { JSONValue::True => Ok(true), JSONValue::False => Ok(false), _ => Err("Invalid type conversion") } } } impl From for () { /// This will just swallow the enum value and return a unit tuple. /// /// This impl only exists to mirror the `()` to `JSONValue::Null` /// conversion. fn from(_: JSONValue) -> () { () } } impl From for JSONValue { /// Wraps the `HashMap` in the `JSONValue` enum, /// returning a `JSONValue::Object` fn from(val: JSONMap) -> JSONValue { Self::Object(val) } } impl From for JSONValue { /// Wraps the `Vec` in the `JSONValue` enum, /// returning a `JSONValue::Array` fn from(val: JSONArray) -> JSONValue { Self::Array(val) } } impl From for JSONValue { /// Wraps the `f64` in the `JSONValue` enum, /// returning a `JSONValue::Number` fn from(n: f64) -> Self { Self::Number(n) } } impl From for JSONValue { /// Wraps the `String` in the `JSONValue` enum, /// returning a `JSONValue::String` fn from(s: String) -> Self { Self::String(s) } } impl From<&str> for JSONValue { /// Creates a `String` and wraps it in the `JSONValue` enum, /// returning a `JSONValue::String` fn from(s: &str) -> Self { Self::String(String::from(s)) } } impl From for JSONValue { /// Sets the `JSONValue` enum to the `True` or `False` value fn from(val: bool) -> Self { match val { true => Self::True, false => Self::False, } } } impl From<()> for JSONValue { /// Sets the `JSONValue` enum to the `Null` value fn from(_s: ()) -> Self { Self::Null } } /// The type of error returned by the parser #[derive(Debug, PartialEq)] pub enum ParseError { /// The input looks like JSON, but seems to end UnexpectedEndOfInput(String), /// Looks like JSON, but seems to have characters after it should ExpectedEndOfInput(String), /// Wasn't this supposed to be an object literal? ExpectedObjectKey(String), /// Hey, wasn't there supposed to be...? ExpectedToken(String), /// What's this character? UnexpectedToken(String), /// Shouldn't this be a numeral? ExpectedDigit(String), /// There's a backslash...were you going somewhere with that? ExpectedEscapeChar(String), /// Should be a unicode escape character...missing a few digits? ExpectedUnicodeEscape(String), } /// This struct is the bulk of the parser /// /// The members of the struct are private, as they /// are implementation details of the parser #[derive(Debug, PartialEq)] pub struct JSON { /// The input JSON String as a character array chars: Vec, /// The internal parsing index i: usize, } /// Cut down the if boilerplate /// /// Thanks to `uwaterloodudette` on reddit macro_rules! try_parse { ($( $e:expr ),* ) => { $( if let Some(v) = $e? { return Ok(v); } )* }; } impl JSON { /// Private constructor fn new(json: &str) -> Self { JSON { chars: json.chars().collect(), i: 0, } } /// Parse a `JSONValue` from the current JSON string fn parse_value(&mut self) -> JSONResult { self.skip_whitespace(); // Go through the parser methods, until you find // one that doesn't return a `None` try_parse!( self.parse_string(), self.parse_number(), self.parse_object(), self.parse_array(), self.parse_keyword("true", JSONValue::True), self.parse_keyword("false", JSONValue::False), self.parse_keyword("null", JSONValue::Null) ); // Every parser failed, so the syntax is probably incorrect Err(ParseError::UnexpectedEndOfInput(format!( "Doesn't seem to be valid JSON" ))) } /// See if there's a `JSONValue::Object` next in the JSON fn parse_object(&mut self) -> Result, ParseError> { if self.chars[self.i] != '{' { return Ok(None); } self.increment(1); self.skip_whitespace(); let mut result: JSONMap = HashMap::new(); let mut initial = true; // if it is not '}', // we take the path of string -> whitespace -> ':' -> value -> ... while self.chars[self.i] != '}' { self.skip_whitespace(); if initial == false { self.eat(',')?; self.skip_whitespace(); } else { self.skip_whitespace(); } let maybe_key = self.parse_string()?; if maybe_key.is_none() { return Err(ParseError::ExpectedObjectKey(format!( "Expected an object key. Does the object have a trailing comma?" ))); } self.skip_whitespace(); self.eat(':')?; let key = maybe_key.unwrap().unwrap(); let value = self.parse_value()?; result.insert(key, value); initial = false; self.skip_whitespace(); } self.expect_not_end('}')?; // Move to the next character: '}' self.increment(1); Ok(Some(JSONValue::from(result))) } /// See if there's a `JSONValue::Array` next in the JSON fn parse_array(&mut self) -> Result, ParseError> { if self.chars[self.i] != '[' { return Ok(None); } self.increment(1); self.skip_whitespace(); let mut result: Vec = vec![]; let mut initial = true; while self.chars[self.i] != ']' { self.skip_whitespace(); if initial == false { self.eat(',')?; } let value = self.parse_value()?; result.push(value); initial = false; } self.expect_not_end(']')?; // move to next character: ']' self.increment(1); Ok(Some(JSONValue::from(result))) } /// See if there's a `JSONValue::String` next in the JSON fn parse_string(&mut self) -> Result, ParseError> { if self.chars[self.i] != '"' { return Ok(None); } self.increment(1); let mut result = String::new(); while self.chars[self.i] != '"' && self.i < self.chars.len() - 1 { // All the escape sequences... if self.chars[self.i] == '\\' { let ch = self.chars[self.i + 1]; if ch == '"' { result.push_str("\""); self.increment(1); } else if ['\\', '/'].contains(&ch) { let escaped = ch.escape_default().next().unwrap_or(ch); result.push(escaped); self.increment(1); } else if ['b', 'f', 'n', 'r', 't'].contains(&ch) { let ch = match ch { 'b' => '\u{8}', 'f' => '\x0C', 'n' => '\n', 'r' => '\r', 't' => '\t', _ => unreachable!(), }; result.push(ch); self.increment(1); } else if ch == 'u' { if self.chars[self.i + 2].is_ascii_hexdigit() && self.chars[self.i + 3].is_ascii_hexdigit() && self.chars[self.i + 4].is_ascii_hexdigit() && self.chars[self.i + 5].is_ascii_hexdigit() { // Blech, parse out a JSON unicode (utf16) escape code. Handles surrogate pairs // by giving you the replacement character...because...yeah let char_str = String::from_iter(&self.chars[self.i + 2..=self.i + 5]); let code = u16::from_str_radix(&char_str, 16) .expect("Failed to parse unicode escape number"); let string = String::from_utf16_lossy(&[code]); result.push_str(&string); self.increment(5); } else { return Err(ParseError::ExpectedUnicodeEscape(format!( "Expected a unicode escape sequence" ))); } } else { return Err(ParseError::ExpectedEscapeChar(format!( "Expected an escape sequence" ))); } } else { result.push(self.chars[self.i]); } self.increment(1); } self.expect_not_end('"')?; self.increment(1); Ok(Some(JSONValue::from(result))) } /// See if there's a `JSONValue::Number` next in the JSON fn parse_number(&mut self) -> Result, ParseError> { let start = self.i; // If it doesn't start with 0-9 or a minus sign, it's probably not a number if !(self.chars[start].is_ascii_digit() || self.chars[start] == '-') { return Ok(None); } // All this looping basically just counts the number of characters in the number let max = self.chars.len() - 1; let mut n = start; // Minus sign if self.chars[n] == '-' && n < max { n += 1; self.expect_digit(start, n)?; } // Integer Part while self.chars[n].is_ascii_digit() && n < max { n += 1; } // Decimal Part if self.chars[n] == '.' && n < max { n += 1; self.expect_digit(start, n)?; while self.chars[n].is_ascii_digit() && n < max { n += 1; } } // Scientific notation part if self.chars[n] == 'e' || self.chars[n] == 'E' && n < max { n += 1; if self.chars[n] == '-' || self.chars[n] == '+' && n < max { n += 1; } // Exponent base self.expect_digit(start, n)?; while self.chars[n].is_ascii_digit() && n < max { n += 1; } } // If there are numeric digits attempt to parse the digits as a number if n > start { let mut end = if n < self.chars.len() { n } else { max }; // Hack to remove non-number characters if !self.chars[end].is_ascii_digit() { end -= 1; } let str = String::from_iter(&self.chars[start..=end]); match str.parse::() { Ok(number) => { self.increment(str.len()); return Ok(Some(JSONValue::from(number))); } Err(e) => Err(ParseError::ExpectedDigit(format!("'{}', {:#?}", str, e))), } } else { Ok(None) } } /// See if there's a `JSONValue::True`, `JSONValue::False`, or a `JSONValue::Null` next in the JSON fn parse_keyword( &mut self, search: &str, value: JSONValue, ) -> Result, ParseError> { let start = self.i; let end = if self.i + search.len() > self.chars.len() { self.chars.len() } else { self.i + search.len() }; let slice = &String::from_iter(&self.chars[start..end]); if slice == search { self.i += search.len(); return Ok(Some(value)); } Ok(None) } /// Increment the internal index until the next character is not a whitespace character fn skip_whitespace(&mut self) { while self.chars[self.i].is_ascii_whitespace() { self.increment(1); } } /// 'Eat' the specified character /// /// * If the next `char` matches the one passed, the internal index is incremented /// * If the next `char` does not match the one passed, a `ParseError::ExpectedToken` /// error is returned fn eat(&mut self, ch: char) -> Result<(), ParseError> { if self.chars[self.i] != ch { let msg = format!("Expected {}.", ch); return Err(ParseError::ExpectedToken(msg)); } self.increment(1); Ok(()) } /// Do a checked increment of the internal pointer index fn increment(&mut self, amount: usize) { let current = self.i; if current + amount >= self.chars.len() { self.i = self.chars.len() - 1; } else { self.i += amount; } } /// Check that the next character is a digit. If not, return ParseError. fn expect_digit(&mut self, start: usize, end: usize) -> Result<(), ParseError> { let current = String::from_iter(&self.chars[start..end]); if !self.chars[end].is_ascii_digit() { Err(ParseError::ExpectedDigit(format!( "Expected a digit, received '{}' after numeric '{}'", self.chars[end], current ))) } else { Ok(()) } } /// Verify that we are not at the end of the input string fn expect_not_end(&mut self, ch: char) -> Result<(), ParseError> { if self.i == self.chars.len() { Err(ParseError::UnexpectedEndOfInput(format!( "Unexpected end of input. Expected '{}'", ch ))) } else { Ok(()) } } /// Convert a `&str` containing JSON into a `Result` /// /// Example: /// ```rust /// use naive_json_parser::JSON; /// /// let json = r#"[1, 2.0, 3e4, "foo", {}, [], true, false, null]"#; /// /// // If valid JSON, this should now be a set of nested `JSONValue` enums containing the /// // parsed values. If invalid, a `ParseError` is returned. /// let parse_result = JSON::parse(json); /// # assert!(parse_result.is_ok(), "Parse method example failed"); /// ``` pub fn parse(json: &str) -> JSONResult { JSON::new(json).parse_value() } } #[cfg(test)] #[cfg(not(tarpaulin_include))] mod tests { use super::JSONValue::{Array, Number, True}; use super::*; #[test] fn parse_keyword() { let res = JSON::new(r#""foobarbaz""#).parse_keyword("true", JSONValue::True); assert_eq!(res, Ok(None)); let res = JSON::new("true").parse_keyword("true", JSONValue::True); assert_eq!(res, Ok(Some(True))); } #[test] fn skip_whitespace() { let mut parser = JSON::new(" \t\r\nx"); parser.skip_whitespace(); assert_eq!('x', parser.chars[parser.i]); } #[test] fn parse_string() { let res = JSON::new(r#""\t""#).parse_string(); assert_eq!(res, Ok(Some(JSONValue::from("\t")))); let res = JSON::new(r#""\u203d""#).parse_string(); assert_eq!(res, Ok(Some(JSONValue::from("‽")))); } #[test] fn parse_empty_array() { let res = JSON::new("[]").parse_value(); assert_eq!(res, Ok(Array(vec![]))); } #[test] fn parse_number() { // This function works like I think, right? assert_eq!(','.is_ascii_digit(), false); let res = JSON::new(r#""foo""#).parse_number(); assert_eq!(res, Ok(None)); let res = JSON::new("3e4").parse_number(); assert_eq!(res, Ok(Some(Number(3e4f64)))); let res = JSON::new("1.234,").parse_number(); assert_eq!(res, Ok(Some(Number(1.234f64)))); } #[test] fn parse_object() { let result = JSON::new(r#"{"foo": "bar"}"#).parse_object(); let mut hash_map: JSONMap = HashMap::new(); hash_map.insert(String::from("foo"), JSONValue::from("bar")); assert_eq!(result, Ok(Some(JSONValue::Object(hash_map)))); } }