diff --git a/keyvalues-serde/src/de/mod.rs b/keyvalues-serde/src/de/mod.rs index 37305c4..403075a 100644 --- a/keyvalues-serde/src/de/mod.rs +++ b/keyvalues-serde/src/de/mod.rs @@ -20,7 +20,7 @@ use std::{ use crate::{ de::{map::ObjEater, seq::SeqBuilder}, error::{Error, Result}, - tokens::{Token, TokenStream}, + tokens::{tokens_from_vdf, Token}, }; pub fn from_reader(rdr: R) -> Result { @@ -84,7 +84,7 @@ pub struct Deserializer<'de> { impl<'de> Deserializer<'de> { /// Attempts to create a new VDF deserializer along with returning the top level VDF key pub fn new_with_key(vdf: Vdf<'de>) -> Result<(Self, Key<'de>)> { - let token_stream = TokenStream::from(vdf); + let token_stream = tokens_from_vdf(vdf); let key = if let Some(Token::Key(key)) = token_stream.first() { key.clone() @@ -92,7 +92,7 @@ impl<'de> Deserializer<'de> { unreachable!("Tokenstream must start with key"); }; - let tokens = token_stream.0.into_iter().peekable(); + let tokens = token_stream.into_iter().peekable(); Ok((Self { tokens }, key.clone())) } diff --git a/keyvalues-serde/src/ser.rs b/keyvalues-serde/src/ser.rs index 0281674..f7584f6 100644 --- a/keyvalues-serde/src/ser.rs +++ b/keyvalues-serde/src/ser.rs @@ -1,13 +1,12 @@ //! Serialize Rust types to VDF text -use keyvalues_parser::Vdf; use serde_core::{ser, Serialize}; use std::io::Write; use crate::{ error::{Error, Result}, - tokens::{NaiveToken, NaiveTokenStream}, + tokens::{naive::vdf_from_naive_tokens, NaiveToken}, }; /// The struct for serializing Rust values into VDF text @@ -16,7 +15,7 @@ use crate::{ /// [`to_writer_with_key()`] can be used instead #[derive(Default)] pub struct Serializer { - tokens: NaiveTokenStream, + tokens: Vec, } impl Serializer { @@ -79,7 +78,7 @@ where } } - let vdf = Vdf::try_from(&serializer.tokens)?; + let vdf = vdf_from_naive_tokens(&serializer.tokens)?; write!(writer, "{vdf}")?; Ok(()) diff --git a/keyvalues-serde/src/tokens/mod.rs b/keyvalues-serde/src/tokens/mod.rs index c86b97a..19f73e0 100644 --- a/keyvalues-serde/src/tokens/mod.rs +++ b/keyvalues-serde/src/tokens/mod.rs @@ -1,140 +1,62 @@ -// TODO: a lot of this can probably be slimmed down at this point -// TODO: implement a validate function -// TODO: make a note that this has invariants that must be upheld, so it is only exposed internally +// TODO: replace with some kind of iterator that decomposes the original structure instead of using +// an intermediate layer -mod naive; +pub(crate) mod naive; #[cfg(test)] mod tests; use keyvalues_parser::{Obj, Value, Vdf}; -use std::{ - borrow::Cow, - ops::{Deref, DerefMut}, -}; - -pub use crate::tokens::naive::{NaiveToken, NaiveTokenStream}; - -// I've been struggling to get serde to play nice with using a more complex internal structure in a -// `Deserializer`. I think the easiest solution I can come up with is to flatten out the `Vdf` into -// a stream of tokens that serde can consume. In this way the Deserializer can just work on -// munching through all the tokens instead of trying to mutate a more complex nested structure -// containing different types -/// A stream of [`Token`]s representing a [`Vdf`] -/// -/// I think an example is the easiest way to understand the structure so something like -/// -/// ```vdf -/// "Outer Key" -/// { -/// "Inner Key" "Inner Value" -/// "Inner Key" -/// { -/// } -/// } -/// ``` -/// -/// will be transformed into -/// -/// ```ron -/// Vdf( -/// key: "Outer Key", -/// value: Obj({ -/// "Inner Key": [ -/// Str("Inner Value"), -/// Obj({}) -/// ] -/// }) -/// ) -/// ``` -/// -/// which has the following token stream -/// -/// ```ron -/// TokenStream([ -/// Key("Outer Key"), -/// ObjBegin, -/// Key("Inner Key"), -/// SeqBegin, -/// Str("Inner Value"), -/// ObjBegin, -/// ObjEnd, -/// SeqEnd, -/// ObjEnd, -/// )] -/// ``` -/// -/// So in this way it's a linear sequence of keys and values where the value is either a str or an -/// object. -#[derive(Debug, PartialEq, Eq)] -pub struct TokenStream<'a>(pub Vec>); +use std::borrow::Cow; -impl<'a> Deref for TokenStream<'a> { - type Target = Vec>; +pub use crate::tokens::naive::NaiveToken; - fn deref(&self) -> &Self::Target { - &self.0 - } -} +pub(crate) fn tokens_from_vdf(vdf: Vdf<'_>) -> Vec> { + let Vdf { key, value } = vdf; -impl DerefMut for TokenStream<'_> { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } + let mut tokens = vec![Token::Key(key)]; + tokens.extend(tokens_from_value(value)); + tokens } -impl<'a> From> for TokenStream<'a> { - fn from(vdf: Vdf<'a>) -> Self { - let Vdf { key, value } = vdf; - - let mut inner = vec![Token::Key(key)]; - inner.extend(TokenStream::from(value).0); - - Self(inner) - } -} +// TODO: pass through a `&mut Vec<_>` instead of allocating new ones +fn tokens_from_value(value: Value<'_>) -> Vec> { + let mut tokens = Vec::new(); -impl<'a> From> for TokenStream<'a> { - fn from(value: Value<'a>) -> Self { - let mut inner = Vec::new(); - - match value { - Value::Str(s) => inner.push(Token::Str(s)), - Value::Obj(obj) => { - inner.push(Token::ObjBegin); - inner.extend(Self::from(obj).0); - inner.push(Token::ObjEnd); - } + match value { + Value::Str(s) => tokens.push(Token::Str(s)), + Value::Obj(obj) => { + tokens.push(Token::ObjBegin); + tokens.extend(tokens_from_obj(obj)); + tokens.push(Token::ObjEnd); } - - Self(inner) } -} -impl<'a> From> for TokenStream<'a> { - fn from(obj: Obj<'a>) -> Self { - let mut inner = Vec::new(); + tokens +} - for (key, values) in obj.into_inner().into_iter() { - inner.push(Token::Key(key)); +fn tokens_from_obj(obj: Obj<'_>) -> Vec> { + let mut tokens = Vec::new(); - // For ease of use a sequence is only marked when len != 1 - let num_values = values.len(); - if num_values != 1 { - inner.push(Token::SeqBegin); - } + for (key, values) in obj.into_inner().into_iter() { + tokens.push(Token::Key(key)); - for value in values { - inner.extend(TokenStream::from(value).0); - } + // For ease of use a sequence is only marked when len != 1 + let num_values = values.len(); + if num_values != 1 { + tokens.push(Token::SeqBegin); + } - if num_values != 1 { - inner.push(Token::SeqEnd); - } + for value in values { + tokens.extend(tokens_from_value(value)); } - Self(inner) + if num_values != 1 { + tokens.push(Token::SeqEnd); + } } + + tokens } /// A single VDF token diff --git a/keyvalues-serde/src/tokens/naive.rs b/keyvalues-serde/src/tokens/naive.rs index 0b2a5ae..55d126e 100644 --- a/keyvalues-serde/src/tokens/naive.rs +++ b/keyvalues-serde/src/tokens/naive.rs @@ -1,14 +1,7 @@ -//! Internal conversion from the [`NaiveTokenStream`] to [`Vdf`]s -//! -//! WARN: This logic relies on the representation of [`NaiveTokenStream`]s infallibly matching the -//! layout of a [`Vdf`]. The implementation here must remain internal and the `Serializer` must -//! output to match this format. - -use std::{ - borrow::Cow, - iter::Peekable, - ops::{Deref, DerefMut}, -}; +// TODO(cosmic): replace this with a builder to incrementally create the vdf without going through +// this extra layer + +use std::{borrow::Cow, iter::Peekable}; #[cfg(doc)] use crate::tokens::Token; @@ -17,166 +10,132 @@ use crate::{Error, Result}; use keyvalues_parser::{Key, Obj, Value, Vdf}; use serde_core::ser::Error as _; -/// A stream of [`NaiveToken`]s that do not encode what is a key vs a value -/// -/// This is primarily provided to simplify serialization so that a serializer can emit a naive -/// token stream that can later be used to create a VDF. This is due to the following reasons -/// -/// 1. The tokens can be owned values since there is no lifetime to tie the borrowed values to. -/// 2. There isn't context about what are keys vs. values -/// 3. Validation can be done in a separate step -/// -/// From there a `NaiveTokenStream` can be converted to a `Vdf` where the position of the keys is -/// inferred from the general structure. This also performs validation that all keys have an -/// associated value, all markers for multi-token structures make sense, and that there can't be a -/// sequence as a value in another sequence. -#[derive(Debug, Default)] -pub struct NaiveTokenStream(pub Vec); - -impl Deref for NaiveTokenStream { - type Target = Vec; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DerefMut for NaiveTokenStream { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -// The conversion from `NaiveTokenStream` to `Vdf` leverages all the `process_*` functions which -// pass off an owned iterator through all of them to deal with the borrow checker -impl<'a> TryFrom<&'a NaiveTokenStream> for Vdf<'a> { - type Error = Error; - - fn try_from(naive_token_stream: &'a NaiveTokenStream) -> Result { - // Just some helper functions for munching through tokens - fn process_key_values<'a, I>( - mut tokens: Peekable, - ) -> Result<(Peekable, Key<'a>, Vec>)> - where - I: Iterator, - { - let key = match tokens.peek() { - Some(NaiveToken::Str(s)) => { - // Pop off the peeked token - let _ = tokens.next().unwrap(); - Cow::from(s) - } - // Infer an empty key when we see an obj while expecting a key - Some(NaiveToken::ObjBegin) => Cow::from(""), - other => { - // TODO: this shouldn't really be a custom error, but we need a better base - // error type - return Err(Error::custom(format!("Expected key, found: {other:?}"))); - } - }; +// The conversion leverages all the `process_*` functions which pass off an owned iterator through +// all of them to deal with the borrow checker +pub(crate) fn vdf_from_naive_tokens(naive_tokens: &[NaiveToken]) -> Result> { + // Just some helper functions for munching through tokens + fn process_key_values<'a, I>( + mut tokens: Peekable, + ) -> Result<(Peekable, Key<'a>, Vec>)> + where + I: Iterator, + { + let key = match tokens.peek() { + Some(NaiveToken::Str(s)) => { + // Pop off the peeked token + let _ = tokens.next().unwrap(); + Cow::from(s) + } + // Infer an empty key when we see an obj while expecting a key + Some(NaiveToken::ObjBegin) => Cow::from(""), + other => { + // TODO: this shouldn't really be a custom error, but we need a better base + // error type + return Err(Error::custom(format!("Expected key, found: {other:?}"))); + } + }; - let res = process_values(tokens)?; - tokens = res.0; - let values = res.1; + let res = process_values(tokens)?; + tokens = res.0; + let values = res.1; - Ok((tokens, key, values)) - } + Ok((tokens, key, values)) + } - fn process_values<'a, I>(mut tokens: Peekable) -> Result<(Peekable, Vec>)> - where - I: Iterator, - { - let pair = match tokens.next() { - // A `Str` is a single value - Some(NaiveToken::Str(s)) => (tokens, vec![Value::Str(Cow::from(s.clone()))]), - Some(NaiveToken::ObjBegin) => { - let (tokens, value) = process_obj(tokens)?; - (tokens, vec![value]) - } - // Sequences are a series of values that can't contain a sequence (vdf limitation) - Some(NaiveToken::SeqBegin) => { - let mut values = Vec::new(); - loop { - if let Some(NaiveToken::SeqEnd) = tokens.peek() { - // Pop off the marker - tokens.next(); - break; - } else { - let res = process_non_seq_value(tokens)?; - tokens = res.0; - if let Some(val) = res.1 { - values.push(val); - } + fn process_values<'a, I>(mut tokens: Peekable) -> Result<(Peekable, Vec>)> + where + I: Iterator, + { + let pair = match tokens.next() { + // A `Str` is a single value + Some(NaiveToken::Str(s)) => (tokens, vec![Value::Str(Cow::from(s.clone()))]), + Some(NaiveToken::ObjBegin) => { + let (tokens, value) = process_obj(tokens)?; + (tokens, vec![value]) + } + // Sequences are a series of values that can't contain a sequence (vdf limitation) + Some(NaiveToken::SeqBegin) => { + let mut values = Vec::new(); + loop { + if let Some(NaiveToken::SeqEnd) = tokens.peek() { + // Pop off the marker + tokens.next(); + break; + } else { + let res = process_non_seq_value(tokens)?; + tokens = res.0; + if let Some(val) = res.1 { + values.push(val); } } - - (tokens, values) } - // VDF represents `Null` as omitting the value - Some(NaiveToken::Null) => (tokens, Vec::new()), - _ => return Err(Error::ExpectedSomeValue), - }; - Ok(pair) - } + (tokens, values) + } + // VDF represents `Null` as omitting the value + Some(NaiveToken::Null) => (tokens, Vec::new()), + _ => return Err(Error::ExpectedSomeValue), + }; - fn process_non_seq_value<'a, I>( - mut tokens: Peekable, - ) -> Result<(Peekable, Option>)> - where - I: Iterator, - { - let pair = match tokens.next() { - Some(NaiveToken::Str(s)) => (tokens, Some(Value::Str(Cow::from(s)))), - Some(NaiveToken::ObjBegin) => { - let (tokens, value) = process_obj(tokens)?; - (tokens, Some(value)) - } - // VDF represents `Null` as omitting the value - Some(NaiveToken::Null) => (tokens, None), - _ => return Err(Error::ExpectedSomeNonSeqValue), - }; + Ok(pair) + } - Ok(pair) - } + fn process_non_seq_value<'a, I>( + mut tokens: Peekable, + ) -> Result<(Peekable, Option>)> + where + I: Iterator, + { + let pair = match tokens.next() { + Some(NaiveToken::Str(s)) => (tokens, Some(Value::Str(Cow::from(s)))), + Some(NaiveToken::ObjBegin) => { + let (tokens, value) = process_obj(tokens)?; + (tokens, Some(value)) + } + // VDF represents `Null` as omitting the value + Some(NaiveToken::Null) => (tokens, None), + _ => return Err(Error::ExpectedSomeNonSeqValue), + }; - fn process_obj<'a, I>(mut tokens: Peekable) -> Result<(Peekable, Value<'a>)> - where - I: Iterator, - { - let mut obj = Obj::new(); - loop { - match tokens.peek() { - Some(NaiveToken::ObjEnd) => { - tokens.next(); - break; - } - // An object is a series of key-value pairs - Some(_) => { - let res = process_key_values(tokens)?; - tokens = res.0; - let key = res.1; - let values = res.2; - obj.insert(key, values); - } - _ => return Err(Error::ExpectedObjectStart), + Ok(pair) + } + + fn process_obj<'a, I>(mut tokens: Peekable) -> Result<(Peekable, Value<'a>)> + where + I: Iterator, + { + let mut obj = Obj::new(); + loop { + match tokens.peek() { + Some(NaiveToken::ObjEnd) => { + tokens.next(); + break; } + // An object is a series of key-value pairs + Some(_) => { + let res = process_key_values(tokens)?; + tokens = res.0; + let key = res.1; + let values = res.2; + obj.insert(key, values); + } + _ => return Err(Error::ExpectedObjectStart), } - - Ok((tokens, Value::Obj(obj))) } - let tokens = naive_token_stream.iter().peekable(); - let (mut tokens, key, mut values) = process_key_values(tokens)?; + Ok((tokens, Value::Obj(obj))) + } - if tokens.next().is_some() { - return Err(Error::TrailingTokens); - } - let value = values.pop().ok_or_else(|| { - Error::custom("Syntax error: Serialized multiple values when there should only be one") - })?; - Ok(Self::new(key, value)) + let tokens = naive_tokens.iter().peekable(); + let (mut tokens, key, mut values) = process_key_values(tokens)?; + + if tokens.next().is_some() { + return Err(Error::TrailingTokens); } + let value = values.pop().ok_or_else(|| { + Error::custom("Syntax error: Serialized multiple values when there should only be one") + })?; + Ok(Vdf::new(key, value)) } /// A naive version of a [`Token`] diff --git a/keyvalues-serde/src/tokens/tests.rs b/keyvalues-serde/src/tokens/tests.rs index a59c964..bb5c755 100644 --- a/keyvalues-serde/src/tokens/tests.rs +++ b/keyvalues-serde/src/tokens/tests.rs @@ -2,8 +2,8 @@ use std::borrow::Cow; use crate::{ tokens::{ - naive::{NaiveToken, NaiveTokenStream}, - Token, TokenStream, + naive::{vdf_from_naive_tokens, NaiveToken}, + tokens_from_vdf, Token, }, Error, }; @@ -20,7 +20,7 @@ use keyvalues_parser::{Obj, Value, Vdf}; // } #[test] fn vdf_from_token_stream_basics() { - let naive_token_stream = NaiveTokenStream(vec![ + let naive_token_stream = vec![ NaiveToken::str("outer"), NaiveToken::ObjBegin, NaiveToken::str("sequence start"), @@ -32,7 +32,7 @@ fn vdf_from_token_stream_basics() { NaiveToken::str("some other inner val"), NaiveToken::SeqEnd, NaiveToken::ObjEnd, - ]); + ]; let ideal = { let mut sequence_obj = Obj::new(); @@ -56,13 +56,13 @@ fn vdf_from_token_stream_basics() { } }; - let actual = Vdf::try_from(&naive_token_stream).unwrap(); + let actual = vdf_from_naive_tokens(&naive_token_stream).unwrap(); assert_eq!(actual, ideal); } #[test] fn invalid_vdf_nested_seq() { - let naive_token_stream = NaiveTokenStream(vec![ + let naive_token_stream = vec![ NaiveToken::str("outer"), NaiveToken::ObjBegin, NaiveToken::str("nested sequence"), @@ -72,24 +72,24 @@ fn invalid_vdf_nested_seq() { NaiveToken::SeqEnd, NaiveToken::SeqEnd, NaiveToken::ObjEnd, - ]); + ]; - let err = Vdf::try_from(&naive_token_stream).unwrap_err(); + let err = vdf_from_naive_tokens(&naive_token_stream).unwrap_err(); assert!(matches!(err, Error::ExpectedSomeNonSeqValue), "{err:?}"); } #[test] fn invalid_vdf_seq_key() { - let naive_token_stream = NaiveTokenStream(vec![ + let naive_token_stream = vec![ NaiveToken::str("outer"), NaiveToken::ObjBegin, NaiveToken::SeqBegin, NaiveToken::SeqEnd, NaiveToken::ObjEnd, - ]); + ]; // TODO: clean up error type, so we can compare - let _err = Vdf::try_from(&naive_token_stream).unwrap_err(); + let _err = vdf_from_naive_tokens(&naive_token_stream).unwrap_err(); } #[test] @@ -104,10 +104,10 @@ fn token_stream_from_vdf() { } "#; let vdf = Vdf::parse(s).unwrap(); - let token_stream = TokenStream::from(vdf); + let token_stream = tokens_from_vdf(vdf); assert_eq!( token_stream, - TokenStream(vec![ + &[ Token::Key(Cow::from("Outer Key")), Token::ObjBegin, Token::Key(Cow::from("Inner Key")), @@ -117,6 +117,6 @@ fn token_stream_from_vdf() { Token::ObjEnd, Token::SeqEnd, Token::ObjEnd, - ]) + ] ); }