diff --git a/README.md b/README.md index 2cd943a..5285fb7 100644 --- a/README.md +++ b/README.md @@ -65,12 +65,12 @@ Error: Unexpected end of input parsing integer; read context: input pos 3, lates ```console printf "3:ab" | cargo run -Error: Unexpected end of input parsing string value; read context: input pos 4, latest input bytes dump: [51, 58, 97, 98] (UTF-8 string: `3:ab`); write context: output pos 0, latest output bytes dump: [] (UTF-8 string: ``) +Error: Unexpected end of input parsing string value; read context: input pos 4, latest input bytes dump: [51, 58, 97, 98] (UTF-8 string: `3:ab`) ``` ```console echo "i00e" | cargo run -Error: Leading zeros in integers are not allowed, for example b'i00e'; read context: byte `48` (char: `0`), input pos 3, latest input bytes dump: [105, 48, 48] (UTF-8 string: `i00`); write context: byte `48` (char: `0`), output pos 2, latest output bytes dump: [48, 48] (UTF-8 string: `00`) +Error: Leading zeros in integers are not allowed, for example b'i00e'; read context: byte `48` (char: `0`), input pos 3, latest input bytes dump: [105, 48, 48] (UTF-8 string: `i00`) ``` Generating pretty JSON with [jq][jq]: @@ -111,36 +111,10 @@ cargo add bencode2json There two ways of using the library: -- With high-level parser wrappers. -- With the low-level parsers. +- With high-level wrappers. +- With the low-level generators. -Example using the high-level parser wrappers: - -```rust -use bencode2json::{try_bencode_to_json}; - -let result = try_bencode_to_json(b"d4:spam4:eggse").unwrap(); - -assert_eq!(result, r#"{"spam":"eggsstring>"}"#); -``` - -Example using the low-level parser: - -```rust -use bencode2json::parsers::{BencodeParser}; - -let mut output = String::new(); - -let mut parser = BencodeParser::new(&b"4:spam"[..]); - -parser - .write_str(&mut output) - .expect("Bencode to JSON conversion failed"); - -println!("{output}"); // It prints the JSON string: "spam" -``` - -More [examples](./examples/). +See [examples](./examples/). ## Test @@ -167,21 +141,19 @@ cargo cov ## Performance In terms of memory usage this implementation consumes at least the size of the -biggest bencoded string. The string parser keeps all the string bytes in memory until -it parses the whole string, in order to convert it to UTF-8, when it's possible. +biggest bencoded integer or string. The string and integer parsers keeps all the bytes in memory until +it parses the whole value. The library also wraps the input and output streams in a [BufReader](https://doc.rust-lang.org/std/io/struct.BufReader.html) and [BufWriter](https://doc.rust-lang.org/std/io/struct.BufWriter.html) because it can be excessively inefficient to work directly with something that implements [Read](https://doc.rust-lang.org/std/io/trait.Read.html) or [Write](https://doc.rust-lang.org/std/io/trait.Write.html). ## TODO -- [ ] More examples of using the library. - [ ] Counter for number of items in a list for debugging and errors. - [ ] Fuzz testing: Generate random valid bencoded values. - [ ] Install tracing crate. Add verbose mode that enables debugging. - [ ] Option to check if the final JSON it's valid at the end of the process. - [ ] Benchmarking for this implementation and the original C implementation. -- [ ] Optimize string parser. We can stop trying to convert the string to UTF-8 when we find a non valid UTF-8 char. ## Alternatives diff --git a/examples/parser_file_in_file_out.rs b/examples/parser_file_in_file_out.rs index 732f9b8..06acb13 100644 --- a/examples/parser_file_in_file_out.rs +++ b/examples/parser_file_in_file_out.rs @@ -10,7 +10,7 @@ use std::{ io::{Read, Write}, }; -use bencode2json::parsers::BencodeParser; +use bencode2json::generators::json::Generator; use clap::{Arg, Command}; fn main() { @@ -61,7 +61,7 @@ fn main() { std::process::exit(1); }; - if let Err(e) = BencodeParser::new(input).write_bytes(&mut output) { + if let Err(e) = Generator::new(input).write_bytes(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/examples/parser_stdin_stdout.rs b/examples/parser_stdin_stdout.rs index 4f8e162..01456cb 100644 --- a/examples/parser_stdin_stdout.rs +++ b/examples/parser_stdin_stdout.rs @@ -7,13 +7,13 @@ //! It prints "spam". use std::io; -use bencode2json::parsers::BencodeParser; +use bencode2json::generators::json::Generator; fn main() { let input = Box::new(io::stdin()); let mut output = Box::new(io::stdout()); - if let Err(e) = BencodeParser::new(input).write_bytes(&mut output) { + if let Err(e) = Generator::new(input).write_bytes(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/examples/parser_string_in_string_out.rs b/examples/parser_string_in_string_out.rs index 638304e..6c7bf7c 100644 --- a/examples/parser_string_in_string_out.rs +++ b/examples/parser_string_in_string_out.rs @@ -5,13 +5,13 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::BencodeParser; +use bencode2json::generators::json::Generator; fn main() { let input = "4:spam".to_string(); let mut output = String::new(); - if let Err(e) = BencodeParser::new(input.as_bytes()).write_str(&mut output) { + if let Err(e) = Generator::new(input.as_bytes()).write_str(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/examples/parser_string_in_vec_out.rs b/examples/parser_string_in_vec_out.rs index 56c073b..3d89dfd 100644 --- a/examples/parser_string_in_vec_out.rs +++ b/examples/parser_string_in_vec_out.rs @@ -5,13 +5,13 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::BencodeParser; +use bencode2json::generators::json::Generator; fn main() { let input = "4:spam".to_string(); let mut output = Vec::new(); - if let Err(e) = BencodeParser::new(input.as_bytes()).write_bytes(&mut output) { + if let Err(e) = Generator::new(input.as_bytes()).write_bytes(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/examples/parser_vec_in_string_out.rs b/examples/parser_vec_in_string_out.rs index fc27d07..1388f51 100644 --- a/examples/parser_vec_in_string_out.rs +++ b/examples/parser_vec_in_string_out.rs @@ -5,13 +5,13 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::BencodeParser; +use bencode2json::generators::json::Generator; fn main() { let input = b"4:spam".to_vec(); let mut output = String::new(); - if let Err(e) = BencodeParser::new(&input[..]).write_str(&mut output) { + if let Err(e) = Generator::new(&input[..]).write_str(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/examples/parser_vec_in_vec_out.rs b/examples/parser_vec_in_vec_out.rs index 57ac35d..d678bff 100644 --- a/examples/parser_vec_in_vec_out.rs +++ b/examples/parser_vec_in_vec_out.rs @@ -5,13 +5,13 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::BencodeParser; +use bencode2json::generators::json::Generator; fn main() { let input = b"4:spam".to_vec(); let mut output = Vec::new(); - if let Err(e) = BencodeParser::new(&input[..]).write_bytes(&mut output) { + if let Err(e) = Generator::new(&input[..]).write_bytes(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/src/parsers/error.rs b/src/error.rs similarity index 88% rename from src/parsers/error.rs rename to src/error.rs index 6874bd6..2f581a3 100644 --- a/src/parsers/error.rs +++ b/src/error.rs @@ -9,7 +9,7 @@ use thiserror::Error; use crate::rw; -use super::BencodeType; +use super::generators::BencodeType; /// Errors that can occur while parsing a bencoded value. #[derive(Debug, Error)] @@ -27,16 +27,16 @@ pub enum Error { /// The main parser peeks one byte ahead to know what kind of bencoded value /// is being parsed. If the byte read after peeking does not match the /// peeked byte, it means the input is being consumed somewhere else. - #[error("Read byte after peeking does match peeked byte; {0}; {1}")] - ReadByteAfterPeekingDoesMatchPeekedByte(ReadContext, WriteContext), + #[error("Read byte after peeking does match peeked byte; {0}")] + ReadByteAfterPeekingDoesMatchPeekedByte(ReadContext), /// Unrecognized first byte for new bencoded value. /// /// The main parser peeks one byte ahead to know what kind of bencoded value /// is being parsed. This error is raised when the peeked byte is not a /// valid first byte for a bencoded value. - #[error("Unrecognized first byte for new bencoded value; {0}; {1}")] - UnrecognizedFirstBencodeValueByte(ReadContext, WriteContext), + #[error("Unrecognized first byte for new bencoded value; {0}")] + UnrecognizedFirstBencodeValueByte(ReadContext), // Integers /// Unexpected byte parsing integer. @@ -44,38 +44,38 @@ pub enum Error { /// The main parser parses integers by reading bytes until it finds the /// end of the integer. This error is raised when the byte read is not a /// valid byte for an integer bencoded value. - #[error("Unexpected byte parsing integer; {0}; {1}")] - UnexpectedByteParsingInteger(ReadContext, WriteContext), + #[error("Unexpected byte parsing integer; {0}")] + UnexpectedByteParsingInteger(ReadContext), /// Unexpected end of input parsing integer. /// /// The input ends before the integer ends. - #[error("Unexpected end of input parsing integer; {0}; {1}")] - UnexpectedEndOfInputParsingInteger(ReadContext, WriteContext), + #[error("Unexpected end of input parsing integer; {0}")] + UnexpectedEndOfInputParsingInteger(ReadContext), /// Leading zeros in integers are not allowed, for example b'i00e'. - #[error("Leading zeros in integers are not allowed, for example b'i00e'; {0}; {1}")] - LeadingZerosInIntegersNotAllowed(ReadContext, WriteContext), + #[error("Leading zeros in integers are not allowed, for example b'i00e'; {0}")] + LeadingZerosInIntegersNotAllowed(ReadContext), // Strings /// Invalid string length byte, expected a digit. /// /// The string parser found an invalid byte for the string length. The /// length can only be made of digits (0-9). - #[error("Invalid string length byte, expected a digit; {0}; {1}")] - InvalidStringLengthByte(ReadContext, WriteContext), + #[error("Invalid string length byte, expected a digit; {0}")] + InvalidStringLengthByte(ReadContext), /// Unexpected end of input parsing string length. /// /// The input ends before the string length ends. - #[error("Unexpected end of input parsing string length; {0}; {1}")] - UnexpectedEndOfInputParsingStringLength(ReadContext, WriteContext), + #[error("Unexpected end of input parsing string length; {0}")] + UnexpectedEndOfInputParsingStringLength(ReadContext), /// Unexpected end of input parsing string value. /// /// The input ends before the string value ends. - #[error("Unexpected end of input parsing string value; {0}; {1}")] - UnexpectedEndOfInputParsingStringValue(ReadContext, WriteContext), + #[error("Unexpected end of input parsing string value; {0}")] + UnexpectedEndOfInputParsingStringValue(ReadContext), // Lists /// Unexpected end of input parsing list. Expecting first list item or list end. @@ -121,7 +121,7 @@ pub enum Error { NoMatchingStartForListOrDictEnd(ReadContext, WriteContext), } -/// The reader context when the error ocurred. +/// The reader context when the error occurred. #[derive(Debug)] pub struct ReadContext { /// The read byte that caused the error if any. @@ -157,7 +157,7 @@ impl fmt::Display for ReadContext { } } -/// The writer context when the error ocurred. +/// The writer context when the error occurred. #[derive(Debug)] pub struct WriteContext { /// The written byte that caused the error if any. @@ -197,7 +197,7 @@ impl fmt::Display for WriteContext { mod tests { mod for_read_context { - use crate::parsers::error::ReadContext; + use crate::error::ReadContext; #[test] fn it_should_display_the_read_context() { @@ -237,7 +237,7 @@ mod tests { } mod for_write_context { - use crate::parsers::error::WriteContext; + use crate::error::WriteContext; #[test] fn it_should_display_the_read_context() { diff --git a/src/parsers/mod.rs b/src/generators/json.rs similarity index 89% rename from src/parsers/mod.rs rename to src/generators/json.rs index 44bebb8..ccfa6af 100644 --- a/src/parsers/mod.rs +++ b/src/generators/json.rs @@ -1,48 +1,29 @@ -//! Parsers, including the main parser and the parsers for the basic types -//! (integer and string). -//! -//! ``BencodeParser`` is the main parser. It is generic over the type of the -//! input buffer. -pub mod error; -pub mod integer; -pub mod stack; -pub mod string; - +//! Json generator for bencoded data. +use core::str; use std::{ fmt::Write as FmtWrite, - io::{self, Read, Write as IoWrite}, + io::{Read, Write as IoWrite}, }; -use derive_more::derive::Display; -use error::{ReadContext, WriteContext}; -use stack::{Stack, State}; - -use crate::rw::{ - byte_reader::ByteReader, byte_writer::ByteWriter, string_writer::StringWriter, writer::Writer, +use super::{ + stack::{Stack, State}, + BencodeType, }; +use tokenizer::{BencodeToken, Tokenizer}; -// Bencoded reserved bytes -const BENCODE_BEGIN_INTEGER: u8 = b'i'; -const BENCODE_END_INTEGER: u8 = b'e'; -const BENCODE_BEGIN_LIST: u8 = b'l'; -const BENCODE_BEGIN_DICT: u8 = b'd'; -const BENCODE_END_LIST_OR_DICT: u8 = b'e'; - -#[derive(Debug, PartialEq, Display)] -pub enum BencodeType { - Integer, - String, - List, - Dict, -} +use crate::{ + error::{self, ReadContext, WriteContext}, + rw::{byte_writer::ByteWriter, string_writer::StringWriter, writer::Writer}, + tokenizer, +}; -pub struct BencodeParser { - byte_reader: ByteReader, +pub struct Generator { + tokenizer: Tokenizer, num_processed_tokens: u64, stack: Stack, } -impl BencodeParser { +impl Generator { const JSON_ARRAY_BEGIN: u8 = b'['; const JSON_ARRAY_ITEMS_SEPARATOR: u8 = b','; const JSON_ARRAY_END: u8 = b']'; @@ -53,8 +34,8 @@ impl BencodeParser { const JSON_OBJ_END: u8 = b'}'; pub fn new(reader: R) -> Self { - BencodeParser { - byte_reader: ByteReader::new(reader), + Generator { + tokenizer: Tokenizer::new(reader), num_processed_tokens: 1, stack: Stack::default(), } @@ -104,49 +85,49 @@ impl BencodeParser { /// - It can't read from the input or write to the output. /// - The input is invalid Bencode. fn parse(&mut self, writer: &mut W) -> Result<(), error::Error> { - while let Some(peeked_byte) = Self::peek_byte(&mut self.byte_reader, writer)? { - match peeked_byte { - BENCODE_BEGIN_INTEGER => { + while let Some(token) = self.tokenizer.next_token()? { + match token { + BencodeToken::Integer(integer_bytes) => { self.begin_bencoded_value(BencodeType::Integer, writer)?; - integer::parse(&mut self.byte_reader, writer)?; + // todo: add `write_bytes` to writer. + for bytes in integer_bytes { + writer.write_byte(bytes)?; + } } - b'0'..=b'9' => { + BencodeToken::String(string_bytes) => { self.begin_bencoded_value(BencodeType::String, writer)?; - string::parse(&mut self.byte_reader, writer)?; + + let html_tag_style_string = match str::from_utf8(&string_bytes) { + Ok(string) => { + // String only contains valid UTF-8 chars -> print it as it's + &format!("{}", string.to_owned()) + } + Err(_) => { + // String contains non valid UTF-8 chars -> print it as hex bytes + &format!("{}", hex::encode(string_bytes)) + } + }; + + writer.write_str( + &serde_json::to_string(&html_tag_style_string) + .expect("Failed to serialize to JSON. This should not happen because non UTF-8 bencoded string are serialized as hex bytes"), + )?; } - BENCODE_BEGIN_LIST => { - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, writer)?; + BencodeToken::BeginList => { self.begin_bencoded_value(BencodeType::List, writer)?; writer.write_byte(Self::JSON_ARRAY_BEGIN)?; self.stack.push(State::ExpectingFirstListItemOrEnd); } - BENCODE_BEGIN_DICT => { - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, writer)?; + BencodeToken::BeginDict => { self.begin_bencoded_value(BencodeType::Dict, writer)?; writer.write_byte(Self::JSON_OBJ_BEGIN)?; self.stack.push(State::ExpectingFirstDictFieldOrEnd); } - BENCODE_END_LIST_OR_DICT => { - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, writer)?; + BencodeToken::EndListOrDict => { self.end_list_or_dict(writer)?; } - b'\n' => { + BencodeToken::LineBreak => { // Ignore line breaks at the beginning, the end, or between values - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, writer)?; - } - _ => { - return Err(error::Error::UnrecognizedFirstBencodeValueByte( - ReadContext { - byte: Some(peeked_byte), - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), - }, - WriteContext { - byte: Some(peeked_byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); } } @@ -156,68 +137,6 @@ impl BencodeParser { self.check_bad_end_stack_state(writer) } - /// It reads the next byte from the input consuming it. It returns `None` if - /// the input has ended. - /// - /// # Errors - /// - /// Will return and errors if: - /// - /// - It can't read from the input. - /// - The byte read is not the expected one (the previously peeked byte). - fn read_peeked_byte( - peeked_byte: u8, - reader: &mut ByteReader, - writer: &W, - ) -> Result, error::Error> { - match reader.read_byte() { - Ok(byte) => { - if byte == peeked_byte { - return Ok(Some(byte)); - } - Err(error::Error::ReadByteAfterPeekingDoesMatchPeekedByte( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Ok(None); - } - Err(err.into()) - } - } - } - - /// It peeks the next byte from the input without consuming it. It returns - /// `None` if the input has ended. - /// - /// # Errors - /// - /// Will return and errors if it can't read from the input. - fn peek_byte( - reader: &mut ByteReader, - _writer: &W, - ) -> Result, error::Error> { - match reader.peek_byte() { - Ok(byte) => Ok(Some(byte)), - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Ok(None); - } - Err(err.into()) - } - } - } - /// It updates the stack state and prints the delimiters when needed. /// /// Called when the first byt of a bencoded value (integer, string, list or dict) @@ -245,8 +164,8 @@ impl BencodeParser { bencode_type, ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -269,8 +188,8 @@ impl BencodeParser { bencode_type, ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -311,8 +230,8 @@ impl BencodeParser { return Err(error::Error::PrematureEndOfDict( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -325,8 +244,8 @@ impl BencodeParser { return Err(error::Error::NoMatchingStartForListOrDictEnd( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -354,8 +273,8 @@ impl BencodeParser { error::Error::UnexpectedEndOfInputExpectingFirstListItemOrEnd( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -368,8 +287,8 @@ impl BencodeParser { Err(error::Error::UnexpectedEndOfInputExpectingNextListItem( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -382,8 +301,8 @@ impl BencodeParser { error::Error::UnexpectedEndOfInputExpectingFirstDictFieldOrEnd( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -396,8 +315,8 @@ impl BencodeParser { Err(error::Error::UnexpectedEndOfInputExpectingDictFieldValue( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -410,8 +329,8 @@ impl BencodeParser { error::Error::UnexpectedEndOfInputExpectingDictFieldKeyOrEnd( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -429,16 +348,16 @@ mod tests { use std::io::{self, Read}; - use crate::{parsers::BencodeParser, test::bencode_to_json_unchecked, try_bencode_to_json}; + use crate::generators::json::Generator; mod it_should_allow_writing { - use crate::parsers::BencodeParser; + use crate::generators::json::Generator; #[test] fn to_any_type_implementing_io_write_trait() { let mut output = Vec::new(); - let mut parser = BencodeParser::new(&b"i0e"[..]); + let mut parser = Generator::new(&b"i0e"[..]); parser .write_bytes(&mut output) @@ -451,7 +370,7 @@ mod tests { fn writing_to_any_type_implementing_fmt_write_trait() { let mut output = String::new(); - let mut parser = BencodeParser::new(&b"i0e"[..]); + let mut parser = Generator::new(&b"i0e"[..]); parser .write_str(&mut output) @@ -476,7 +395,7 @@ mod tests { let mut output = String::new(); - let mut parser = BencodeParser::new(EmptyReader); + let mut parser = Generator::new(EmptyReader); parser.write_str(&mut output).unwrap(); @@ -485,13 +404,13 @@ mod tests { mod it_should_allow_special_bencode_cases { - use crate::{parsers::BencodeParser, test::bencode_to_json_unchecked}; + use crate::{generators::json::Generator, test::bencode_to_json_unchecked}; #[test] fn an_empty_input() { let mut output = String::new(); - let mut parser = BencodeParser::new(&b""[..]); + let mut parser = Generator::new(&b""[..]); parser .write_str(&mut output) @@ -522,10 +441,7 @@ mod tests { mod it_should_fail { use std::io::{self, Read}; - use crate::{ - parsers::{error::Error, BencodeParser}, - try_bencode_to_json, - }; + use crate::{error::Error, generators::json::Generator, try_bencode_to_json}; #[test] fn when_there_is_a_problem_reading_from_input() { @@ -542,7 +458,7 @@ mod tests { let mut output = String::new(); - let mut parser = BencodeParser::new(FaultyReader); + let mut parser = Generator::new(FaultyReader); let result = parser.write_str(&mut output); @@ -622,7 +538,7 @@ mod tests { } mod should_fail { - use crate::{parsers::error::Error, try_bencode_to_json}; + use crate::{error::Error, try_bencode_to_json}; #[test] fn when_it_finds_an_invalid_byte() { @@ -784,7 +700,7 @@ mod tests { } mod should_escape_json { - use crate::{parsers::tests::bencode_to_json_unchecked, to_bencode}; + use crate::{test::bencode_to_json_unchecked, to_bencode}; #[test] fn containing_a_double_quote() { @@ -828,7 +744,7 @@ mod tests { } mod it_should_fail_parsing_when { - use crate::parsers::{error::Error, tests::try_bencode_to_json}; + use crate::{error::Error, try_bencode_to_json}; #[test] fn it_reaches_the_end_of_the_input_parsing_the_string_length() { @@ -866,9 +782,9 @@ mod tests { } mod lists { - use crate::{ - parsers::tests::bencode_to_json_unchecked, - test::{generate_n_nested_empty_bencoded_lists, generate_n_nested_empty_json_arrays}, + use crate::test::{ + bencode_to_json_unchecked, generate_n_nested_empty_bencoded_lists, + generate_n_nested_empty_json_arrays, }; #[test] @@ -895,7 +811,7 @@ mod tests { } mod with_one_item { - use crate::parsers::tests::bencode_to_json_unchecked; + use crate::test::bencode_to_json_unchecked; #[test] fn integer() { @@ -919,7 +835,7 @@ mod tests { } mod of_type_list { - use crate::parsers::tests::bencode_to_json_unchecked; + use crate::test::bencode_to_json_unchecked; #[test] fn two_nested_empty_list() { @@ -978,7 +894,7 @@ mod tests { } mod of_type_dict { - use crate::parsers::tests::bencode_to_json_unchecked; + use crate::test::bencode_to_json_unchecked; #[test] fn empty() { @@ -1037,7 +953,7 @@ mod tests { } mod with_two_items_of_the_same_type { - use crate::parsers::tests::bencode_to_json_unchecked; + use crate::test::bencode_to_json_unchecked; #[test] fn two_integers() { @@ -1091,7 +1007,7 @@ mod tests { } mod with_two_items_of_different_types { - use crate::parsers::tests::bencode_to_json_unchecked; + use crate::test::bencode_to_json_unchecked; #[test] fn integer_and_utf8_string() { @@ -1401,7 +1317,7 @@ mod tests { } mod should_fail { - use crate::{parsers::error::Error, try_bencode_to_json}; + use crate::{error::Error, try_bencode_to_json}; #[test] fn when_an_empty_list_does_not_have_the_matching_close_byte() { @@ -1442,11 +1358,9 @@ mod tests { } mod dictionary { - use crate::{ - parsers::tests::bencode_to_json_unchecked, - test::{ - generate_n_nested_empty_bencoded_dictionaries, generate_n_nested_empty_json_objects, - }, + use crate::test::{ + bencode_to_json_unchecked, generate_n_nested_empty_bencoded_dictionaries, + generate_n_nested_empty_json_objects, }; #[test] @@ -1479,7 +1393,7 @@ mod tests { } mod with_a_key { - use crate::parsers::tests::bencode_to_json_unchecked; + use crate::test::bencode_to_json_unchecked; #[test] fn starting_with_a_digit() { @@ -1499,7 +1413,7 @@ mod tests { } mod with_one_field { - use crate::parsers::tests::bencode_to_json_unchecked; + use crate::test::bencode_to_json_unchecked; #[test] fn integer() { @@ -1543,7 +1457,7 @@ mod tests { } mod with_two_fields_of_the_same_type { - use crate::parsers::tests::bencode_to_json_unchecked; + use crate::test::bencode_to_json_unchecked; #[test] fn two_integers() { @@ -1934,12 +1848,13 @@ mod tests { mod should_escape_json { mod in_field_keys { - use crate::parsers::tests::bencode_to_json_unchecked; // Only one especial char is tested. The string parser contains // other tests for the rest of the special chars that need to be // escaped. + use crate::test::bencode_to_json_unchecked; + #[test] fn containing_a_line_break_at_the_beginning_of_the_string() { assert_eq!( @@ -1966,7 +1881,7 @@ mod tests { } mod in_field_values { - use crate::parsers::tests::bencode_to_json_unchecked; + use crate::test::bencode_to_json_unchecked; #[test] fn containing_a_line_break_at_the_beginning_of_the_string() { @@ -1995,7 +1910,7 @@ mod tests { } mod should_fail { - use crate::{parsers::error::Error, try_bencode_to_json}; + use crate::{error::Error, try_bencode_to_json}; #[test] fn when_an_empty_dict_does_not_have_the_matching_close_byte() { @@ -2073,8 +1988,8 @@ mod tests { } mod when_the_field_key_is_not_a_string_for_example { - use crate::parsers::error::Error; - use crate::parsers::BencodeType; + use crate::error::Error; + use crate::generators::json::BencodeType; use crate::try_bencode_to_json; #[test] diff --git a/src/generators/mod.rs b/src/generators/mod.rs new file mode 100644 index 0000000..c1911f0 --- /dev/null +++ b/src/generators/mod.rs @@ -0,0 +1,14 @@ +pub mod json; +pub mod stack; + +// todo: extract trait for generators when we implement a new one. + +use derive_more::derive::Display; + +#[derive(Debug, PartialEq, Display)] +pub enum BencodeType { + Integer, + String, + List, + Dict, +} diff --git a/src/parsers/stack.rs b/src/generators/stack.rs similarity index 96% rename from src/parsers/stack.rs rename to src/generators/stack.rs index d6f8451..6bf6dec 100644 --- a/src/parsers/stack.rs +++ b/src/generators/stack.rs @@ -1,5 +1,4 @@ -//! The stack used by the Bencoded to JSON converter to keep track of the -//! current parsing state. +//! The stack used by the generators to keep track of the current parsing state. use std::fmt::Display; /// Stack containing states for nested Bencoded values. @@ -150,7 +149,7 @@ impl Stack { #[cfg(test)] mod tests { mod the_stack_state { - use crate::parsers::stack::State; + use crate::generators::stack::State; #[test] fn should_be_displayed_with_single_letter_abbreviations() { @@ -165,7 +164,7 @@ mod tests { mod the_stack { mod it_should { - use crate::parsers::stack::{Stack, State}; + use crate::generators::stack::{Stack, State}; #[test] fn have_an_initial_state() { @@ -235,7 +234,7 @@ mod tests { mod be_displayed_with_single_letter_abbreviations_for_states { - use crate::parsers::stack::{Stack, State}; + use crate::generators::stack::{Stack, State}; #[test] fn with_the_initial_state() { diff --git a/src/lib.rs b/src/lib.rs index 9a165ee..9ca27cb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,10 +34,13 @@ //! //! > __NOTICE__: In the context of this lib, parser is a function that takes an input //! > containing bencoded data and produces a JSON output (raw bytes or UTF-8 string). -use parsers::{error::Error, BencodeParser}; - -pub mod parsers; +pub mod error; +pub mod generators; pub mod rw; +pub mod tokenizer; + +use error::Error; +use generators::json::Generator; mod test; /// It converts bencoded bytes into a JSON string. @@ -48,7 +51,7 @@ mod test; pub fn try_bencode_to_json(input_buffer: &[u8]) -> Result { let mut output = String::new(); - let mut parser = BencodeParser::new(input_buffer); + let mut parser = Generator::new(input_buffer); match parser.write_str(&mut output) { Ok(()) => Ok(output), diff --git a/src/main.rs b/src/main.rs index ae927f0..e7e5b9a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ //! ```text //! cargo run -- -i ./tests/fixtures/sample.bencode -o output.json //! ``` -use bencode2json::parsers::BencodeParser; +use bencode2json::generators::json::Generator; use clap::{Arg, Command}; use std::fs::File; use std::io::{self, Read, Write}; @@ -70,7 +70,7 @@ fn run() { Box::new(io::stdout()) }; - if let Err(e) = BencodeParser::new(input).write_bytes(&mut output) { + if let Err(e) = Generator::new(input).write_bytes(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/src/test.rs b/src/test.rs index 2234107..fef1001 100644 --- a/src/test.rs +++ b/src/test.rs @@ -8,11 +8,11 @@ #[cfg(test)] #[must_use] pub(crate) fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - use crate::parsers::BencodeParser; + use crate::generators::json::Generator; let mut output = String::new(); - let mut parser = BencodeParser::new(input_buffer); + let mut parser = Generator::new(input_buffer); parser .write_str(&mut output) diff --git a/src/parsers/integer.rs b/src/tokenizer/integer.rs similarity index 60% rename from src/parsers/integer.rs rename to src/tokenizer/integer.rs index 50d61bd..5722bcc 100644 --- a/src/parsers/integer.rs +++ b/src/tokenizer/integer.rs @@ -3,10 +3,10 @@ //! It reads bencoded bytes from the input and writes JSON bytes to the output. use std::io::{self, Read}; -use crate::rw::{byte_reader::ByteReader, writer::Writer}; +use crate::rw::byte_reader::ByteReader; use super::{ - error::{Error, ReadContext, WriteContext}, + error::{Error, ReadContext}, BENCODE_END_INTEGER, }; @@ -31,12 +31,13 @@ enum StateExpecting { /// /// Will panic if we reach the end of the input without completing the integer /// (without reaching the end of the integer `e`). -pub fn parse(reader: &mut ByteReader, writer: &mut W) -> Result<(), Error> { +pub fn parse(reader: &mut ByteReader) -> Result, Error> { let mut state = StateExpecting::Start; let mut first_digit_is_zero = false; + let mut value = vec![]; loop { - let byte = next_byte(reader, writer)?; + let byte = next_byte(reader)?; let char = byte as char; @@ -47,11 +48,11 @@ pub fn parse(reader: &mut ByteReader, writer: &mut W) -> } StateExpecting::DigitOrSign => { if char == '-' { - writer.write_byte(byte)?; + value.push(byte); StateExpecting::DigitAfterSign } else if char.is_ascii_digit() { - writer.write_byte(byte)?; + value.push(byte); if char == '0' { first_digit_is_zero = true; @@ -59,23 +60,16 @@ pub fn parse(reader: &mut ByteReader, writer: &mut W) -> StateExpecting::DigitOrEnd } else { - return Err(Error::UnexpectedByteParsingInteger( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedByteParsingInteger(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } } StateExpecting::DigitAfterSign => { if char.is_ascii_digit() { - writer.write_byte(byte)?; + value.push(byte); if char == '0' { first_digit_is_zero = true; @@ -83,55 +77,34 @@ pub fn parse(reader: &mut ByteReader, writer: &mut W) -> StateExpecting::DigitOrEnd } else { - return Err(Error::UnexpectedByteParsingInteger( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedByteParsingInteger(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } } StateExpecting::DigitOrEnd => { if char.is_ascii_digit() { - writer.write_byte(byte)?; + value.push(byte); if char == '0' && first_digit_is_zero { - return Err(Error::LeadingZerosInIntegersNotAllowed( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::LeadingZerosInIntegersNotAllowed(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } StateExpecting::DigitOrEnd } else if byte == BENCODE_END_INTEGER { - return Ok(()); + return Ok(value); } else { - return Err(Error::UnexpectedByteParsingInteger( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedByteParsingInteger(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } } }; @@ -143,23 +116,16 @@ pub fn parse(reader: &mut ByteReader, writer: &mut W) -> /// # Errors /// /// Will return an error if the end of input was reached. -fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { +fn next_byte(reader: &mut ByteReader) -> Result { match reader.read_byte() { Ok(byte) => Ok(byte), Err(err) => { if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingInteger( - ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedEndOfInputParsingInteger(ReadContext { + byte: None, + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } Err(err.into()) } @@ -168,42 +134,30 @@ fn next_byte(reader: &mut ByteReader, writer: &W) -> Resu #[cfg(test)] mod tests { - use crate::{ - parsers::{error::Error, integer::parse}, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, - }; + use crate::{error::Error, rw::byte_reader::ByteReader}; - fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - let mut output = String::new(); + use super::parse; - parse_bencode(input_buffer, &mut output).expect("Bencode to JSON conversion failed"); - - output + fn bencode_to_json_unchecked(input_buffer: &[u8]) -> Vec { + parse_bencode(input_buffer).expect("Bencode to JSON conversion failed") } - fn try_bencode_to_json(input_buffer: &[u8]) -> Result { - let mut output = String::new(); - - match parse_bencode(input_buffer, &mut output) { - Ok(()) => Ok(output), - Err(err) => Err(err), - } + fn try_bencode_to_json(input_buffer: &[u8]) -> Result, Error> { + parse_bencode(input_buffer) } - fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result<(), Error> { + fn parse_bencode(input_buffer: &[u8]) -> Result, Error> { let mut reader = ByteReader::new(input_buffer); - let mut writer = StringWriter::new(output); - - parse(&mut reader, &mut writer) + parse(&mut reader) } mod for_helpers { - use crate::parsers::integer::tests::try_bencode_to_json; + use crate::tokenizer::integer::tests::try_bencode_to_json; #[test] fn bencode_to_json_wrapper_succeeds() { - assert_eq!(try_bencode_to_json(b"i0e").unwrap(), "0".to_string()); + assert_eq!(try_bencode_to_json(b"i0e").unwrap(), "0".as_bytes()); } #[test] @@ -214,33 +168,31 @@ mod tests { #[test] fn zero() { - assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".to_string()); + assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".as_bytes()); } #[test] fn one_digit_integer() { - assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".to_string()); + assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".as_bytes()); } #[test] fn two_digits_integer() { - assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".to_string()); + assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".as_bytes()); } #[test] fn negative_integer() { - assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".to_string()); + assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".as_bytes()); } mod it_should_fail { use std::io::{self, Read}; use crate::{ - parsers::{ - error::Error, - integer::{parse, tests::try_bencode_to_json}, - }, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, + error::Error, + rw::byte_reader::ByteReader, + tokenizer::integer::{parse, tests::try_bencode_to_json}, }; #[test] @@ -296,7 +248,7 @@ mod tests { } mod when_it_receives_a_unexpected_byte { - use crate::parsers::{error::Error, integer::tests::try_bencode_to_json}; + use crate::{error::Error, tokenizer::integer::tests::try_bencode_to_json}; #[test] fn while_expecting_a_digit_or_sign() { @@ -350,10 +302,7 @@ mod tests { let mut reader = ByteReader::new(FaultyReader); - let mut output = String::new(); - let mut writer = StringWriter::new(&mut output); - - let result = parse(&mut reader, &mut writer); + let result = parse(&mut reader); assert!(matches!(result, Err(Error::Io(_)))); } diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs new file mode 100644 index 0000000..cd13a45 --- /dev/null +++ b/src/tokenizer/mod.rs @@ -0,0 +1,153 @@ +//! Bencode tokenizer. Given an input stream, it returns a stream of tokens. +pub mod integer; +pub mod string; + +use std::io::{self, Read}; + +use super::error::{self, ReadContext}; + +use crate::rw::byte_reader::ByteReader; + +// todo: Implement trait Iterator for tokenizer. + +// Bencoded reserved bytes +const BENCODE_BEGIN_INTEGER: u8 = b'i'; +pub const BENCODE_END_INTEGER: u8 = b'e'; +const BENCODE_BEGIN_LIST: u8 = b'l'; +const BENCODE_BEGIN_DICT: u8 = b'd'; +const BENCODE_END_LIST_OR_DICT: u8 = b'e'; + +#[derive(Debug, PartialEq)] +pub enum BencodeToken { + Integer(Vec), + String(Vec), + BeginList, + BeginDict, + EndListOrDict, + LineBreak, +} + +pub struct Tokenizer { + byte_reader: ByteReader, +} + +impl Tokenizer { + pub fn new(reader: R) -> Self { + Tokenizer { + byte_reader: ByteReader::new(reader), + } + } + + /// It parses the next bencoded token from input. + /// + /// # Errors + /// + /// Will return an error if: + /// + /// - It can't read from the input. + pub fn next_token(&mut self) -> Result, error::Error> { + match Self::peek_byte(&mut self.byte_reader)? { + Some(peeked_byte) => { + match peeked_byte { + BENCODE_BEGIN_INTEGER => { + let value = integer::parse(&mut self.byte_reader)?; + Ok(Some(BencodeToken::Integer(value))) + } + b'0'..=b'9' => { + let value = string::parse(&mut self.byte_reader)?; + Ok(Some(BencodeToken::String(value))) + } + BENCODE_BEGIN_LIST => { + let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; + Ok(Some(BencodeToken::BeginList)) + } + BENCODE_BEGIN_DICT => { + let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; + Ok(Some(BencodeToken::BeginDict)) + } + BENCODE_END_LIST_OR_DICT => { + let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; + Ok(Some(BencodeToken::EndListOrDict)) + } + b'\n' => { + // todo: we should not return any token and continue to the next token. + // Ignore line breaks at the beginning, the end, or between values + let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; + Ok(Some(BencodeToken::LineBreak)) + } + _ => Err(error::Error::UnrecognizedFirstBencodeValueByte( + ReadContext { + byte: Some(peeked_byte), + pos: self.byte_reader.input_byte_counter(), + latest_bytes: self.byte_reader.captured_bytes(), + }, + )), + } + } + None => Ok(None), + } + } + + /// It reads the next byte from the input consuming it. It returns `None` if + /// the input has ended. + /// + /// # Errors + /// + /// Will return and errors if: + /// + /// - It can't read from the input. + /// - The byte read is not the expected one (the previously peeked byte). + fn read_peeked_byte( + peeked_byte: u8, + reader: &mut ByteReader, + ) -> Result, error::Error> { + match reader.read_byte() { + Ok(byte) => { + if byte == peeked_byte { + return Ok(Some(byte)); + } + Err(error::Error::ReadByteAfterPeekingDoesMatchPeekedByte( + ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + )) + } + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Ok(None); + } + Err(err.into()) + } + } + } + + /// It peeks the next byte from the input without consuming it. It returns + /// `None` if the input has ended. + /// + /// # Errors + /// + /// Will return and errors if it can't read from the input. + fn peek_byte(reader: &mut ByteReader) -> Result, error::Error> { + match reader.peek_byte() { + Ok(byte) => Ok(Some(byte)), + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Ok(None); + } + Err(err.into()) + } + } + } + + /// Returns the number of bytes that have been read from the input. + pub fn input_byte_counter(&self) -> u64 { + self.byte_reader.input_byte_counter() + } + + /// Returns a copy of the bytes that have been read from the input. + pub fn captured_bytes(&self) -> Vec { + self.byte_reader.captured_bytes() + } +} diff --git a/src/parsers/string.rs b/src/tokenizer/string.rs similarity index 58% rename from src/parsers/string.rs rename to src/tokenizer/string.rs index 93514bc..5d23c95 100644 --- a/src/parsers/string.rs +++ b/src/tokenizer/string.rs @@ -3,7 +3,7 @@ //! It reads bencoded bytes from the input and writes JSON bytes to the output. use std::io::{self, Read}; -use crate::rw::{byte_reader::ByteReader, writer::Writer}; +use crate::rw::byte_reader::ByteReader; /* todo: Optimize UTF-8 conversion. Try to convert to string partially and stop converting if we reach a point when input is not valid UTF-8 anymore. This @@ -13,21 +13,20 @@ use crate::rw::{byte_reader::ByteReader, writer::Writer}; use core::str; -use super::error::{Error, ReadContext, WriteContext}; +use super::error::{Error, ReadContext}; /// It parses a string bencoded value. /// /// # Errors /// -/// Will return an error if it can't read from the input or write to the -/// output. +/// Will return an error if it can't read from the input. /// /// # Panics /// /// Will panic if we reach the end of the input without completing the string. -pub fn parse(reader: &mut ByteReader, writer: &mut W) -> Result<(), Error> { +pub fn parse(reader: &mut ByteReader) -> Result, Error> { let mut string_parser = StringParser::default(); - string_parser.parse(reader, writer) + string_parser.parse(reader) } /// Strings bencode format have two parts: `length:value`. @@ -42,38 +41,18 @@ struct StringParser { } impl StringParser { - fn parse( - &mut self, - reader: &mut ByteReader, - writer: &mut W, - ) -> Result<(), Error> { + fn parse(&mut self, reader: &mut ByteReader) -> Result, Error> { let mut length = Length::default(); - length.parse(reader, writer)?; + length.parse(reader)?; let mut value = Value::new(length.number); - value.parse(reader, writer)?; + let value_bytes = value.parse(reader)?; self.parsed_value = value.utf8(); - writer.write_str(&self.json())?; - - Ok(()) - } - - /// It returns the final parsed value as string. - /// - /// If the string contains non UTF-8 bytes it returns the hexadecimal list - /// of bytes in in the format 'fa fb' - fn parsed_value(&self) -> String { - self.parsed_value.clone() - } - - /// It serializes the parsed value into JSON. - #[must_use] - fn json(&self) -> String { - serde_json::to_string(&self.parsed_value()).unwrap() + Ok(value_bytes) } } @@ -89,20 +68,16 @@ struct Length { impl Length { const END_OF_STRING_LENGTH_BYTE: u8 = b':'; - fn parse( - &mut self, - reader: &mut ByteReader, - writer: &W, - ) -> Result<(), Error> { + fn parse(&mut self, reader: &mut ByteReader) -> Result<(), Error> { loop { - let byte = Self::next_byte(reader, writer)?; + let byte = Self::next_byte(reader)?; match byte { Self::END_OF_STRING_LENGTH_BYTE => { break; } _ => { - self.add_byte(byte, reader, writer)?; + self.add_byte(byte, reader)?; } } } @@ -115,7 +90,7 @@ impl Length { /// # Errors /// /// Will return an error if the end of input was reached. - fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { + fn next_byte(reader: &mut ByteReader) -> Result { match reader.read_byte() { Ok(byte) => Ok(byte), Err(err) => { @@ -126,11 +101,6 @@ impl Length { pos: reader.input_byte_counter(), latest_bytes: reader.captured_bytes(), }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, )); } Err(err.into()) @@ -143,25 +113,13 @@ impl Length { /// # Errors /// /// Will return an error if the byte is not a digit (0..9). - fn add_byte( - &mut self, - byte: u8, - reader: &mut ByteReader, - writer: &W, - ) -> Result<(), Error> { + fn add_byte(&mut self, byte: u8, reader: &mut ByteReader) -> Result<(), Error> { if !byte.is_ascii_digit() { - return Err(Error::InvalidStringLengthByte( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::InvalidStringLengthByte(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } self.bytes.push(byte); @@ -198,16 +156,12 @@ impl Value { } } - fn parse( - &mut self, - reader: &mut ByteReader, - writer: &W, - ) -> Result<(), Error> { + fn parse(&mut self, reader: &mut ByteReader) -> Result, Error> { for _i in 1..=self.length { - self.add_byte(Self::next_byte(reader, writer)?); + self.add_byte(Self::next_byte(reader)?); } - Ok(()) + Ok(self.bytes.clone()) } /// It reads the next byte from the input. @@ -215,23 +169,16 @@ impl Value { /// # Errors /// /// Will return an error if the end of input was reached. - fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { + fn next_byte(reader: &mut ByteReader) -> Result { match reader.read_byte() { Ok(byte) => Ok(byte), Err(err) => { if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingStringValue( - ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedEndOfInputParsingStringValue(ReadContext { + byte: None, + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } Err(err.into()) } @@ -263,47 +210,29 @@ impl Value { #[cfg(test)] mod tests { - use crate::{ - parsers::error::Error, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, - }; + use crate::{error::Error, rw::byte_reader::ByteReader}; use super::parse; - fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - let mut output = String::new(); - - parse_bencode(input_buffer, &mut output).expect("Bencode to JSON conversion failed"); - - output + fn bencode_to_json_unchecked(input_buffer: &[u8]) -> Vec { + parse_bencode(input_buffer).expect("Bencode to JSON conversion failed") } - fn try_bencode_to_json(input_buffer: &[u8]) -> Result { - let mut output = String::new(); - - match parse_bencode(input_buffer, &mut output) { - Ok(()) => Ok(output), - Err(err) => Err(err), - } + fn try_bencode_to_json(input_buffer: &[u8]) -> Result, Error> { + parse_bencode(input_buffer) } - fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result<(), Error> { + fn parse_bencode(input_buffer: &[u8]) -> Result, Error> { let mut reader = ByteReader::new(input_buffer); - - let mut writer = StringWriter::new(output); - - parse(&mut reader, &mut writer) + parse(&mut reader) } mod for_helpers { - use crate::parsers::string::tests::try_bencode_to_json; + use crate::tokenizer::string::tests::try_bencode_to_json; #[test] fn bencode_to_json_wrapper_succeeds() { - assert_eq!( - try_bencode_to_json(b"4:spam").unwrap(), - r#""spam""#.to_string() - ); + assert_eq!(try_bencode_to_json(b"4:spam").unwrap(), r"spam".as_bytes()); } #[test] @@ -314,118 +243,61 @@ mod tests { #[test] fn length_can_contain_leading_zeros() { - assert_eq!( - bencode_to_json_unchecked(b"00:"), - r#""""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"00:"), r"".as_bytes()); } #[test] fn empty_string() { - assert_eq!( - bencode_to_json_unchecked(b"0:"), - r#""""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"0:"), r"".as_bytes()); } #[test] fn string_with_tags() { assert_eq!( bencode_to_json_unchecked(b"8:"), - r#""""#.to_string() + r"".as_bytes() ); } #[test] fn utf8() { - assert_eq!( - bencode_to_json_unchecked(b"4:spam"), - r#""spam""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"4:spam"), r"spam".as_bytes()); } #[test] fn non_utf8() { assert_eq!( bencode_to_json_unchecked(b"4:\xFF\xFE\xFD\xFC"), - r#""fffefdfc""#.to_string() + vec![0xFF, 0xFE, 0xFD, 0xFC] ); } #[test] fn ending_with_bencode_end_char() { - assert_eq!( - bencode_to_json_unchecked(b"1:e"), - r#""e""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"1:e"), r"e".as_bytes()); } #[test] fn containing_a_reserved_char() { - assert_eq!( - bencode_to_json_unchecked(b"1:i"), - r#""i""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:l"), - r#""l""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:d"), - r#""d""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:l"), - r#""l""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:e"), - r#""e""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"1:i"), r"i".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:l"), r"l".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:d"), r"d".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:l"), r"l".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:e"), r"e".as_bytes()); } #[test] fn containing_a_digit() { - assert_eq!( - bencode_to_json_unchecked(b"1:0"), - r#""0""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:1"), - r#""1""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:2"), - r#""2""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:3"), - r#""3""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:4"), - r#""4""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:5"), - r#""5""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:6"), - r#""6""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:7"), - r#""7""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:8"), - r#""8""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:9"), - r#""9""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"1:0"), r"0".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:1"), r"1".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:2"), r"2".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:3"), r"3".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:4"), r"4".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:5"), r"5".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:6"), r"6".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:7"), r"7".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:8"), r"8".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:9"), r"9".as_bytes()); } mod should_escape_json { @@ -484,11 +356,9 @@ mod tests { use std::io::{self, Read}; use crate::{ - parsers::{ - error::Error, - string::{parse, tests::try_bencode_to_json}, - }, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, + error::Error, + rw::byte_reader::ByteReader, + tokenizer::string::{parse, tests::try_bencode_to_json}, }; #[test] @@ -576,10 +446,7 @@ mod tests { fn it_cannot_read_more_bytes_without_finishing_parsing_the_string_length() { let mut reader = ByteReader::new(FaultyReader::new(b"4:spam".to_vec(), 1)); - let mut output = String::new(); - let mut writer = StringWriter::new(&mut output); - - let result = parse(&mut reader, &mut writer); + let result = parse(&mut reader); assert!(matches!(result, Err(Error::Io(_)))); } @@ -588,10 +455,7 @@ mod tests { fn it_cannot_read_more_bytes_without_finishing_parsing_the_string_value() { let mut reader = ByteReader::new(FaultyReader::new(b"4:spam".to_vec(), 3)); - let mut output = String::new(); - let mut writer = StringWriter::new(&mut output); - - let result = parse(&mut reader, &mut writer); + let result = parse(&mut reader); assert!(matches!(result, Err(Error::Io(_)))); }