From 83eeefdafe5205b53177621ce5aa209a47a3f013 Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Tue, 3 Dec 2024 18:16:25 +0000 Subject: [PATCH 01/13] refactor: extract bencode tokenizer Split parser logic into two types: - Tokenizer: It returns bencoded tokens. - Generator: It iterator over bencoded tokens to generate the JSON. --- src/parsers/integer.rs | 16 +++++-- src/parsers/mod.rs | 104 ++++++++++++++++++++++++++++++++++------- src/parsers/string.rs | 19 ++++---- 3 files changed, 110 insertions(+), 29 deletions(-) diff --git a/src/parsers/integer.rs b/src/parsers/integer.rs index 50d61bd..b02356d 100644 --- a/src/parsers/integer.rs +++ b/src/parsers/integer.rs @@ -31,9 +31,13 @@ enum StateExpecting { /// /// Will panic if we reach the end of the input without completing the integer /// (without reaching the end of the integer `e`). -pub fn parse(reader: &mut ByteReader, writer: &mut W) -> Result<(), Error> { +pub fn parse( + reader: &mut ByteReader, + writer: &mut W, +) -> Result, Error> { let mut state = StateExpecting::Start; let mut first_digit_is_zero = false; + let mut value = vec![]; loop { let byte = next_byte(reader, writer)?; @@ -48,10 +52,12 @@ pub fn parse(reader: &mut ByteReader, writer: &mut W) -> StateExpecting::DigitOrSign => { if char == '-' { writer.write_byte(byte)?; + value.push(byte); StateExpecting::DigitAfterSign } else if char.is_ascii_digit() { writer.write_byte(byte)?; + value.push(byte); if char == '0' { first_digit_is_zero = true; @@ -76,6 +82,7 @@ pub fn parse(reader: &mut ByteReader, writer: &mut W) -> StateExpecting::DigitAfterSign => { if char.is_ascii_digit() { writer.write_byte(byte)?; + value.push(byte); if char == '0' { first_digit_is_zero = true; @@ -100,6 +107,7 @@ pub fn parse(reader: &mut ByteReader, writer: &mut W) -> StateExpecting::DigitOrEnd => { if char.is_ascii_digit() { writer.write_byte(byte)?; + value.push(byte); if char == '0' && first_digit_is_zero { return Err(Error::LeadingZerosInIntegersNotAllowed( @@ -118,7 +126,7 @@ pub fn parse(reader: &mut ByteReader, writer: &mut W) -> StateExpecting::DigitOrEnd } else if byte == BENCODE_END_INTEGER { - return Ok(()); + return Ok(value); } else { return Err(Error::UnexpectedByteParsingInteger( ReadContext { @@ -185,12 +193,12 @@ mod tests { let mut output = String::new(); match parse_bencode(input_buffer, &mut output) { - Ok(()) => Ok(output), + Ok(_value) => Ok(output), Err(err) => Err(err), } } - fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result<(), Error> { + fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result, Error> { let mut reader = ByteReader::new(input_buffer); let mut writer = StringWriter::new(output); diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index 44bebb8..5cd0d60 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -8,6 +8,7 @@ pub mod integer; pub mod stack; pub mod string; +use core::str; use std::{ fmt::Write as FmtWrite, io::{self, Read, Write as IoWrite}, @@ -36,6 +37,16 @@ pub enum BencodeType { Dict, } +#[derive(Debug, PartialEq)] +pub enum BencodeToken { + Integer(Vec), + String(Vec), + BeginList, + BeginDict, + EndListOrDict, + LineBreak, +} + pub struct BencodeParser { byte_reader: ByteReader, num_processed_tokens: u64, @@ -104,35 +115,40 @@ impl BencodeParser { /// - It can't read from the input or write to the output. /// - The input is invalid Bencode. fn parse(&mut self, writer: &mut W) -> Result<(), error::Error> { - while let Some(peeked_byte) = Self::peek_byte(&mut self.byte_reader, writer)? { - match peeked_byte { + let capture_output = Vec::new(); + let mut null_writer = ByteWriter::new(capture_output); + + while let Some(peeked_byte) = Self::peek_byte(&mut self.byte_reader, &null_writer)? { + let token: BencodeToken = match peeked_byte { BENCODE_BEGIN_INTEGER => { - self.begin_bencoded_value(BencodeType::Integer, writer)?; - integer::parse(&mut self.byte_reader, writer)?; + let value = integer::parse(&mut self.byte_reader, &mut null_writer)?; + BencodeToken::Integer(value) } b'0'..=b'9' => { - self.begin_bencoded_value(BencodeType::String, writer)?; - string::parse(&mut self.byte_reader, writer)?; + let value = string::parse(&mut self.byte_reader, &mut null_writer)?; + BencodeToken::String(value) } BENCODE_BEGIN_LIST => { - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, writer)?; - self.begin_bencoded_value(BencodeType::List, writer)?; - writer.write_byte(Self::JSON_ARRAY_BEGIN)?; - self.stack.push(State::ExpectingFirstListItemOrEnd); + let _byte = + Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, &null_writer)?; + BencodeToken::BeginList } BENCODE_BEGIN_DICT => { - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, writer)?; - self.begin_bencoded_value(BencodeType::Dict, writer)?; - writer.write_byte(Self::JSON_OBJ_BEGIN)?; - self.stack.push(State::ExpectingFirstDictFieldOrEnd); + let _byte = + Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, &null_writer)?; + BencodeToken::BeginDict } BENCODE_END_LIST_OR_DICT => { - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, writer)?; - self.end_list_or_dict(writer)?; + let _byte = + Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, &null_writer)?; + BencodeToken::EndListOrDict } b'\n' => { + // todo: we should not return any token and continue to the next token. // Ignore line breaks at the beginning, the end, or between values - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, writer)?; + let _byte = + Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, &null_writer)?; + BencodeToken::LineBreak } _ => { return Err(error::Error::UnrecognizedFirstBencodeValueByte( @@ -148,6 +164,60 @@ impl BencodeParser { }, )); } + }; + + /* TODO: + + - Extract tokenizer (without implementing the Iterator trait). + - Remove writer from tokenizer. + - Implement trait Iterator for tokenizer. + - Rename this parser to generator. + + */ + + match token { + BencodeToken::Integer(integer_bytes) => { + self.begin_bencoded_value(BencodeType::Integer, writer)?; + // todo: add `write_bytes` to writer. + for bytes in integer_bytes { + writer.write_byte(bytes)?; + } + } + BencodeToken::String(string_bytes) => { + self.begin_bencoded_value(BencodeType::String, writer)?; + + let html_tag_style_string = match str::from_utf8(&string_bytes) { + Ok(string) => { + // String only contains valid UTF-8 chars -> print it as it's + &format!("{}", string.to_owned()) + } + Err(_) => { + // String contains non valid UTF-8 chars -> print it as hex bytes + &format!("{}", hex::encode(string_bytes)) + } + }; + + writer.write_str( + &serde_json::to_string(&html_tag_style_string) + .expect("Failed to serialize to JSON. This should not happen because non UTF-8 bencoded string are serialized as hex bytes"), + )?; + } + BencodeToken::BeginList => { + self.begin_bencoded_value(BencodeType::List, writer)?; + writer.write_byte(Self::JSON_ARRAY_BEGIN)?; + self.stack.push(State::ExpectingFirstListItemOrEnd); + } + BencodeToken::BeginDict => { + self.begin_bencoded_value(BencodeType::Dict, writer)?; + writer.write_byte(Self::JSON_OBJ_BEGIN)?; + self.stack.push(State::ExpectingFirstDictFieldOrEnd); + } + BencodeToken::EndListOrDict => { + self.end_list_or_dict(writer)?; + } + BencodeToken::LineBreak => { + // Ignore line breaks at the beginning, the end, or between values + } } self.num_processed_tokens += 1; diff --git a/src/parsers/string.rs b/src/parsers/string.rs index 93514bc..6d5966a 100644 --- a/src/parsers/string.rs +++ b/src/parsers/string.rs @@ -25,7 +25,10 @@ use super::error::{Error, ReadContext, WriteContext}; /// # Panics /// /// Will panic if we reach the end of the input without completing the string. -pub fn parse(reader: &mut ByteReader, writer: &mut W) -> Result<(), Error> { +pub fn parse( + reader: &mut ByteReader, + writer: &mut W, +) -> Result, Error> { let mut string_parser = StringParser::default(); string_parser.parse(reader, writer) } @@ -46,20 +49,20 @@ impl StringParser { &mut self, reader: &mut ByteReader, writer: &mut W, - ) -> Result<(), Error> { + ) -> Result, Error> { let mut length = Length::default(); length.parse(reader, writer)?; let mut value = Value::new(length.number); - value.parse(reader, writer)?; + let value_bytes = value.parse(reader, writer)?; self.parsed_value = value.utf8(); writer.write_str(&self.json())?; - Ok(()) + Ok(value_bytes) } /// It returns the final parsed value as string. @@ -202,12 +205,12 @@ impl Value { &mut self, reader: &mut ByteReader, writer: &W, - ) -> Result<(), Error> { + ) -> Result, Error> { for _i in 1..=self.length { self.add_byte(Self::next_byte(reader, writer)?); } - Ok(()) + Ok(self.bytes.clone()) } /// It reads the next byte from the input. @@ -282,12 +285,12 @@ mod tests { let mut output = String::new(); match parse_bencode(input_buffer, &mut output) { - Ok(()) => Ok(output), + Ok(_string_value_bytes) => Ok(output), Err(err) => Err(err), } } - fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result<(), Error> { + fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result, Error> { let mut reader = ByteReader::new(input_buffer); let mut writer = StringWriter::new(output); From 63b9b73c7a1c604614a9d01580507b7e9692220d Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 09:12:50 +0000 Subject: [PATCH 02/13] refactor: extract struct BencodeTokenizer --- src/parsers/mod.rs | 324 ++++++++++++++++++++++++++------------------- 1 file changed, 185 insertions(+), 139 deletions(-) diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index 5cd0d60..dcf64ac 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -8,6 +8,14 @@ pub mod integer; pub mod stack; pub mod string; +/* TODO: + +- Remove writer from tokenizer. +- Implement trait Iterator for tokenizer. +- Rename this parser to generator. + +*/ + use core::str; use std::{ fmt::Write as FmtWrite, @@ -47,8 +55,164 @@ pub enum BencodeToken { LineBreak, } -pub struct BencodeParser { +pub struct BencodeTokenizer { byte_reader: ByteReader, +} + +impl BencodeTokenizer { + pub fn new(reader: R) -> Self { + BencodeTokenizer { + byte_reader: ByteReader::new(reader), + } + } + + fn next_token( + &mut self, + writer: &mut W, + ) -> Result, error::Error> { + let capture_output = Vec::new(); + let mut null_writer = ByteWriter::new(capture_output); + + let opt_peeked_byte = Self::peek_byte(&mut self.byte_reader, &null_writer)?; + + match opt_peeked_byte { + Some(peeked_byte) => { + match peeked_byte { + BENCODE_BEGIN_INTEGER => { + let value = integer::parse(&mut self.byte_reader, &mut null_writer)?; + Ok(Some(BencodeToken::Integer(value))) + } + b'0'..=b'9' => { + let value = string::parse(&mut self.byte_reader, &mut null_writer)?; + Ok(Some(BencodeToken::String(value))) + } + BENCODE_BEGIN_LIST => { + let _byte = Self::read_peeked_byte( + peeked_byte, + &mut self.byte_reader, + &null_writer, + )?; + Ok(Some(BencodeToken::BeginList)) + } + BENCODE_BEGIN_DICT => { + let _byte = Self::read_peeked_byte( + peeked_byte, + &mut self.byte_reader, + &null_writer, + )?; + Ok(Some(BencodeToken::BeginDict)) + } + BENCODE_END_LIST_OR_DICT => { + let _byte = Self::read_peeked_byte( + peeked_byte, + &mut self.byte_reader, + &null_writer, + )?; + Ok(Some(BencodeToken::EndListOrDict)) + } + b'\n' => { + // todo: we should not return any token and continue to the next token. + // Ignore line breaks at the beginning, the end, or between values + let _byte = Self::read_peeked_byte( + peeked_byte, + &mut self.byte_reader, + &null_writer, + )?; + Ok(Some(BencodeToken::LineBreak)) + } + _ => Err(error::Error::UnrecognizedFirstBencodeValueByte( + ReadContext { + byte: Some(peeked_byte), + pos: self.byte_reader.input_byte_counter(), + latest_bytes: self.byte_reader.captured_bytes(), + }, + WriteContext { + byte: Some(peeked_byte), + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )), + } + } + None => Ok(None), + } + } + + /// It reads the next byte from the input consuming it. It returns `None` if + /// the input has ended. + /// + /// # Errors + /// + /// Will return and errors if: + /// + /// - It can't read from the input. + /// - The byte read is not the expected one (the previously peeked byte). + fn read_peeked_byte( + peeked_byte: u8, + reader: &mut ByteReader, + writer: &W, + ) -> Result, error::Error> { + match reader.read_byte() { + Ok(byte) => { + if byte == peeked_byte { + return Ok(Some(byte)); + } + Err(error::Error::ReadByteAfterPeekingDoesMatchPeekedByte( + ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + WriteContext { + byte: Some(byte), + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )) + } + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Ok(None); + } + Err(err.into()) + } + } + } + + /// It peeks the next byte from the input without consuming it. It returns + /// `None` if the input has ended. + /// + /// # Errors + /// + /// Will return and errors if it can't read from the input. + fn peek_byte( + reader: &mut ByteReader, + _writer: &W, + ) -> Result, error::Error> { + match reader.peek_byte() { + Ok(byte) => Ok(Some(byte)), + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Ok(None); + } + Err(err.into()) + } + } + } + + /// Returns the number of bytes that have been read from the input. + pub fn input_byte_counter(&self) -> u64 { + self.byte_reader.input_byte_counter() + } + + /// Returns a copy of the bytes that have been read from the input. + pub fn captured_bytes(&self) -> Vec { + self.byte_reader.captured_bytes() + } +} + +pub struct BencodeParser { + tokenizer: BencodeTokenizer, num_processed_tokens: u64, stack: Stack, } @@ -65,7 +229,7 @@ impl BencodeParser { pub fn new(reader: R) -> Self { BencodeParser { - byte_reader: ByteReader::new(reader), + tokenizer: BencodeTokenizer::new(reader), num_processed_tokens: 1, stack: Stack::default(), } @@ -118,63 +282,7 @@ impl BencodeParser { let capture_output = Vec::new(); let mut null_writer = ByteWriter::new(capture_output); - while let Some(peeked_byte) = Self::peek_byte(&mut self.byte_reader, &null_writer)? { - let token: BencodeToken = match peeked_byte { - BENCODE_BEGIN_INTEGER => { - let value = integer::parse(&mut self.byte_reader, &mut null_writer)?; - BencodeToken::Integer(value) - } - b'0'..=b'9' => { - let value = string::parse(&mut self.byte_reader, &mut null_writer)?; - BencodeToken::String(value) - } - BENCODE_BEGIN_LIST => { - let _byte = - Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, &null_writer)?; - BencodeToken::BeginList - } - BENCODE_BEGIN_DICT => { - let _byte = - Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, &null_writer)?; - BencodeToken::BeginDict - } - BENCODE_END_LIST_OR_DICT => { - let _byte = - Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, &null_writer)?; - BencodeToken::EndListOrDict - } - b'\n' => { - // todo: we should not return any token and continue to the next token. - // Ignore line breaks at the beginning, the end, or between values - let _byte = - Self::read_peeked_byte(peeked_byte, &mut self.byte_reader, &null_writer)?; - BencodeToken::LineBreak - } - _ => { - return Err(error::Error::UnrecognizedFirstBencodeValueByte( - ReadContext { - byte: Some(peeked_byte), - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), - }, - WriteContext { - byte: Some(peeked_byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); - } - }; - - /* TODO: - - - Extract tokenizer (without implementing the Iterator trait). - - Remove writer from tokenizer. - - Implement trait Iterator for tokenizer. - - Rename this parser to generator. - - */ - + while let Some(token) = self.tokenizer.next_token(&mut null_writer)? { match token { BencodeToken::Integer(integer_bytes) => { self.begin_bencoded_value(BencodeType::Integer, writer)?; @@ -226,68 +334,6 @@ impl BencodeParser { self.check_bad_end_stack_state(writer) } - /// It reads the next byte from the input consuming it. It returns `None` if - /// the input has ended. - /// - /// # Errors - /// - /// Will return and errors if: - /// - /// - It can't read from the input. - /// - The byte read is not the expected one (the previously peeked byte). - fn read_peeked_byte( - peeked_byte: u8, - reader: &mut ByteReader, - writer: &W, - ) -> Result, error::Error> { - match reader.read_byte() { - Ok(byte) => { - if byte == peeked_byte { - return Ok(Some(byte)); - } - Err(error::Error::ReadByteAfterPeekingDoesMatchPeekedByte( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Ok(None); - } - Err(err.into()) - } - } - } - - /// It peeks the next byte from the input without consuming it. It returns - /// `None` if the input has ended. - /// - /// # Errors - /// - /// Will return and errors if it can't read from the input. - fn peek_byte( - reader: &mut ByteReader, - _writer: &W, - ) -> Result, error::Error> { - match reader.peek_byte() { - Ok(byte) => Ok(Some(byte)), - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Ok(None); - } - Err(err.into()) - } - } - } - /// It updates the stack state and prints the delimiters when needed. /// /// Called when the first byt of a bencoded value (integer, string, list or dict) @@ -315,8 +361,8 @@ impl BencodeParser { bencode_type, ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -339,8 +385,8 @@ impl BencodeParser { bencode_type, ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -381,8 +427,8 @@ impl BencodeParser { return Err(error::Error::PrematureEndOfDict( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -395,8 +441,8 @@ impl BencodeParser { return Err(error::Error::NoMatchingStartForListOrDictEnd( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -424,8 +470,8 @@ impl BencodeParser { error::Error::UnexpectedEndOfInputExpectingFirstListItemOrEnd( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -438,8 +484,8 @@ impl BencodeParser { Err(error::Error::UnexpectedEndOfInputExpectingNextListItem( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -452,8 +498,8 @@ impl BencodeParser { error::Error::UnexpectedEndOfInputExpectingFirstDictFieldOrEnd( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -466,8 +512,8 @@ impl BencodeParser { Err(error::Error::UnexpectedEndOfInputExpectingDictFieldValue( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, @@ -480,8 +526,8 @@ impl BencodeParser { error::Error::UnexpectedEndOfInputExpectingDictFieldKeyOrEnd( ReadContext { byte: None, - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), }, WriteContext { byte: None, From 3a7ea5d1e7afe2d351dac4338d2dd9b207b95c97 Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 09:22:10 +0000 Subject: [PATCH 03/13] refactor: extract mod tokenizer --- src/parsers/integer.rs | 2 +- src/parsers/mod.rs | 183 +----------------------------------- src/parsers/tokenizer.rs | 196 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+), 180 deletions(-) create mode 100644 src/parsers/tokenizer.rs diff --git a/src/parsers/integer.rs b/src/parsers/integer.rs index b02356d..bcb0a5f 100644 --- a/src/parsers/integer.rs +++ b/src/parsers/integer.rs @@ -7,7 +7,7 @@ use crate::rw::{byte_reader::ByteReader, writer::Writer}; use super::{ error::{Error, ReadContext, WriteContext}, - BENCODE_END_INTEGER, + tokenizer::BENCODE_END_INTEGER, }; /// The current state parsing the integer. diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index dcf64ac..b5e9007 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -7,11 +7,10 @@ pub mod error; pub mod integer; pub mod stack; pub mod string; +pub mod tokenizer; /* TODO: -- Remove writer from tokenizer. -- Implement trait Iterator for tokenizer. - Rename this parser to generator. */ @@ -19,23 +18,15 @@ pub mod string; use core::str; use std::{ fmt::Write as FmtWrite, - io::{self, Read, Write as IoWrite}, + io::{Read, Write as IoWrite}, }; use derive_more::derive::Display; use error::{ReadContext, WriteContext}; use stack::{Stack, State}; +use tokenizer::{BencodeToken, BencodeTokenizer}; -use crate::rw::{ - byte_reader::ByteReader, byte_writer::ByteWriter, string_writer::StringWriter, writer::Writer, -}; - -// Bencoded reserved bytes -const BENCODE_BEGIN_INTEGER: u8 = b'i'; -const BENCODE_END_INTEGER: u8 = b'e'; -const BENCODE_BEGIN_LIST: u8 = b'l'; -const BENCODE_BEGIN_DICT: u8 = b'd'; -const BENCODE_END_LIST_OR_DICT: u8 = b'e'; +use crate::rw::{byte_writer::ByteWriter, string_writer::StringWriter, writer::Writer}; #[derive(Debug, PartialEq, Display)] pub enum BencodeType { @@ -45,172 +36,6 @@ pub enum BencodeType { Dict, } -#[derive(Debug, PartialEq)] -pub enum BencodeToken { - Integer(Vec), - String(Vec), - BeginList, - BeginDict, - EndListOrDict, - LineBreak, -} - -pub struct BencodeTokenizer { - byte_reader: ByteReader, -} - -impl BencodeTokenizer { - pub fn new(reader: R) -> Self { - BencodeTokenizer { - byte_reader: ByteReader::new(reader), - } - } - - fn next_token( - &mut self, - writer: &mut W, - ) -> Result, error::Error> { - let capture_output = Vec::new(); - let mut null_writer = ByteWriter::new(capture_output); - - let opt_peeked_byte = Self::peek_byte(&mut self.byte_reader, &null_writer)?; - - match opt_peeked_byte { - Some(peeked_byte) => { - match peeked_byte { - BENCODE_BEGIN_INTEGER => { - let value = integer::parse(&mut self.byte_reader, &mut null_writer)?; - Ok(Some(BencodeToken::Integer(value))) - } - b'0'..=b'9' => { - let value = string::parse(&mut self.byte_reader, &mut null_writer)?; - Ok(Some(BencodeToken::String(value))) - } - BENCODE_BEGIN_LIST => { - let _byte = Self::read_peeked_byte( - peeked_byte, - &mut self.byte_reader, - &null_writer, - )?; - Ok(Some(BencodeToken::BeginList)) - } - BENCODE_BEGIN_DICT => { - let _byte = Self::read_peeked_byte( - peeked_byte, - &mut self.byte_reader, - &null_writer, - )?; - Ok(Some(BencodeToken::BeginDict)) - } - BENCODE_END_LIST_OR_DICT => { - let _byte = Self::read_peeked_byte( - peeked_byte, - &mut self.byte_reader, - &null_writer, - )?; - Ok(Some(BencodeToken::EndListOrDict)) - } - b'\n' => { - // todo: we should not return any token and continue to the next token. - // Ignore line breaks at the beginning, the end, or between values - let _byte = Self::read_peeked_byte( - peeked_byte, - &mut self.byte_reader, - &null_writer, - )?; - Ok(Some(BencodeToken::LineBreak)) - } - _ => Err(error::Error::UnrecognizedFirstBencodeValueByte( - ReadContext { - byte: Some(peeked_byte), - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), - }, - WriteContext { - byte: Some(peeked_byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )), - } - } - None => Ok(None), - } - } - - /// It reads the next byte from the input consuming it. It returns `None` if - /// the input has ended. - /// - /// # Errors - /// - /// Will return and errors if: - /// - /// - It can't read from the input. - /// - The byte read is not the expected one (the previously peeked byte). - fn read_peeked_byte( - peeked_byte: u8, - reader: &mut ByteReader, - writer: &W, - ) -> Result, error::Error> { - match reader.read_byte() { - Ok(byte) => { - if byte == peeked_byte { - return Ok(Some(byte)); - } - Err(error::Error::ReadByteAfterPeekingDoesMatchPeekedByte( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Ok(None); - } - Err(err.into()) - } - } - } - - /// It peeks the next byte from the input without consuming it. It returns - /// `None` if the input has ended. - /// - /// # Errors - /// - /// Will return and errors if it can't read from the input. - fn peek_byte( - reader: &mut ByteReader, - _writer: &W, - ) -> Result, error::Error> { - match reader.peek_byte() { - Ok(byte) => Ok(Some(byte)), - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Ok(None); - } - Err(err.into()) - } - } - } - - /// Returns the number of bytes that have been read from the input. - pub fn input_byte_counter(&self) -> u64 { - self.byte_reader.input_byte_counter() - } - - /// Returns a copy of the bytes that have been read from the input. - pub fn captured_bytes(&self) -> Vec { - self.byte_reader.captured_bytes() - } -} - pub struct BencodeParser { tokenizer: BencodeTokenizer, num_processed_tokens: u64, diff --git a/src/parsers/tokenizer.rs b/src/parsers/tokenizer.rs new file mode 100644 index 0000000..8e009c1 --- /dev/null +++ b/src/parsers/tokenizer.rs @@ -0,0 +1,196 @@ +//! Bencode tokenizer. Given an input stream, it returns a stream of tokens. +use std::io::{self, Read}; + +use super::{ + error::{self, ReadContext, WriteContext}, + integer, string, +}; + +use crate::rw::{byte_reader::ByteReader, byte_writer::ByteWriter, writer::Writer}; + +/* TODO: + +- Remove writer from tokenizer. +- Implement trait Iterator for tokenizer. + +*/ + +// Bencoded reserved bytes +const BENCODE_BEGIN_INTEGER: u8 = b'i'; +pub const BENCODE_END_INTEGER: u8 = b'e'; +const BENCODE_BEGIN_LIST: u8 = b'l'; +const BENCODE_BEGIN_DICT: u8 = b'd'; +const BENCODE_END_LIST_OR_DICT: u8 = b'e'; + +#[derive(Debug, PartialEq)] +pub enum BencodeToken { + Integer(Vec), + String(Vec), + BeginList, + BeginDict, + EndListOrDict, + LineBreak, +} + +pub struct BencodeTokenizer { + byte_reader: ByteReader, +} + +impl BencodeTokenizer { + pub fn new(reader: R) -> Self { + BencodeTokenizer { + byte_reader: ByteReader::new(reader), + } + } + + /// It parses the next bencoded token from input. + /// + /// # Errors + /// + /// Will return an error if: + /// + /// - It can't read from the input. + pub fn next_token( + &mut self, + writer: &mut W, + ) -> Result, error::Error> { + let capture_output = Vec::new(); + let mut null_writer = ByteWriter::new(capture_output); + + let opt_peeked_byte = Self::peek_byte(&mut self.byte_reader, &null_writer)?; + + match opt_peeked_byte { + Some(peeked_byte) => { + match peeked_byte { + BENCODE_BEGIN_INTEGER => { + let value = integer::parse(&mut self.byte_reader, &mut null_writer)?; + Ok(Some(BencodeToken::Integer(value))) + } + b'0'..=b'9' => { + let value = string::parse(&mut self.byte_reader, &mut null_writer)?; + Ok(Some(BencodeToken::String(value))) + } + BENCODE_BEGIN_LIST => { + let _byte = Self::read_peeked_byte( + peeked_byte, + &mut self.byte_reader, + &null_writer, + )?; + Ok(Some(BencodeToken::BeginList)) + } + BENCODE_BEGIN_DICT => { + let _byte = Self::read_peeked_byte( + peeked_byte, + &mut self.byte_reader, + &null_writer, + )?; + Ok(Some(BencodeToken::BeginDict)) + } + BENCODE_END_LIST_OR_DICT => { + let _byte = Self::read_peeked_byte( + peeked_byte, + &mut self.byte_reader, + &null_writer, + )?; + Ok(Some(BencodeToken::EndListOrDict)) + } + b'\n' => { + // todo: we should not return any token and continue to the next token. + // Ignore line breaks at the beginning, the end, or between values + let _byte = Self::read_peeked_byte( + peeked_byte, + &mut self.byte_reader, + &null_writer, + )?; + Ok(Some(BencodeToken::LineBreak)) + } + _ => Err(error::Error::UnrecognizedFirstBencodeValueByte( + ReadContext { + byte: Some(peeked_byte), + pos: self.byte_reader.input_byte_counter(), + latest_bytes: self.byte_reader.captured_bytes(), + }, + WriteContext { + byte: Some(peeked_byte), + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )), + } + } + None => Ok(None), + } + } + + /// It reads the next byte from the input consuming it. It returns `None` if + /// the input has ended. + /// + /// # Errors + /// + /// Will return and errors if: + /// + /// - It can't read from the input. + /// - The byte read is not the expected one (the previously peeked byte). + fn read_peeked_byte( + peeked_byte: u8, + reader: &mut ByteReader, + writer: &W, + ) -> Result, error::Error> { + match reader.read_byte() { + Ok(byte) => { + if byte == peeked_byte { + return Ok(Some(byte)); + } + Err(error::Error::ReadByteAfterPeekingDoesMatchPeekedByte( + ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + WriteContext { + byte: Some(byte), + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )) + } + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Ok(None); + } + Err(err.into()) + } + } + } + + /// It peeks the next byte from the input without consuming it. It returns + /// `None` if the input has ended. + /// + /// # Errors + /// + /// Will return and errors if it can't read from the input. + fn peek_byte( + reader: &mut ByteReader, + _writer: &W, + ) -> Result, error::Error> { + match reader.peek_byte() { + Ok(byte) => Ok(Some(byte)), + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Ok(None); + } + Err(err.into()) + } + } + } + + /// Returns the number of bytes that have been read from the input. + pub fn input_byte_counter(&self) -> u64 { + self.byte_reader.input_byte_counter() + } + + /// Returns a copy of the bytes that have been read from the input. + pub fn captured_bytes(&self) -> Vec { + self.byte_reader.captured_bytes() + } +} From f6a0584603453049df43db17ba9cfc5e3e4cd0d0 Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 09:35:36 +0000 Subject: [PATCH 04/13] refactor: duplicate integer and strig parser before removing writer Remove the writer without affecting other parts of the code. --- src/parsers/tokenizer/integer.rs | 369 +++++++++++ .../{tokenizer.rs => tokenizer/mod.rs} | 8 +- src/parsers/tokenizer/string.rs | 602 ++++++++++++++++++ 3 files changed, 975 insertions(+), 4 deletions(-) create mode 100644 src/parsers/tokenizer/integer.rs rename src/parsers/{tokenizer.rs => tokenizer/mod.rs} (98%) create mode 100644 src/parsers/tokenizer/string.rs diff --git a/src/parsers/tokenizer/integer.rs b/src/parsers/tokenizer/integer.rs new file mode 100644 index 0000000..f31b40f --- /dev/null +++ b/src/parsers/tokenizer/integer.rs @@ -0,0 +1,369 @@ +//! Bencoded integer parser. +//! +//! It reads bencoded bytes from the input and writes JSON bytes to the output. +use std::io::{self, Read}; + +use crate::rw::{byte_reader::ByteReader, writer::Writer}; + +use super::{ + error::{Error, ReadContext, WriteContext}, + BENCODE_END_INTEGER, +}; + +/// The current state parsing the integer. +#[derive(PartialEq)] +#[allow(clippy::enum_variant_names)] +enum StateExpecting { + Start, // S + DigitOrSign, // DoS + DigitAfterSign, // DaS + DigitOrEnd, // DoE +} + +/// It parses an integer bencoded value. +/// +/// # Errors +/// +/// Will return an error if it can't read from the input or write to the +/// output. +/// +/// # Panics +/// +/// Will panic if we reach the end of the input without completing the integer +/// (without reaching the end of the integer `e`). +pub fn parse( + reader: &mut ByteReader, + writer: &mut W, +) -> Result, Error> { + let mut state = StateExpecting::Start; + let mut first_digit_is_zero = false; + let mut value = vec![]; + + loop { + let byte = next_byte(reader, writer)?; + + let char = byte as char; + + state = match state { + StateExpecting::Start => { + // Discard the 'i' byte + StateExpecting::DigitOrSign + } + StateExpecting::DigitOrSign => { + if char == '-' { + writer.write_byte(byte)?; + value.push(byte); + + StateExpecting::DigitAfterSign + } else if char.is_ascii_digit() { + writer.write_byte(byte)?; + value.push(byte); + + if char == '0' { + first_digit_is_zero = true; + } + + StateExpecting::DigitOrEnd + } else { + return Err(Error::UnexpectedByteParsingInteger( + ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + WriteContext { + byte: Some(byte), + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )); + } + } + StateExpecting::DigitAfterSign => { + if char.is_ascii_digit() { + writer.write_byte(byte)?; + value.push(byte); + + if char == '0' { + first_digit_is_zero = true; + } + + StateExpecting::DigitOrEnd + } else { + return Err(Error::UnexpectedByteParsingInteger( + ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + WriteContext { + byte: Some(byte), + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )); + } + } + StateExpecting::DigitOrEnd => { + if char.is_ascii_digit() { + writer.write_byte(byte)?; + value.push(byte); + + if char == '0' && first_digit_is_zero { + return Err(Error::LeadingZerosInIntegersNotAllowed( + ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + WriteContext { + byte: Some(byte), + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )); + } + + StateExpecting::DigitOrEnd + } else if byte == BENCODE_END_INTEGER { + return Ok(value); + } else { + return Err(Error::UnexpectedByteParsingInteger( + ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + WriteContext { + byte: Some(byte), + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )); + } + } + }; + } +} + +/// It reads the next byte from the input. +/// +/// # Errors +/// +/// Will return an error if the end of input was reached. +fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { + match reader.read_byte() { + Ok(byte) => Ok(byte), + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Err(Error::UnexpectedEndOfInputParsingInteger( + ReadContext { + byte: None, + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )); + } + Err(err.into()) + } + } +} + +#[cfg(test)] +mod tests { + use crate::{ + parsers::{error::Error, integer::parse}, + rw::{byte_reader::ByteReader, string_writer::StringWriter}, + }; + + fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { + let mut output = String::new(); + + parse_bencode(input_buffer, &mut output).expect("Bencode to JSON conversion failed"); + + output + } + + fn try_bencode_to_json(input_buffer: &[u8]) -> Result { + let mut output = String::new(); + + match parse_bencode(input_buffer, &mut output) { + Ok(_value) => Ok(output), + Err(err) => Err(err), + } + } + + fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result, Error> { + let mut reader = ByteReader::new(input_buffer); + + let mut writer = StringWriter::new(output); + + parse(&mut reader, &mut writer) + } + + mod for_helpers { + use crate::parsers::tokenizer::integer::tests::try_bencode_to_json; + + #[test] + fn bencode_to_json_wrapper_succeeds() { + assert_eq!(try_bencode_to_json(b"i0e").unwrap(), "0".to_string()); + } + + #[test] + fn bencode_to_json_wrapper_fails() { + assert!(try_bencode_to_json(b"i").is_err()); + } + } + + #[test] + fn zero() { + assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".to_string()); + } + + #[test] + fn one_digit_integer() { + assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".to_string()); + } + + #[test] + fn two_digits_integer() { + assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".to_string()); + } + + #[test] + fn negative_integer() { + assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".to_string()); + } + + mod it_should_fail { + use std::io::{self, Read}; + + use crate::{ + parsers::{ + error::Error, + tokenizer::integer::{parse, tests::try_bencode_to_json}, + }, + rw::{byte_reader::ByteReader, string_writer::StringWriter}, + }; + + #[test] + fn when_it_cannot_read_more_bytes_from_input() { + let unfinished_int = b"i42"; + + let result = try_bencode_to_json(unfinished_int); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputParsingInteger { .. }) + )); + } + + #[test] + fn when_it_finds_an_invalid_byte() { + let int_with_invalid_byte = b"iae"; + + let result = try_bencode_to_json(int_with_invalid_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedByteParsingInteger { .. }) + )); + } + + #[test] + fn when_it_finds_leading_zeros() { + // Leading zeros are not allowed.Only the zero integer can start with zero. + + let int_with_invalid_byte = b"i00e"; + + let result = try_bencode_to_json(int_with_invalid_byte); + + assert!(matches!( + result, + Err(Error::LeadingZerosInIntegersNotAllowed { .. }) + )); + } + + #[test] + fn when_it_finds_leading_zeros_in_a_negative_integer() { + // Leading zeros are not allowed.Only the zero integer can start with zero. + + let int_with_invalid_byte = b"i-00e"; + + let result = try_bencode_to_json(int_with_invalid_byte); + + assert!(matches!( + result, + Err(Error::LeadingZerosInIntegersNotAllowed { .. }) + )); + } + + mod when_it_receives_a_unexpected_byte { + use crate::parsers::{error::Error, tokenizer::integer::tests::try_bencode_to_json}; + + #[test] + fn while_expecting_a_digit_or_sign() { + let int_with_invalid_byte = b"ia"; + + let result = try_bencode_to_json(int_with_invalid_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedByteParsingInteger { .. }) + )); + } + + #[test] + fn while_expecting_digit_after_the_sign() { + let int_with_invalid_byte = b"i-a"; + + let result = try_bencode_to_json(int_with_invalid_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedByteParsingInteger { .. }) + )); + } + + #[test] + fn while_expecting_digit_or_end() { + let int_with_invalid_byte = b"i-1a"; + + let result = try_bencode_to_json(int_with_invalid_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedByteParsingInteger { .. }) + )); + } + } + + #[test] + fn when_it_receives_a_non_eof_io_error() { + struct FaultyReader; + + impl Read for FaultyReader { + fn read(&mut self, _buf: &mut [u8]) -> io::Result { + Err(io::Error::new( + io::ErrorKind::PermissionDenied, + "Permission denied", + )) + } + } + + let mut reader = ByteReader::new(FaultyReader); + + let mut output = String::new(); + let mut writer = StringWriter::new(&mut output); + + let result = parse(&mut reader, &mut writer); + + assert!(matches!(result, Err(Error::Io(_)))); + } + } +} diff --git a/src/parsers/tokenizer.rs b/src/parsers/tokenizer/mod.rs similarity index 98% rename from src/parsers/tokenizer.rs rename to src/parsers/tokenizer/mod.rs index 8e009c1..ceee9fd 100644 --- a/src/parsers/tokenizer.rs +++ b/src/parsers/tokenizer/mod.rs @@ -1,10 +1,10 @@ //! Bencode tokenizer. Given an input stream, it returns a stream of tokens. +pub mod integer; +pub mod string; + use std::io::{self, Read}; -use super::{ - error::{self, ReadContext, WriteContext}, - integer, string, -}; +use super::error::{self, ReadContext, WriteContext}; use crate::rw::{byte_reader::ByteReader, byte_writer::ByteWriter, writer::Writer}; diff --git a/src/parsers/tokenizer/string.rs b/src/parsers/tokenizer/string.rs new file mode 100644 index 0000000..df6f57b --- /dev/null +++ b/src/parsers/tokenizer/string.rs @@ -0,0 +1,602 @@ +//! Bencoded string parser. +//! +//! It reads bencoded bytes from the input and writes JSON bytes to the output. +use std::io::{self, Read}; + +use crate::rw::{byte_reader::ByteReader, writer::Writer}; + +/* todo: Optimize UTF-8 conversion. Try to convert to string partially and stop + converting if we reach a point when input is not valid UTF-8 anymore. This + way we don't consume more memory and we can print the bytes directly to the + output from that point on. +*/ + +use core::str; + +use super::error::{Error, ReadContext, WriteContext}; + +/// It parses a string bencoded value. +/// +/// # Errors +/// +/// Will return an error if it can't read from the input or write to the +/// output. +/// +/// # Panics +/// +/// Will panic if we reach the end of the input without completing the string. +pub fn parse( + reader: &mut ByteReader, + writer: &mut W, +) -> Result, Error> { + let mut string_parser = StringParser::default(); + string_parser.parse(reader, writer) +} + +/// Strings bencode format have two parts: `length:value`. +/// +/// - Length is a sequence of bytes (only digits 0..9). +/// - Value is an arbitrary sequence of bytes (not only valid UTF-8). +#[derive(Default, Debug)] +#[allow(clippy::module_name_repetitions)] +struct StringParser { + /// The final parsed string. + parsed_value: String, +} + +impl StringParser { + fn parse( + &mut self, + reader: &mut ByteReader, + writer: &mut W, + ) -> Result, Error> { + let mut length = Length::default(); + + length.parse(reader, writer)?; + + let mut value = Value::new(length.number); + + let value_bytes = value.parse(reader, writer)?; + + self.parsed_value = value.utf8(); + + writer.write_str(&self.json())?; + + Ok(value_bytes) + } + + /// It returns the final parsed value as string. + /// + /// If the string contains non UTF-8 bytes it returns the hexadecimal list + /// of bytes in in the format 'fa fb' + fn parsed_value(&self) -> String { + self.parsed_value.clone() + } + + /// It serializes the parsed value into JSON. + #[must_use] + fn json(&self) -> String { + serde_json::to_string(&self.parsed_value()).unwrap() + } +} + +#[derive(Default, Debug)] +struct Length { + /// A list of parsed bytes. It's only for debugging. + bytes: Vec, + + /// The parsed length at the current read digit. + number: usize, +} + +impl Length { + const END_OF_STRING_LENGTH_BYTE: u8 = b':'; + + fn parse( + &mut self, + reader: &mut ByteReader, + writer: &W, + ) -> Result<(), Error> { + loop { + let byte = Self::next_byte(reader, writer)?; + + match byte { + Self::END_OF_STRING_LENGTH_BYTE => { + break; + } + _ => { + self.add_byte(byte, reader, writer)?; + } + } + } + + Ok(()) + } + + /// It reads the next byte from the input. + /// + /// # Errors + /// + /// Will return an error if the end of input was reached. + fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { + match reader.read_byte() { + Ok(byte) => Ok(byte), + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Err(Error::UnexpectedEndOfInputParsingStringLength( + ReadContext { + byte: None, + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )); + } + Err(err.into()) + } + } + } + + /// It adds a new byte (digit) to the string length. + /// + /// # Errors + /// + /// Will return an error if the byte is not a digit (0..9). + fn add_byte( + &mut self, + byte: u8, + reader: &mut ByteReader, + writer: &W, + ) -> Result<(), Error> { + if !byte.is_ascii_digit() { + return Err(Error::InvalidStringLengthByte( + ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + WriteContext { + byte: Some(byte), + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )); + } + + self.bytes.push(byte); + + self.add_digit_to_length(Self::byte_to_digit(byte)); + + Ok(()) + } + + /// It converts a byte containing an ASCII digit into a number `usize`. + fn byte_to_digit(byte: u8) -> usize { + (byte - b'0') as usize + } + + /// It adds the new digit to the number. + fn add_digit_to_length(&mut self, digit: usize) { + self.number = (self.number * 10) + digit; + } +} + +#[derive(Debug)] +struct Value { + length: usize, + bytes: Vec, + bytes_counter: usize, +} + +impl Value { + fn new(length: usize) -> Self { + Self { + length, + bytes: vec![], + bytes_counter: 0, + } + } + + fn parse( + &mut self, + reader: &mut ByteReader, + writer: &W, + ) -> Result, Error> { + for _i in 1..=self.length { + self.add_byte(Self::next_byte(reader, writer)?); + } + + Ok(self.bytes.clone()) + } + + /// It reads the next byte from the input. + /// + /// # Errors + /// + /// Will return an error if the end of input was reached. + fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { + match reader.read_byte() { + Ok(byte) => Ok(byte), + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Err(Error::UnexpectedEndOfInputParsingStringValue( + ReadContext { + byte: None, + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )); + } + Err(err.into()) + } + } + } + + fn add_byte(&mut self, byte: u8) { + self.bytes.push(byte); + self.bytes_counter += 1; + } + + fn utf8(&self) -> String { + match str::from_utf8(&self.bytes) { + Ok(string) => { + // String only contains valid UTF-8 chars -> print it as it's + format!("{}", string.to_owned()) + } + Err(_) => { + // String contains non valid UTF-8 chars -> print it as hex bytes + Self::bytes_to_hex(&self.bytes) + } + } + } + + fn bytes_to_hex(data: &[u8]) -> String { + format!("{}", hex::encode(data)) + } +} + +#[cfg(test)] +mod tests { + use crate::{ + parsers::error::Error, + rw::{byte_reader::ByteReader, string_writer::StringWriter}, + }; + + use super::parse; + + fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { + let mut output = String::new(); + + parse_bencode(input_buffer, &mut output).expect("Bencode to JSON conversion failed"); + + output + } + + fn try_bencode_to_json(input_buffer: &[u8]) -> Result { + let mut output = String::new(); + + match parse_bencode(input_buffer, &mut output) { + Ok(_string_value_bytes) => Ok(output), + Err(err) => Err(err), + } + } + + fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result, Error> { + let mut reader = ByteReader::new(input_buffer); + + let mut writer = StringWriter::new(output); + + parse(&mut reader, &mut writer) + } + + mod for_helpers { + use crate::parsers::tokenizer::string::tests::try_bencode_to_json; + + #[test] + fn bencode_to_json_wrapper_succeeds() { + assert_eq!( + try_bencode_to_json(b"4:spam").unwrap(), + r#""spam""#.to_string() + ); + } + + #[test] + fn bencode_to_json_wrapper_fails() { + assert!(try_bencode_to_json(b"4:").is_err()); + } + } + + #[test] + fn length_can_contain_leading_zeros() { + assert_eq!( + bencode_to_json_unchecked(b"00:"), + r#""""#.to_string() + ); + } + + #[test] + fn empty_string() { + assert_eq!( + bencode_to_json_unchecked(b"0:"), + r#""""#.to_string() + ); + } + + #[test] + fn string_with_tags() { + assert_eq!( + bencode_to_json_unchecked(b"8:"), + r#""""#.to_string() + ); + } + + #[test] + fn utf8() { + assert_eq!( + bencode_to_json_unchecked(b"4:spam"), + r#""spam""#.to_string() + ); + } + + #[test] + fn non_utf8() { + assert_eq!( + bencode_to_json_unchecked(b"4:\xFF\xFE\xFD\xFC"), + r#""fffefdfc""#.to_string() + ); + } + + #[test] + fn ending_with_bencode_end_char() { + assert_eq!( + bencode_to_json_unchecked(b"1:e"), + r#""e""#.to_string() + ); + } + + #[test] + fn containing_a_reserved_char() { + assert_eq!( + bencode_to_json_unchecked(b"1:i"), + r#""i""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:l"), + r#""l""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:d"), + r#""d""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:l"), + r#""l""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:e"), + r#""e""#.to_string() + ); + } + + #[test] + fn containing_a_digit() { + assert_eq!( + bencode_to_json_unchecked(b"1:0"), + r#""0""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:1"), + r#""1""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:2"), + r#""2""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:3"), + r#""3""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:4"), + r#""4""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:5"), + r#""5""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:6"), + r#""6""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:7"), + r#""7""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:8"), + r#""8""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:9"), + r#""9""#.to_string() + ); + } + + mod should_escape_json { + use crate::{test::bencode_to_json_unchecked, to_bencode}; + + #[test] + fn containing_a_double_quote() { + assert_eq!( + bencode_to_json_unchecked("1:\"".as_bytes()), + r#""\"""#.to_string() + ); + } + + #[test] + fn containing_backslashes() { + assert_eq!( + bencode_to_json_unchecked("1:\\".as_bytes()), + r#""\\""#.to_string() + ); + } + + #[test] + fn containing_control_characters() { + assert_eq!( + bencode_to_json_unchecked("1:\n".as_bytes()), + r#""\n""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked("1:\r".as_bytes()), + r#""\r""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked("1:\t".as_bytes()), + r#""\t""#.to_string() + ); + } + + #[test] + fn containing_unicode_characters() { + assert_eq!( + bencode_to_json_unchecked(&to_bencode("ñandú")), + r#""ñandú""#.to_string() + ); + } + + #[test] + fn containing_non_unicode_characters() { + assert_eq!( + bencode_to_json_unchecked(&[b'4', b':', 0x80, 0xFF, 0x00, 0xAB]), + r#""80ff00ab""#.to_string() + ); + } + } + + mod it_should_fail_parsing_when { + use std::io::{self, Read}; + + use crate::{ + parsers::{ + error::Error, + tokenizer::string::{parse, tests::try_bencode_to_json}, + }, + rw::{byte_reader::ByteReader, string_writer::StringWriter}, + }; + + #[test] + fn it_reaches_the_end_of_the_input_parsing_the_string_length() { + let incomplete_string_length = b"4"; + + let result = try_bencode_to_json(incomplete_string_length); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputParsingStringLength { .. }) + )); + } + + #[test] + fn it_reaches_the_end_of_the_input_parsing_the_string_value() { + let incomplete_string_value = b"4:123"; + + let result = try_bencode_to_json(incomplete_string_value); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputParsingStringValue { .. }) + )); + } + + #[test] + fn it_receives_a_non_digit_byte_in_the_string_length() { + let incomplete_string_value = b"4a:1234"; + + let result = try_bencode_to_json(incomplete_string_value); + + assert!(matches!(result, Err(Error::InvalidStringLengthByte { .. }))); + } + + /// Fake reader that fails after reading a certain number of bytes + struct FaultyReader { + /// The bytes the reader will return + bytes: Vec, + + /// The position in the bytes vector where the reader will fail + fail_in_pos: usize, + + /// The current number of bytes read + counter: usize, + } + + impl FaultyReader { + fn new(bytes: Vec, fail_in_pos: usize) -> Self { + Self { + bytes, + fail_in_pos, + counter: 0, + } + } + } + + impl Read for FaultyReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + // Fail exactly at the position set by `fail_in_pos` + if self.counter >= self.fail_in_pos { + return Err(io::Error::new( + io::ErrorKind::PermissionDenied, + "Permission denied", + )); + } + + // Check if we have any bytes left to read + if self.counter >= self.bytes.len() { + return Ok(0); // No more bytes to read (EOF) + } + + // Write one byte at a time to the buffer + buf[0] = self.bytes[self.counter]; + + // Increment the counter to reflect one byte read + self.counter += 1; + + // Return that we read exactly 1 byte + Ok(1) + } + } + + #[test] + fn it_cannot_read_more_bytes_without_finishing_parsing_the_string_length() { + let mut reader = ByteReader::new(FaultyReader::new(b"4:spam".to_vec(), 1)); + + let mut output = String::new(); + let mut writer = StringWriter::new(&mut output); + + let result = parse(&mut reader, &mut writer); + + assert!(matches!(result, Err(Error::Io(_)))); + } + + #[test] + fn it_cannot_read_more_bytes_without_finishing_parsing_the_string_value() { + let mut reader = ByteReader::new(FaultyReader::new(b"4:spam".to_vec(), 3)); + + let mut output = String::new(); + let mut writer = StringWriter::new(&mut output); + + let result = parse(&mut reader, &mut writer); + + assert!(matches!(result, Err(Error::Io(_)))); + } + } +} From 77ad5af4bd1f8dfd143f3a188fd8ce4b6006c87d Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 09:41:09 +0000 Subject: [PATCH 05/13] refactor: remove writer from main tokenizer --- src/parsers/error.rs | 8 +++--- src/parsers/mod.rs | 5 +--- src/parsers/tokenizer/mod.rs | 53 +++++++----------------------------- 3 files changed, 15 insertions(+), 51 deletions(-) diff --git a/src/parsers/error.rs b/src/parsers/error.rs index 6874bd6..5d00aa9 100644 --- a/src/parsers/error.rs +++ b/src/parsers/error.rs @@ -27,16 +27,16 @@ pub enum Error { /// The main parser peeks one byte ahead to know what kind of bencoded value /// is being parsed. If the byte read after peeking does not match the /// peeked byte, it means the input is being consumed somewhere else. - #[error("Read byte after peeking does match peeked byte; {0}; {1}")] - ReadByteAfterPeekingDoesMatchPeekedByte(ReadContext, WriteContext), + #[error("Read byte after peeking does match peeked byte; {0}")] + ReadByteAfterPeekingDoesMatchPeekedByte(ReadContext), /// Unrecognized first byte for new bencoded value. /// /// The main parser peeks one byte ahead to know what kind of bencoded value /// is being parsed. This error is raised when the peeked byte is not a /// valid first byte for a bencoded value. - #[error("Unrecognized first byte for new bencoded value; {0}; {1}")] - UnrecognizedFirstBencodeValueByte(ReadContext, WriteContext), + #[error("Unrecognized first byte for new bencoded value; {0}")] + UnrecognizedFirstBencodeValueByte(ReadContext), // Integers /// Unexpected byte parsing integer. diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index b5e9007..71a5323 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -104,10 +104,7 @@ impl BencodeParser { /// - It can't read from the input or write to the output. /// - The input is invalid Bencode. fn parse(&mut self, writer: &mut W) -> Result<(), error::Error> { - let capture_output = Vec::new(); - let mut null_writer = ByteWriter::new(capture_output); - - while let Some(token) = self.tokenizer.next_token(&mut null_writer)? { + while let Some(token) = self.tokenizer.next_token()? { match token { BencodeToken::Integer(integer_bytes) => { self.begin_bencoded_value(BencodeType::Integer, writer)?; diff --git a/src/parsers/tokenizer/mod.rs b/src/parsers/tokenizer/mod.rs index ceee9fd..5c7ca61 100644 --- a/src/parsers/tokenizer/mod.rs +++ b/src/parsers/tokenizer/mod.rs @@ -4,9 +4,9 @@ pub mod string; use std::io::{self, Read}; -use super::error::{self, ReadContext, WriteContext}; +use super::error::{self, ReadContext}; -use crate::rw::{byte_reader::ByteReader, byte_writer::ByteWriter, writer::Writer}; +use crate::rw::{byte_reader::ByteReader, byte_writer::ByteWriter}; /* TODO: @@ -50,14 +50,11 @@ impl BencodeTokenizer { /// Will return an error if: /// /// - It can't read from the input. - pub fn next_token( - &mut self, - writer: &mut W, - ) -> Result, error::Error> { + pub fn next_token(&mut self) -> Result, error::Error> { let capture_output = Vec::new(); let mut null_writer = ByteWriter::new(capture_output); - let opt_peeked_byte = Self::peek_byte(&mut self.byte_reader, &null_writer)?; + let opt_peeked_byte = Self::peek_byte(&mut self.byte_reader)?; match opt_peeked_byte { Some(peeked_byte) => { @@ -71,37 +68,21 @@ impl BencodeTokenizer { Ok(Some(BencodeToken::String(value))) } BENCODE_BEGIN_LIST => { - let _byte = Self::read_peeked_byte( - peeked_byte, - &mut self.byte_reader, - &null_writer, - )?; + let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; Ok(Some(BencodeToken::BeginList)) } BENCODE_BEGIN_DICT => { - let _byte = Self::read_peeked_byte( - peeked_byte, - &mut self.byte_reader, - &null_writer, - )?; + let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; Ok(Some(BencodeToken::BeginDict)) } BENCODE_END_LIST_OR_DICT => { - let _byte = Self::read_peeked_byte( - peeked_byte, - &mut self.byte_reader, - &null_writer, - )?; + let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; Ok(Some(BencodeToken::EndListOrDict)) } b'\n' => { // todo: we should not return any token and continue to the next token. // Ignore line breaks at the beginning, the end, or between values - let _byte = Self::read_peeked_byte( - peeked_byte, - &mut self.byte_reader, - &null_writer, - )?; + let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; Ok(Some(BencodeToken::LineBreak)) } _ => Err(error::Error::UnrecognizedFirstBencodeValueByte( @@ -110,11 +91,6 @@ impl BencodeTokenizer { pos: self.byte_reader.input_byte_counter(), latest_bytes: self.byte_reader.captured_bytes(), }, - WriteContext { - byte: Some(peeked_byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, )), } } @@ -131,10 +107,9 @@ impl BencodeTokenizer { /// /// - It can't read from the input. /// - The byte read is not the expected one (the previously peeked byte). - fn read_peeked_byte( + fn read_peeked_byte( peeked_byte: u8, reader: &mut ByteReader, - writer: &W, ) -> Result, error::Error> { match reader.read_byte() { Ok(byte) => { @@ -147,11 +122,6 @@ impl BencodeTokenizer { pos: reader.input_byte_counter(), latest_bytes: reader.captured_bytes(), }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, )) } Err(err) => { @@ -169,10 +139,7 @@ impl BencodeTokenizer { /// # Errors /// /// Will return and errors if it can't read from the input. - fn peek_byte( - reader: &mut ByteReader, - _writer: &W, - ) -> Result, error::Error> { + fn peek_byte(reader: &mut ByteReader) -> Result, error::Error> { match reader.peek_byte() { Ok(byte) => Ok(Some(byte)), Err(err) => { From 75ffdb4ad58dc050db6716f1a336d05cbe729e2a Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 09:46:10 +0000 Subject: [PATCH 06/13] refactor: remove writer from tokenizer integer parser --- src/parsers/error.rs | 12 ++-- src/parsers/integer.rs | 91 ++++++++------------------ src/parsers/tokenizer/integer.rs | 109 +++++++++---------------------- src/parsers/tokenizer/mod.rs | 2 +- 4 files changed, 67 insertions(+), 147 deletions(-) diff --git a/src/parsers/error.rs b/src/parsers/error.rs index 5d00aa9..c7ab72e 100644 --- a/src/parsers/error.rs +++ b/src/parsers/error.rs @@ -44,18 +44,18 @@ pub enum Error { /// The main parser parses integers by reading bytes until it finds the /// end of the integer. This error is raised when the byte read is not a /// valid byte for an integer bencoded value. - #[error("Unexpected byte parsing integer; {0}; {1}")] - UnexpectedByteParsingInteger(ReadContext, WriteContext), + #[error("Unexpected byte parsing integer; {0}")] + UnexpectedByteParsingInteger(ReadContext), /// Unexpected end of input parsing integer. /// /// The input ends before the integer ends. - #[error("Unexpected end of input parsing integer; {0}; {1}")] - UnexpectedEndOfInputParsingInteger(ReadContext, WriteContext), + #[error("Unexpected end of input parsing integer; {0}")] + UnexpectedEndOfInputParsingInteger(ReadContext), /// Leading zeros in integers are not allowed, for example b'i00e'. - #[error("Leading zeros in integers are not allowed, for example b'i00e'; {0}; {1}")] - LeadingZerosInIntegersNotAllowed(ReadContext, WriteContext), + #[error("Leading zeros in integers are not allowed, for example b'i00e'; {0}")] + LeadingZerosInIntegersNotAllowed(ReadContext), // Strings /// Invalid string length byte, expected a digit. diff --git a/src/parsers/integer.rs b/src/parsers/integer.rs index bcb0a5f..0c79157 100644 --- a/src/parsers/integer.rs +++ b/src/parsers/integer.rs @@ -6,7 +6,7 @@ use std::io::{self, Read}; use crate::rw::{byte_reader::ByteReader, writer::Writer}; use super::{ - error::{Error, ReadContext, WriteContext}, + error::{Error, ReadContext}, tokenizer::BENCODE_END_INTEGER, }; @@ -40,7 +40,7 @@ pub fn parse( let mut value = vec![]; loop { - let byte = next_byte(reader, writer)?; + let byte = next_byte(reader)?; let char = byte as char; @@ -65,18 +65,11 @@ pub fn parse( StateExpecting::DigitOrEnd } else { - return Err(Error::UnexpectedByteParsingInteger( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedByteParsingInteger(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } } StateExpecting::DigitAfterSign => { @@ -90,18 +83,11 @@ pub fn parse( StateExpecting::DigitOrEnd } else { - return Err(Error::UnexpectedByteParsingInteger( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedByteParsingInteger(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } } StateExpecting::DigitOrEnd => { @@ -110,36 +96,22 @@ pub fn parse( value.push(byte); if char == '0' && first_digit_is_zero { - return Err(Error::LeadingZerosInIntegersNotAllowed( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::LeadingZerosInIntegersNotAllowed(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } StateExpecting::DigitOrEnd } else if byte == BENCODE_END_INTEGER { return Ok(value); } else { - return Err(Error::UnexpectedByteParsingInteger( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedByteParsingInteger(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } } }; @@ -151,23 +123,16 @@ pub fn parse( /// # Errors /// /// Will return an error if the end of input was reached. -fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { +fn next_byte(reader: &mut ByteReader) -> Result { match reader.read_byte() { Ok(byte) => Ok(byte), Err(err) => { if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingInteger( - ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedEndOfInputParsingInteger(ReadContext { + byte: None, + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } Err(err.into()) } diff --git a/src/parsers/tokenizer/integer.rs b/src/parsers/tokenizer/integer.rs index f31b40f..3896265 100644 --- a/src/parsers/tokenizer/integer.rs +++ b/src/parsers/tokenizer/integer.rs @@ -3,10 +3,10 @@ //! It reads bencoded bytes from the input and writes JSON bytes to the output. use std::io::{self, Read}; -use crate::rw::{byte_reader::ByteReader, writer::Writer}; +use crate::rw::byte_reader::ByteReader; use super::{ - error::{Error, ReadContext, WriteContext}, + error::{Error, ReadContext}, BENCODE_END_INTEGER, }; @@ -31,16 +31,13 @@ enum StateExpecting { /// /// Will panic if we reach the end of the input without completing the integer /// (without reaching the end of the integer `e`). -pub fn parse( - reader: &mut ByteReader, - writer: &mut W, -) -> Result, Error> { +pub fn parse(reader: &mut ByteReader) -> Result, Error> { let mut state = StateExpecting::Start; let mut first_digit_is_zero = false; let mut value = vec![]; loop { - let byte = next_byte(reader, writer)?; + let byte = next_byte(reader)?; let char = byte as char; @@ -51,12 +48,10 @@ pub fn parse( } StateExpecting::DigitOrSign => { if char == '-' { - writer.write_byte(byte)?; value.push(byte); StateExpecting::DigitAfterSign } else if char.is_ascii_digit() { - writer.write_byte(byte)?; value.push(byte); if char == '0' { @@ -65,23 +60,15 @@ pub fn parse( StateExpecting::DigitOrEnd } else { - return Err(Error::UnexpectedByteParsingInteger( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedByteParsingInteger(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } } StateExpecting::DigitAfterSign => { if char.is_ascii_digit() { - writer.write_byte(byte)?; value.push(byte); if char == '0' { @@ -90,56 +77,34 @@ pub fn parse( StateExpecting::DigitOrEnd } else { - return Err(Error::UnexpectedByteParsingInteger( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedByteParsingInteger(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } } StateExpecting::DigitOrEnd => { if char.is_ascii_digit() { - writer.write_byte(byte)?; value.push(byte); if char == '0' && first_digit_is_zero { - return Err(Error::LeadingZerosInIntegersNotAllowed( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::LeadingZerosInIntegersNotAllowed(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } StateExpecting::DigitOrEnd } else if byte == BENCODE_END_INTEGER { return Ok(value); } else { - return Err(Error::UnexpectedByteParsingInteger( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedByteParsingInteger(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } } }; @@ -151,23 +116,16 @@ pub fn parse( /// # Errors /// /// Will return an error if the end of input was reached. -fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { +fn next_byte(reader: &mut ByteReader) -> Result { match reader.read_byte() { Ok(byte) => Ok(byte), Err(err) => { if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingInteger( - ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedEndOfInputParsingInteger(ReadContext { + byte: None, + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } Err(err.into()) } @@ -248,7 +206,7 @@ mod tests { error::Error, tokenizer::integer::{parse, tests::try_bencode_to_json}, }, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, + rw::byte_reader::ByteReader, }; #[test] @@ -358,10 +316,7 @@ mod tests { let mut reader = ByteReader::new(FaultyReader); - let mut output = String::new(); - let mut writer = StringWriter::new(&mut output); - - let result = parse(&mut reader, &mut writer); + let result = parse(&mut reader); assert!(matches!(result, Err(Error::Io(_)))); } diff --git a/src/parsers/tokenizer/mod.rs b/src/parsers/tokenizer/mod.rs index 5c7ca61..1eae4b9 100644 --- a/src/parsers/tokenizer/mod.rs +++ b/src/parsers/tokenizer/mod.rs @@ -60,7 +60,7 @@ impl BencodeTokenizer { Some(peeked_byte) => { match peeked_byte { BENCODE_BEGIN_INTEGER => { - let value = integer::parse(&mut self.byte_reader, &mut null_writer)?; + let value = integer::parse(&mut self.byte_reader)?; Ok(Some(BencodeToken::Integer(value))) } b'0'..=b'9' => { From 0a05544d339685aabda5e6388fdb0f140a464d6e Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 10:09:13 +0000 Subject: [PATCH 07/13] refactor: remove old int and str parsers with writers --- src/parsers/integer.rs | 334 ----------------------- src/parsers/string.rs | 602 ----------------------------------------- 2 files changed, 936 deletions(-) delete mode 100644 src/parsers/integer.rs delete mode 100644 src/parsers/string.rs diff --git a/src/parsers/integer.rs b/src/parsers/integer.rs deleted file mode 100644 index 0c79157..0000000 --- a/src/parsers/integer.rs +++ /dev/null @@ -1,334 +0,0 @@ -//! Bencoded integer parser. -//! -//! It reads bencoded bytes from the input and writes JSON bytes to the output. -use std::io::{self, Read}; - -use crate::rw::{byte_reader::ByteReader, writer::Writer}; - -use super::{ - error::{Error, ReadContext}, - tokenizer::BENCODE_END_INTEGER, -}; - -/// The current state parsing the integer. -#[derive(PartialEq)] -#[allow(clippy::enum_variant_names)] -enum StateExpecting { - Start, // S - DigitOrSign, // DoS - DigitAfterSign, // DaS - DigitOrEnd, // DoE -} - -/// It parses an integer bencoded value. -/// -/// # Errors -/// -/// Will return an error if it can't read from the input or write to the -/// output. -/// -/// # Panics -/// -/// Will panic if we reach the end of the input without completing the integer -/// (without reaching the end of the integer `e`). -pub fn parse( - reader: &mut ByteReader, - writer: &mut W, -) -> Result, Error> { - let mut state = StateExpecting::Start; - let mut first_digit_is_zero = false; - let mut value = vec![]; - - loop { - let byte = next_byte(reader)?; - - let char = byte as char; - - state = match state { - StateExpecting::Start => { - // Discard the 'i' byte - StateExpecting::DigitOrSign - } - StateExpecting::DigitOrSign => { - if char == '-' { - writer.write_byte(byte)?; - value.push(byte); - - StateExpecting::DigitAfterSign - } else if char.is_ascii_digit() { - writer.write_byte(byte)?; - value.push(byte); - - if char == '0' { - first_digit_is_zero = true; - } - - StateExpecting::DigitOrEnd - } else { - return Err(Error::UnexpectedByteParsingInteger(ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - } - StateExpecting::DigitAfterSign => { - if char.is_ascii_digit() { - writer.write_byte(byte)?; - value.push(byte); - - if char == '0' { - first_digit_is_zero = true; - } - - StateExpecting::DigitOrEnd - } else { - return Err(Error::UnexpectedByteParsingInteger(ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - } - StateExpecting::DigitOrEnd => { - if char.is_ascii_digit() { - writer.write_byte(byte)?; - value.push(byte); - - if char == '0' && first_digit_is_zero { - return Err(Error::LeadingZerosInIntegersNotAllowed(ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - - StateExpecting::DigitOrEnd - } else if byte == BENCODE_END_INTEGER { - return Ok(value); - } else { - return Err(Error::UnexpectedByteParsingInteger(ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - } - }; - } -} - -/// It reads the next byte from the input. -/// -/// # Errors -/// -/// Will return an error if the end of input was reached. -fn next_byte(reader: &mut ByteReader) -> Result { - match reader.read_byte() { - Ok(byte) => Ok(byte), - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingInteger(ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - Err(err.into()) - } - } -} - -#[cfg(test)] -mod tests { - use crate::{ - parsers::{error::Error, integer::parse}, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, - }; - - fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - let mut output = String::new(); - - parse_bencode(input_buffer, &mut output).expect("Bencode to JSON conversion failed"); - - output - } - - fn try_bencode_to_json(input_buffer: &[u8]) -> Result { - let mut output = String::new(); - - match parse_bencode(input_buffer, &mut output) { - Ok(_value) => Ok(output), - Err(err) => Err(err), - } - } - - fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result, Error> { - let mut reader = ByteReader::new(input_buffer); - - let mut writer = StringWriter::new(output); - - parse(&mut reader, &mut writer) - } - - mod for_helpers { - use crate::parsers::integer::tests::try_bencode_to_json; - - #[test] - fn bencode_to_json_wrapper_succeeds() { - assert_eq!(try_bencode_to_json(b"i0e").unwrap(), "0".to_string()); - } - - #[test] - fn bencode_to_json_wrapper_fails() { - assert!(try_bencode_to_json(b"i").is_err()); - } - } - - #[test] - fn zero() { - assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".to_string()); - } - - #[test] - fn one_digit_integer() { - assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".to_string()); - } - - #[test] - fn two_digits_integer() { - assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".to_string()); - } - - #[test] - fn negative_integer() { - assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".to_string()); - } - - mod it_should_fail { - use std::io::{self, Read}; - - use crate::{ - parsers::{ - error::Error, - integer::{parse, tests::try_bencode_to_json}, - }, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, - }; - - #[test] - fn when_it_cannot_read_more_bytes_from_input() { - let unfinished_int = b"i42"; - - let result = try_bencode_to_json(unfinished_int); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingInteger { .. }) - )); - } - - #[test] - fn when_it_finds_an_invalid_byte() { - let int_with_invalid_byte = b"iae"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - - #[test] - fn when_it_finds_leading_zeros() { - // Leading zeros are not allowed.Only the zero integer can start with zero. - - let int_with_invalid_byte = b"i00e"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::LeadingZerosInIntegersNotAllowed { .. }) - )); - } - - #[test] - fn when_it_finds_leading_zeros_in_a_negative_integer() { - // Leading zeros are not allowed.Only the zero integer can start with zero. - - let int_with_invalid_byte = b"i-00e"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::LeadingZerosInIntegersNotAllowed { .. }) - )); - } - - mod when_it_receives_a_unexpected_byte { - use crate::parsers::{error::Error, integer::tests::try_bencode_to_json}; - - #[test] - fn while_expecting_a_digit_or_sign() { - let int_with_invalid_byte = b"ia"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - - #[test] - fn while_expecting_digit_after_the_sign() { - let int_with_invalid_byte = b"i-a"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - - #[test] - fn while_expecting_digit_or_end() { - let int_with_invalid_byte = b"i-1a"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - } - - #[test] - fn when_it_receives_a_non_eof_io_error() { - struct FaultyReader; - - impl Read for FaultyReader { - fn read(&mut self, _buf: &mut [u8]) -> io::Result { - Err(io::Error::new( - io::ErrorKind::PermissionDenied, - "Permission denied", - )) - } - } - - let mut reader = ByteReader::new(FaultyReader); - - let mut output = String::new(); - let mut writer = StringWriter::new(&mut output); - - let result = parse(&mut reader, &mut writer); - - assert!(matches!(result, Err(Error::Io(_)))); - } - } -} diff --git a/src/parsers/string.rs b/src/parsers/string.rs deleted file mode 100644 index 6d5966a..0000000 --- a/src/parsers/string.rs +++ /dev/null @@ -1,602 +0,0 @@ -//! Bencoded string parser. -//! -//! It reads bencoded bytes from the input and writes JSON bytes to the output. -use std::io::{self, Read}; - -use crate::rw::{byte_reader::ByteReader, writer::Writer}; - -/* todo: Optimize UTF-8 conversion. Try to convert to string partially and stop - converting if we reach a point when input is not valid UTF-8 anymore. This - way we don't consume more memory and we can print the bytes directly to the - output from that point on. -*/ - -use core::str; - -use super::error::{Error, ReadContext, WriteContext}; - -/// It parses a string bencoded value. -/// -/// # Errors -/// -/// Will return an error if it can't read from the input or write to the -/// output. -/// -/// # Panics -/// -/// Will panic if we reach the end of the input without completing the string. -pub fn parse( - reader: &mut ByteReader, - writer: &mut W, -) -> Result, Error> { - let mut string_parser = StringParser::default(); - string_parser.parse(reader, writer) -} - -/// Strings bencode format have two parts: `length:value`. -/// -/// - Length is a sequence of bytes (only digits 0..9). -/// - Value is an arbitrary sequence of bytes (not only valid UTF-8). -#[derive(Default, Debug)] -#[allow(clippy::module_name_repetitions)] -struct StringParser { - /// The final parsed string. - parsed_value: String, -} - -impl StringParser { - fn parse( - &mut self, - reader: &mut ByteReader, - writer: &mut W, - ) -> Result, Error> { - let mut length = Length::default(); - - length.parse(reader, writer)?; - - let mut value = Value::new(length.number); - - let value_bytes = value.parse(reader, writer)?; - - self.parsed_value = value.utf8(); - - writer.write_str(&self.json())?; - - Ok(value_bytes) - } - - /// It returns the final parsed value as string. - /// - /// If the string contains non UTF-8 bytes it returns the hexadecimal list - /// of bytes in in the format 'fa fb' - fn parsed_value(&self) -> String { - self.parsed_value.clone() - } - - /// It serializes the parsed value into JSON. - #[must_use] - fn json(&self) -> String { - serde_json::to_string(&self.parsed_value()).unwrap() - } -} - -#[derive(Default, Debug)] -struct Length { - /// A list of parsed bytes. It's only for debugging. - bytes: Vec, - - /// The parsed length at the current read digit. - number: usize, -} - -impl Length { - const END_OF_STRING_LENGTH_BYTE: u8 = b':'; - - fn parse( - &mut self, - reader: &mut ByteReader, - writer: &W, - ) -> Result<(), Error> { - loop { - let byte = Self::next_byte(reader, writer)?; - - match byte { - Self::END_OF_STRING_LENGTH_BYTE => { - break; - } - _ => { - self.add_byte(byte, reader, writer)?; - } - } - } - - Ok(()) - } - - /// It reads the next byte from the input. - /// - /// # Errors - /// - /// Will return an error if the end of input was reached. - fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { - match reader.read_byte() { - Ok(byte) => Ok(byte), - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingStringLength( - ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); - } - Err(err.into()) - } - } - } - - /// It adds a new byte (digit) to the string length. - /// - /// # Errors - /// - /// Will return an error if the byte is not a digit (0..9). - fn add_byte( - &mut self, - byte: u8, - reader: &mut ByteReader, - writer: &W, - ) -> Result<(), Error> { - if !byte.is_ascii_digit() { - return Err(Error::InvalidStringLengthByte( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); - } - - self.bytes.push(byte); - - self.add_digit_to_length(Self::byte_to_digit(byte)); - - Ok(()) - } - - /// It converts a byte containing an ASCII digit into a number `usize`. - fn byte_to_digit(byte: u8) -> usize { - (byte - b'0') as usize - } - - /// It adds the new digit to the number. - fn add_digit_to_length(&mut self, digit: usize) { - self.number = (self.number * 10) + digit; - } -} - -#[derive(Debug)] -struct Value { - length: usize, - bytes: Vec, - bytes_counter: usize, -} - -impl Value { - fn new(length: usize) -> Self { - Self { - length, - bytes: vec![], - bytes_counter: 0, - } - } - - fn parse( - &mut self, - reader: &mut ByteReader, - writer: &W, - ) -> Result, Error> { - for _i in 1..=self.length { - self.add_byte(Self::next_byte(reader, writer)?); - } - - Ok(self.bytes.clone()) - } - - /// It reads the next byte from the input. - /// - /// # Errors - /// - /// Will return an error if the end of input was reached. - fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { - match reader.read_byte() { - Ok(byte) => Ok(byte), - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingStringValue( - ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); - } - Err(err.into()) - } - } - } - - fn add_byte(&mut self, byte: u8) { - self.bytes.push(byte); - self.bytes_counter += 1; - } - - fn utf8(&self) -> String { - match str::from_utf8(&self.bytes) { - Ok(string) => { - // String only contains valid UTF-8 chars -> print it as it's - format!("{}", string.to_owned()) - } - Err(_) => { - // String contains non valid UTF-8 chars -> print it as hex bytes - Self::bytes_to_hex(&self.bytes) - } - } - } - - fn bytes_to_hex(data: &[u8]) -> String { - format!("{}", hex::encode(data)) - } -} - -#[cfg(test)] -mod tests { - use crate::{ - parsers::error::Error, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, - }; - - use super::parse; - - fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - let mut output = String::new(); - - parse_bencode(input_buffer, &mut output).expect("Bencode to JSON conversion failed"); - - output - } - - fn try_bencode_to_json(input_buffer: &[u8]) -> Result { - let mut output = String::new(); - - match parse_bencode(input_buffer, &mut output) { - Ok(_string_value_bytes) => Ok(output), - Err(err) => Err(err), - } - } - - fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result, Error> { - let mut reader = ByteReader::new(input_buffer); - - let mut writer = StringWriter::new(output); - - parse(&mut reader, &mut writer) - } - - mod for_helpers { - use crate::parsers::string::tests::try_bencode_to_json; - - #[test] - fn bencode_to_json_wrapper_succeeds() { - assert_eq!( - try_bencode_to_json(b"4:spam").unwrap(), - r#""spam""#.to_string() - ); - } - - #[test] - fn bencode_to_json_wrapper_fails() { - assert!(try_bencode_to_json(b"4:").is_err()); - } - } - - #[test] - fn length_can_contain_leading_zeros() { - assert_eq!( - bencode_to_json_unchecked(b"00:"), - r#""""#.to_string() - ); - } - - #[test] - fn empty_string() { - assert_eq!( - bencode_to_json_unchecked(b"0:"), - r#""""#.to_string() - ); - } - - #[test] - fn string_with_tags() { - assert_eq!( - bencode_to_json_unchecked(b"8:"), - r#""""#.to_string() - ); - } - - #[test] - fn utf8() { - assert_eq!( - bencode_to_json_unchecked(b"4:spam"), - r#""spam""#.to_string() - ); - } - - #[test] - fn non_utf8() { - assert_eq!( - bencode_to_json_unchecked(b"4:\xFF\xFE\xFD\xFC"), - r#""fffefdfc""#.to_string() - ); - } - - #[test] - fn ending_with_bencode_end_char() { - assert_eq!( - bencode_to_json_unchecked(b"1:e"), - r#""e""#.to_string() - ); - } - - #[test] - fn containing_a_reserved_char() { - assert_eq!( - bencode_to_json_unchecked(b"1:i"), - r#""i""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:l"), - r#""l""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:d"), - r#""d""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:l"), - r#""l""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:e"), - r#""e""#.to_string() - ); - } - - #[test] - fn containing_a_digit() { - assert_eq!( - bencode_to_json_unchecked(b"1:0"), - r#""0""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:1"), - r#""1""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:2"), - r#""2""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:3"), - r#""3""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:4"), - r#""4""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:5"), - r#""5""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:6"), - r#""6""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:7"), - r#""7""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:8"), - r#""8""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:9"), - r#""9""#.to_string() - ); - } - - mod should_escape_json { - use crate::{test::bencode_to_json_unchecked, to_bencode}; - - #[test] - fn containing_a_double_quote() { - assert_eq!( - bencode_to_json_unchecked("1:\"".as_bytes()), - r#""\"""#.to_string() - ); - } - - #[test] - fn containing_backslashes() { - assert_eq!( - bencode_to_json_unchecked("1:\\".as_bytes()), - r#""\\""#.to_string() - ); - } - - #[test] - fn containing_control_characters() { - assert_eq!( - bencode_to_json_unchecked("1:\n".as_bytes()), - r#""\n""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked("1:\r".as_bytes()), - r#""\r""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked("1:\t".as_bytes()), - r#""\t""#.to_string() - ); - } - - #[test] - fn containing_unicode_characters() { - assert_eq!( - bencode_to_json_unchecked(&to_bencode("ñandú")), - r#""ñandú""#.to_string() - ); - } - - #[test] - fn containing_non_unicode_characters() { - assert_eq!( - bencode_to_json_unchecked(&[b'4', b':', 0x80, 0xFF, 0x00, 0xAB]), - r#""80ff00ab""#.to_string() - ); - } - } - - mod it_should_fail_parsing_when { - use std::io::{self, Read}; - - use crate::{ - parsers::{ - error::Error, - string::{parse, tests::try_bencode_to_json}, - }, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, - }; - - #[test] - fn it_reaches_the_end_of_the_input_parsing_the_string_length() { - let incomplete_string_length = b"4"; - - let result = try_bencode_to_json(incomplete_string_length); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingStringLength { .. }) - )); - } - - #[test] - fn it_reaches_the_end_of_the_input_parsing_the_string_value() { - let incomplete_string_value = b"4:123"; - - let result = try_bencode_to_json(incomplete_string_value); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingStringValue { .. }) - )); - } - - #[test] - fn it_receives_a_non_digit_byte_in_the_string_length() { - let incomplete_string_value = b"4a:1234"; - - let result = try_bencode_to_json(incomplete_string_value); - - assert!(matches!(result, Err(Error::InvalidStringLengthByte { .. }))); - } - - /// Fake reader that fails after reading a certain number of bytes - struct FaultyReader { - /// The bytes the reader will return - bytes: Vec, - - /// The position in the bytes vector where the reader will fail - fail_in_pos: usize, - - /// The current number of bytes read - counter: usize, - } - - impl FaultyReader { - fn new(bytes: Vec, fail_in_pos: usize) -> Self { - Self { - bytes, - fail_in_pos, - counter: 0, - } - } - } - - impl Read for FaultyReader { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - // Fail exactly at the position set by `fail_in_pos` - if self.counter >= self.fail_in_pos { - return Err(io::Error::new( - io::ErrorKind::PermissionDenied, - "Permission denied", - )); - } - - // Check if we have any bytes left to read - if self.counter >= self.bytes.len() { - return Ok(0); // No more bytes to read (EOF) - } - - // Write one byte at a time to the buffer - buf[0] = self.bytes[self.counter]; - - // Increment the counter to reflect one byte read - self.counter += 1; - - // Return that we read exactly 1 byte - Ok(1) - } - } - - #[test] - fn it_cannot_read_more_bytes_without_finishing_parsing_the_string_length() { - let mut reader = ByteReader::new(FaultyReader::new(b"4:spam".to_vec(), 1)); - - let mut output = String::new(); - let mut writer = StringWriter::new(&mut output); - - let result = parse(&mut reader, &mut writer); - - assert!(matches!(result, Err(Error::Io(_)))); - } - - #[test] - fn it_cannot_read_more_bytes_without_finishing_parsing_the_string_value() { - let mut reader = ByteReader::new(FaultyReader::new(b"4:spam".to_vec(), 3)); - - let mut output = String::new(); - let mut writer = StringWriter::new(&mut output); - - let result = parse(&mut reader, &mut writer); - - assert!(matches!(result, Err(Error::Io(_)))); - } - } -} From 9e0db6c4250bbebb0058ec438f5971670f3bdae9 Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 10:09:41 +0000 Subject: [PATCH 08/13] refactor: remove writer from tokenizer string parser --- src/parsers/error.rs | 12 +- src/parsers/mod.rs | 2 - src/parsers/tokenizer/integer.rs | 38 ++--- src/parsers/tokenizer/mod.rs | 12 +- src/parsers/tokenizer/string.rs | 253 +++++++------------------------ 5 files changed, 80 insertions(+), 237 deletions(-) diff --git a/src/parsers/error.rs b/src/parsers/error.rs index c7ab72e..08ab2ab 100644 --- a/src/parsers/error.rs +++ b/src/parsers/error.rs @@ -62,20 +62,20 @@ pub enum Error { /// /// The string parser found an invalid byte for the string length. The /// length can only be made of digits (0-9). - #[error("Invalid string length byte, expected a digit; {0}; {1}")] - InvalidStringLengthByte(ReadContext, WriteContext), + #[error("Invalid string length byte, expected a digit; {0}")] + InvalidStringLengthByte(ReadContext), /// Unexpected end of input parsing string length. /// /// The input ends before the string length ends. - #[error("Unexpected end of input parsing string length; {0}; {1}")] - UnexpectedEndOfInputParsingStringLength(ReadContext, WriteContext), + #[error("Unexpected end of input parsing string length; {0}")] + UnexpectedEndOfInputParsingStringLength(ReadContext), /// Unexpected end of input parsing string value. /// /// The input ends before the string value ends. - #[error("Unexpected end of input parsing string value; {0}; {1}")] - UnexpectedEndOfInputParsingStringValue(ReadContext, WriteContext), + #[error("Unexpected end of input parsing string value; {0}")] + UnexpectedEndOfInputParsingStringValue(ReadContext), // Lists /// Unexpected end of input parsing list. Expecting first list item or list end. diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index 71a5323..8c95c79 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -4,9 +4,7 @@ //! ``BencodeParser`` is the main parser. It is generic over the type of the //! input buffer. pub mod error; -pub mod integer; pub mod stack; -pub mod string; pub mod tokenizer; /* TODO: diff --git a/src/parsers/tokenizer/integer.rs b/src/parsers/tokenizer/integer.rs index 3896265..0dd2730 100644 --- a/src/parsers/tokenizer/integer.rs +++ b/src/parsers/tokenizer/integer.rs @@ -134,34 +134,22 @@ fn next_byte(reader: &mut ByteReader) -> Result { #[cfg(test)] mod tests { - use crate::{ - parsers::{error::Error, integer::parse}, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, - }; + use crate::{parsers::error::Error, rw::byte_reader::ByteReader}; - fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - let mut output = String::new(); + use super::parse; - parse_bencode(input_buffer, &mut output).expect("Bencode to JSON conversion failed"); - - output + fn bencode_to_json_unchecked(input_buffer: &[u8]) -> Vec { + parse_bencode(input_buffer).expect("Bencode to JSON conversion failed") } - fn try_bencode_to_json(input_buffer: &[u8]) -> Result { - let mut output = String::new(); - - match parse_bencode(input_buffer, &mut output) { - Ok(_value) => Ok(output), - Err(err) => Err(err), - } + fn try_bencode_to_json(input_buffer: &[u8]) -> Result, Error> { + parse_bencode(input_buffer) } - fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result, Error> { + fn parse_bencode(input_buffer: &[u8]) -> Result, Error> { let mut reader = ByteReader::new(input_buffer); - let mut writer = StringWriter::new(output); - - parse(&mut reader, &mut writer) + parse(&mut reader) } mod for_helpers { @@ -169,7 +157,7 @@ mod tests { #[test] fn bencode_to_json_wrapper_succeeds() { - assert_eq!(try_bencode_to_json(b"i0e").unwrap(), "0".to_string()); + assert_eq!(try_bencode_to_json(b"i0e").unwrap(), "0".as_bytes()); } #[test] @@ -180,22 +168,22 @@ mod tests { #[test] fn zero() { - assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".to_string()); + assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".as_bytes()); } #[test] fn one_digit_integer() { - assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".to_string()); + assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".as_bytes()); } #[test] fn two_digits_integer() { - assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".to_string()); + assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".as_bytes()); } #[test] fn negative_integer() { - assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".to_string()); + assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".as_bytes()); } mod it_should_fail { diff --git a/src/parsers/tokenizer/mod.rs b/src/parsers/tokenizer/mod.rs index 1eae4b9..f1de03a 100644 --- a/src/parsers/tokenizer/mod.rs +++ b/src/parsers/tokenizer/mod.rs @@ -6,11 +6,10 @@ use std::io::{self, Read}; use super::error::{self, ReadContext}; -use crate::rw::{byte_reader::ByteReader, byte_writer::ByteWriter}; +use crate::rw::byte_reader::ByteReader; /* TODO: -- Remove writer from tokenizer. - Implement trait Iterator for tokenizer. */ @@ -51,12 +50,7 @@ impl BencodeTokenizer { /// /// - It can't read from the input. pub fn next_token(&mut self) -> Result, error::Error> { - let capture_output = Vec::new(); - let mut null_writer = ByteWriter::new(capture_output); - - let opt_peeked_byte = Self::peek_byte(&mut self.byte_reader)?; - - match opt_peeked_byte { + match Self::peek_byte(&mut self.byte_reader)? { Some(peeked_byte) => { match peeked_byte { BENCODE_BEGIN_INTEGER => { @@ -64,7 +58,7 @@ impl BencodeTokenizer { Ok(Some(BencodeToken::Integer(value))) } b'0'..=b'9' => { - let value = string::parse(&mut self.byte_reader, &mut null_writer)?; + let value = string::parse(&mut self.byte_reader)?; Ok(Some(BencodeToken::String(value))) } BENCODE_BEGIN_LIST => { diff --git a/src/parsers/tokenizer/string.rs b/src/parsers/tokenizer/string.rs index df6f57b..f3325a5 100644 --- a/src/parsers/tokenizer/string.rs +++ b/src/parsers/tokenizer/string.rs @@ -3,7 +3,7 @@ //! It reads bencoded bytes from the input and writes JSON bytes to the output. use std::io::{self, Read}; -use crate::rw::{byte_reader::ByteReader, writer::Writer}; +use crate::rw::byte_reader::ByteReader; /* todo: Optimize UTF-8 conversion. Try to convert to string partially and stop converting if we reach a point when input is not valid UTF-8 anymore. This @@ -13,24 +13,20 @@ use crate::rw::{byte_reader::ByteReader, writer::Writer}; use core::str; -use super::error::{Error, ReadContext, WriteContext}; +use super::error::{Error, ReadContext}; /// It parses a string bencoded value. /// /// # Errors /// -/// Will return an error if it can't read from the input or write to the -/// output. +/// Will return an error if it can't read from the input. /// /// # Panics /// /// Will panic if we reach the end of the input without completing the string. -pub fn parse( - reader: &mut ByteReader, - writer: &mut W, -) -> Result, Error> { +pub fn parse(reader: &mut ByteReader) -> Result, Error> { let mut string_parser = StringParser::default(); - string_parser.parse(reader, writer) + string_parser.parse(reader) } /// Strings bencode format have two parts: `length:value`. @@ -45,39 +41,19 @@ struct StringParser { } impl StringParser { - fn parse( - &mut self, - reader: &mut ByteReader, - writer: &mut W, - ) -> Result, Error> { + fn parse(&mut self, reader: &mut ByteReader) -> Result, Error> { let mut length = Length::default(); - length.parse(reader, writer)?; + length.parse(reader)?; let mut value = Value::new(length.number); - let value_bytes = value.parse(reader, writer)?; + let value_bytes = value.parse(reader)?; self.parsed_value = value.utf8(); - writer.write_str(&self.json())?; - Ok(value_bytes) } - - /// It returns the final parsed value as string. - /// - /// If the string contains non UTF-8 bytes it returns the hexadecimal list - /// of bytes in in the format 'fa fb' - fn parsed_value(&self) -> String { - self.parsed_value.clone() - } - - /// It serializes the parsed value into JSON. - #[must_use] - fn json(&self) -> String { - serde_json::to_string(&self.parsed_value()).unwrap() - } } #[derive(Default, Debug)] @@ -92,20 +68,16 @@ struct Length { impl Length { const END_OF_STRING_LENGTH_BYTE: u8 = b':'; - fn parse( - &mut self, - reader: &mut ByteReader, - writer: &W, - ) -> Result<(), Error> { + fn parse(&mut self, reader: &mut ByteReader) -> Result<(), Error> { loop { - let byte = Self::next_byte(reader, writer)?; + let byte = Self::next_byte(reader)?; match byte { Self::END_OF_STRING_LENGTH_BYTE => { break; } _ => { - self.add_byte(byte, reader, writer)?; + self.add_byte(byte, reader)?; } } } @@ -118,7 +90,7 @@ impl Length { /// # Errors /// /// Will return an error if the end of input was reached. - fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { + fn next_byte(reader: &mut ByteReader) -> Result { match reader.read_byte() { Ok(byte) => Ok(byte), Err(err) => { @@ -129,11 +101,6 @@ impl Length { pos: reader.input_byte_counter(), latest_bytes: reader.captured_bytes(), }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, )); } Err(err.into()) @@ -146,25 +113,13 @@ impl Length { /// # Errors /// /// Will return an error if the byte is not a digit (0..9). - fn add_byte( - &mut self, - byte: u8, - reader: &mut ByteReader, - writer: &W, - ) -> Result<(), Error> { + fn add_byte(&mut self, byte: u8, reader: &mut ByteReader) -> Result<(), Error> { if !byte.is_ascii_digit() { - return Err(Error::InvalidStringLengthByte( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: Some(byte), - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::InvalidStringLengthByte(ReadContext { + byte: Some(byte), + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } self.bytes.push(byte); @@ -201,13 +156,9 @@ impl Value { } } - fn parse( - &mut self, - reader: &mut ByteReader, - writer: &W, - ) -> Result, Error> { + fn parse(&mut self, reader: &mut ByteReader) -> Result, Error> { for _i in 1..=self.length { - self.add_byte(Self::next_byte(reader, writer)?); + self.add_byte(Self::next_byte(reader)?); } Ok(self.bytes.clone()) @@ -218,23 +169,16 @@ impl Value { /// # Errors /// /// Will return an error if the end of input was reached. - fn next_byte(reader: &mut ByteReader, writer: &W) -> Result { + fn next_byte(reader: &mut ByteReader) -> Result { match reader.read_byte() { Ok(byte) => Ok(byte), Err(err) => { if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingStringValue( - ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); + return Err(Error::UnexpectedEndOfInputParsingStringValue(ReadContext { + byte: None, + pos: reader.input_byte_counter(), + latest_bytes: reader.captured_bytes(), + })); } Err(err.into()) } @@ -266,36 +210,21 @@ impl Value { #[cfg(test)] mod tests { - use crate::{ - parsers::error::Error, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, - }; + use crate::{parsers::error::Error, rw::byte_reader::ByteReader}; use super::parse; - fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - let mut output = String::new(); - - parse_bencode(input_buffer, &mut output).expect("Bencode to JSON conversion failed"); - - output + fn bencode_to_json_unchecked(input_buffer: &[u8]) -> Vec { + parse_bencode(input_buffer).expect("Bencode to JSON conversion failed") } - fn try_bencode_to_json(input_buffer: &[u8]) -> Result { - let mut output = String::new(); - - match parse_bencode(input_buffer, &mut output) { - Ok(_string_value_bytes) => Ok(output), - Err(err) => Err(err), - } + fn try_bencode_to_json(input_buffer: &[u8]) -> Result, Error> { + parse_bencode(input_buffer) } - fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result, Error> { + fn parse_bencode(input_buffer: &[u8]) -> Result, Error> { let mut reader = ByteReader::new(input_buffer); - - let mut writer = StringWriter::new(output); - - parse(&mut reader, &mut writer) + parse(&mut reader) } mod for_helpers { @@ -303,10 +232,7 @@ mod tests { #[test] fn bencode_to_json_wrapper_succeeds() { - assert_eq!( - try_bencode_to_json(b"4:spam").unwrap(), - r#""spam""#.to_string() - ); + assert_eq!(try_bencode_to_json(b"4:spam").unwrap(), r"spam".as_bytes()); } #[test] @@ -317,118 +243,61 @@ mod tests { #[test] fn length_can_contain_leading_zeros() { - assert_eq!( - bencode_to_json_unchecked(b"00:"), - r#""""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"00:"), r"".as_bytes()); } #[test] fn empty_string() { - assert_eq!( - bencode_to_json_unchecked(b"0:"), - r#""""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"0:"), r"".as_bytes()); } #[test] fn string_with_tags() { assert_eq!( bencode_to_json_unchecked(b"8:"), - r#""""#.to_string() + r"".as_bytes() ); } #[test] fn utf8() { - assert_eq!( - bencode_to_json_unchecked(b"4:spam"), - r#""spam""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"4:spam"), r"spam".as_bytes()); } #[test] fn non_utf8() { assert_eq!( bencode_to_json_unchecked(b"4:\xFF\xFE\xFD\xFC"), - r#""fffefdfc""#.to_string() + vec![0xFF, 0xFE, 0xFD, 0xFC] ); } #[test] fn ending_with_bencode_end_char() { - assert_eq!( - bencode_to_json_unchecked(b"1:e"), - r#""e""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"1:e"), r"e".as_bytes()); } #[test] fn containing_a_reserved_char() { - assert_eq!( - bencode_to_json_unchecked(b"1:i"), - r#""i""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:l"), - r#""l""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:d"), - r#""d""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:l"), - r#""l""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:e"), - r#""e""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"1:i"), r"i".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:l"), r"l".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:d"), r"d".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:l"), r"l".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:e"), r"e".as_bytes()); } #[test] fn containing_a_digit() { - assert_eq!( - bencode_to_json_unchecked(b"1:0"), - r#""0""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:1"), - r#""1""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:2"), - r#""2""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:3"), - r#""3""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:4"), - r#""4""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:5"), - r#""5""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:6"), - r#""6""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:7"), - r#""7""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:8"), - r#""8""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:9"), - r#""9""#.to_string() - ); + assert_eq!(bencode_to_json_unchecked(b"1:0"), r"0".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:1"), r"1".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:2"), r"2".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:3"), r"3".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:4"), r"4".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:5"), r"5".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:6"), r"6".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:7"), r"7".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:8"), r"8".as_bytes()); + assert_eq!(bencode_to_json_unchecked(b"1:9"), r"9".as_bytes()); } mod should_escape_json { @@ -491,7 +360,7 @@ mod tests { error::Error, tokenizer::string::{parse, tests::try_bencode_to_json}, }, - rw::{byte_reader::ByteReader, string_writer::StringWriter}, + rw::byte_reader::ByteReader, }; #[test] @@ -579,10 +448,7 @@ mod tests { fn it_cannot_read_more_bytes_without_finishing_parsing_the_string_length() { let mut reader = ByteReader::new(FaultyReader::new(b"4:spam".to_vec(), 1)); - let mut output = String::new(); - let mut writer = StringWriter::new(&mut output); - - let result = parse(&mut reader, &mut writer); + let result = parse(&mut reader); assert!(matches!(result, Err(Error::Io(_)))); } @@ -591,10 +457,7 @@ mod tests { fn it_cannot_read_more_bytes_without_finishing_parsing_the_string_value() { let mut reader = ByteReader::new(FaultyReader::new(b"4:spam".to_vec(), 3)); - let mut output = String::new(); - let mut writer = StringWriter::new(&mut output); - - let result = parse(&mut reader, &mut writer); + let result = parse(&mut reader); assert!(matches!(result, Err(Error::Io(_)))); } From 331c76eebbe63fc1f3f7800b29723faed2004507 Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 10:37:15 +0000 Subject: [PATCH 09/13] refactor: reorganize modules --- examples/parser_file_in_file_out.rs | 2 +- examples/parser_stdin_stdout.rs | 2 +- examples/parser_string_in_string_out.rs | 2 +- examples/parser_string_in_vec_out.rs | 2 +- examples/parser_vec_in_string_out.rs | 2 +- examples/parser_vec_in_vec_out.rs | 2 +- src/lib.rs | 2 +- src/main.rs | 2 +- src/parsers/error.rs | 6 +- src/parsers/generators/json.rs | 2087 ++++++++++++++++++++++ src/parsers/generators/mod.rs | 12 + src/parsers/{ => generators}/stack.rs | 6 +- src/parsers/mod.rs | 2097 +---------------------- src/test.rs | 2 +- 14 files changed, 2115 insertions(+), 2111 deletions(-) create mode 100644 src/parsers/generators/json.rs create mode 100644 src/parsers/generators/mod.rs rename src/parsers/{ => generators}/stack.rs (97%) diff --git a/examples/parser_file_in_file_out.rs b/examples/parser_file_in_file_out.rs index 732f9b8..3ac3873 100644 --- a/examples/parser_file_in_file_out.rs +++ b/examples/parser_file_in_file_out.rs @@ -10,7 +10,7 @@ use std::{ io::{Read, Write}, }; -use bencode2json::parsers::BencodeParser; +use bencode2json::parsers::generators::json::BencodeParser; use clap::{Arg, Command}; fn main() { diff --git a/examples/parser_stdin_stdout.rs b/examples/parser_stdin_stdout.rs index 4f8e162..2f1881d 100644 --- a/examples/parser_stdin_stdout.rs +++ b/examples/parser_stdin_stdout.rs @@ -7,7 +7,7 @@ //! It prints "spam". use std::io; -use bencode2json::parsers::BencodeParser; +use bencode2json::parsers::generators::json::BencodeParser; fn main() { let input = Box::new(io::stdin()); diff --git a/examples/parser_string_in_string_out.rs b/examples/parser_string_in_string_out.rs index 638304e..85adc5b 100644 --- a/examples/parser_string_in_string_out.rs +++ b/examples/parser_string_in_string_out.rs @@ -5,7 +5,7 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::BencodeParser; +use bencode2json::parsers::generators::json::BencodeParser; fn main() { let input = "4:spam".to_string(); diff --git a/examples/parser_string_in_vec_out.rs b/examples/parser_string_in_vec_out.rs index 56c073b..a4db939 100644 --- a/examples/parser_string_in_vec_out.rs +++ b/examples/parser_string_in_vec_out.rs @@ -5,7 +5,7 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::BencodeParser; +use bencode2json::parsers::generators::json::BencodeParser; fn main() { let input = "4:spam".to_string(); diff --git a/examples/parser_vec_in_string_out.rs b/examples/parser_vec_in_string_out.rs index fc27d07..d473d2a 100644 --- a/examples/parser_vec_in_string_out.rs +++ b/examples/parser_vec_in_string_out.rs @@ -5,7 +5,7 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::BencodeParser; +use bencode2json::parsers::generators::json::BencodeParser; fn main() { let input = b"4:spam".to_vec(); diff --git a/examples/parser_vec_in_vec_out.rs b/examples/parser_vec_in_vec_out.rs index 57ac35d..4d8b794 100644 --- a/examples/parser_vec_in_vec_out.rs +++ b/examples/parser_vec_in_vec_out.rs @@ -5,7 +5,7 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::BencodeParser; +use bencode2json::parsers::generators::json::BencodeParser; fn main() { let input = b"4:spam".to_vec(); diff --git a/src/lib.rs b/src/lib.rs index 9a165ee..4df388b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,7 +34,7 @@ //! //! > __NOTICE__: In the context of this lib, parser is a function that takes an input //! > containing bencoded data and produces a JSON output (raw bytes or UTF-8 string). -use parsers::{error::Error, BencodeParser}; +use parsers::{error::Error, generators::json::BencodeParser}; pub mod parsers; pub mod rw; diff --git a/src/main.rs b/src/main.rs index ae927f0..b4185f9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ //! ```text //! cargo run -- -i ./tests/fixtures/sample.bencode -o output.json //! ``` -use bencode2json::parsers::BencodeParser; +use bencode2json::parsers::generators::json::BencodeParser; use clap::{Arg, Command}; use std::fs::File; use std::io::{self, Read, Write}; diff --git a/src/parsers/error.rs b/src/parsers/error.rs index 08ab2ab..1390249 100644 --- a/src/parsers/error.rs +++ b/src/parsers/error.rs @@ -9,7 +9,7 @@ use thiserror::Error; use crate::rw; -use super::BencodeType; +use super::generators::BencodeType; /// Errors that can occur while parsing a bencoded value. #[derive(Debug, Error)] @@ -121,7 +121,7 @@ pub enum Error { NoMatchingStartForListOrDictEnd(ReadContext, WriteContext), } -/// The reader context when the error ocurred. +/// The reader context when the error occurred. #[derive(Debug)] pub struct ReadContext { /// The read byte that caused the error if any. @@ -157,7 +157,7 @@ impl fmt::Display for ReadContext { } } -/// The writer context when the error ocurred. +/// The writer context when the error occurred. #[derive(Debug)] pub struct WriteContext { /// The written byte that caused the error if any. diff --git a/src/parsers/generators/json.rs b/src/parsers/generators/json.rs new file mode 100644 index 0000000..5ea180a --- /dev/null +++ b/src/parsers/generators/json.rs @@ -0,0 +1,2087 @@ +/* TODO: + +- Rename this parser to generator. + +*/ + +use core::str; +use std::{ + fmt::Write as FmtWrite, + io::{Read, Write as IoWrite}, +}; + +use super::{ + stack::{Stack, State}, + BencodeType, +}; +use tokenizer::{BencodeToken, BencodeTokenizer}; + +use crate::{ + parsers::{ + error::{self, ReadContext, WriteContext}, + tokenizer, + }, + rw::{byte_writer::ByteWriter, string_writer::StringWriter, writer::Writer}, +}; + +pub struct BencodeParser { + tokenizer: BencodeTokenizer, + num_processed_tokens: u64, + stack: Stack, +} + +impl BencodeParser { + const JSON_ARRAY_BEGIN: u8 = b'['; + const JSON_ARRAY_ITEMS_SEPARATOR: u8 = b','; + const JSON_ARRAY_END: u8 = b']'; + + const JSON_OBJ_BEGIN: u8 = b'{'; + const JSON_OBJ_FIELDS_SEPARATOR: u8 = b','; + const JSON_OBJ_FIELD_KEY_VALUE_SEPARATOR: u8 = b':'; + const JSON_OBJ_END: u8 = b'}'; + + pub fn new(reader: R) -> Self { + BencodeParser { + tokenizer: BencodeTokenizer::new(reader), + num_processed_tokens: 1, + stack: Stack::default(), + } + } + + /// It parses a bencoded value read from input and writes the corresponding + /// JSON UTF-8 string value to the output. + /// + /// # Errors + /// + /// Will return an error if it can't read from the input or write to the + /// output. + /// + /// # Panics + /// + /// Will panic if receives a byte that isn't a valid begin or end of a + /// bencoded type: integer, string, list or dictionary. + pub fn write_str(&mut self, writer: W) -> Result<(), error::Error> { + let mut writer = StringWriter::new(writer); + self.parse(&mut writer) + } + + /// It parses a bencoded value read from input and writes the corresponding + /// JSON UTF-8 string value as bytes to the output. + /// + /// # Errors + /// + /// Will return an error if it can't read from the input or write to the + /// output. + /// + /// # Panics + /// + /// Will panic if receives a byte that isn't a valid begin or end of a + /// bencoded type: integer, string, list or dictionary. + pub fn write_bytes(&mut self, writer: W) -> Result<(), error::Error> { + let mut writer = ByteWriter::new(writer); + self.parse(&mut writer) + } + + /// It parses a bencoded value read from input and writes the corresponding + /// JSON value to the output. + /// + /// # Errors + /// + /// Will return an error if: + /// + /// - It can't read from the input or write to the output. + /// - The input is invalid Bencode. + fn parse(&mut self, writer: &mut W) -> Result<(), error::Error> { + while let Some(token) = self.tokenizer.next_token()? { + match token { + BencodeToken::Integer(integer_bytes) => { + self.begin_bencoded_value(BencodeType::Integer, writer)?; + // todo: add `write_bytes` to writer. + for bytes in integer_bytes { + writer.write_byte(bytes)?; + } + } + BencodeToken::String(string_bytes) => { + self.begin_bencoded_value(BencodeType::String, writer)?; + + let html_tag_style_string = match str::from_utf8(&string_bytes) { + Ok(string) => { + // String only contains valid UTF-8 chars -> print it as it's + &format!("{}", string.to_owned()) + } + Err(_) => { + // String contains non valid UTF-8 chars -> print it as hex bytes + &format!("{}", hex::encode(string_bytes)) + } + }; + + writer.write_str( + &serde_json::to_string(&html_tag_style_string) + .expect("Failed to serialize to JSON. This should not happen because non UTF-8 bencoded string are serialized as hex bytes"), + )?; + } + BencodeToken::BeginList => { + self.begin_bencoded_value(BencodeType::List, writer)?; + writer.write_byte(Self::JSON_ARRAY_BEGIN)?; + self.stack.push(State::ExpectingFirstListItemOrEnd); + } + BencodeToken::BeginDict => { + self.begin_bencoded_value(BencodeType::Dict, writer)?; + writer.write_byte(Self::JSON_OBJ_BEGIN)?; + self.stack.push(State::ExpectingFirstDictFieldOrEnd); + } + BencodeToken::EndListOrDict => { + self.end_list_or_dict(writer)?; + } + BencodeToken::LineBreak => { + // Ignore line breaks at the beginning, the end, or between values + } + } + + self.num_processed_tokens += 1; + } + + self.check_bad_end_stack_state(writer) + } + + /// It updates the stack state and prints the delimiters when needed. + /// + /// Called when the first byt of a bencoded value (integer, string, list or dict) + /// is received. + /// + /// # Errors + /// + /// Will return an error if the writer can't write to the output. + pub fn begin_bencoded_value( + &mut self, + bencode_type: BencodeType, + writer: &mut W, + ) -> Result<(), error::Error> { + match self.stack.peek() { + State::Initial => {} + State::ExpectingFirstListItemOrEnd => { + self.stack.swap_top(State::ExpectingNextListItem); + } + State::ExpectingNextListItem => { + writer.write_byte(Self::JSON_ARRAY_ITEMS_SEPARATOR)?; + } + State::ExpectingFirstDictFieldOrEnd => { + if bencode_type != BencodeType::String { + return Err(error::Error::ExpectedStringForDictKeyGot( + bencode_type, + ReadContext { + byte: None, + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )); + } + + self.stack.swap_top(State::ExpectingDictFieldValue); + } + State::ExpectingDictFieldValue => { + writer.write_byte(Self::JSON_OBJ_FIELD_KEY_VALUE_SEPARATOR)?; + + self.stack.swap_top(State::ExpectingDictFieldKeyOrEnd); + } + State::ExpectingDictFieldKeyOrEnd => { + if bencode_type != BencodeType::String { + return Err(error::Error::ExpectedStringForDictKeyGot( + bencode_type, + ReadContext { + byte: None, + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )); + } + + writer.write_byte(Self::JSON_OBJ_FIELDS_SEPARATOR)?; + + self.stack.swap_top(State::ExpectingDictFieldValue); + } + } + + Ok(()) + } + + /// It updates the stack state and prints the delimiters when needed. + /// + /// Called when the end of list or dictionary byte is received. End of + /// integers or strings are processed while parsing them. + /// + /// # Errors + /// + /// Will return an error if the writer can't write to the output. + pub fn end_list_or_dict(&mut self, writer: &mut W) -> Result<(), error::Error> { + match self.stack.peek() { + State::ExpectingFirstListItemOrEnd | State::ExpectingNextListItem => { + writer.write_byte(Self::JSON_ARRAY_END)?; + self.stack.pop(); + } + State::ExpectingFirstDictFieldOrEnd | State::ExpectingDictFieldKeyOrEnd => { + writer.write_byte(Self::JSON_OBJ_END)?; + self.stack.pop(); + } + State::ExpectingDictFieldValue => { + return Err(error::Error::PrematureEndOfDict( + ReadContext { + byte: None, + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )) + } + State::Initial => { + return Err(error::Error::NoMatchingStartForListOrDictEnd( + ReadContext { + byte: None, + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )) + } + } + + Ok(()) + } + + /// It checks if the stack state is correct at the end of the parsing. + /// + /// That could happen, for example, when bencode values are not finished. + /// + /// # Errors + /// + /// Will return an error if the stack state is not correct. + fn check_bad_end_stack_state(&self, writer: &W) -> Result<(), error::Error> { + match self.stack.peek() { + State::Initial => Ok(()), + State::ExpectingFirstListItemOrEnd => Err( + error::Error::UnexpectedEndOfInputExpectingFirstListItemOrEnd( + ReadContext { + byte: None, + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + ), + ), + State::ExpectingNextListItem => { + Err(error::Error::UnexpectedEndOfInputExpectingNextListItem( + ReadContext { + byte: None, + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )) + } + State::ExpectingFirstDictFieldOrEnd => Err( + error::Error::UnexpectedEndOfInputExpectingFirstDictFieldOrEnd( + ReadContext { + byte: None, + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + ), + ), + State::ExpectingDictFieldValue => { + Err(error::Error::UnexpectedEndOfInputExpectingDictFieldValue( + ReadContext { + byte: None, + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + )) + } + State::ExpectingDictFieldKeyOrEnd => Err( + error::Error::UnexpectedEndOfInputExpectingDictFieldKeyOrEnd( + ReadContext { + byte: None, + pos: self.tokenizer.input_byte_counter(), + latest_bytes: self.tokenizer.captured_bytes(), + }, + WriteContext { + byte: None, + pos: writer.output_byte_counter(), + latest_bytes: writer.captured_bytes(), + }, + ), + ), + } + } +} + +#[cfg(test)] +mod tests { + + use std::io::{self, Read}; + + use crate::parsers::generators::json::BencodeParser; + + mod it_should_allow_writing { + use crate::parsers::generators::json::BencodeParser; + + #[test] + fn to_any_type_implementing_io_write_trait() { + let mut output = Vec::new(); + + let mut parser = BencodeParser::new(&b"i0e"[..]); + + parser + .write_bytes(&mut output) + .expect("Bencode to JSON conversion failed"); + + assert_eq!(output, vec!(b'0')); + } + + #[test] + fn writing_to_any_type_implementing_fmt_write_trait() { + let mut output = String::new(); + + let mut parser = BencodeParser::new(&b"i0e"[..]); + + parser + .write_str(&mut output) + .expect("Bencode to JSON conversion failed"); + + assert_eq!(output, "0".to_string()); + } + } + + #[test] + fn it_should_allow_reading_from_an_empty_input() { + struct EmptyReader; + + impl Read for EmptyReader { + fn read(&mut self, _buf: &mut [u8]) -> io::Result { + Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "Unexpected EOF", + )) + } + } + + let mut output = String::new(); + + let mut parser = BencodeParser::new(EmptyReader); + + parser.write_str(&mut output).unwrap(); + + assert_eq!(output, ""); + } + + mod it_should_allow_special_bencode_cases { + + use crate::{parsers::generators::json::BencodeParser, test::bencode_to_json_unchecked}; + + #[test] + fn an_empty_input() { + let mut output = String::new(); + + let mut parser = BencodeParser::new(&b""[..]); + + parser + .write_str(&mut output) + .expect("Bencode to JSON conversion failed"); + + assert_eq!(output, String::new()); + } + + #[test] + fn line_breaks_at_the_beginning_of_the_input_stream() { + assert_eq!(bencode_to_json_unchecked(b"\ni0e"), "0".to_string()); + } + + #[test] + fn line_breaks_at_the_end_of_the_input_stream() { + assert_eq!(bencode_to_json_unchecked(b"i0e\n"), "0".to_string()); + } + + #[test] + fn line_breaks_between_bencoded_values() { + assert_eq!( + bencode_to_json_unchecked(b"li0e\ni1ee"), + "[0,1]".to_string() + ); + } + } + + mod it_should_fail { + use std::io::{self, Read}; + + use crate::{ + parsers::{error::Error, generators::json::BencodeParser}, + try_bencode_to_json, + }; + + #[test] + fn when_there_is_a_problem_reading_from_input() { + struct FaultyReader; + + impl Read for FaultyReader { + fn read(&mut self, _buf: &mut [u8]) -> io::Result { + Err(io::Error::new( + io::ErrorKind::PermissionDenied, + "Permission denied", + )) + } + } + + let mut output = String::new(); + + let mut parser = BencodeParser::new(FaultyReader); + + let result = parser.write_str(&mut output); + + assert!(matches!(result, Err(Error::Io(_)))); + } + + #[test] + fn when_it_cannot_recognized_the_fist_byte_of_a_new_bencoded_value() { + let invalid_bencoded_value = b"a"; + + let result = try_bencode_to_json(invalid_bencoded_value); + + assert!(matches!( + result, + Err(Error::UnrecognizedFirstBencodeValueByte { .. }) + )); + } + + #[test] + fn when_it_reaches_the_end_of_the_input_without_finishing_parsing_a_valid_bencoded_value() { + let integer_with_missing_end_byte = b"i42"; + + let result = try_bencode_to_json(integer_with_missing_end_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputParsingInteger { .. }) + )); + } + } + + mod integers { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn zero() { + assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".to_string()); + } + + #[test] + fn one_digit_integer() { + assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".to_string()); + } + + #[test] + fn two_digits_integer() { + assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".to_string()); + } + + #[test] + fn negative_integer() { + assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".to_string()); + } + + #[test] + fn positive_integer_greater_than_i64_max() { + let big_positive_integer = i64::MAX.to_string() + "1"; + + let bencoded_big_positive_integer = format!("i{big_positive_integer}e"); + + assert_eq!( + bencode_to_json_unchecked(bencoded_big_positive_integer.as_bytes()), + big_positive_integer + ); + } + + #[test] + fn negative_integer_smaller_than_i64_min() { + let big_negative_integer = i64::MIN.to_string() + "1"; + + let bencoded_big_negative_integer = format!("i{big_negative_integer}e"); + + assert_eq!( + bencode_to_json_unchecked(bencoded_big_negative_integer.as_bytes()), + big_negative_integer + ); + } + + mod should_fail { + use crate::{parsers::error::Error, try_bencode_to_json}; + + #[test] + fn when_it_finds_an_invalid_byte() { + let int_with_invalid_byte = b"iae"; + + let result = try_bencode_to_json(int_with_invalid_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedByteParsingInteger { .. }) + )); + } + + #[test] + fn with_duplicate_sign() { + let int_with_invalid_byte = b"i--42e"; + + let result = try_bencode_to_json(int_with_invalid_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedByteParsingInteger { .. }) + )); + } + } + } + + mod strings { + use crate::{ + test::{bencode_to_json_unchecked, bencoded_string_with_repeated_byte}, + to_bencode, + }; + + #[test] + fn length_can_contain_leading_zeros() { + assert_eq!( + bencode_to_json_unchecked(b"00:"), + r#""""#.to_string() + ); + } + + #[test] + fn empty_string() { + assert_eq!( + bencode_to_json_unchecked(b"0:"), + r#""""#.to_string() + ); + } + + #[test] + fn utf8() { + assert_eq!( + bencode_to_json_unchecked(b"4:spam"), + r#""spam""#.to_string() + ); + } + + #[test] + fn non_utf8() { + assert_eq!( + bencode_to_json_unchecked(b"4:\xFF\xFE\xFD\xFC"), + r#""fffefdfc""#.to_string() + ); + } + + #[test] + fn big_utf8_string() { + let big_string = "a".repeat(1_000_000); + + assert_eq!( + bencode_to_json_unchecked(&to_bencode(&big_string)), + format!(r#""{big_string}""#) + ); + } + + #[test] + fn big_non_utf8_string() { + let big_non_utf8_string = bencoded_string_with_repeated_byte(b'\xFF', 1_000_000); + + let expected = format!(r#""{}""#, "ff".repeat(1_000_000)); + + assert_eq!(bencode_to_json_unchecked(&big_non_utf8_string), expected); + } + + #[test] + fn ending_with_bencode_end_char() { + assert_eq!( + bencode_to_json_unchecked(b"1:e"), + r#""e""#.to_string() + ); + } + + #[test] + fn containing_a_reserved_char() { + assert_eq!( + bencode_to_json_unchecked(b"1:i"), + r#""i""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:l"), + r#""l""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:d"), + r#""d""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:l"), + r#""l""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:e"), + r#""e""#.to_string() + ); + } + + #[test] + fn containing_a_digit() { + assert_eq!( + bencode_to_json_unchecked(b"1:0"), + r#""0""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:1"), + r#""1""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:2"), + r#""2""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:3"), + r#""3""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:4"), + r#""4""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:5"), + r#""5""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:6"), + r#""6""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:7"), + r#""7""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:8"), + r#""8""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked(b"1:9"), + r#""9""#.to_string() + ); + } + + mod should_escape_json { + use crate::{test::bencode_to_json_unchecked, to_bencode}; + + #[test] + fn containing_a_double_quote() { + assert_eq!( + bencode_to_json_unchecked("1:\"".as_bytes()), + r#""\"""#.to_string() + ); + } + + #[test] + fn containing_backslashes() { + assert_eq!( + bencode_to_json_unchecked("1:\\".as_bytes()), + r#""\\""#.to_string() + ); + } + + #[test] + fn containing_control_characters() { + assert_eq!( + bencode_to_json_unchecked("1:\n".as_bytes()), + r#""\n""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked("1:\r".as_bytes()), + r#""\r""#.to_string() + ); + assert_eq!( + bencode_to_json_unchecked("1:\t".as_bytes()), + r#""\t""#.to_string() + ); + } + + #[test] + fn containing_unicode_characters() { + assert_eq!( + bencode_to_json_unchecked(&to_bencode("ñandú")), + r#""ñandú""#.to_string() + ); + } + } + + mod it_should_fail_parsing_when { + use crate::{parsers::error::Error, try_bencode_to_json}; + + #[test] + fn it_reaches_the_end_of_the_input_parsing_the_string_length() { + let incomplete_string_length = b"4"; + + let result = try_bencode_to_json(incomplete_string_length); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputParsingStringLength { .. }) + )); + } + + #[test] + fn it_reaches_the_end_of_the_input_parsing_the_string_value() { + let incomplete_string_value = b"4:123"; + + let result = try_bencode_to_json(incomplete_string_value); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputParsingStringValue { .. }) + )); + } + + #[test] + fn it_receives_a_non_digit_byte_in_the_string_length() { + let incomplete_string_value = b"4a:1234"; + + let result = try_bencode_to_json(incomplete_string_value); + + assert!(matches!(result, Err(Error::InvalidStringLengthByte { .. }))); + } + } + } + + mod lists { + use crate::test::{ + bencode_to_json_unchecked, generate_n_nested_empty_bencoded_lists, + generate_n_nested_empty_json_arrays, + }; + + #[test] + fn empty_list() { + assert_eq!(bencode_to_json_unchecked(b"le"), "[]".to_string()); + } + + #[test] + fn one_nested_empty_list() { + assert_eq!(bencode_to_json_unchecked(b"llee"), "[[]]".to_string()); + } + + #[test] + fn two_nested_empty_list() { + assert_eq!(bencode_to_json_unchecked(b"llleee"), "[[[]]]".to_string()); + } + + #[test] + fn many_nested_empty_list() { + assert_eq!( + bencode_to_json_unchecked(&generate_n_nested_empty_bencoded_lists(100)), + generate_n_nested_empty_json_arrays(100) + ); + } + + mod with_one_item { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn integer() { + assert_eq!(bencode_to_json_unchecked(b"li42ee"), "[42]".to_string()); + } + + #[test] + fn utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"l4:spame"), + r#"["spam"]"#.to_string() + ); + } + + #[test] + fn non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"l4:\xFF\xFE\xFD\xFCe"), + r#"["fffefdfc"]"#.to_string() + ); + } + + mod of_type_list { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn two_nested_empty_list() { + assert_eq!(bencode_to_json_unchecked(b"llee"), "[[]]".to_string()); + } + + #[test] + fn three_nested_empty_lists() { + assert_eq!(bencode_to_json_unchecked(b"llleee"), "[[[]]]".to_string()); + } + + #[test] + fn one_nested_list_which_contains_one_integer() { + assert_eq!(bencode_to_json_unchecked(b"lli42eee"), "[[42]]".to_string()); + } + + #[test] + fn one_nested_list_which_contains_two_integers() { + assert_eq!( + bencode_to_json_unchecked(b"lli42ei43eee"), + "[[42,43]]".to_string() + ); + } + + #[test] + fn one_nested_list_which_contains_one_utf_8_string() { + assert_eq!( + bencode_to_json_unchecked(b"ll4:spamee"), + r#"[["spam"]]"#.to_string() + ); + } + + #[test] + fn one_nested_list_which_contains_two_utf_8_strings() { + assert_eq!( + bencode_to_json_unchecked(b"ll5:alice3:bobee"), + r#"[["alice","bob"]]"#.to_string() + ); + } + + #[test] + fn one_nested_list_which_contains_one_non_utf_8_string() { + assert_eq!( + bencode_to_json_unchecked(b"ll4:\xFF\xFE\xFD\xFCee"), + r#"[["fffefdfc"]]"#.to_string() + ); + } + + #[test] + fn one_nested_list_which_contains_two_non_utf_8_string() { + assert_eq!( + bencode_to_json_unchecked(b"ll2:\xFF\xFE2:\xFD\xFCee"), + r#"[["fffe","fdfc"]]"#.to_string() + ); + } + } + + mod of_type_dict { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn empty() { + assert_eq!(bencode_to_json_unchecked(b"ldee"), "[{}]".to_string()); + } + + #[test] + fn with_one_field() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:foo3:baree"), + r#"[{"foo":"bar"}]"#.to_string() + ); + } + + #[test] + fn with_two_fields() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:bar4:spam3:fooi42eee"), + r#"[{"bar":"spam","foo":42}]"# + .to_string() + ); + } + + #[test] + fn with_nested_empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:foodeee"), + r#"[{"foo":{}}]"#.to_string() + ); + } + + #[test] + fn with_two_nested_empty_dicts() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:food3:foodeeee"), + r#"[{"foo":{"foo":{}}}]"#.to_string() + ); + } + + #[test] + fn with_nested_dict_with_one_field() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:food3:foo3:bareee"), + r#"[{"foo":{"foo":"bar"}}]"#.to_string() + ); + } + + #[test] + fn with_nested_dict_with_two_fields() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:food3:foo3:bar3:fooi42eeee"), + r#"[{"foo":{"foo":"bar","foo":42}}]"#.to_string() + ); + } + } + } + + mod with_two_items_of_the_same_type { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn two_integers() { + assert_eq!( + bencode_to_json_unchecked(b"li42ei43ee"), + "[42,43]".to_string() + ); + } + + #[test] + fn two_utf8_strings() { + assert_eq!( + bencode_to_json_unchecked(b"l5:alice3:bobe"), + r#"["alice","bob"]"#.to_string() + ); + } + + #[test] + fn two_non_utf8_strings() { + assert_eq!( + bencode_to_json_unchecked(b"l2:\xFF\xFE2:\xFD\xFCe"), + r#"["fffe","fdfc"]"#.to_string() + ); + } + + #[test] + fn two_empty_lists() { + assert_eq!(bencode_to_json_unchecked(b"llelee"), r"[[],[]]".to_string()); + } + + #[test] + fn two_empty_dicts() { + assert_eq!(bencode_to_json_unchecked(b"ldedee"), r"[{},{}]".to_string()); + } + + #[test] + fn two_lists_with_one_item() { + assert_eq!( + bencode_to_json_unchecked(b"lli42eeli42eee"), + r"[[42],[42]]".to_string() + ); + } + + #[test] + fn two_dicts_with_one_item() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:fooi42eed3:fooi42eee"), + r#"[{"foo":42},{"foo":42}]"#.to_string() + ); + } + } + + mod with_two_items_of_different_types { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn integer_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"li42e5:alicee"), + r#"[42,"alice"]"#.to_string() + ); + } + + #[test] + fn integer_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"li42e2:\xFF\xFEe"), + r#"[42,"fffe"]"#.to_string() + ); + } + + #[test] + fn integer_and_empty_list() { + assert_eq!( + bencode_to_json_unchecked(b"li42elee"), + r"[42,[]]".to_string() + ); + } + + #[test] + fn integer_and_list() { + assert_eq!( + bencode_to_json_unchecked(b"li42eli43eee"), + r"[42,[43]]".to_string() + ); + } + + #[test] + fn integer_and_empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"li42edee"), + r"[42,{}]".to_string() + ); + } + + #[test] + fn integer_and_dict() { + assert_eq!( + bencode_to_json_unchecked(b"li42ed3:fooi42eee"), + r#"[42,{"foo":42}]"#.to_string() + ); + } + + #[test] + fn utf8_string_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"l5:alicei42ee"), + r#"["alice",42]"#.to_string() + ); + } + + #[test] + fn utf8_string_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"l5:alice1:\xFFe"), + r#"["alice","ff"]"#.to_string() + ); + } + + #[test] + fn utf8_string_and_empty_list() { + assert_eq!( + bencode_to_json_unchecked(b"l5:alicelee"), + r#"["alice",[]]"#.to_string() + ); + } + + #[test] + fn utf8_string_and_list() { + assert_eq!( + bencode_to_json_unchecked(b"l5:aliceli42eee"), + r#"["alice",[42]]"#.to_string() + ); + } + + #[test] + fn utf8_string_and_empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"l5:alicedee"), + r#"["alice",{}]"#.to_string() + ); + } + + #[test] + fn utf8_string_and_dict() { + assert_eq!( + bencode_to_json_unchecked(b"l5:aliced3:fooi42eee"), + r#"["alice",{"foo":42}]"#.to_string() + ); + } + + #[test] + fn non_utf8_string_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"l1:\xFFi42ee"), + r#"["ff",42]"#.to_string() + ); + } + + #[test] + fn non_utf8_string_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"l1:\xFF3:fooe"), + r#"["ff","foo"]"#.to_string() + ); + } + + #[test] + fn non_utf8_string_and_empty_list() { + assert_eq!( + bencode_to_json_unchecked(b"l1:\xFFlee"), + r#"["ff",[]]"#.to_string() + ); + } + + #[test] + fn non_utf8_string_and_list() { + assert_eq!( + bencode_to_json_unchecked(b"l1:\xFFli42eee"), + r#"["ff",[42]]"#.to_string() + ); + } + + #[test] + fn non_utf8_string_and_empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"l1:\xFFdee"), + r#"["ff",{}]"#.to_string() + ); + } + + #[test] + fn non_utf8_string_and_dict() { + assert_eq!( + bencode_to_json_unchecked(b"l1:\xFFd3:fooi42eee"), + r#"["ff",{"foo":42}]"#.to_string() + ); + } + + #[test] + fn empty_list_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"llei42ee"), + r"[[],42]".to_string() + ); + } + + #[test] + fn empty_list_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"lle3:fooe"), + r#"[[],"foo"]"#.to_string() + ); + } + + #[test] + fn empty_list_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"lle1:\xFFe"), + r#"[[],"ff"]"#.to_string() + ); + } + + #[test] + fn empty_list_and_empty_dict() { + assert_eq!(bencode_to_json_unchecked(b"lledee"), r"[[],{}]".to_string()); + } + + #[test] + fn empty_list_and_dict() { + assert_eq!( + bencode_to_json_unchecked(b"lled3:fooi42eee"), + r#"[[],{"foo":42}]"#.to_string() + ); + } + + #[test] + fn list_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"lli42eei43ee"), + r"[[42],43]".to_string() + ); + } + + #[test] + fn list_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"lli42ee3:fooe"), + r#"[[42],"foo"]"#.to_string() + ); + } + + #[test] + fn list_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"lli42ee1:\xFFe"), + r#"[[42],"ff"]"#.to_string() + ); + } + + #[test] + fn list_and_empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"lli42eedee"), + r"[[42],{}]".to_string() + ); + } + + #[test] + fn list_and_dict() { + assert_eq!( + bencode_to_json_unchecked(b"lli42eed3:fooi43eee"), + r#"[[42],{"foo":43}]"#.to_string() + ); + } + + #[test] + fn empty_dict_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"ldei42ee"), + r"[{},42]".to_string() + ); + } + + #[test] + fn empty_dict_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"lde3:fooe"), + r#"[{},"foo"]"#.to_string() + ); + } + + #[test] + fn empty_dict_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"lde1:\xFFe"), + r#"[{},"ff"]"#.to_string() + ); + } + + #[test] + fn empty_dict_and_empty_list() { + assert_eq!(bencode_to_json_unchecked(b"ldelee"), r"[{},[]]".to_string()); + } + + #[test] + fn empty_dict_and_list() { + assert_eq!( + bencode_to_json_unchecked(b"ldeli42eee"), + r"[{},[42]]".to_string() + ); + } + + #[test] + fn dict_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:fooi42eei43ee"), + r#"[{"foo":42},43]"#.to_string() + ); + } + + #[test] + fn dict_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:fooi42ee3:fooe"), + r#"[{"foo":42},"foo"]"#.to_string() + ); + } + + #[test] + fn dict_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:fooi42ee1:\xFFe"), + r#"[{"foo":42},"ff"]"#.to_string() + ); + } + + #[test] + fn dict_and_empty_list() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:fooi42eelee"), + r#"[{"foo":42},[]]"#.to_string() + ); + } + + #[test] + fn dict_and_list() { + assert_eq!( + bencode_to_json_unchecked(b"ld3:fooi42eeli43eee"), + r#"[{"foo":42},[43]]"#.to_string() + ); + } + + #[test] + fn non_utf8_string_and_an_integer() { + assert_eq!( + bencode_to_json_unchecked(b"l2:\xFF\xFEi42ee"), + r#"["fffe",42]"#.to_string() + ); + } + } + + mod should_fail { + use crate::{parsers::error::Error, try_bencode_to_json}; + + #[test] + fn when_an_empty_list_does_not_have_the_matching_close_byte() { + let list_without_closing_list_byte = b"l"; + + let result = try_bencode_to_json(list_without_closing_list_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputExpectingFirstListItemOrEnd { .. }) + )); + } + + #[test] + fn when_a_list_does_not_have_the_matching_close_byte() { + let list_without_closing_list_byte = b"li42e"; + + let result = try_bencode_to_json(list_without_closing_list_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputExpectingNextListItem { .. }) + )); + } + + #[test] + fn when_it_receives_an_end_list_byte_without_the_matching_open_byte() { + let end_list_byte_without_start = b"e"; + + let result = try_bencode_to_json(end_list_byte_without_start); + + assert!(matches!( + result, + Err(Error::NoMatchingStartForListOrDictEnd { .. }) + )); + } + } + } + + mod dictionary { + use crate::test::{ + bencode_to_json_unchecked, generate_n_nested_empty_bencoded_dictionaries, + generate_n_nested_empty_json_objects, + }; + + #[test] + fn empty_dictionary() { + assert_eq!(bencode_to_json_unchecked(b"de"), "{}".to_string()); + } + + #[test] + fn one_nested_empty_dictionary() { + assert_eq!( + bencode_to_json_unchecked(b"d3:foodee"), + r#"{"foo":{}}"#.to_string() + ); + } + + #[test] + fn two_nested_empty_dictionaries() { + assert_eq!( + bencode_to_json_unchecked(b"d3:food3:foodeee"), + r#"{"foo":{"foo":{}}}"#.to_string() + ); + } + + #[test] + fn many_nested_empty_dictionaries() { + assert_eq!( + bencode_to_json_unchecked(&generate_n_nested_empty_bencoded_dictionaries(100)), + generate_n_nested_empty_json_objects(100) + ); + } + + mod with_a_key { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn starting_with_a_digit() { + assert_eq!( + bencode_to_json_unchecked(b"d4:1fooi42ee"), + r#"{"1foo":42}"#.to_string() + ); + } + + #[test] + fn which_is_not_a_utf_8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d2:\xFF\xFEi42ee"), + r#"{"fffe":42}"#.to_string() + ); + } + } + + mod with_one_field { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn integer() { + assert_eq!( + bencode_to_json_unchecked(b"d3:fooi42ee"), + r#"{"foo":42}"#.to_string() + ); + } + + #[test] + fn utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar4:spame"), + r#"{"bar":"spam"}"#.to_string() + ); + } + + #[test] + fn non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar2:\xFF\xFEe"), + r#"{"bar":"fffe"}"#.to_string() + ); + } + + #[test] + fn empty_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barlee"), + r#"{"bar":[]}"#.to_string() + ); + } + + #[test] + fn empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bardee"), + r#"{"bar":{}}"#.to_string() + ); + } + } + + mod with_two_fields_of_the_same_type { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn two_integers() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bari42e3:fooi43ee"), + r#"{"bar":42,"foo":43}"#.to_string() + ); + } + + #[test] + fn two_empty_utf8_strings() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar0:3:foo0:e"), + r#"{"bar":"","foo":""}"#.to_string() + ); + } + + #[test] + fn two_utf8_strings() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar4:spam3:foo5:alicee"), + r#"{"bar":"spam","foo":"alice"}"#.to_string() + ); + } + + #[test] + fn two_non_utf8_strings() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar1:\xFF3:foo1:\xFEe"), + r#"{"bar":"ff","foo":"fe"}"#.to_string() + ); + } + + #[test] + fn two_empty_lists() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barle3:foolee"), + r#"{"bar":[],"foo":[]}"#.to_string() + ); + } + + #[test] + fn two_empty_dicts() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barde3:foodee"), + r#"{"bar":{},"foo":{}}"#.to_string() + ); + } + + #[test] + fn two_lists() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barli42ee3:fooli43eee"), + r#"{"bar":[42],"foo":[43]}"#.to_string() + ); + } + + #[test] + fn two_dicts() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bard3:bardee3:food3:foodeee"), + r#"{"bar":{"bar":{}},"foo":{"foo":{}}}"# + .to_string() + ); + } + } + + mod with_two_fields_of_different_type { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn integer_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bari42e3:foo5:alicee"), + r#"{"bar":42,"foo":"alice"}"#.to_string() + ); + } + + #[test] + fn integer_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bari42e3:foo1:\xFFe"), + r#"{"bar":42,"foo":"ff"}"# + .to_string() + ); + } + + #[test] + fn integer_and_empty_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bari42e3:foolee"), + r#"{"bar":42,"foo":[]}"#.to_string() + ); + } + + #[test] + fn integer_and_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bari42e3:fooli43eee"), + r#"{"bar":42,"foo":[43]}"#.to_string() + ); + } + + #[test] + fn integer_and_empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bari42e3:foodee"), + r#"{"bar":42,"foo":{}}"#.to_string() + ); + } + + #[test] + fn integer_and_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bari42e3:food3:fooi43eee"), + r#"{"bar":42,"foo":{"foo":43}}"#.to_string() + ); + } + + #[test] + fn utf8_string_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar5:alice3:fooi43ee"), + r#"{"bar":"alice","foo":43}"#.to_string() + ); + } + + #[test] + fn utf8_string_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar5:alice3:foo1:\xFFe"), + r#"{"bar":"alice","foo":"ff"}"# + .to_string() + ); + } + + #[test] + fn utf8_string_and_empty_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar5:alice3:foolee"), + r#"{"bar":"alice","foo":[]}"#.to_string() + ); + } + + #[test] + fn utf8_string_and_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar5:alice3:fooli42eee"), + r#"{"bar":"alice","foo":[42]}"#.to_string() + ); + } + + #[test] + fn utf8_string_and_empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar5:alice3:foodee"), + r#"{"bar":"alice","foo":{}}"#.to_string() + ); + } + + #[test] + fn utf8_string_and_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar5:alice3:food3:fooi42eee"), + r#"{"bar":"alice","foo":{"foo":42}}"#.to_string() + ); + } + + #[test] + fn non_utf8_string_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar1:\xFF3:fooi43ee"), + r#"{"bar":"ff","foo":43}"# + .to_string() + ); + } + + #[test] + fn non_utf8_string_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar1:\xFF3:foo1:\xFFe"), + r#"{"bar":"ff","foo":"ff"}"#.to_string() + ); + } + + #[test] + fn non_utf8_string_and_empty_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar1:\xFF3:foolee"), + r#"{"bar":"ff","foo":[]}"# + .to_string() + ); + } + + #[test] + fn non_utf8_string_and_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar1:\xFF3:fooli42eee"), + r#"{"bar":"ff","foo":[42]}"# + .to_string() + ); + } + + #[test] + fn non_utf8_string_and_empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar1:\xFF3:foodee"), + r#"{"bar":"ff","foo":{}}"# + .to_string() + ); + } + + #[test] + fn non_utf8_string_and_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bar1:\xFF3:food3:fooi42eee"), + r#"{"bar":"ff","foo":{"foo":42}}"#.to_string() + ); + } + + #[test] + fn empty_list_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barle3:fooi42ee"), + r#"{"bar":[],"foo":42}"#.to_string() + ); + } + + #[test] + fn empty_list_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barle3:foo5:alicee"), + r#"{"bar":[],"foo":"alice"}"#.to_string() + ); + } + + #[test] + fn empty_list_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barle3:foo1:\xFFe"), + r#"{"bar":[],"foo":"ff"}"# + .to_string() + ); + } + + #[test] + fn empty_list_and_empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barle3:foodee"), + r#"{"bar":[],"foo":{}}"#.to_string() + ); + } + + #[test] + fn empty_list_and_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barle3:food3:foo5:aliceee"), + r#"{"bar":[],"foo":{"foo":"alice"}}"#.to_string() + ); + } + + #[test] + fn list_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barli42ee3:fooi42ee"), + r#"{"bar":[42],"foo":42}"#.to_string() + ); + } + + #[test] + fn list_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barli42ee3:foo5:alicee"), + r#"{"bar":[42],"foo":"alice"}"#.to_string() + ); + } + + #[test] + fn list_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barli42ee3:foo1:\xFFe"), + r#"{"bar":[42],"foo":"ff"}"# + .to_string() + ); + } + + #[test] + fn list_and_empty_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barli42ee3:foodee"), + r#"{"bar":[42],"foo":{}}"#.to_string() + ); + } + + #[test] + fn list_and_dict() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barli42ee3:food3:foo5:aliceee"), + r#"{"bar":[42],"foo":{"foo":"alice"}}"#.to_string() + ); + } + + #[test] + fn empty_dict_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barde3:fooi42ee"), + r#"{"bar":{},"foo":42}"#.to_string() + ); + } + + #[test] + fn empty_dict_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barde3:foo5:alicee"), + r#"{"bar":{},"foo":"alice"}"#.to_string() + ); + } + + #[test] + fn empty_dict_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barde3:foo1:\xFFe"), + r#"{"bar":{},"foo":"ff"}"# + .to_string() + ); + } + + #[test] + fn empty_dict_and_empty_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barde3:foolee"), + r#"{"bar":{},"foo":[]}"#.to_string() + ); + } + + #[test] + fn empty_dict_and_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:barde3:fooli42eee"), + r#"{"bar":{},"foo":[42]}"#.to_string() + ); + } + + #[test] + fn dict_and_integer() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bard3:bari42ee3:fooi43ee"), + r#"{"bar":{"bar":42},"foo":43}"#.to_string() + ); + } + + #[test] + fn dict_and_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bard3:bari42ee3:foo5:alicee"), + r#"{"bar":{"bar":42},"foo":"alice"}"# + .to_string() + ); + } + + #[test] + fn dict_and_non_utf8_string() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bard3:bari42ee3:foo1:\xFFe"), + r#"{"bar":{"bar":42},"foo":"ff"}"# + .to_string() + ); + } + + #[test] + fn dict_and_empty_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bard3:bari42ee3:foolee"), + r#"{"bar":{"bar":42},"foo":[]}"#.to_string() + ); + } + + #[test] + fn dict_and_list() { + assert_eq!( + bencode_to_json_unchecked(b"d3:bard3:bari42ee3:fooli42eee"), + r#"{"bar":{"bar":42},"foo":[42]}"# + .to_string() + ); + } + } + + mod should_escape_json { + + mod in_field_keys { + + // Only one especial char is tested. The string parser contains + // other tests for the rest of the special chars that need to be + // escaped. + + use crate::test::bencode_to_json_unchecked; + + #[test] + fn containing_a_line_break_at_the_beginning_of_the_string() { + assert_eq!( + bencode_to_json_unchecked("d4:\nfoo3:bare".as_bytes()), + r#"{"\nfoo":"bar"}"#.to_string() + ); + } + + #[test] + fn containing_a_line_break_in_the_middle_of_the_string() { + assert_eq!( + bencode_to_json_unchecked("d4:f\noo3:bare".as_bytes()), + r#"{"f\noo":"bar"}"#.to_string() + ); + } + + #[test] + fn containing_a_line_break_at_the_end_of_the_string() { + assert_eq!( + bencode_to_json_unchecked("d4:foo\n3:bare".as_bytes()), + r#"{"foo\n":"bar"}"#.to_string() + ); + } + } + + mod in_field_values { + use crate::test::bencode_to_json_unchecked; + + #[test] + fn containing_a_line_break_at_the_beginning_of_the_string() { + assert_eq!( + bencode_to_json_unchecked("d3:foo4:\nbare".as_bytes()), + r#"{"foo":"\nbar"}"#.to_string() + ); + } + + #[test] + fn containing_a_line_break_in_the_middle_of_the_string() { + assert_eq!( + bencode_to_json_unchecked("d3:foo4:ba\nre".as_bytes()), + r#"{"foo":"ba\nr"}"#.to_string() + ); + } + + #[test] + fn containing_a_line_break_at_the_end_of_the_string() { + assert_eq!( + bencode_to_json_unchecked("d3:foo4:bar\ne".as_bytes()), + r#"{"foo":"bar\n"}"#.to_string() + ); + } + } + } + + mod should_fail { + use crate::{parsers::error::Error, try_bencode_to_json}; + + #[test] + fn when_an_empty_dict_does_not_have_the_matching_close_byte() { + let dict_without_closing_dict_byte = b"d"; + + let result = try_bencode_to_json(dict_without_closing_dict_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputExpectingFirstDictFieldOrEnd { .. }) + )); + } + + #[test] + fn when_a_dict_field_does_not_have_the_value() { + let dict_without_closing_dict_byte = b"d3:foo"; + + let result = try_bencode_to_json(dict_without_closing_dict_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputExpectingDictFieldValue { .. }) + )); + } + + #[test] + fn when_a_dict_does_not_have_the_matching_close_byte() { + let dict_without_closing_dict_byte = b"d3:fooi42e"; + + let result = try_bencode_to_json(dict_without_closing_dict_byte); + + assert!(matches!( + result, + Err(Error::UnexpectedEndOfInputExpectingDictFieldKeyOrEnd { .. }) + )); + } + + #[test] + fn when_it_receives_an_end_dict_byte_without_the_matching_open_byte() { + let end_dict_byte_without_start = b"e"; + + let result = try_bencode_to_json(end_dict_byte_without_start); + + assert!(matches!( + result, + Err(Error::NoMatchingStartForListOrDictEnd { .. }) + )); + } + + #[test] + fn when_it_receives_a_premature_end_dict_byte() { + let dict_with_missing_key_value = b"d3:fooe"; + + let result = try_bencode_to_json(dict_with_missing_key_value); + + assert!(matches!(result, Err(Error::PrematureEndOfDict { .. }))); + } + + #[test] + fn when_the_first_field_value_is_empty() { + let dict_with_missing_key_value = b"d3:fooe"; + + let result = try_bencode_to_json(dict_with_missing_key_value); + + assert!(matches!(result, Err(Error::PrematureEndOfDict { .. }))); + } + + #[test] + fn when_the_second_field_value_is_empty() { + let dict_with_missing_key_value = b"d3:foo3:bar3:fooe"; + + let result = try_bencode_to_json(dict_with_missing_key_value); + + assert!(matches!(result, Err(Error::PrematureEndOfDict { .. }))); + } + + mod when_the_field_key_is_not_a_string_for_example { + use crate::parsers::error::Error; + use crate::parsers::generators::json::BencodeType; + use crate::try_bencode_to_json; + + #[test] + fn when_the_key_in_the_first_dict_field_is_an_integer() { + let field_with_integer_key = b"di42ei43ee"; + + let result = try_bencode_to_json(field_with_integer_key); + + assert!(matches!( + result, + Err(Error::ExpectedStringForDictKeyGot( + BencodeType::Integer, + _, + _ + )) + )); + } + + #[test] + fn when_the_key_in_the_second_dict_field_is_an_integer() { + let field_with_integer_key = b"d3:foo3:bari42ei43ee"; + + let result = try_bencode_to_json(field_with_integer_key); + + assert!(matches!( + result, + Err(Error::ExpectedStringForDictKeyGot( + BencodeType::Integer, + _, + _ + )) + )); + } + + #[test] + fn when_the_key_in_the_first_dict_field_is_a_list() { + let field_with_list_key = b"dlei42ee"; + + let result = try_bencode_to_json(field_with_list_key); + + assert!(matches!( + result, + Err(Error::ExpectedStringForDictKeyGot(BencodeType::List, _, _)) + )); + } + + #[test] + fn when_the_key_in_the_second_dict_field_is_a_list() { + let field_with_list_key = b"d3:foo3:barlei42ee"; + + let result = try_bencode_to_json(field_with_list_key); + + assert!(matches!( + result, + Err(Error::ExpectedStringForDictKeyGot(BencodeType::List, _, _)) + )); + } + + #[test] + fn when_the_key_in_the_first_dict_field_is_a_dict() { + let field_with_list_key = b"ddei42ee"; + + let result = try_bencode_to_json(field_with_list_key); + + assert!(matches!( + result, + Err(Error::ExpectedStringForDictKeyGot(BencodeType::Dict, _, _)) + )); + } + + #[test] + fn when_the_key_in_the_second_dict_field_is_a_dict() { + let field_with_list_key = b"d3:foo3:bardei42ee"; + + let result = try_bencode_to_json(field_with_list_key); + + assert!(matches!( + result, + Err(Error::ExpectedStringForDictKeyGot(BencodeType::Dict, _, _)) + )); + } + } + } + } +} diff --git a/src/parsers/generators/mod.rs b/src/parsers/generators/mod.rs new file mode 100644 index 0000000..42fd26e --- /dev/null +++ b/src/parsers/generators/mod.rs @@ -0,0 +1,12 @@ +pub mod json; +pub mod stack; + +use derive_more::derive::Display; + +#[derive(Debug, PartialEq, Display)] +pub enum BencodeType { + Integer, + String, + List, + Dict, +} diff --git a/src/parsers/stack.rs b/src/parsers/generators/stack.rs similarity index 97% rename from src/parsers/stack.rs rename to src/parsers/generators/stack.rs index d6f8451..5881941 100644 --- a/src/parsers/stack.rs +++ b/src/parsers/generators/stack.rs @@ -150,7 +150,7 @@ impl Stack { #[cfg(test)] mod tests { mod the_stack_state { - use crate::parsers::stack::State; + use crate::parsers::generators::stack::State; #[test] fn should_be_displayed_with_single_letter_abbreviations() { @@ -165,7 +165,7 @@ mod tests { mod the_stack { mod it_should { - use crate::parsers::stack::{Stack, State}; + use crate::parsers::generators::stack::{Stack, State}; #[test] fn have_an_initial_state() { @@ -235,7 +235,7 @@ mod tests { mod be_displayed_with_single_letter_abbreviations_for_states { - use crate::parsers::stack::{Stack, State}; + use crate::parsers::generators::stack::{Stack, State}; #[test] fn with_the_initial_state() { diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index 8c95c79..b4c08e0 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -1,2098 +1,3 @@ -//! Parsers, including the main parser and the parsers for the basic types -//! (integer and string). -//! -//! ``BencodeParser`` is the main parser. It is generic over the type of the -//! input buffer. pub mod error; -pub mod stack; +pub mod generators; pub mod tokenizer; - -/* TODO: - -- Rename this parser to generator. - -*/ - -use core::str; -use std::{ - fmt::Write as FmtWrite, - io::{Read, Write as IoWrite}, -}; - -use derive_more::derive::Display; -use error::{ReadContext, WriteContext}; -use stack::{Stack, State}; -use tokenizer::{BencodeToken, BencodeTokenizer}; - -use crate::rw::{byte_writer::ByteWriter, string_writer::StringWriter, writer::Writer}; - -#[derive(Debug, PartialEq, Display)] -pub enum BencodeType { - Integer, - String, - List, - Dict, -} - -pub struct BencodeParser { - tokenizer: BencodeTokenizer, - num_processed_tokens: u64, - stack: Stack, -} - -impl BencodeParser { - const JSON_ARRAY_BEGIN: u8 = b'['; - const JSON_ARRAY_ITEMS_SEPARATOR: u8 = b','; - const JSON_ARRAY_END: u8 = b']'; - - const JSON_OBJ_BEGIN: u8 = b'{'; - const JSON_OBJ_FIELDS_SEPARATOR: u8 = b','; - const JSON_OBJ_FIELD_KEY_VALUE_SEPARATOR: u8 = b':'; - const JSON_OBJ_END: u8 = b'}'; - - pub fn new(reader: R) -> Self { - BencodeParser { - tokenizer: BencodeTokenizer::new(reader), - num_processed_tokens: 1, - stack: Stack::default(), - } - } - - /// It parses a bencoded value read from input and writes the corresponding - /// JSON UTF-8 string value to the output. - /// - /// # Errors - /// - /// Will return an error if it can't read from the input or write to the - /// output. - /// - /// # Panics - /// - /// Will panic if receives a byte that isn't a valid begin or end of a - /// bencoded type: integer, string, list or dictionary. - pub fn write_str(&mut self, writer: W) -> Result<(), error::Error> { - let mut writer = StringWriter::new(writer); - self.parse(&mut writer) - } - - /// It parses a bencoded value read from input and writes the corresponding - /// JSON UTF-8 string value as bytes to the output. - /// - /// # Errors - /// - /// Will return an error if it can't read from the input or write to the - /// output. - /// - /// # Panics - /// - /// Will panic if receives a byte that isn't a valid begin or end of a - /// bencoded type: integer, string, list or dictionary. - pub fn write_bytes(&mut self, writer: W) -> Result<(), error::Error> { - let mut writer = ByteWriter::new(writer); - self.parse(&mut writer) - } - - /// It parses a bencoded value read from input and writes the corresponding - /// JSON value to the output. - /// - /// # Errors - /// - /// Will return an error if: - /// - /// - It can't read from the input or write to the output. - /// - The input is invalid Bencode. - fn parse(&mut self, writer: &mut W) -> Result<(), error::Error> { - while let Some(token) = self.tokenizer.next_token()? { - match token { - BencodeToken::Integer(integer_bytes) => { - self.begin_bencoded_value(BencodeType::Integer, writer)?; - // todo: add `write_bytes` to writer. - for bytes in integer_bytes { - writer.write_byte(bytes)?; - } - } - BencodeToken::String(string_bytes) => { - self.begin_bencoded_value(BencodeType::String, writer)?; - - let html_tag_style_string = match str::from_utf8(&string_bytes) { - Ok(string) => { - // String only contains valid UTF-8 chars -> print it as it's - &format!("{}", string.to_owned()) - } - Err(_) => { - // String contains non valid UTF-8 chars -> print it as hex bytes - &format!("{}", hex::encode(string_bytes)) - } - }; - - writer.write_str( - &serde_json::to_string(&html_tag_style_string) - .expect("Failed to serialize to JSON. This should not happen because non UTF-8 bencoded string are serialized as hex bytes"), - )?; - } - BencodeToken::BeginList => { - self.begin_bencoded_value(BencodeType::List, writer)?; - writer.write_byte(Self::JSON_ARRAY_BEGIN)?; - self.stack.push(State::ExpectingFirstListItemOrEnd); - } - BencodeToken::BeginDict => { - self.begin_bencoded_value(BencodeType::Dict, writer)?; - writer.write_byte(Self::JSON_OBJ_BEGIN)?; - self.stack.push(State::ExpectingFirstDictFieldOrEnd); - } - BencodeToken::EndListOrDict => { - self.end_list_or_dict(writer)?; - } - BencodeToken::LineBreak => { - // Ignore line breaks at the beginning, the end, or between values - } - } - - self.num_processed_tokens += 1; - } - - self.check_bad_end_stack_state(writer) - } - - /// It updates the stack state and prints the delimiters when needed. - /// - /// Called when the first byt of a bencoded value (integer, string, list or dict) - /// is received. - /// - /// # Errors - /// - /// Will return an error if the writer can't write to the output. - pub fn begin_bencoded_value( - &mut self, - bencode_type: BencodeType, - writer: &mut W, - ) -> Result<(), error::Error> { - match self.stack.peek() { - State::Initial => {} - State::ExpectingFirstListItemOrEnd => { - self.stack.swap_top(State::ExpectingNextListItem); - } - State::ExpectingNextListItem => { - writer.write_byte(Self::JSON_ARRAY_ITEMS_SEPARATOR)?; - } - State::ExpectingFirstDictFieldOrEnd => { - if bencode_type != BencodeType::String { - return Err(error::Error::ExpectedStringForDictKeyGot( - bencode_type, - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); - } - - self.stack.swap_top(State::ExpectingDictFieldValue); - } - State::ExpectingDictFieldValue => { - writer.write_byte(Self::JSON_OBJ_FIELD_KEY_VALUE_SEPARATOR)?; - - self.stack.swap_top(State::ExpectingDictFieldKeyOrEnd); - } - State::ExpectingDictFieldKeyOrEnd => { - if bencode_type != BencodeType::String { - return Err(error::Error::ExpectedStringForDictKeyGot( - bencode_type, - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); - } - - writer.write_byte(Self::JSON_OBJ_FIELDS_SEPARATOR)?; - - self.stack.swap_top(State::ExpectingDictFieldValue); - } - } - - Ok(()) - } - - /// It updates the stack state and prints the delimiters when needed. - /// - /// Called when the end of list or dictionary byte is received. End of - /// integers or strings are processed while parsing them. - /// - /// # Errors - /// - /// Will return an error if the writer can't write to the output. - pub fn end_list_or_dict(&mut self, writer: &mut W) -> Result<(), error::Error> { - match self.stack.peek() { - State::ExpectingFirstListItemOrEnd | State::ExpectingNextListItem => { - writer.write_byte(Self::JSON_ARRAY_END)?; - self.stack.pop(); - } - State::ExpectingFirstDictFieldOrEnd | State::ExpectingDictFieldKeyOrEnd => { - writer.write_byte(Self::JSON_OBJ_END)?; - self.stack.pop(); - } - State::ExpectingDictFieldValue => { - return Err(error::Error::PrematureEndOfDict( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - State::Initial => { - return Err(error::Error::NoMatchingStartForListOrDictEnd( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - } - - Ok(()) - } - - /// It checks if the stack state is correct at the end of the parsing. - /// - /// That could happen, for example, when bencode values are not finished. - /// - /// # Errors - /// - /// Will return an error if the stack state is not correct. - fn check_bad_end_stack_state(&self, writer: &W) -> Result<(), error::Error> { - match self.stack.peek() { - State::Initial => Ok(()), - State::ExpectingFirstListItemOrEnd => Err( - error::Error::UnexpectedEndOfInputExpectingFirstListItemOrEnd( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - ), - ), - State::ExpectingNextListItem => { - Err(error::Error::UnexpectedEndOfInputExpectingNextListItem( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - State::ExpectingFirstDictFieldOrEnd => Err( - error::Error::UnexpectedEndOfInputExpectingFirstDictFieldOrEnd( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - ), - ), - State::ExpectingDictFieldValue => { - Err(error::Error::UnexpectedEndOfInputExpectingDictFieldValue( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - State::ExpectingDictFieldKeyOrEnd => Err( - error::Error::UnexpectedEndOfInputExpectingDictFieldKeyOrEnd( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - ), - ), - } - } -} - -#[cfg(test)] -mod tests { - - use std::io::{self, Read}; - - use crate::{parsers::BencodeParser, test::bencode_to_json_unchecked, try_bencode_to_json}; - - mod it_should_allow_writing { - use crate::parsers::BencodeParser; - - #[test] - fn to_any_type_implementing_io_write_trait() { - let mut output = Vec::new(); - - let mut parser = BencodeParser::new(&b"i0e"[..]); - - parser - .write_bytes(&mut output) - .expect("Bencode to JSON conversion failed"); - - assert_eq!(output, vec!(b'0')); - } - - #[test] - fn writing_to_any_type_implementing_fmt_write_trait() { - let mut output = String::new(); - - let mut parser = BencodeParser::new(&b"i0e"[..]); - - parser - .write_str(&mut output) - .expect("Bencode to JSON conversion failed"); - - assert_eq!(output, "0".to_string()); - } - } - - #[test] - fn it_should_allow_reading_from_an_empty_input() { - struct EmptyReader; - - impl Read for EmptyReader { - fn read(&mut self, _buf: &mut [u8]) -> io::Result { - Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - "Unexpected EOF", - )) - } - } - - let mut output = String::new(); - - let mut parser = BencodeParser::new(EmptyReader); - - parser.write_str(&mut output).unwrap(); - - assert_eq!(output, ""); - } - - mod it_should_allow_special_bencode_cases { - - use crate::{parsers::BencodeParser, test::bencode_to_json_unchecked}; - - #[test] - fn an_empty_input() { - let mut output = String::new(); - - let mut parser = BencodeParser::new(&b""[..]); - - parser - .write_str(&mut output) - .expect("Bencode to JSON conversion failed"); - - assert_eq!(output, String::new()); - } - - #[test] - fn line_breaks_at_the_beginning_of_the_input_stream() { - assert_eq!(bencode_to_json_unchecked(b"\ni0e"), "0".to_string()); - } - - #[test] - fn line_breaks_at_the_end_of_the_input_stream() { - assert_eq!(bencode_to_json_unchecked(b"i0e\n"), "0".to_string()); - } - - #[test] - fn line_breaks_between_bencoded_values() { - assert_eq!( - bencode_to_json_unchecked(b"li0e\ni1ee"), - "[0,1]".to_string() - ); - } - } - - mod it_should_fail { - use std::io::{self, Read}; - - use crate::{ - parsers::{error::Error, BencodeParser}, - try_bencode_to_json, - }; - - #[test] - fn when_there_is_a_problem_reading_from_input() { - struct FaultyReader; - - impl Read for FaultyReader { - fn read(&mut self, _buf: &mut [u8]) -> io::Result { - Err(io::Error::new( - io::ErrorKind::PermissionDenied, - "Permission denied", - )) - } - } - - let mut output = String::new(); - - let mut parser = BencodeParser::new(FaultyReader); - - let result = parser.write_str(&mut output); - - assert!(matches!(result, Err(Error::Io(_)))); - } - - #[test] - fn when_it_cannot_recognized_the_fist_byte_of_a_new_bencoded_value() { - let invalid_bencoded_value = b"a"; - - let result = try_bencode_to_json(invalid_bencoded_value); - - assert!(matches!( - result, - Err(Error::UnrecognizedFirstBencodeValueByte { .. }) - )); - } - - #[test] - fn when_it_reaches_the_end_of_the_input_without_finishing_parsing_a_valid_bencoded_value() { - let integer_with_missing_end_byte = b"i42"; - - let result = try_bencode_to_json(integer_with_missing_end_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingInteger { .. }) - )); - } - } - - mod integers { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn zero() { - assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".to_string()); - } - - #[test] - fn one_digit_integer() { - assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".to_string()); - } - - #[test] - fn two_digits_integer() { - assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".to_string()); - } - - #[test] - fn negative_integer() { - assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".to_string()); - } - - #[test] - fn positive_integer_greater_than_i64_max() { - let big_positive_integer = i64::MAX.to_string() + "1"; - - let bencoded_big_positive_integer = format!("i{big_positive_integer}e"); - - assert_eq!( - bencode_to_json_unchecked(bencoded_big_positive_integer.as_bytes()), - big_positive_integer - ); - } - - #[test] - fn negative_integer_smaller_than_i64_min() { - let big_negative_integer = i64::MIN.to_string() + "1"; - - let bencoded_big_negative_integer = format!("i{big_negative_integer}e"); - - assert_eq!( - bencode_to_json_unchecked(bencoded_big_negative_integer.as_bytes()), - big_negative_integer - ); - } - - mod should_fail { - use crate::{parsers::error::Error, try_bencode_to_json}; - - #[test] - fn when_it_finds_an_invalid_byte() { - let int_with_invalid_byte = b"iae"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - - #[test] - fn with_duplicate_sign() { - let int_with_invalid_byte = b"i--42e"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - } - } - - mod strings { - use crate::{ - test::{bencode_to_json_unchecked, bencoded_string_with_repeated_byte}, - to_bencode, - }; - - #[test] - fn length_can_contain_leading_zeros() { - assert_eq!( - bencode_to_json_unchecked(b"00:"), - r#""""#.to_string() - ); - } - - #[test] - fn empty_string() { - assert_eq!( - bencode_to_json_unchecked(b"0:"), - r#""""#.to_string() - ); - } - - #[test] - fn utf8() { - assert_eq!( - bencode_to_json_unchecked(b"4:spam"), - r#""spam""#.to_string() - ); - } - - #[test] - fn non_utf8() { - assert_eq!( - bencode_to_json_unchecked(b"4:\xFF\xFE\xFD\xFC"), - r#""fffefdfc""#.to_string() - ); - } - - #[test] - fn big_utf8_string() { - let big_string = "a".repeat(1_000_000); - - assert_eq!( - bencode_to_json_unchecked(&to_bencode(&big_string)), - format!(r#""{big_string}""#) - ); - } - - #[test] - fn big_non_utf8_string() { - let big_non_utf8_string = bencoded_string_with_repeated_byte(b'\xFF', 1_000_000); - - let expected = format!(r#""{}""#, "ff".repeat(1_000_000)); - - assert_eq!(bencode_to_json_unchecked(&big_non_utf8_string), expected); - } - - #[test] - fn ending_with_bencode_end_char() { - assert_eq!( - bencode_to_json_unchecked(b"1:e"), - r#""e""#.to_string() - ); - } - - #[test] - fn containing_a_reserved_char() { - assert_eq!( - bencode_to_json_unchecked(b"1:i"), - r#""i""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:l"), - r#""l""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:d"), - r#""d""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:l"), - r#""l""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:e"), - r#""e""#.to_string() - ); - } - - #[test] - fn containing_a_digit() { - assert_eq!( - bencode_to_json_unchecked(b"1:0"), - r#""0""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:1"), - r#""1""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:2"), - r#""2""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:3"), - r#""3""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:4"), - r#""4""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:5"), - r#""5""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:6"), - r#""6""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:7"), - r#""7""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:8"), - r#""8""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:9"), - r#""9""#.to_string() - ); - } - - mod should_escape_json { - use crate::{parsers::tests::bencode_to_json_unchecked, to_bencode}; - - #[test] - fn containing_a_double_quote() { - assert_eq!( - bencode_to_json_unchecked("1:\"".as_bytes()), - r#""\"""#.to_string() - ); - } - - #[test] - fn containing_backslashes() { - assert_eq!( - bencode_to_json_unchecked("1:\\".as_bytes()), - r#""\\""#.to_string() - ); - } - - #[test] - fn containing_control_characters() { - assert_eq!( - bencode_to_json_unchecked("1:\n".as_bytes()), - r#""\n""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked("1:\r".as_bytes()), - r#""\r""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked("1:\t".as_bytes()), - r#""\t""#.to_string() - ); - } - - #[test] - fn containing_unicode_characters() { - assert_eq!( - bencode_to_json_unchecked(&to_bencode("ñandú")), - r#""ñandú""#.to_string() - ); - } - } - - mod it_should_fail_parsing_when { - use crate::parsers::{error::Error, tests::try_bencode_to_json}; - - #[test] - fn it_reaches_the_end_of_the_input_parsing_the_string_length() { - let incomplete_string_length = b"4"; - - let result = try_bencode_to_json(incomplete_string_length); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingStringLength { .. }) - )); - } - - #[test] - fn it_reaches_the_end_of_the_input_parsing_the_string_value() { - let incomplete_string_value = b"4:123"; - - let result = try_bencode_to_json(incomplete_string_value); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingStringValue { .. }) - )); - } - - #[test] - fn it_receives_a_non_digit_byte_in_the_string_length() { - let incomplete_string_value = b"4a:1234"; - - let result = try_bencode_to_json(incomplete_string_value); - - assert!(matches!(result, Err(Error::InvalidStringLengthByte { .. }))); - } - } - } - - mod lists { - use crate::{ - parsers::tests::bencode_to_json_unchecked, - test::{generate_n_nested_empty_bencoded_lists, generate_n_nested_empty_json_arrays}, - }; - - #[test] - fn empty_list() { - assert_eq!(bencode_to_json_unchecked(b"le"), "[]".to_string()); - } - - #[test] - fn one_nested_empty_list() { - assert_eq!(bencode_to_json_unchecked(b"llee"), "[[]]".to_string()); - } - - #[test] - fn two_nested_empty_list() { - assert_eq!(bencode_to_json_unchecked(b"llleee"), "[[[]]]".to_string()); - } - - #[test] - fn many_nested_empty_list() { - assert_eq!( - bencode_to_json_unchecked(&generate_n_nested_empty_bencoded_lists(100)), - generate_n_nested_empty_json_arrays(100) - ); - } - - mod with_one_item { - use crate::parsers::tests::bencode_to_json_unchecked; - - #[test] - fn integer() { - assert_eq!(bencode_to_json_unchecked(b"li42ee"), "[42]".to_string()); - } - - #[test] - fn utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"l4:spame"), - r#"["spam"]"#.to_string() - ); - } - - #[test] - fn non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"l4:\xFF\xFE\xFD\xFCe"), - r#"["fffefdfc"]"#.to_string() - ); - } - - mod of_type_list { - use crate::parsers::tests::bencode_to_json_unchecked; - - #[test] - fn two_nested_empty_list() { - assert_eq!(bencode_to_json_unchecked(b"llee"), "[[]]".to_string()); - } - - #[test] - fn three_nested_empty_lists() { - assert_eq!(bencode_to_json_unchecked(b"llleee"), "[[[]]]".to_string()); - } - - #[test] - fn one_nested_list_which_contains_one_integer() { - assert_eq!(bencode_to_json_unchecked(b"lli42eee"), "[[42]]".to_string()); - } - - #[test] - fn one_nested_list_which_contains_two_integers() { - assert_eq!( - bencode_to_json_unchecked(b"lli42ei43eee"), - "[[42,43]]".to_string() - ); - } - - #[test] - fn one_nested_list_which_contains_one_utf_8_string() { - assert_eq!( - bencode_to_json_unchecked(b"ll4:spamee"), - r#"[["spam"]]"#.to_string() - ); - } - - #[test] - fn one_nested_list_which_contains_two_utf_8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"ll5:alice3:bobee"), - r#"[["alice","bob"]]"#.to_string() - ); - } - - #[test] - fn one_nested_list_which_contains_one_non_utf_8_string() { - assert_eq!( - bencode_to_json_unchecked(b"ll4:\xFF\xFE\xFD\xFCee"), - r#"[["fffefdfc"]]"#.to_string() - ); - } - - #[test] - fn one_nested_list_which_contains_two_non_utf_8_string() { - assert_eq!( - bencode_to_json_unchecked(b"ll2:\xFF\xFE2:\xFD\xFCee"), - r#"[["fffe","fdfc"]]"#.to_string() - ); - } - } - - mod of_type_dict { - use crate::parsers::tests::bencode_to_json_unchecked; - - #[test] - fn empty() { - assert_eq!(bencode_to_json_unchecked(b"ldee"), "[{}]".to_string()); - } - - #[test] - fn with_one_field() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:foo3:baree"), - r#"[{"foo":"bar"}]"#.to_string() - ); - } - - #[test] - fn with_two_fields() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:bar4:spam3:fooi42eee"), - r#"[{"bar":"spam","foo":42}]"# - .to_string() - ); - } - - #[test] - fn with_nested_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:foodeee"), - r#"[{"foo":{}}]"#.to_string() - ); - } - - #[test] - fn with_two_nested_empty_dicts() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:food3:foodeeee"), - r#"[{"foo":{"foo":{}}}]"#.to_string() - ); - } - - #[test] - fn with_nested_dict_with_one_field() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:food3:foo3:bareee"), - r#"[{"foo":{"foo":"bar"}}]"#.to_string() - ); - } - - #[test] - fn with_nested_dict_with_two_fields() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:food3:foo3:bar3:fooi42eeee"), - r#"[{"foo":{"foo":"bar","foo":42}}]"#.to_string() - ); - } - } - } - - mod with_two_items_of_the_same_type { - use crate::parsers::tests::bencode_to_json_unchecked; - - #[test] - fn two_integers() { - assert_eq!( - bencode_to_json_unchecked(b"li42ei43ee"), - "[42,43]".to_string() - ); - } - - #[test] - fn two_utf8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"l5:alice3:bobe"), - r#"["alice","bob"]"#.to_string() - ); - } - - #[test] - fn two_non_utf8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"l2:\xFF\xFE2:\xFD\xFCe"), - r#"["fffe","fdfc"]"#.to_string() - ); - } - - #[test] - fn two_empty_lists() { - assert_eq!(bencode_to_json_unchecked(b"llelee"), r"[[],[]]".to_string()); - } - - #[test] - fn two_empty_dicts() { - assert_eq!(bencode_to_json_unchecked(b"ldedee"), r"[{},{}]".to_string()); - } - - #[test] - fn two_lists_with_one_item() { - assert_eq!( - bencode_to_json_unchecked(b"lli42eeli42eee"), - r"[[42],[42]]".to_string() - ); - } - - #[test] - fn two_dicts_with_one_item() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42eed3:fooi42eee"), - r#"[{"foo":42},{"foo":42}]"#.to_string() - ); - } - } - - mod with_two_items_of_different_types { - use crate::parsers::tests::bencode_to_json_unchecked; - - #[test] - fn integer_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"li42e5:alicee"), - r#"[42,"alice"]"#.to_string() - ); - } - - #[test] - fn integer_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"li42e2:\xFF\xFEe"), - r#"[42,"fffe"]"#.to_string() - ); - } - - #[test] - fn integer_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"li42elee"), - r"[42,[]]".to_string() - ); - } - - #[test] - fn integer_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"li42eli43eee"), - r"[42,[43]]".to_string() - ); - } - - #[test] - fn integer_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"li42edee"), - r"[42,{}]".to_string() - ); - } - - #[test] - fn integer_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"li42ed3:fooi42eee"), - r#"[42,{"foo":42}]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"l5:alicei42ee"), - r#"["alice",42]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"l5:alice1:\xFFe"), - r#"["alice","ff"]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"l5:alicelee"), - r#"["alice",[]]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"l5:aliceli42eee"), - r#"["alice",[42]]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"l5:alicedee"), - r#"["alice",{}]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"l5:aliced3:fooi42eee"), - r#"["alice",{"foo":42}]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFFi42ee"), - r#"["ff",42]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFF3:fooe"), - r#"["ff","foo"]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFFlee"), - r#"["ff",[]]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFFli42eee"), - r#"["ff",[42]]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFFdee"), - r#"["ff",{}]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFFd3:fooi42eee"), - r#"["ff",{"foo":42}]"#.to_string() - ); - } - - #[test] - fn empty_list_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"llei42ee"), - r"[[],42]".to_string() - ); - } - - #[test] - fn empty_list_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lle3:fooe"), - r#"[[],"foo"]"#.to_string() - ); - } - - #[test] - fn empty_list_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lle1:\xFFe"), - r#"[[],"ff"]"#.to_string() - ); - } - - #[test] - fn empty_list_and_empty_dict() { - assert_eq!(bencode_to_json_unchecked(b"lledee"), r"[[],{}]".to_string()); - } - - #[test] - fn empty_list_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"lled3:fooi42eee"), - r#"[[],{"foo":42}]"#.to_string() - ); - } - - #[test] - fn list_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"lli42eei43ee"), - r"[[42],43]".to_string() - ); - } - - #[test] - fn list_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lli42ee3:fooe"), - r#"[[42],"foo"]"#.to_string() - ); - } - - #[test] - fn list_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lli42ee1:\xFFe"), - r#"[[42],"ff"]"#.to_string() - ); - } - - #[test] - fn list_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"lli42eedee"), - r"[[42],{}]".to_string() - ); - } - - #[test] - fn list_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"lli42eed3:fooi43eee"), - r#"[[42],{"foo":43}]"#.to_string() - ); - } - - #[test] - fn empty_dict_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"ldei42ee"), - r"[{},42]".to_string() - ); - } - - #[test] - fn empty_dict_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lde3:fooe"), - r#"[{},"foo"]"#.to_string() - ); - } - - #[test] - fn empty_dict_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lde1:\xFFe"), - r#"[{},"ff"]"#.to_string() - ); - } - - #[test] - fn empty_dict_and_empty_list() { - assert_eq!(bencode_to_json_unchecked(b"ldelee"), r"[{},[]]".to_string()); - } - - #[test] - fn empty_dict_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"ldeli42eee"), - r"[{},[42]]".to_string() - ); - } - - #[test] - fn dict_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42eei43ee"), - r#"[{"foo":42},43]"#.to_string() - ); - } - - #[test] - fn dict_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42ee3:fooe"), - r#"[{"foo":42},"foo"]"#.to_string() - ); - } - - #[test] - fn dict_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42ee1:\xFFe"), - r#"[{"foo":42},"ff"]"#.to_string() - ); - } - - #[test] - fn dict_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42eelee"), - r#"[{"foo":42},[]]"#.to_string() - ); - } - - #[test] - fn dict_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42eeli43eee"), - r#"[{"foo":42},[43]]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_an_integer() { - assert_eq!( - bencode_to_json_unchecked(b"l2:\xFF\xFEi42ee"), - r#"["fffe",42]"#.to_string() - ); - } - } - - mod should_fail { - use crate::{parsers::error::Error, try_bencode_to_json}; - - #[test] - fn when_an_empty_list_does_not_have_the_matching_close_byte() { - let list_without_closing_list_byte = b"l"; - - let result = try_bencode_to_json(list_without_closing_list_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputExpectingFirstListItemOrEnd { .. }) - )); - } - - #[test] - fn when_a_list_does_not_have_the_matching_close_byte() { - let list_without_closing_list_byte = b"li42e"; - - let result = try_bencode_to_json(list_without_closing_list_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputExpectingNextListItem { .. }) - )); - } - - #[test] - fn when_it_receives_an_end_list_byte_without_the_matching_open_byte() { - let end_list_byte_without_start = b"e"; - - let result = try_bencode_to_json(end_list_byte_without_start); - - assert!(matches!( - result, - Err(Error::NoMatchingStartForListOrDictEnd { .. }) - )); - } - } - } - - mod dictionary { - use crate::{ - parsers::tests::bencode_to_json_unchecked, - test::{ - generate_n_nested_empty_bencoded_dictionaries, generate_n_nested_empty_json_objects, - }, - }; - - #[test] - fn empty_dictionary() { - assert_eq!(bencode_to_json_unchecked(b"de"), "{}".to_string()); - } - - #[test] - fn one_nested_empty_dictionary() { - assert_eq!( - bencode_to_json_unchecked(b"d3:foodee"), - r#"{"foo":{}}"#.to_string() - ); - } - - #[test] - fn two_nested_empty_dictionaries() { - assert_eq!( - bencode_to_json_unchecked(b"d3:food3:foodeee"), - r#"{"foo":{"foo":{}}}"#.to_string() - ); - } - - #[test] - fn many_nested_empty_dictionaries() { - assert_eq!( - bencode_to_json_unchecked(&generate_n_nested_empty_bencoded_dictionaries(100)), - generate_n_nested_empty_json_objects(100) - ); - } - - mod with_a_key { - use crate::parsers::tests::bencode_to_json_unchecked; - - #[test] - fn starting_with_a_digit() { - assert_eq!( - bencode_to_json_unchecked(b"d4:1fooi42ee"), - r#"{"1foo":42}"#.to_string() - ); - } - - #[test] - fn which_is_not_a_utf_8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d2:\xFF\xFEi42ee"), - r#"{"fffe":42}"#.to_string() - ); - } - } - - mod with_one_field { - use crate::parsers::tests::bencode_to_json_unchecked; - - #[test] - fn integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:fooi42ee"), - r#"{"foo":42}"#.to_string() - ); - } - - #[test] - fn utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar4:spame"), - r#"{"bar":"spam"}"#.to_string() - ); - } - - #[test] - fn non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar2:\xFF\xFEe"), - r#"{"bar":"fffe"}"#.to_string() - ); - } - - #[test] - fn empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barlee"), - r#"{"bar":[]}"#.to_string() - ); - } - - #[test] - fn empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bardee"), - r#"{"bar":{}}"#.to_string() - ); - } - } - - mod with_two_fields_of_the_same_type { - use crate::parsers::tests::bencode_to_json_unchecked; - - #[test] - fn two_integers() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:fooi43ee"), - r#"{"bar":42,"foo":43}"#.to_string() - ); - } - - #[test] - fn two_empty_utf8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar0:3:foo0:e"), - r#"{"bar":"","foo":""}"#.to_string() - ); - } - - #[test] - fn two_utf8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar4:spam3:foo5:alicee"), - r#"{"bar":"spam","foo":"alice"}"#.to_string() - ); - } - - #[test] - fn two_non_utf8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:foo1:\xFEe"), - r#"{"bar":"ff","foo":"fe"}"#.to_string() - ); - } - - #[test] - fn two_empty_lists() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:foolee"), - r#"{"bar":[],"foo":[]}"#.to_string() - ); - } - - #[test] - fn two_empty_dicts() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:foodee"), - r#"{"bar":{},"foo":{}}"#.to_string() - ); - } - - #[test] - fn two_lists() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:fooli43eee"), - r#"{"bar":[42],"foo":[43]}"#.to_string() - ); - } - - #[test] - fn two_dicts() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bardee3:food3:foodeee"), - r#"{"bar":{"bar":{}},"foo":{"foo":{}}}"# - .to_string() - ); - } - } - - mod with_two_fields_of_different_type { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn integer_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:foo5:alicee"), - r#"{"bar":42,"foo":"alice"}"#.to_string() - ); - } - - #[test] - fn integer_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:foo1:\xFFe"), - r#"{"bar":42,"foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn integer_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:foolee"), - r#"{"bar":42,"foo":[]}"#.to_string() - ); - } - - #[test] - fn integer_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:fooli43eee"), - r#"{"bar":42,"foo":[43]}"#.to_string() - ); - } - - #[test] - fn integer_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:foodee"), - r#"{"bar":42,"foo":{}}"#.to_string() - ); - } - - #[test] - fn integer_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:food3:fooi43eee"), - r#"{"bar":42,"foo":{"foo":43}}"#.to_string() - ); - } - - #[test] - fn utf8_string_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:fooi43ee"), - r#"{"bar":"alice","foo":43}"#.to_string() - ); - } - - #[test] - fn utf8_string_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:foo1:\xFFe"), - r#"{"bar":"alice","foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn utf8_string_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:foolee"), - r#"{"bar":"alice","foo":[]}"#.to_string() - ); - } - - #[test] - fn utf8_string_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:fooli42eee"), - r#"{"bar":"alice","foo":[42]}"#.to_string() - ); - } - - #[test] - fn utf8_string_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:foodee"), - r#"{"bar":"alice","foo":{}}"#.to_string() - ); - } - - #[test] - fn utf8_string_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:food3:fooi42eee"), - r#"{"bar":"alice","foo":{"foo":42}}"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:fooi43ee"), - r#"{"bar":"ff","foo":43}"# - .to_string() - ); - } - - #[test] - fn non_utf8_string_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:foo1:\xFFe"), - r#"{"bar":"ff","foo":"ff"}"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:foolee"), - r#"{"bar":"ff","foo":[]}"# - .to_string() - ); - } - - #[test] - fn non_utf8_string_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:fooli42eee"), - r#"{"bar":"ff","foo":[42]}"# - .to_string() - ); - } - - #[test] - fn non_utf8_string_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:foodee"), - r#"{"bar":"ff","foo":{}}"# - .to_string() - ); - } - - #[test] - fn non_utf8_string_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:food3:fooi42eee"), - r#"{"bar":"ff","foo":{"foo":42}}"#.to_string() - ); - } - - #[test] - fn empty_list_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:fooi42ee"), - r#"{"bar":[],"foo":42}"#.to_string() - ); - } - - #[test] - fn empty_list_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:foo5:alicee"), - r#"{"bar":[],"foo":"alice"}"#.to_string() - ); - } - - #[test] - fn empty_list_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:foo1:\xFFe"), - r#"{"bar":[],"foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn empty_list_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:foodee"), - r#"{"bar":[],"foo":{}}"#.to_string() - ); - } - - #[test] - fn empty_list_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:food3:foo5:aliceee"), - r#"{"bar":[],"foo":{"foo":"alice"}}"#.to_string() - ); - } - - #[test] - fn list_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:fooi42ee"), - r#"{"bar":[42],"foo":42}"#.to_string() - ); - } - - #[test] - fn list_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:foo5:alicee"), - r#"{"bar":[42],"foo":"alice"}"#.to_string() - ); - } - - #[test] - fn list_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:foo1:\xFFe"), - r#"{"bar":[42],"foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn list_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:foodee"), - r#"{"bar":[42],"foo":{}}"#.to_string() - ); - } - - #[test] - fn list_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:food3:foo5:aliceee"), - r#"{"bar":[42],"foo":{"foo":"alice"}}"#.to_string() - ); - } - - #[test] - fn empty_dict_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:fooi42ee"), - r#"{"bar":{},"foo":42}"#.to_string() - ); - } - - #[test] - fn empty_dict_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:foo5:alicee"), - r#"{"bar":{},"foo":"alice"}"#.to_string() - ); - } - - #[test] - fn empty_dict_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:foo1:\xFFe"), - r#"{"bar":{},"foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn empty_dict_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:foolee"), - r#"{"bar":{},"foo":[]}"#.to_string() - ); - } - - #[test] - fn empty_dict_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:fooli42eee"), - r#"{"bar":{},"foo":[42]}"#.to_string() - ); - } - - #[test] - fn dict_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bari42ee3:fooi43ee"), - r#"{"bar":{"bar":42},"foo":43}"#.to_string() - ); - } - - #[test] - fn dict_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bari42ee3:foo5:alicee"), - r#"{"bar":{"bar":42},"foo":"alice"}"# - .to_string() - ); - } - - #[test] - fn dict_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bari42ee3:foo1:\xFFe"), - r#"{"bar":{"bar":42},"foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn dict_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bari42ee3:foolee"), - r#"{"bar":{"bar":42},"foo":[]}"#.to_string() - ); - } - - #[test] - fn dict_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bari42ee3:fooli42eee"), - r#"{"bar":{"bar":42},"foo":[42]}"# - .to_string() - ); - } - } - - mod should_escape_json { - - mod in_field_keys { - use crate::parsers::tests::bencode_to_json_unchecked; - - // Only one especial char is tested. The string parser contains - // other tests for the rest of the special chars that need to be - // escaped. - - #[test] - fn containing_a_line_break_at_the_beginning_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d4:\nfoo3:bare".as_bytes()), - r#"{"\nfoo":"bar"}"#.to_string() - ); - } - - #[test] - fn containing_a_line_break_in_the_middle_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d4:f\noo3:bare".as_bytes()), - r#"{"f\noo":"bar"}"#.to_string() - ); - } - - #[test] - fn containing_a_line_break_at_the_end_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d4:foo\n3:bare".as_bytes()), - r#"{"foo\n":"bar"}"#.to_string() - ); - } - } - - mod in_field_values { - use crate::parsers::tests::bencode_to_json_unchecked; - - #[test] - fn containing_a_line_break_at_the_beginning_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d3:foo4:\nbare".as_bytes()), - r#"{"foo":"\nbar"}"#.to_string() - ); - } - - #[test] - fn containing_a_line_break_in_the_middle_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d3:foo4:ba\nre".as_bytes()), - r#"{"foo":"ba\nr"}"#.to_string() - ); - } - - #[test] - fn containing_a_line_break_at_the_end_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d3:foo4:bar\ne".as_bytes()), - r#"{"foo":"bar\n"}"#.to_string() - ); - } - } - } - - mod should_fail { - use crate::{parsers::error::Error, try_bencode_to_json}; - - #[test] - fn when_an_empty_dict_does_not_have_the_matching_close_byte() { - let dict_without_closing_dict_byte = b"d"; - - let result = try_bencode_to_json(dict_without_closing_dict_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputExpectingFirstDictFieldOrEnd { .. }) - )); - } - - #[test] - fn when_a_dict_field_does_not_have_the_value() { - let dict_without_closing_dict_byte = b"d3:foo"; - - let result = try_bencode_to_json(dict_without_closing_dict_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputExpectingDictFieldValue { .. }) - )); - } - - #[test] - fn when_a_dict_does_not_have_the_matching_close_byte() { - let dict_without_closing_dict_byte = b"d3:fooi42e"; - - let result = try_bencode_to_json(dict_without_closing_dict_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputExpectingDictFieldKeyOrEnd { .. }) - )); - } - - #[test] - fn when_it_receives_an_end_dict_byte_without_the_matching_open_byte() { - let end_dict_byte_without_start = b"e"; - - let result = try_bencode_to_json(end_dict_byte_without_start); - - assert!(matches!( - result, - Err(Error::NoMatchingStartForListOrDictEnd { .. }) - )); - } - - #[test] - fn when_it_receives_a_premature_end_dict_byte() { - let dict_with_missing_key_value = b"d3:fooe"; - - let result = try_bencode_to_json(dict_with_missing_key_value); - - assert!(matches!(result, Err(Error::PrematureEndOfDict { .. }))); - } - - #[test] - fn when_the_first_field_value_is_empty() { - let dict_with_missing_key_value = b"d3:fooe"; - - let result = try_bencode_to_json(dict_with_missing_key_value); - - assert!(matches!(result, Err(Error::PrematureEndOfDict { .. }))); - } - - #[test] - fn when_the_second_field_value_is_empty() { - let dict_with_missing_key_value = b"d3:foo3:bar3:fooe"; - - let result = try_bencode_to_json(dict_with_missing_key_value); - - assert!(matches!(result, Err(Error::PrematureEndOfDict { .. }))); - } - - mod when_the_field_key_is_not_a_string_for_example { - use crate::parsers::error::Error; - use crate::parsers::BencodeType; - use crate::try_bencode_to_json; - - #[test] - fn when_the_key_in_the_first_dict_field_is_an_integer() { - let field_with_integer_key = b"di42ei43ee"; - - let result = try_bencode_to_json(field_with_integer_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot( - BencodeType::Integer, - _, - _ - )) - )); - } - - #[test] - fn when_the_key_in_the_second_dict_field_is_an_integer() { - let field_with_integer_key = b"d3:foo3:bari42ei43ee"; - - let result = try_bencode_to_json(field_with_integer_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot( - BencodeType::Integer, - _, - _ - )) - )); - } - - #[test] - fn when_the_key_in_the_first_dict_field_is_a_list() { - let field_with_list_key = b"dlei42ee"; - - let result = try_bencode_to_json(field_with_list_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot(BencodeType::List, _, _)) - )); - } - - #[test] - fn when_the_key_in_the_second_dict_field_is_a_list() { - let field_with_list_key = b"d3:foo3:barlei42ee"; - - let result = try_bencode_to_json(field_with_list_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot(BencodeType::List, _, _)) - )); - } - - #[test] - fn when_the_key_in_the_first_dict_field_is_a_dict() { - let field_with_list_key = b"ddei42ee"; - - let result = try_bencode_to_json(field_with_list_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot(BencodeType::Dict, _, _)) - )); - } - - #[test] - fn when_the_key_in_the_second_dict_field_is_a_dict() { - let field_with_list_key = b"d3:foo3:bardei42ee"; - - let result = try_bencode_to_json(field_with_list_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot(BencodeType::Dict, _, _)) - )); - } - } - } - } -} diff --git a/src/test.rs b/src/test.rs index 2234107..4f19b61 100644 --- a/src/test.rs +++ b/src/test.rs @@ -8,7 +8,7 @@ #[cfg(test)] #[must_use] pub(crate) fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - use crate::parsers::BencodeParser; + use crate::parsers::generators::json::BencodeParser; let mut output = String::new(); From 3052d6a57ecf10077f494a24a13fe1370970d62e Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 10:39:48 +0000 Subject: [PATCH 10/13] refactor: rename BencodeTOkenizer to Tokenizer --- src/parsers/generators/json.rs | 6 +++--- src/parsers/tokenizer/mod.rs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/parsers/generators/json.rs b/src/parsers/generators/json.rs index 5ea180a..bf7e10e 100644 --- a/src/parsers/generators/json.rs +++ b/src/parsers/generators/json.rs @@ -14,7 +14,7 @@ use super::{ stack::{Stack, State}, BencodeType, }; -use tokenizer::{BencodeToken, BencodeTokenizer}; +use tokenizer::{BencodeToken, Tokenizer}; use crate::{ parsers::{ @@ -25,7 +25,7 @@ use crate::{ }; pub struct BencodeParser { - tokenizer: BencodeTokenizer, + tokenizer: Tokenizer, num_processed_tokens: u64, stack: Stack, } @@ -42,7 +42,7 @@ impl BencodeParser { pub fn new(reader: R) -> Self { BencodeParser { - tokenizer: BencodeTokenizer::new(reader), + tokenizer: Tokenizer::new(reader), num_processed_tokens: 1, stack: Stack::default(), } diff --git a/src/parsers/tokenizer/mod.rs b/src/parsers/tokenizer/mod.rs index f1de03a..3a54934 100644 --- a/src/parsers/tokenizer/mod.rs +++ b/src/parsers/tokenizer/mod.rs @@ -31,13 +31,13 @@ pub enum BencodeToken { LineBreak, } -pub struct BencodeTokenizer { +pub struct Tokenizer { byte_reader: ByteReader, } -impl BencodeTokenizer { +impl Tokenizer { pub fn new(reader: R) -> Self { - BencodeTokenizer { + Tokenizer { byte_reader: ByteReader::new(reader), } } From a3c7c4bc753725b417197b33ebe6be3a5524b90c Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 10:46:30 +0000 Subject: [PATCH 11/13] refactor: remove parent parser mod --- examples/parser_file_in_file_out.rs | 2 +- examples/parser_stdin_stdout.rs | 2 +- examples/parser_string_in_string_out.rs | 2 +- examples/parser_string_in_vec_out.rs | 2 +- examples/parser_vec_in_string_out.rs | 2 +- examples/parser_vec_in_vec_out.rs | 2 +- src/{parsers => }/error.rs | 4 ++-- src/{parsers => }/generators/json.rs | 29 ++++++++++--------------- src/{parsers => }/generators/mod.rs | 0 src/{parsers => }/generators/stack.rs | 6 ++--- src/lib.rs | 9 +++++--- src/main.rs | 2 +- src/parsers/mod.rs | 3 --- src/test.rs | 2 +- src/{parsers => }/tokenizer/integer.rs | 12 +++++----- src/{parsers => }/tokenizer/mod.rs | 0 src/{parsers => }/tokenizer/string.rs | 10 ++++----- 17 files changed, 40 insertions(+), 49 deletions(-) rename src/{parsers => }/error.rs (98%) rename src/{parsers => }/generators/json.rs (98%) rename src/{parsers => }/generators/mod.rs (100%) rename src/{parsers => }/generators/stack.rs (97%) delete mode 100644 src/parsers/mod.rs rename src/{parsers => }/tokenizer/integer.rs (95%) rename src/{parsers => }/tokenizer/mod.rs (100%) rename src/{parsers => }/tokenizer/string.rs (97%) diff --git a/examples/parser_file_in_file_out.rs b/examples/parser_file_in_file_out.rs index 3ac3873..78c89a1 100644 --- a/examples/parser_file_in_file_out.rs +++ b/examples/parser_file_in_file_out.rs @@ -10,7 +10,7 @@ use std::{ io::{Read, Write}, }; -use bencode2json::parsers::generators::json::BencodeParser; +use bencode2json::generators::json::BencodeParser; use clap::{Arg, Command}; fn main() { diff --git a/examples/parser_stdin_stdout.rs b/examples/parser_stdin_stdout.rs index 2f1881d..c4255ed 100644 --- a/examples/parser_stdin_stdout.rs +++ b/examples/parser_stdin_stdout.rs @@ -7,7 +7,7 @@ //! It prints "spam". use std::io; -use bencode2json::parsers::generators::json::BencodeParser; +use bencode2json::generators::json::BencodeParser; fn main() { let input = Box::new(io::stdin()); diff --git a/examples/parser_string_in_string_out.rs b/examples/parser_string_in_string_out.rs index 85adc5b..8d79a03 100644 --- a/examples/parser_string_in_string_out.rs +++ b/examples/parser_string_in_string_out.rs @@ -5,7 +5,7 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::generators::json::BencodeParser; +use bencode2json::generators::json::BencodeParser; fn main() { let input = "4:spam".to_string(); diff --git a/examples/parser_string_in_vec_out.rs b/examples/parser_string_in_vec_out.rs index a4db939..07550ee 100644 --- a/examples/parser_string_in_vec_out.rs +++ b/examples/parser_string_in_vec_out.rs @@ -5,7 +5,7 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::generators::json::BencodeParser; +use bencode2json::generators::json::BencodeParser; fn main() { let input = "4:spam".to_string(); diff --git a/examples/parser_vec_in_string_out.rs b/examples/parser_vec_in_string_out.rs index d473d2a..667900c 100644 --- a/examples/parser_vec_in_string_out.rs +++ b/examples/parser_vec_in_string_out.rs @@ -5,7 +5,7 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::generators::json::BencodeParser; +use bencode2json::generators::json::BencodeParser; fn main() { let input = b"4:spam".to_vec(); diff --git a/examples/parser_vec_in_vec_out.rs b/examples/parser_vec_in_vec_out.rs index 4d8b794..99415fa 100644 --- a/examples/parser_vec_in_vec_out.rs +++ b/examples/parser_vec_in_vec_out.rs @@ -5,7 +5,7 @@ //! ``` //! //! It prints "spam". -use bencode2json::parsers::generators::json::BencodeParser; +use bencode2json::generators::json::BencodeParser; fn main() { let input = b"4:spam".to_vec(); diff --git a/src/parsers/error.rs b/src/error.rs similarity index 98% rename from src/parsers/error.rs rename to src/error.rs index 1390249..2f581a3 100644 --- a/src/parsers/error.rs +++ b/src/error.rs @@ -197,7 +197,7 @@ impl fmt::Display for WriteContext { mod tests { mod for_read_context { - use crate::parsers::error::ReadContext; + use crate::error::ReadContext; #[test] fn it_should_display_the_read_context() { @@ -237,7 +237,7 @@ mod tests { } mod for_write_context { - use crate::parsers::error::WriteContext; + use crate::error::WriteContext; #[test] fn it_should_display_the_read_context() { diff --git a/src/parsers/generators/json.rs b/src/generators/json.rs similarity index 98% rename from src/parsers/generators/json.rs rename to src/generators/json.rs index bf7e10e..efdef23 100644 --- a/src/parsers/generators/json.rs +++ b/src/generators/json.rs @@ -17,11 +17,9 @@ use super::{ use tokenizer::{BencodeToken, Tokenizer}; use crate::{ - parsers::{ - error::{self, ReadContext, WriteContext}, - tokenizer, - }, + error::{self, ReadContext, WriteContext}, rw::{byte_writer::ByteWriter, string_writer::StringWriter, writer::Writer}, + tokenizer, }; pub struct BencodeParser { @@ -355,10 +353,10 @@ mod tests { use std::io::{self, Read}; - use crate::parsers::generators::json::BencodeParser; + use crate::generators::json::BencodeParser; mod it_should_allow_writing { - use crate::parsers::generators::json::BencodeParser; + use crate::generators::json::BencodeParser; #[test] fn to_any_type_implementing_io_write_trait() { @@ -411,7 +409,7 @@ mod tests { mod it_should_allow_special_bencode_cases { - use crate::{parsers::generators::json::BencodeParser, test::bencode_to_json_unchecked}; + use crate::{generators::json::BencodeParser, test::bencode_to_json_unchecked}; #[test] fn an_empty_input() { @@ -448,10 +446,7 @@ mod tests { mod it_should_fail { use std::io::{self, Read}; - use crate::{ - parsers::{error::Error, generators::json::BencodeParser}, - try_bencode_to_json, - }; + use crate::{error::Error, generators::json::BencodeParser, try_bencode_to_json}; #[test] fn when_there_is_a_problem_reading_from_input() { @@ -548,7 +543,7 @@ mod tests { } mod should_fail { - use crate::{parsers::error::Error, try_bencode_to_json}; + use crate::{error::Error, try_bencode_to_json}; #[test] fn when_it_finds_an_invalid_byte() { @@ -754,7 +749,7 @@ mod tests { } mod it_should_fail_parsing_when { - use crate::{parsers::error::Error, try_bencode_to_json}; + use crate::{error::Error, try_bencode_to_json}; #[test] fn it_reaches_the_end_of_the_input_parsing_the_string_length() { @@ -1327,7 +1322,7 @@ mod tests { } mod should_fail { - use crate::{parsers::error::Error, try_bencode_to_json}; + use crate::{error::Error, try_bencode_to_json}; #[test] fn when_an_empty_list_does_not_have_the_matching_close_byte() { @@ -1920,7 +1915,7 @@ mod tests { } mod should_fail { - use crate::{parsers::error::Error, try_bencode_to_json}; + use crate::{error::Error, try_bencode_to_json}; #[test] fn when_an_empty_dict_does_not_have_the_matching_close_byte() { @@ -1998,8 +1993,8 @@ mod tests { } mod when_the_field_key_is_not_a_string_for_example { - use crate::parsers::error::Error; - use crate::parsers::generators::json::BencodeType; + use crate::error::Error; + use crate::generators::json::BencodeType; use crate::try_bencode_to_json; #[test] diff --git a/src/parsers/generators/mod.rs b/src/generators/mod.rs similarity index 100% rename from src/parsers/generators/mod.rs rename to src/generators/mod.rs diff --git a/src/parsers/generators/stack.rs b/src/generators/stack.rs similarity index 97% rename from src/parsers/generators/stack.rs rename to src/generators/stack.rs index 5881941..f05c362 100644 --- a/src/parsers/generators/stack.rs +++ b/src/generators/stack.rs @@ -150,7 +150,7 @@ impl Stack { #[cfg(test)] mod tests { mod the_stack_state { - use crate::parsers::generators::stack::State; + use crate::generators::stack::State; #[test] fn should_be_displayed_with_single_letter_abbreviations() { @@ -165,7 +165,7 @@ mod tests { mod the_stack { mod it_should { - use crate::parsers::generators::stack::{Stack, State}; + use crate::generators::stack::{Stack, State}; #[test] fn have_an_initial_state() { @@ -235,7 +235,7 @@ mod tests { mod be_displayed_with_single_letter_abbreviations_for_states { - use crate::parsers::generators::stack::{Stack, State}; + use crate::generators::stack::{Stack, State}; #[test] fn with_the_initial_state() { diff --git a/src/lib.rs b/src/lib.rs index 4df388b..ae77ea3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,10 +34,13 @@ //! //! > __NOTICE__: In the context of this lib, parser is a function that takes an input //! > containing bencoded data and produces a JSON output (raw bytes or UTF-8 string). -use parsers::{error::Error, generators::json::BencodeParser}; - -pub mod parsers; +pub mod error; +pub mod generators; pub mod rw; +pub mod tokenizer; + +use error::Error; +use generators::json::BencodeParser; mod test; /// It converts bencoded bytes into a JSON string. diff --git a/src/main.rs b/src/main.rs index b4185f9..dea70d7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ //! ```text //! cargo run -- -i ./tests/fixtures/sample.bencode -o output.json //! ``` -use bencode2json::parsers::generators::json::BencodeParser; +use bencode2json::generators::json::BencodeParser; use clap::{Arg, Command}; use std::fs::File; use std::io::{self, Read, Write}; diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs deleted file mode 100644 index b4c08e0..0000000 --- a/src/parsers/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod error; -pub mod generators; -pub mod tokenizer; diff --git a/src/test.rs b/src/test.rs index 4f19b61..a16112f 100644 --- a/src/test.rs +++ b/src/test.rs @@ -8,7 +8,7 @@ #[cfg(test)] #[must_use] pub(crate) fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - use crate::parsers::generators::json::BencodeParser; + use crate::generators::json::BencodeParser; let mut output = String::new(); diff --git a/src/parsers/tokenizer/integer.rs b/src/tokenizer/integer.rs similarity index 95% rename from src/parsers/tokenizer/integer.rs rename to src/tokenizer/integer.rs index 0dd2730..5722bcc 100644 --- a/src/parsers/tokenizer/integer.rs +++ b/src/tokenizer/integer.rs @@ -134,7 +134,7 @@ fn next_byte(reader: &mut ByteReader) -> Result { #[cfg(test)] mod tests { - use crate::{parsers::error::Error, rw::byte_reader::ByteReader}; + use crate::{error::Error, rw::byte_reader::ByteReader}; use super::parse; @@ -153,7 +153,7 @@ mod tests { } mod for_helpers { - use crate::parsers::tokenizer::integer::tests::try_bencode_to_json; + use crate::tokenizer::integer::tests::try_bencode_to_json; #[test] fn bencode_to_json_wrapper_succeeds() { @@ -190,11 +190,9 @@ mod tests { use std::io::{self, Read}; use crate::{ - parsers::{ - error::Error, - tokenizer::integer::{parse, tests::try_bencode_to_json}, - }, + error::Error, rw::byte_reader::ByteReader, + tokenizer::integer::{parse, tests::try_bencode_to_json}, }; #[test] @@ -250,7 +248,7 @@ mod tests { } mod when_it_receives_a_unexpected_byte { - use crate::parsers::{error::Error, tokenizer::integer::tests::try_bencode_to_json}; + use crate::{error::Error, tokenizer::integer::tests::try_bencode_to_json}; #[test] fn while_expecting_a_digit_or_sign() { diff --git a/src/parsers/tokenizer/mod.rs b/src/tokenizer/mod.rs similarity index 100% rename from src/parsers/tokenizer/mod.rs rename to src/tokenizer/mod.rs diff --git a/src/parsers/tokenizer/string.rs b/src/tokenizer/string.rs similarity index 97% rename from src/parsers/tokenizer/string.rs rename to src/tokenizer/string.rs index f3325a5..5d23c95 100644 --- a/src/parsers/tokenizer/string.rs +++ b/src/tokenizer/string.rs @@ -210,7 +210,7 @@ impl Value { #[cfg(test)] mod tests { - use crate::{parsers::error::Error, rw::byte_reader::ByteReader}; + use crate::{error::Error, rw::byte_reader::ByteReader}; use super::parse; @@ -228,7 +228,7 @@ mod tests { } mod for_helpers { - use crate::parsers::tokenizer::string::tests::try_bencode_to_json; + use crate::tokenizer::string::tests::try_bencode_to_json; #[test] fn bencode_to_json_wrapper_succeeds() { @@ -356,11 +356,9 @@ mod tests { use std::io::{self, Read}; use crate::{ - parsers::{ - error::Error, - tokenizer::string::{parse, tests::try_bencode_to_json}, - }, + error::Error, rw::byte_reader::ByteReader, + tokenizer::string::{parse, tests::try_bencode_to_json}, }; #[test] From 68d9915cb78eac5f27da34777b950baeaf4e7dbd Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 10:55:44 +0000 Subject: [PATCH 12/13] refactor: rename json::BencodeParser to json::Generator --- examples/parser_file_in_file_out.rs | 4 ++-- examples/parser_stdin_stdout.rs | 4 ++-- examples/parser_string_in_string_out.rs | 4 ++-- examples/parser_string_in_vec_out.rs | 4 ++-- examples/parser_vec_in_string_out.rs | 4 ++-- examples/parser_vec_in_vec_out.rs | 4 ++-- src/generators/json.rs | 31 +++++++++++-------------- src/generators/mod.rs | 2 ++ src/generators/stack.rs | 3 +-- src/lib.rs | 4 ++-- src/main.rs | 4 ++-- src/test.rs | 4 ++-- src/tokenizer/mod.rs | 6 +---- 13 files changed, 35 insertions(+), 43 deletions(-) diff --git a/examples/parser_file_in_file_out.rs b/examples/parser_file_in_file_out.rs index 78c89a1..06acb13 100644 --- a/examples/parser_file_in_file_out.rs +++ b/examples/parser_file_in_file_out.rs @@ -10,7 +10,7 @@ use std::{ io::{Read, Write}, }; -use bencode2json::generators::json::BencodeParser; +use bencode2json::generators::json::Generator; use clap::{Arg, Command}; fn main() { @@ -61,7 +61,7 @@ fn main() { std::process::exit(1); }; - if let Err(e) = BencodeParser::new(input).write_bytes(&mut output) { + if let Err(e) = Generator::new(input).write_bytes(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/examples/parser_stdin_stdout.rs b/examples/parser_stdin_stdout.rs index c4255ed..01456cb 100644 --- a/examples/parser_stdin_stdout.rs +++ b/examples/parser_stdin_stdout.rs @@ -7,13 +7,13 @@ //! It prints "spam". use std::io; -use bencode2json::generators::json::BencodeParser; +use bencode2json::generators::json::Generator; fn main() { let input = Box::new(io::stdin()); let mut output = Box::new(io::stdout()); - if let Err(e) = BencodeParser::new(input).write_bytes(&mut output) { + if let Err(e) = Generator::new(input).write_bytes(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/examples/parser_string_in_string_out.rs b/examples/parser_string_in_string_out.rs index 8d79a03..6c7bf7c 100644 --- a/examples/parser_string_in_string_out.rs +++ b/examples/parser_string_in_string_out.rs @@ -5,13 +5,13 @@ //! ``` //! //! It prints "spam". -use bencode2json::generators::json::BencodeParser; +use bencode2json::generators::json::Generator; fn main() { let input = "4:spam".to_string(); let mut output = String::new(); - if let Err(e) = BencodeParser::new(input.as_bytes()).write_str(&mut output) { + if let Err(e) = Generator::new(input.as_bytes()).write_str(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/examples/parser_string_in_vec_out.rs b/examples/parser_string_in_vec_out.rs index 07550ee..3d89dfd 100644 --- a/examples/parser_string_in_vec_out.rs +++ b/examples/parser_string_in_vec_out.rs @@ -5,13 +5,13 @@ //! ``` //! //! It prints "spam". -use bencode2json::generators::json::BencodeParser; +use bencode2json::generators::json::Generator; fn main() { let input = "4:spam".to_string(); let mut output = Vec::new(); - if let Err(e) = BencodeParser::new(input.as_bytes()).write_bytes(&mut output) { + if let Err(e) = Generator::new(input.as_bytes()).write_bytes(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/examples/parser_vec_in_string_out.rs b/examples/parser_vec_in_string_out.rs index 667900c..1388f51 100644 --- a/examples/parser_vec_in_string_out.rs +++ b/examples/parser_vec_in_string_out.rs @@ -5,13 +5,13 @@ //! ``` //! //! It prints "spam". -use bencode2json::generators::json::BencodeParser; +use bencode2json::generators::json::Generator; fn main() { let input = b"4:spam".to_vec(); let mut output = String::new(); - if let Err(e) = BencodeParser::new(&input[..]).write_str(&mut output) { + if let Err(e) = Generator::new(&input[..]).write_str(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/examples/parser_vec_in_vec_out.rs b/examples/parser_vec_in_vec_out.rs index 99415fa..d678bff 100644 --- a/examples/parser_vec_in_vec_out.rs +++ b/examples/parser_vec_in_vec_out.rs @@ -5,13 +5,13 @@ //! ``` //! //! It prints "spam". -use bencode2json::generators::json::BencodeParser; +use bencode2json::generators::json::Generator; fn main() { let input = b"4:spam".to_vec(); let mut output = Vec::new(); - if let Err(e) = BencodeParser::new(&input[..]).write_bytes(&mut output) { + if let Err(e) = Generator::new(&input[..]).write_bytes(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/src/generators/json.rs b/src/generators/json.rs index efdef23..ccfa6af 100644 --- a/src/generators/json.rs +++ b/src/generators/json.rs @@ -1,9 +1,4 @@ -/* TODO: - -- Rename this parser to generator. - -*/ - +//! Json generator for bencoded data. use core::str; use std::{ fmt::Write as FmtWrite, @@ -22,13 +17,13 @@ use crate::{ tokenizer, }; -pub struct BencodeParser { +pub struct Generator { tokenizer: Tokenizer, num_processed_tokens: u64, stack: Stack, } -impl BencodeParser { +impl Generator { const JSON_ARRAY_BEGIN: u8 = b'['; const JSON_ARRAY_ITEMS_SEPARATOR: u8 = b','; const JSON_ARRAY_END: u8 = b']'; @@ -39,7 +34,7 @@ impl BencodeParser { const JSON_OBJ_END: u8 = b'}'; pub fn new(reader: R) -> Self { - BencodeParser { + Generator { tokenizer: Tokenizer::new(reader), num_processed_tokens: 1, stack: Stack::default(), @@ -353,16 +348,16 @@ mod tests { use std::io::{self, Read}; - use crate::generators::json::BencodeParser; + use crate::generators::json::Generator; mod it_should_allow_writing { - use crate::generators::json::BencodeParser; + use crate::generators::json::Generator; #[test] fn to_any_type_implementing_io_write_trait() { let mut output = Vec::new(); - let mut parser = BencodeParser::new(&b"i0e"[..]); + let mut parser = Generator::new(&b"i0e"[..]); parser .write_bytes(&mut output) @@ -375,7 +370,7 @@ mod tests { fn writing_to_any_type_implementing_fmt_write_trait() { let mut output = String::new(); - let mut parser = BencodeParser::new(&b"i0e"[..]); + let mut parser = Generator::new(&b"i0e"[..]); parser .write_str(&mut output) @@ -400,7 +395,7 @@ mod tests { let mut output = String::new(); - let mut parser = BencodeParser::new(EmptyReader); + let mut parser = Generator::new(EmptyReader); parser.write_str(&mut output).unwrap(); @@ -409,13 +404,13 @@ mod tests { mod it_should_allow_special_bencode_cases { - use crate::{generators::json::BencodeParser, test::bencode_to_json_unchecked}; + use crate::{generators::json::Generator, test::bencode_to_json_unchecked}; #[test] fn an_empty_input() { let mut output = String::new(); - let mut parser = BencodeParser::new(&b""[..]); + let mut parser = Generator::new(&b""[..]); parser .write_str(&mut output) @@ -446,7 +441,7 @@ mod tests { mod it_should_fail { use std::io::{self, Read}; - use crate::{error::Error, generators::json::BencodeParser, try_bencode_to_json}; + use crate::{error::Error, generators::json::Generator, try_bencode_to_json}; #[test] fn when_there_is_a_problem_reading_from_input() { @@ -463,7 +458,7 @@ mod tests { let mut output = String::new(); - let mut parser = BencodeParser::new(FaultyReader); + let mut parser = Generator::new(FaultyReader); let result = parser.write_str(&mut output); diff --git a/src/generators/mod.rs b/src/generators/mod.rs index 42fd26e..c1911f0 100644 --- a/src/generators/mod.rs +++ b/src/generators/mod.rs @@ -1,6 +1,8 @@ pub mod json; pub mod stack; +// todo: extract trait for generators when we implement a new one. + use derive_more::derive::Display; #[derive(Debug, PartialEq, Display)] diff --git a/src/generators/stack.rs b/src/generators/stack.rs index f05c362..6bf6dec 100644 --- a/src/generators/stack.rs +++ b/src/generators/stack.rs @@ -1,5 +1,4 @@ -//! The stack used by the Bencoded to JSON converter to keep track of the -//! current parsing state. +//! The stack used by the generators to keep track of the current parsing state. use std::fmt::Display; /// Stack containing states for nested Bencoded values. diff --git a/src/lib.rs b/src/lib.rs index ae77ea3..9ca27cb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,7 +40,7 @@ pub mod rw; pub mod tokenizer; use error::Error; -use generators::json::BencodeParser; +use generators::json::Generator; mod test; /// It converts bencoded bytes into a JSON string. @@ -51,7 +51,7 @@ mod test; pub fn try_bencode_to_json(input_buffer: &[u8]) -> Result { let mut output = String::new(); - let mut parser = BencodeParser::new(input_buffer); + let mut parser = Generator::new(input_buffer); match parser.write_str(&mut output) { Ok(()) => Ok(output), diff --git a/src/main.rs b/src/main.rs index dea70d7..e7e5b9a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ //! ```text //! cargo run -- -i ./tests/fixtures/sample.bencode -o output.json //! ``` -use bencode2json::generators::json::BencodeParser; +use bencode2json::generators::json::Generator; use clap::{Arg, Command}; use std::fs::File; use std::io::{self, Read, Write}; @@ -70,7 +70,7 @@ fn run() { Box::new(io::stdout()) }; - if let Err(e) = BencodeParser::new(input).write_bytes(&mut output) { + if let Err(e) = Generator::new(input).write_bytes(&mut output) { eprintln!("Error: {e}"); std::process::exit(1); } diff --git a/src/test.rs b/src/test.rs index a16112f..fef1001 100644 --- a/src/test.rs +++ b/src/test.rs @@ -8,11 +8,11 @@ #[cfg(test)] #[must_use] pub(crate) fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - use crate::generators::json::BencodeParser; + use crate::generators::json::Generator; let mut output = String::new(); - let mut parser = BencodeParser::new(input_buffer); + let mut parser = Generator::new(input_buffer); parser .write_str(&mut output) diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 3a54934..cd13a45 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -8,11 +8,7 @@ use super::error::{self, ReadContext}; use crate::rw::byte_reader::ByteReader; -/* TODO: - -- Implement trait Iterator for tokenizer. - -*/ +// todo: Implement trait Iterator for tokenizer. // Bencoded reserved bytes const BENCODE_BEGIN_INTEGER: u8 = b'i'; From ec6cc56d460975b4bbe8266255acf0cfdd82f6c7 Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 4 Dec 2024 10:59:57 +0000 Subject: [PATCH 13/13] docs: update README --- README.md | 42 +++++++----------------------------------- 1 file changed, 7 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 2cd943a..5285fb7 100644 --- a/README.md +++ b/README.md @@ -65,12 +65,12 @@ Error: Unexpected end of input parsing integer; read context: input pos 3, lates ```console printf "3:ab" | cargo run -Error: Unexpected end of input parsing string value; read context: input pos 4, latest input bytes dump: [51, 58, 97, 98] (UTF-8 string: `3:ab`); write context: output pos 0, latest output bytes dump: [] (UTF-8 string: ``) +Error: Unexpected end of input parsing string value; read context: input pos 4, latest input bytes dump: [51, 58, 97, 98] (UTF-8 string: `3:ab`) ``` ```console echo "i00e" | cargo run -Error: Leading zeros in integers are not allowed, for example b'i00e'; read context: byte `48` (char: `0`), input pos 3, latest input bytes dump: [105, 48, 48] (UTF-8 string: `i00`); write context: byte `48` (char: `0`), output pos 2, latest output bytes dump: [48, 48] (UTF-8 string: `00`) +Error: Leading zeros in integers are not allowed, for example b'i00e'; read context: byte `48` (char: `0`), input pos 3, latest input bytes dump: [105, 48, 48] (UTF-8 string: `i00`) ``` Generating pretty JSON with [jq][jq]: @@ -111,36 +111,10 @@ cargo add bencode2json There two ways of using the library: -- With high-level parser wrappers. -- With the low-level parsers. +- With high-level wrappers. +- With the low-level generators. -Example using the high-level parser wrappers: - -```rust -use bencode2json::{try_bencode_to_json}; - -let result = try_bencode_to_json(b"d4:spam4:eggse").unwrap(); - -assert_eq!(result, r#"{"spam":"eggsstring>"}"#); -``` - -Example using the low-level parser: - -```rust -use bencode2json::parsers::{BencodeParser}; - -let mut output = String::new(); - -let mut parser = BencodeParser::new(&b"4:spam"[..]); - -parser - .write_str(&mut output) - .expect("Bencode to JSON conversion failed"); - -println!("{output}"); // It prints the JSON string: "spam" -``` - -More [examples](./examples/). +See [examples](./examples/). ## Test @@ -167,21 +141,19 @@ cargo cov ## Performance In terms of memory usage this implementation consumes at least the size of the -biggest bencoded string. The string parser keeps all the string bytes in memory until -it parses the whole string, in order to convert it to UTF-8, when it's possible. +biggest bencoded integer or string. The string and integer parsers keeps all the bytes in memory until +it parses the whole value. The library also wraps the input and output streams in a [BufReader](https://doc.rust-lang.org/std/io/struct.BufReader.html) and [BufWriter](https://doc.rust-lang.org/std/io/struct.BufWriter.html) because it can be excessively inefficient to work directly with something that implements [Read](https://doc.rust-lang.org/std/io/trait.Read.html) or [Write](https://doc.rust-lang.org/std/io/trait.Write.html). ## TODO -- [ ] More examples of using the library. - [ ] Counter for number of items in a list for debugging and errors. - [ ] Fuzz testing: Generate random valid bencoded values. - [ ] Install tracing crate. Add verbose mode that enables debugging. - [ ] Option to check if the final JSON it's valid at the end of the process. - [ ] Benchmarking for this implementation and the original C implementation. -- [ ] Optimize string parser. We can stop trying to convert the string to UTF-8 when we find a non valid UTF-8 char. ## Alternatives