@@ -8,6 +8,7 @@ pub mod integer;
88pub mod stack;
99pub mod string;
1010
11+ use core:: str;
1112use std:: {
1213 fmt:: Write as FmtWrite ,
1314 io:: { self , Read , Write as IoWrite } ,
@@ -36,6 +37,16 @@ pub enum BencodeType {
3637 Dict ,
3738}
3839
40+ #[ derive( Debug , PartialEq ) ]
41+ pub enum BencodeToken {
42+ Integer ( Vec < u8 > ) ,
43+ String ( Vec < u8 > ) ,
44+ BeginList ,
45+ BeginDict ,
46+ EndListOrDict ,
47+ LineBreak ,
48+ }
49+
3950pub struct BencodeParser < R : Read > {
4051 byte_reader : ByteReader < R > ,
4152 num_processed_tokens : u64 ,
@@ -104,35 +115,40 @@ impl<R: Read> BencodeParser<R> {
104115 /// - It can't read from the input or write to the output.
105116 /// - The input is invalid Bencode.
106117 fn parse < W : Writer > ( & mut self , writer : & mut W ) -> Result < ( ) , error:: Error > {
107- while let Some ( peeked_byte) = Self :: peek_byte ( & mut self . byte_reader , writer) ? {
108- match peeked_byte {
118+ let capture_output = Vec :: new ( ) ;
119+ let mut null_writer = ByteWriter :: new ( capture_output) ;
120+
121+ while let Some ( peeked_byte) = Self :: peek_byte ( & mut self . byte_reader , & null_writer) ? {
122+ let token: BencodeToken = match peeked_byte {
109123 BENCODE_BEGIN_INTEGER => {
110- self . begin_bencoded_value ( BencodeType :: Integer , writer ) ?;
111- integer :: parse ( & mut self . byte_reader , writer ) ? ;
124+ let value = integer :: parse ( & mut self . byte_reader , & mut null_writer ) ?;
125+ BencodeToken :: Integer ( value )
112126 }
113127 b'0' ..=b'9' => {
114- self . begin_bencoded_value ( BencodeType :: String , writer ) ?;
115- string :: parse ( & mut self . byte_reader , writer ) ? ;
128+ let value = string :: parse ( & mut self . byte_reader , & mut null_writer ) ?;
129+ BencodeToken :: String ( value )
116130 }
117131 BENCODE_BEGIN_LIST => {
118- let _byte = Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , writer) ?;
119- self . begin_bencoded_value ( BencodeType :: List , writer) ?;
120- writer. write_byte ( Self :: JSON_ARRAY_BEGIN ) ?;
121- self . stack . push ( State :: ExpectingFirstListItemOrEnd ) ;
132+ let _byte =
133+ Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , & null_writer) ?;
134+ BencodeToken :: BeginList
122135 }
123136 BENCODE_BEGIN_DICT => {
124- let _byte = Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , writer) ?;
125- self . begin_bencoded_value ( BencodeType :: Dict , writer) ?;
126- writer. write_byte ( Self :: JSON_OBJ_BEGIN ) ?;
127- self . stack . push ( State :: ExpectingFirstDictFieldOrEnd ) ;
137+ let _byte =
138+ Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , & null_writer) ?;
139+ BencodeToken :: BeginDict
128140 }
129141 BENCODE_END_LIST_OR_DICT => {
130- let _byte = Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , writer) ?;
131- self . end_list_or_dict ( writer) ?;
142+ let _byte =
143+ Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , & null_writer) ?;
144+ BencodeToken :: EndListOrDict
132145 }
133146 b'\n' => {
147+ // todo: we should not return any token and continue to the next token.
134148 // Ignore line breaks at the beginning, the end, or between values
135- let _byte = Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , writer) ?;
149+ let _byte =
150+ Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , & null_writer) ?;
151+ BencodeToken :: LineBreak
136152 }
137153 _ => {
138154 return Err ( error:: Error :: UnrecognizedFirstBencodeValueByte (
@@ -148,6 +164,60 @@ impl<R: Read> BencodeParser<R> {
148164 } ,
149165 ) ) ;
150166 }
167+ } ;
168+
169+ /* TODO:
170+
171+ - Extract tokenizer (without implementing the Iterator trait).
172+ - Remove writer from tokenizer.
173+ - Implement trait Iterator for tokenizer.
174+ - Rename this parser to generator.
175+
176+ */
177+
178+ match token {
179+ BencodeToken :: Integer ( integer_bytes) => {
180+ self . begin_bencoded_value ( BencodeType :: Integer , writer) ?;
181+ // todo: add `write_bytes` to writer.
182+ for bytes in integer_bytes {
183+ writer. write_byte ( bytes) ?;
184+ }
185+ }
186+ BencodeToken :: String ( string_bytes) => {
187+ self . begin_bencoded_value ( BencodeType :: String , writer) ?;
188+
189+ let html_tag_style_string = match str:: from_utf8 ( & string_bytes) {
190+ Ok ( string) => {
191+ // String only contains valid UTF-8 chars -> print it as it's
192+ & format ! ( "<string>{}</string>" , string. to_owned( ) )
193+ }
194+ Err ( _) => {
195+ // String contains non valid UTF-8 chars -> print it as hex bytes
196+ & format ! ( "<hex>{}</hex>" , hex:: encode( string_bytes) )
197+ }
198+ } ;
199+
200+ writer. write_str (
201+ & serde_json:: to_string ( & html_tag_style_string)
202+ . expect ( "Failed to serialize to JSON. This should not happen because non UTF-8 bencoded string are serialized as hex bytes" ) ,
203+ ) ?;
204+ }
205+ BencodeToken :: BeginList => {
206+ self . begin_bencoded_value ( BencodeType :: List , writer) ?;
207+ writer. write_byte ( Self :: JSON_ARRAY_BEGIN ) ?;
208+ self . stack . push ( State :: ExpectingFirstListItemOrEnd ) ;
209+ }
210+ BencodeToken :: BeginDict => {
211+ self . begin_bencoded_value ( BencodeType :: Dict , writer) ?;
212+ writer. write_byte ( Self :: JSON_OBJ_BEGIN ) ?;
213+ self . stack . push ( State :: ExpectingFirstDictFieldOrEnd ) ;
214+ }
215+ BencodeToken :: EndListOrDict => {
216+ self . end_list_or_dict ( writer) ?;
217+ }
218+ BencodeToken :: LineBreak => {
219+ // Ignore line breaks at the beginning, the end, or between values
220+ }
151221 }
152222
153223 self . num_processed_tokens += 1 ;
0 commit comments