@@ -8,6 +8,7 @@ pub mod integer;
88pub mod stack;
99pub mod string;
1010
11+ use core:: str;
1112use std:: {
1213 fmt:: Write as FmtWrite ,
1314 io:: { self , Read , Write as IoWrite } ,
@@ -36,6 +37,16 @@ pub enum BencodeType {
3637 Dict ,
3738}
3839
40+ #[ derive( Debug , PartialEq ) ]
41+ pub enum BencodeToken {
42+ Integer ( Vec < u8 > ) ,
43+ String ( Vec < u8 > ) ,
44+ BeginList ,
45+ BeginDict ,
46+ EndListOrDict ,
47+ LineBreak ,
48+ }
49+
3950pub struct BencodeParser < R : Read > {
4051 byte_reader : ByteReader < R > ,
4152 num_processed_tokens : u64 ,
@@ -104,35 +115,39 @@ impl<R: Read> BencodeParser<R> {
104115 /// - It can't read from the input or write to the output.
105116 /// - The input is invalid Bencode.
106117 fn parse < W : Writer > ( & mut self , writer : & mut W ) -> Result < ( ) , error:: Error > {
107- while let Some ( peeked_byte) = Self :: peek_byte ( & mut self . byte_reader , writer) ? {
108- match peeked_byte {
118+ let capture_output = Vec :: new ( ) ;
119+ let mut null_writer = ByteWriter :: new ( capture_output) ;
120+
121+ while let Some ( peeked_byte) = Self :: peek_byte ( & mut self . byte_reader , & null_writer) ? {
122+ let token: BencodeToken = match peeked_byte {
109123 BENCODE_BEGIN_INTEGER => {
110- self . begin_bencoded_value ( BencodeType :: Integer , writer ) ?;
111- integer :: parse ( & mut self . byte_reader , writer ) ? ;
124+ let value = integer :: parse ( & mut self . byte_reader , & mut null_writer ) ?;
125+ BencodeToken :: Integer ( value )
112126 }
113127 b'0' ..=b'9' => {
114- self . begin_bencoded_value ( BencodeType :: String , writer ) ?;
115- string :: parse ( & mut self . byte_reader , writer ) ? ;
128+ let value = string :: parse ( & mut self . byte_reader , & mut null_writer ) ?;
129+ BencodeToken :: String ( value )
116130 }
117131 BENCODE_BEGIN_LIST => {
118- let _byte = Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , writer) ?;
119- self . begin_bencoded_value ( BencodeType :: List , writer) ?;
120- writer. write_byte ( Self :: JSON_ARRAY_BEGIN ) ?;
121- self . stack . push ( State :: ExpectingFirstListItemOrEnd ) ;
132+ let _byte =
133+ Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , & null_writer) ?;
134+ BencodeToken :: BeginList
122135 }
123136 BENCODE_BEGIN_DICT => {
124- let _byte = Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , writer) ?;
125- self . begin_bencoded_value ( BencodeType :: Dict , writer) ?;
126- writer. write_byte ( Self :: JSON_OBJ_BEGIN ) ?;
127- self . stack . push ( State :: ExpectingFirstDictFieldOrEnd ) ;
137+ let _byte =
138+ Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , & null_writer) ?;
139+ BencodeToken :: BeginDict
128140 }
129141 BENCODE_END_LIST_OR_DICT => {
130- let _byte = Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , writer) ?;
131- self . end_list_or_dict ( writer) ?;
142+ let _byte =
143+ Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , & null_writer) ?;
144+ BencodeToken :: EndListOrDict
132145 }
133146 b'\n' => {
134147 // Ignore line breaks at the beginning, the end, or between values
135- let _byte = Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , writer) ?;
148+ let _byte =
149+ Self :: read_peeked_byte ( peeked_byte, & mut self . byte_reader , & null_writer) ?;
150+ BencodeToken :: LineBreak
136151 }
137152 _ => {
138153 return Err ( error:: Error :: UnrecognizedFirstBencodeValueByte (
@@ -148,6 +163,60 @@ impl<R: Read> BencodeParser<R> {
148163 } ,
149164 ) ) ;
150165 }
166+ } ;
167+
168+ /* TODO:
169+
170+ - Extract tokenizer (without implementing the Iterator trait).
171+ - Remove writer from tokenizer.
172+ - Implement trait Iterator for tokenizer.
173+ - Rename this parser to generator.
174+
175+ */
176+
177+ match token {
178+ BencodeToken :: Integer ( integer_bytes) => {
179+ self . begin_bencoded_value ( BencodeType :: Integer , writer) ?;
180+ // todo: add `write_bytes` to writer.
181+ for bytes in integer_bytes {
182+ writer. write_byte ( bytes) ?;
183+ }
184+ }
185+ BencodeToken :: String ( string_bytes) => {
186+ self . begin_bencoded_value ( BencodeType :: String , writer) ?;
187+
188+ let html_tag_style_string = match str:: from_utf8 ( & string_bytes) {
189+ Ok ( string) => {
190+ // String only contains valid UTF-8 chars -> print it as it's
191+ & format ! ( "<string>{}</string>" , string. to_owned( ) )
192+ }
193+ Err ( _) => {
194+ // String contains non valid UTF-8 chars -> print it as hex bytes
195+ & format ! ( "<hex>{}</hex>" , hex:: encode( string_bytes) )
196+ }
197+ } ;
198+
199+ writer. write_str (
200+ & serde_json:: to_string ( & html_tag_style_string)
201+ . expect ( "Failed to serialize to JSON. This should not happen because non UTF-8 bencoded string are serialized as hex bytes" ) ,
202+ ) ?;
203+ }
204+ BencodeToken :: BeginList => {
205+ self . begin_bencoded_value ( BencodeType :: List , writer) ?;
206+ writer. write_byte ( Self :: JSON_ARRAY_BEGIN ) ?;
207+ self . stack . push ( State :: ExpectingFirstListItemOrEnd ) ;
208+ }
209+ BencodeToken :: BeginDict => {
210+ self . begin_bencoded_value ( BencodeType :: Dict , writer) ?;
211+ writer. write_byte ( Self :: JSON_OBJ_BEGIN ) ?;
212+ self . stack . push ( State :: ExpectingFirstDictFieldOrEnd ) ;
213+ }
214+ BencodeToken :: EndListOrDict => {
215+ self . end_list_or_dict ( writer) ?;
216+ }
217+ BencodeToken :: LineBreak => {
218+ // Ignore line breaks at the beginning, the end, or between values
219+ }
151220 }
152221
153222 self . num_processed_tokens += 1 ;
0 commit comments