@@ -9,6 +9,8 @@ This means we can't dereference them directly.
99
1010#![ allow( clippy:: unnecessary_cast) ]
1111
12+ use anyhow:: { Error , Result } ;
13+
1214// these bindings are automatically generated by rust bindgen
1315// using the generate_bindings.py script
1416use crate :: python_bindings:: {
@@ -65,7 +67,7 @@ pub trait CodeObject {
6567 fn argcount ( & self ) -> i32 ;
6668 fn varnames ( & self ) -> * mut Self :: TupleObject ;
6769
68- fn get_line_number ( & self , lasti : i32 , table : & [ u8 ] ) -> i32 ;
70+ fn get_line_number ( & self , lasti : i32 , table : & [ u8 ] ) -> Result < i32 , Error > ;
6971}
7072
7173pub trait BytesObject {
@@ -216,7 +218,7 @@ macro_rules! PythonCodeObjectImpl {
216218 self . co_varnames as * mut Self :: TupleObject
217219 }
218220
219- fn get_line_number( & self , lasti: i32 , table: & [ u8 ] ) -> i32 {
221+ fn get_line_number( & self , lasti: i32 , table: & [ u8 ] ) -> Result < i32 , Error > {
220222 let lasti = lasti as i32 ;
221223
222224 // unpack the line table. format is specified here:
@@ -240,34 +242,89 @@ macro_rules! PythonCodeObjectImpl {
240242 line_number += increment;
241243 i += 2 ;
242244 }
243- line_number
245+ Ok ( line_number)
244246 }
245247 }
246248 } ;
247249}
248250
249- fn read_varint ( index : & mut usize , table : & [ u8 ] ) -> usize {
250- let mut ret: usize ;
251- let mut byte = table[ * index] ;
252- let mut shift = 0 ;
253- * index += 1 ;
254- ret = ( byte & 63 ) as usize ;
251+ struct ByteStream < ' a > {
252+ cursor : usize ,
253+ buffer : & ' a [ u8 ] ,
254+ }
255255
256- while byte & 64 != 0 {
257- byte = table[ * index] ;
258- * index += 1 ;
259- shift += 6 ;
260- ret += ( ( byte & 63 ) as usize ) << shift;
256+ impl < ' a > ByteStream < ' a > {
257+ fn new ( buffer : & ' a [ u8 ] ) -> Self {
258+ Self { cursor : 0 , buffer }
259+ }
260+
261+ fn try_read ( & mut self ) -> Option < u8 > {
262+ if self . cursor >= self . buffer . len ( ) {
263+ None
264+ } else {
265+ let byte = self . buffer [ self . cursor ] ;
266+ self . cursor += 1 ;
267+ Some ( byte)
268+ }
269+ }
270+
271+ fn read ( & mut self ) -> Result < u8 , Error > {
272+ match self . try_read ( ) {
273+ Some ( byte) => Ok ( byte) ,
274+ None => Err ( Error :: msg ( format ! (
275+ "Tried to read after end of buffer {:?}" ,
276+ self . buffer
277+ ) ) ) ,
278+ }
279+ }
280+
281+ fn try_read_header ( & mut self ) -> Result < Option < u8 > , Error > {
282+ match self . try_read ( ) {
283+ Some ( byte) => {
284+ if byte & 0b10000000 == 0 {
285+ Err ( Error :: msg ( format ! (
286+ "Expected header bit at cursor {} in {:?}" ,
287+ self . cursor, self . buffer
288+ ) ) )
289+ } else {
290+ Ok ( Some ( byte) )
291+ }
292+ }
293+ None => Ok ( None ) ,
294+ }
295+ }
296+
297+ fn read_body ( & mut self ) -> Result < u8 , Error > {
298+ let byte = self . read ( ) ?;
299+ if byte & 0b10000000 == 1 {
300+ Err ( Error :: msg ( format ! (
301+ "Expected non-header bit at cursor {} in {:?}" ,
302+ self . cursor, self . buffer
303+ ) ) )
304+ } else {
305+ Ok ( byte)
306+ }
307+ }
308+
309+ fn read_varint ( & mut self ) -> Result < usize , Error > {
310+ let mut byte = self . read_body ( ) ?;
311+ let mut value = ( byte & 63 ) as usize ;
312+ let mut shift: usize = 0 ;
313+ while byte & 64 != 0 {
314+ byte = self . read_body ( ) ?;
315+ shift += 6 ;
316+ value |= ( ( byte & 63 ) as usize ) << shift;
317+ }
318+ Ok ( value)
261319 }
262- ret
263- }
264320
265- fn read_signed_varint ( index : & mut usize , table : & [ u8 ] ) -> isize {
266- let unsigned_val = read_varint ( index, table) ;
267- if unsigned_val & 1 != 0 {
268- -( ( unsigned_val >> 1 ) as isize )
269- } else {
270- ( unsigned_val >> 1 ) as isize
321+ fn read_signed_varint ( & mut self ) -> Result < isize , Error > {
322+ let value = self . read_varint ( ) ?;
323+ Ok ( if value & 1 != 0 {
324+ -( ( value >> 1 ) as isize )
325+ } else {
326+ ( value >> 1 ) as isize
327+ } )
271328 }
272329}
273330
@@ -301,49 +358,46 @@ macro_rules! CompactCodeObjectImpl {
301358 self . co_localsplusnames as * mut Self :: TupleObject
302359 }
303360
304- fn get_line_number( & self , lasti: i32 , table: & [ u8 ] ) -> i32 {
361+ fn get_line_number( & self , lasti: i32 , table: & [ u8 ] ) -> Result < i32 , Error > {
305362 // unpack compressed table format from python 3.11
306363 // https://github.com/python/cpython/pull/91666/files
307364 let lasti = lasti - offset_of( self , & self . co_code_adaptive) as i32 ;
308365 let mut line_number: i32 = self . first_lineno( ) ;
309366 let mut bytecode_address: i32 = 0 ;
367+ let mut stream = ByteStream :: new( table) ;
310368
311- let mut index: usize = 0 ;
312- loop {
313- if index >= table. len( ) {
314- break ;
315- }
316- let byte = table[ index] ;
317- index += 1 ;
318-
369+ while let Some ( byte) = stream. try_read_header( ) ? {
370+ let code = ( byte >> 3 ) & 15 ;
319371 let delta = ( ( byte & 7 ) as i32 ) + 1 ;
320372 bytecode_address += delta * 2 ;
321- let code = ( byte >> 3 ) & 15 ;
373+
322374 let line_delta = match code {
323375 15 => 0 ,
324376 14 => {
325- let delta = read_signed_varint( & mut index , table ) ;
326- read_varint( & mut index , table ) ; // end line
327- read_varint( & mut index , table ) ; // start column
328- read_varint( & mut index , table ) ; // end column
377+ let delta = stream . read_signed_varint( ) ? ;
378+ stream . read_varint( ) ? ; // end line
379+ stream . read_varint( ) ? ; // start column
380+ stream . read_varint( ) ? ; // end column
329381 delta
330382 }
331- 13 => read_signed_varint( & mut index , table ) ,
383+ 13 => stream . read_signed_varint( ) ? ,
332384 10 ..=12 => {
333- index += 2 ; // start column / end column
385+ stream. read_body( ) ?; // start column
386+ stream. read_body( ) ?; // end column
334387 ( code - 10 ) . into( )
335388 }
336389 _ => {
337- index += 1 ; // column
390+ stream . read_body ( ) ? ; // column
338391 0
339392 }
340393 } ;
394+
341395 line_number += line_delta as i32 ;
342396 if bytecode_address >= lasti {
343397 break ;
344398 }
345399 }
346- line_number
400+ Ok ( line_number)
347401 }
348402 }
349403 } ;
@@ -701,7 +755,7 @@ impl CodeObject for v3_10_0::PyCodeObject {
701755 fn varnames ( & self ) -> * mut Self :: TupleObject {
702756 self . co_varnames as * mut Self :: TupleObject
703757 }
704- fn get_line_number ( & self , lasti : i32 , table : & [ u8 ] ) -> i32 {
758+ fn get_line_number ( & self , lasti : i32 , table : & [ u8 ] ) -> Result < i32 , Error > {
705759 // in Python 3.10 we need to double the lasti instruction value here (and no I don't know why)
706760 // https://github.com/python/cpython/blob/7b88f63e1dd4006b1a08b9c9f087dd13449ecc76/Python/ceval.c#L5999
707761 // Whereas in python versions up to 3.9 we didn't.
@@ -730,7 +784,7 @@ impl CodeObject for v3_10_0::PyCodeObject {
730784 }
731785 }
732786
733- line_number
787+ Ok ( line_number)
734788 }
735789}
736790
@@ -827,6 +881,21 @@ mod tests {
827881 128_u8 , 0 , 221 , 4 , 8 , 132 , 74 , 136 , 118 , 209 , 4 , 22 , 212 , 4 , 22 , 208 , 4 , 22 , 208 , 4 ,
828882 22 , 208 , 4 , 22 ,
829883 ] ;
830- assert_eq ! ( code. get_line_number( 214 , & table) , 5 ) ;
884+ assert_eq ! ( code. get_line_number( 214 , & table) . unwrap( ) , 5 ) ;
885+ }
886+
887+ #[ test]
888+ fn test_py3_12_line_numbers ( ) {
889+ use crate :: python_bindings:: v3_12_0:: PyCodeObject ;
890+ let code = PyCodeObject {
891+ co_firstlineno : 4 ,
892+ ..Default :: default ( )
893+ } ;
894+
895+ let table = [
896+ 128_u8 , 0 , 221 , 4 , 8 , 132 , 74 , 136 , 118 , 209 , 4 , 22 , 212 , 4 , 22 , 208 , 4 , 22 , 208 , 4 ,
897+ 22 , 208 , 4 , 22 ,
898+ ] ;
899+ assert_eq ! ( code. get_line_number( 214 , & table) . unwrap( ) , 5 ) ;
831900 }
832901}
0 commit comments