Skip to content

Commit 1975e1a

Browse files
committed
Formalize linetable parsing and propagate errors from invalid linetables
1 parent 1fa3a6d commit 1975e1a

File tree

2 files changed

+113
-44
lines changed

2 files changed

+113
-44
lines changed

src/python_interpreters.rs

Lines changed: 112 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ This means we can't dereference them directly.
99

1010
#![allow(clippy::unnecessary_cast)]
1111

12+
use anyhow::{Error, Result};
13+
1214
// these bindings are automatically generated by rust bindgen
1315
// using the generate_bindings.py script
1416
use crate::python_bindings::{
@@ -65,7 +67,7 @@ pub trait CodeObject {
6567
fn argcount(&self) -> i32;
6668
fn varnames(&self) -> *mut Self::TupleObject;
6769

68-
fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32;
70+
fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result<i32, Error>;
6971
}
7072

7173
pub trait BytesObject {
@@ -216,7 +218,7 @@ macro_rules! PythonCodeObjectImpl {
216218
self.co_varnames as *mut Self::TupleObject
217219
}
218220

219-
fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 {
221+
fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result<i32, Error> {
220222
let lasti = lasti as i32;
221223

222224
// unpack the line table. format is specified here:
@@ -240,34 +242,89 @@ macro_rules! PythonCodeObjectImpl {
240242
line_number += increment;
241243
i += 2;
242244
}
243-
line_number
245+
Ok(line_number)
244246
}
245247
}
246248
};
247249
}
248250

249-
fn read_varint(index: &mut usize, table: &[u8]) -> usize {
250-
let mut ret: usize;
251-
let mut byte = table[*index];
252-
let mut shift = 0;
253-
*index += 1;
254-
ret = (byte & 63) as usize;
251+
struct ByteStream<'a> {
252+
cursor: usize,
253+
buffer: &'a [u8],
254+
}
255255

256-
while byte & 64 != 0 {
257-
byte = table[*index];
258-
*index += 1;
259-
shift += 6;
260-
ret += ((byte & 63) as usize) << shift;
256+
impl<'a> ByteStream<'a> {
257+
fn new(buffer: &'a [u8]) -> Self {
258+
Self { cursor: 0, buffer }
259+
}
260+
261+
fn try_read(&mut self) -> Option<u8> {
262+
if self.cursor >= self.buffer.len() {
263+
None
264+
} else {
265+
let byte = self.buffer[self.cursor];
266+
self.cursor += 1;
267+
Some(byte)
268+
}
269+
}
270+
271+
fn read(&mut self) -> Result<u8, Error> {
272+
match self.try_read() {
273+
Some(byte) => Ok(byte),
274+
None => Err(Error::msg(format!(
275+
"Tried to read after end of buffer {:?}",
276+
self.buffer
277+
))),
278+
}
279+
}
280+
281+
fn try_read_header(&mut self) -> Result<Option<u8>, Error> {
282+
match self.try_read() {
283+
Some(byte) => {
284+
if byte & 0b10000000 == 0 {
285+
Err(Error::msg(format!(
286+
"Expected header bit at cursor {} in {:?}",
287+
self.cursor, self.buffer
288+
)))
289+
} else {
290+
Ok(Some(byte))
291+
}
292+
}
293+
None => Ok(None),
294+
}
295+
}
296+
297+
fn read_body(&mut self) -> Result<u8, Error> {
298+
let byte = self.read()?;
299+
if byte & 0b10000000 == 1 {
300+
Err(Error::msg(format!(
301+
"Expected non-header bit at cursor {} in {:?}",
302+
self.cursor, self.buffer
303+
)))
304+
} else {
305+
Ok(byte)
306+
}
307+
}
308+
309+
fn read_varint(&mut self) -> Result<usize, Error> {
310+
let mut byte = self.read_body()?;
311+
let mut value = (byte & 63) as usize;
312+
let mut shift: usize = 0;
313+
while byte & 64 != 0 {
314+
byte = self.read_body()?;
315+
shift += 6;
316+
value |= ((byte & 63) as usize) << shift;
317+
}
318+
Ok(value)
261319
}
262-
ret
263-
}
264320

265-
fn read_signed_varint(index: &mut usize, table: &[u8]) -> isize {
266-
let unsigned_val = read_varint(index, table);
267-
if unsigned_val & 1 != 0 {
268-
-((unsigned_val >> 1) as isize)
269-
} else {
270-
(unsigned_val >> 1) as isize
321+
fn read_signed_varint(&mut self) -> Result<isize, Error> {
322+
let value = self.read_varint()?;
323+
Ok(if value & 1 != 0 {
324+
-((value >> 1) as isize)
325+
} else {
326+
(value >> 1) as isize
327+
})
271328
}
272329
}
273330

@@ -301,49 +358,46 @@ macro_rules! CompactCodeObjectImpl {
301358
self.co_localsplusnames as *mut Self::TupleObject
302359
}
303360

304-
fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 {
361+
fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result<i32, Error> {
305362
// unpack compressed table format from python 3.11
306363
// https://github.com/python/cpython/pull/91666/files
307364
let lasti = lasti - offset_of(self, &self.co_code_adaptive) as i32;
308365
let mut line_number: i32 = self.first_lineno();
309366
let mut bytecode_address: i32 = 0;
367+
let mut stream = ByteStream::new(table);
310368

311-
let mut index: usize = 0;
312-
loop {
313-
if index >= table.len() {
314-
break;
315-
}
316-
let byte = table[index];
317-
index += 1;
318-
369+
while let Some(byte) = stream.try_read_header()? {
370+
let code = (byte >> 3) & 15;
319371
let delta = ((byte & 7) as i32) + 1;
320372
bytecode_address += delta * 2;
321-
let code = (byte >> 3) & 15;
373+
322374
let line_delta = match code {
323375
15 => 0,
324376
14 => {
325-
let delta = read_signed_varint(&mut index, table);
326-
read_varint(&mut index, table); // end line
327-
read_varint(&mut index, table); // start column
328-
read_varint(&mut index, table); // end column
377+
let delta = stream.read_signed_varint()?;
378+
stream.read_varint()?; // end line
379+
stream.read_varint()?; // start column
380+
stream.read_varint()?; // end column
329381
delta
330382
}
331-
13 => read_signed_varint(&mut index, table),
383+
13 => stream.read_signed_varint()?,
332384
10..=12 => {
333-
index += 2; // start column / end column
385+
stream.read_body()?; // start column
386+
stream.read_body()?; // end column
334387
(code - 10).into()
335388
}
336389
_ => {
337-
index += 1; // column
390+
stream.read_body()?; // column
338391
0
339392
}
340393
};
394+
341395
line_number += line_delta as i32;
342396
if bytecode_address >= lasti {
343397
break;
344398
}
345399
}
346-
line_number
400+
Ok(line_number)
347401
}
348402
}
349403
};
@@ -701,7 +755,7 @@ impl CodeObject for v3_10_0::PyCodeObject {
701755
fn varnames(&self) -> *mut Self::TupleObject {
702756
self.co_varnames as *mut Self::TupleObject
703757
}
704-
fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 {
758+
fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result<i32, Error> {
705759
// in Python 3.10 we need to double the lasti instruction value here (and no I don't know why)
706760
// https://github.com/python/cpython/blob/7b88f63e1dd4006b1a08b9c9f087dd13449ecc76/Python/ceval.c#L5999
707761
// Whereas in python versions up to 3.9 we didn't.
@@ -730,7 +784,7 @@ impl CodeObject for v3_10_0::PyCodeObject {
730784
}
731785
}
732786

733-
line_number
787+
Ok(line_number)
734788
}
735789
}
736790

@@ -827,6 +881,21 @@ mod tests {
827881
128_u8, 0, 221, 4, 8, 132, 74, 136, 118, 209, 4, 22, 212, 4, 22, 208, 4, 22, 208, 4,
828882
22, 208, 4, 22,
829883
];
830-
assert_eq!(code.get_line_number(214, &table), 5);
884+
assert_eq!(code.get_line_number(214, &table).unwrap(), 5);
885+
}
886+
887+
#[test]
888+
fn test_py3_12_line_numbers() {
889+
use crate::python_bindings::v3_12_0::PyCodeObject;
890+
let code = PyCodeObject {
891+
co_firstlineno: 4,
892+
..Default::default()
893+
};
894+
895+
let table = [
896+
128_u8, 0, 221, 4, 8, 132, 74, 136, 118, 209, 4, 22, 212, 4, 22, 208, 4, 22, 208, 4,
897+
22, 208, 4, 22,
898+
];
899+
assert_eq!(code.get_line_number(214, &table).unwrap(), 5);
831900
}
832901
}

src/stack_trace.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ fn get_line_number<C: CodeObject, P: ProcessMemory>(
246246
) -> Result<i32, Error> {
247247
let table =
248248
copy_bytes(code.line_table(), process).context("Failed to copy line number table")?;
249-
Ok(code.get_line_number(lasti, &table))
249+
code.get_line_number(lasti, &table)
250250
}
251251

252252
fn get_locals<C: CodeObject, F: FrameObject, P: ProcessMemory>(

0 commit comments

Comments
 (0)