Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ jobs:
timeout-minutes: 60
env:
TOX_SKIP_MISSING_INTERPRETERS: False
VIRTUALENV_SYSTEM_SITE_PACKAGES: ${{ matrix.test_mypyc && 1 || 0 }}
# Rich (pip) -- Disable color for windows + pytest
FORCE_COLOR: ${{ !(startsWith(matrix.os, 'windows-') && startsWith(matrix.toxenv, 'py')) && 1 || 0 }}
# Tox
Expand Down Expand Up @@ -209,8 +210,10 @@ jobs:

- name: Compiled with mypyc
if: ${{ matrix.test_mypyc }}
# Use local version of librt during self-compilation in tests.
run: |
pip install -r test-requirements.txt
pip install -U mypyc/lib-rt
CC=clang MYPYC_OPT_LEVEL=0 MYPY_USE_MYPYC=1 pip install -e .

- name: Setup tox environment
Expand Down
2 changes: 2 additions & 0 deletions mypy/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
# Always use this type alias to refer to type tags.
Tag = u8

# Note: all tags should be kept in sync with lib-rt/internal/librt_internal.c.
# Primitives.
LITERAL_FALSE: Final[Tag] = 0
LITERAL_TRUE: Final[Tag] = 1
Expand All @@ -264,6 +265,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
# Four integers representing source file (line, column) range.
LOCATION: Final[Tag] = 152

RESERVED: Final[Tag] = 254
END_TAG: Final[Tag] = 255


Expand Down
20 changes: 15 additions & 5 deletions mypy/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4930,7 +4930,20 @@ def read(cls, data: ReadBuffer) -> SymbolTableNode:
sym.plugin_generated = read_bool(data)
cross_ref = read_str_opt(data)
if cross_ref is None:
sym.node = read_symbol(data)
tag = read_tag(data)
if tag == TYPE_INFO:
sym.node = TypeInfo.read(data)
else:
# This logic is temporary, to make sure we don't introduce
# regressions until we have proper lazy deserialization.
# It has negligible performance impact.
try:
from librt.internal import extract_symbol
except ImportError:
sym.node = read_symbol(data, tag)
else:
node_bytes = extract_symbol(data)
sym.node = read_symbol(ReadBuffer(node_bytes), tag)
else:
sym.cross_ref = cross_ref
assert read_tag(data) == END_TAG
Expand Down Expand Up @@ -5333,17 +5346,14 @@ def local_definitions(
TSTRING_EXPR: Final[Tag] = 229


def read_symbol(data: ReadBuffer) -> SymbolNode:
tag = read_tag(data)
def read_symbol(data: ReadBuffer, tag: Tag) -> SymbolNode:
# The branches here are ordered manually by type "popularity".
if tag == VAR:
return Var.read(data)
if tag == FUNC_DEF:
return FuncDef.read(data)
if tag == DECORATOR:
return Decorator.read(data)
if tag == TYPE_INFO:
return TypeInfo.read(data)
if tag == OVERLOADED_FUNC_DEF:
return OverloadedFuncDef.read(data)
if tag == TYPE_VAR_EXPR:
Expand Down
1 change: 1 addition & 0 deletions mypy/typeshed/stubs/librt/librt/internal.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ def read_int(data: ReadBuffer, /) -> int: ...
def write_tag(data: WriteBuffer, value: u8, /) -> None: ...
def read_tag(data: ReadBuffer, /) -> u8: ...
def cache_version() -> u8: ...
def extract_symbol(data: ReadBuffer, /) -> bytes: ...
269 changes: 269 additions & 0 deletions mypyc/lib-rt/internal/librt_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,273 @@ write_tag(PyObject *self, PyObject *const *args, size_t nargs) {
return Py_None;
}

// All tags must be kept in sync with cache.py, nodes.py, and types.py.
// Primitive types.
#define LITERAL_FALSE 0
#define LITERAL_TRUE 1
#define LITERAL_NONE 2
#define LITERAL_INT 3
#define LITERAL_STR 4
#define LITERAL_BYTES 5
#define LITERAL_FLOAT 6
#define LITERAL_COMPLEX 7

// Supported builtin collections.
#define LIST_GEN 20
#define LIST_INT 21
#define LIST_STR 22
#define LIST_BYTES 23
#define TUPLE_GEN 24
#define DICT_STR_GEN 30

// This is the smallest custom class tag.
#define MYPY_FILE 50

// Instance class has special formats.
#define INSTANCE 80
#define INSTANCE_SIMPLE 81
#define INSTANCE_GENERIC 82
#define INSTANCE_STR 83
#define INSTANCE_FUNCTION 84
#define INSTANCE_INT 85
#define INSTANCE_BOOL 86
#define INSTANCE_OBJECT 87

#define RESERVED 254
#define END_TAG 255

// Forward declaration.
static char _skip_object(PyObject *data, uint8_t tag);

static inline char
_skip(PyObject *data, Py_ssize_t size) {
// We are careful about error conditions, so all
// _skip_xxx() functions can return an error value.
_CHECK_READ(data, size, CPY_NONE_ERROR)
((ReadBufferObject *)data)->ptr += size;
return CPY_NONE;
}

static inline char
_skip_short_int(PyObject *data, uint8_t first) {
if ((first & TWO_BYTES_INT_BIT) == 0)
return CPY_NONE;
if ((first & FOUR_BYTES_INT_BIT) == 0)
return _skip(data, 1);
return _skip(data, 3);
}

static inline char
_skip_int(PyObject *data) {
_CHECK_READ(data, 1, CPY_NONE_ERROR)

uint8_t first;
_READ(&first, data, uint8_t);
if (likely(first != LONG_INT_TRAILER)) {
return _skip_short_int(data, first);
}

_CHECK_READ(data, 1, CPY_NONE_ERROR)
_READ(&first, data, uint8_t);
Py_ssize_t size_and_sign = _read_short_int(data, first);
if (size_and_sign == CPY_INT_TAG)
return CPY_NONE_ERROR;
if ((Py_ssize_t)size_and_sign < 0) {
PyErr_SetString(PyExc_ValueError, "invalid int data");
return CPY_NONE_ERROR;
}
Py_ssize_t size = size_and_sign >> 2;
return _skip(data, size);
}

// This is essentially a wrapper around _read_short_int() that makes
// sure the result is valid.
static inline Py_ssize_t
_read_size(PyObject *data) {
_CHECK_READ(data, 1, -1)
uint8_t first;
_READ(&first, data, uint8_t);
// We actually allow serializing lists/dicts with over 4 billion items,
// but we don't really need to, fail with ValueError just in case.
if (unlikely(first == LONG_INT_TRAILER)) {
PyErr_SetString(PyExc_ValueError, "unsupported size");
return -1;
}
CPyTagged tagged_size = _read_short_int(data, first);
if (tagged_size == CPY_INT_TAG)
return -1;
if ((Py_ssize_t)tagged_size < 0) {
PyErr_SetString(PyExc_ValueError, "invalid size");
return -1;
}
Py_ssize_t size = tagged_size >> 1;
return size;
}

static inline char
_skip_str_bytes(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
return _skip(data, size);
}

// List/dict logic should be kept in sync with mypy/cache.py
static inline char
_skip_list_gen(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
int i;
for (i = 0; i < size; i++) {
uint8_t tag = read_tag_internal(data);
if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

static inline char
_skip_list_int(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
int i;
for (i = 0; i < size; i++) {
if (unlikely(_skip_int(data) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

static inline char
_skip_list_str_bytes(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
int i;
for (i = 0; i < size; i++) {
if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

static inline char
_skip_dict_str_gen(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
int i;
for (i = 0; i < size; i++) {
// Bare key followed by tagged value.
if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
uint8_t tag = read_tag_internal(data);
if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

// Similar to mypy/cache.py, the convention is that the caller reads
// the opening tag for custom classes.
static inline char
_skip_class(PyObject *data) {
while (1) {
uint8_t tag = read_tag_internal(data);
if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (tag == END_TAG) {
return CPY_NONE;
}
if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) {
return CPY_NONE_ERROR;
}
}
}

// Instance has special compact layout (as an important optimization).
static inline char
_skip_instance(PyObject *data) {
uint8_t second_tag = read_tag_internal(data);
if (unlikely(second_tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (second_tag >= INSTANCE_STR && second_tag <= INSTANCE_OBJECT) {
return CPY_NONE;
}
if (second_tag == INSTANCE_SIMPLE) {
return _skip_str_bytes(data);
}
if (second_tag == INSTANCE_GENERIC) {
return _skip_class(data);
}
PyErr_Format(PyExc_ValueError, "Unexpected instance tag: %d", second_tag);
return CPY_NONE_ERROR;
}

// This is the main dispatch point. Branches are ordered manually
// based roughly on frequency in self-check.
static char
_skip_object(PyObject *data, uint8_t tag) {
if (tag == LITERAL_STR || tag == LITERAL_BYTES)
return _skip_str_bytes(data);
if (tag == LITERAL_NONE || tag == LITERAL_FALSE || tag == LITERAL_TRUE)
return CPY_NONE;
if (tag == LIST_GEN || tag == TUPLE_GEN)
return _skip_list_gen(data);
if (tag == LITERAL_INT)
return _skip_int(data);
if (tag == INSTANCE)
return _skip_instance(data);
if (tag > MYPY_FILE && tag < RESERVED)
return _skip_class(data);
if (tag == LIST_INT)
return _skip_list_int(data);
if (tag == LIST_STR || tag == LIST_BYTES)
return _skip_list_str_bytes(data);
if (tag == DICT_STR_GEN)
return _skip_dict_str_gen(data);
if (tag == LITERAL_FLOAT)
return _skip(data, 8);
if (tag == LITERAL_COMPLEX)
return _skip(data, 16);
PyErr_Format(PyExc_ValueError, "Unsupported tag: %d", tag);
return CPY_NONE_ERROR;
}

static PyObject*
extract_symbol_internal(PyObject *data) {
char *ptr = ((ReadBufferObject *)data)->ptr;
if (unlikely(_skip_class(data) == CPY_NONE_ERROR))
return NULL;
Py_ssize_t size = ((ReadBufferObject *)data)->ptr - ptr;
PyObject *res = PyBytes_FromStringAndSize(ptr, size);
if (unlikely(res == NULL))
return NULL;
return res;
}

static PyObject*
extract_symbol(PyObject *self, PyObject *const *args, size_t nargs) {
if (unlikely(nargs != 1)) {
PyErr_Format(PyExc_TypeError,
"extract_symbol() takes exactly 1 argument (%zu given)", nargs);
return NULL;
}
PyObject *data = args[0];
_CHECK_READ_BUFFER(data, NULL)
return extract_symbol_internal(data);
}

static uint8_t
cache_version_internal(void) {
return 0;
Expand Down Expand Up @@ -954,6 +1221,7 @@ static PyMethodDef librt_internal_module_methods[] = {
{"write_tag", (PyCFunction)write_tag, METH_FASTCALL, PyDoc_STR("write a short int")},
{"read_tag", (PyCFunction)read_tag, METH_FASTCALL, PyDoc_STR("read a short int")},
{"cache_version", (PyCFunction)cache_version, METH_NOARGS, PyDoc_STR("cache format version")},
{"extract_symbol", (PyCFunction)extract_symbol, METH_FASTCALL, PyDoc_STR("extract bytes for a mypy symbol")},
{NULL, NULL, 0, NULL}
};

Expand Down Expand Up @@ -1005,6 +1273,7 @@ librt_internal_module_exec(PyObject *m)
(void *)ReadBuffer_type_internal,
(void *)WriteBuffer_type_internal,
(void *)NativeInternal_API_Version,
(void *)extract_symbol_internal
};
PyObject *c_api_object = PyCapsule_New((void *)NativeInternal_API, "librt.internal._C_API", NULL);
if (PyModule_Add(m, "_C_API", c_api_object) < 0) {
Expand Down
Loading
Loading