Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions sharrow/aster.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import sys
import tokenize

from .debug import trunc_repr

try:
from ast import unparse
except ImportError:
Expand Down Expand Up @@ -394,6 +396,8 @@ def _replacement(
# return original_node

dim_slots = self.dim_slots
logger.info(f"VLC: _replacement: topname={trunc_repr(topname)}, attr={attr}, dim_slots={trunc_repr(dim_slots)} spacevars={trunc_repr(self.spacevars)}"
f", transpose_lead={trunc_repr(transpose_lead)}, ")
if isinstance(self.spacevars, dict):
dim_slots = self.spacevars[attr]

Expand All @@ -413,6 +417,11 @@ def _replacement(
if isinstance(n, int):
elts.append(ast.Name(id=f"_arg{n:02}", ctx=ast.Load()))
elif isinstance(n, dict):
try:
n_val = n[missing_dim_value]
except KeyError:
tmp = repr(n)
print(f"missing dim failing, bubbling up\n {missing_dim_value}, '{tmp[:50]}...{tmp[-50:]}'")
if sys.version_info >= (3, 8):
elts.append(
ast.Constant(n=n[missing_dim_value], ctx=ast.Load())
Expand Down
6 changes: 6 additions & 0 deletions sharrow/debug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def trunc_repr(thing, n=160):
f = n//2
a = repr(thing)
if len(a) < n+20:
return a
return a[:f] + "..." + a[-f:]
39 changes: 33 additions & 6 deletions sharrow/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .filewrite import blacken, rewrite
from .relationships import DataTree
from .table import Table
from .debug import trunc_repr

logger = logging.getLogger("sharrow")

Expand Down Expand Up @@ -553,15 +554,17 @@ def __new__(
cache_dir=None,
name=None,
dtype="float32",
boundscheck=False,
boundscheck=True,
nopython=True,
# nopython=False,
fastmath=True,
parallel=True,
readme=None,
flow_library=None,
extra_hash_data=(),
write_hash_audit=True,
hashing_level=1,
# hashing_level=1,
hashing_level=100,
dim_order=None,
dim_exclude=None,
):
Expand Down Expand Up @@ -803,8 +806,14 @@ def init_sub_funcs(
for n, (k, expr) in enumerate(defs.items()):
expr = str(expr).lstrip()
init_expr = expr
print("expr:", expr)
for spacename, spacearrays in self.tree.subspaces.items():
print("\tspace", spacename)
dim_slots, digital_encodings = meta_data[spacename]
# dim_slots: Dict[data_cols_in_xr or digitisedOffset of a relation, astNode]
# if skim, somtimes is a Dict[str, Dict[label ->index]]
# display(dim_slots)
# display(digital_encodings)
try:
expr = expression_for_numba(
expr,
Expand All @@ -814,6 +823,17 @@ def init_sub_funcs(
digital_encodings=digital_encodings,
)
except KeyError as key_err:
# print(key_err, type(key_err), type(repr(key_err)))
# print(vars(key_err))
if str(key_err) == "None":
# apparently keyerrors can't have newlines
raise ValueError("VLCKeyError: expression_for_numba raised a null keyerror\n"
f"args were \n\t'{expr=}'\n\t'{spacename=}','{trunc_repr(dim_slots)=}...', '{digital_encodings=}'"
)
print(key_err)
# if key_err.args is None:
# print("Custom:args none", key_err)
# raise
if ".." in key_err.args[0]:
topkey, attrkey = key_err.args[0].split("..")
else:
Expand Down Expand Up @@ -1279,9 +1299,9 @@ def iload_raw(
):
assert isinstance(rg, DataTree)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", category=nb.NumbaExperimentalFeatureWarning
)
# warnings.filterwarnings(
# "ignore", category=nb.NumbaExperimentalFeatureWarning
# )
try:
if runner is None:
if mnl is not None:
Expand Down Expand Up @@ -1320,10 +1340,16 @@ def iload_raw(
tree_root_dims[i]
for i in presorted(tree_root_dims, self.dim_order, self.dim_exclude)
]
# for c in argument:
# print(np.dtype(c))
# logger.info(f"VLC: rg={rg}, {argshape=}, {arguments}, {kwargs}")
return runner_(np.asarray(argshape), *arguments, **kwargs)
except nb.TypingError as err:
except nb.core.errors.TypingError as err:
_raw_functions = getattr(self, "_raw_functions", {})
logger.error(f"nb.TypingError in {len(_raw_functions)} functions")
for c in arguments:
if c.dtype.kind in {'U', 'S'}:
logging.error(f"VLC: string array detected! {c}:shape={c.shape}")
for k, v in _raw_functions.items():
logger.error(f"{k} = {v[0]} = {v[1]}")
if "NameError:" in err.args[0]:
Expand Down Expand Up @@ -1408,6 +1434,7 @@ def _load(
if not source.relationships_are_digitized:
source = source.digitize_relationships()
if source.relationships_are_digitized:
# logger.info(f"VLC: {source=}, {runner=}, {dtype=}, {dot=}")
if mnl_draws is None:
result = self.iload_raw(source, runner=runner, dtype=dtype, dot=dot)
else:
Expand Down
6 changes: 4 additions & 2 deletions sharrow/relationships.py
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,7 @@ def digitize_relationships(self, inplace=False, redigitize=True):

for e in obj._graph.edges:
r = obj._get_relationship(e)
logger.info(f"processing relationship {r}, {r.parent_name}")
if redigitize and r.analog:
p_dataset = obj._graph.nodes[r.parent_data].get("dataset", None)
if p_dataset is not None:
Expand All @@ -1042,7 +1043,7 @@ def digitize_relationships(self, inplace=False, redigitize=True):
if r.indexing == "label":
p_dataset = obj._graph.nodes[r.parent_data].get("dataset", None)
c_dataset = obj._graph.nodes[r.child_data].get("dataset", None)

upstream = p_dataset[r.parent_name]
downstream = c_dataset[r.child_name]

Expand Down Expand Up @@ -1146,7 +1147,8 @@ def _arg_tokenizer(self, spacename, spacearray, exclude_dims=None):
n_missing_tokens += 1

if n_missing_tokens > 1:
raise ValueError("at most one missing dimension is allowed")
raise ValueError("at most one missing dimension is allowed. VLC: probably a disconnected relationship "
"graph / unused variable")
result = []
for t in tokens:
if isinstance(t, str):
Expand Down