-
Couldn't load subscription status.
- Fork 1k
Open
Labels
Description
Summary
When a model stores submodules/buffers/parameters in user-managed dicts and accesses them via dict lookup in forward() (instead of attribute access or nn.ModuleDict/ParameterDict), running under nn.Graph emits a warning but then segfaults deep in functional ops (e.g., functional::add).
This indicates that nn.Graph’s module/param tracking and state capture are bypassed by dict access, and runtime proceeds to a backend path that later crashes rather than failing early with a clear Python error.
Code to reproduce bug
import oneflow as flow
import oneflow.nn as nn
import numpy as np
flow.manual_seed(0)
np.random.seed(0)
class BaseModule(nn.Module):
def __init__(self):
super().__init__()
self.layers = {}
self._buffers = {}
self.params = {}
def add_layer(self, name: str, layer: nn.Module):
self.add_module(name, layer) # formally register
self.layers[name] = layer # but forward will access via dict (breaks Graph tracking)
def add_buffer(self, name: str, buffer: flow.Tensor):
self.register_buffer(name, buffer) # formally register
self._buffers[name] = buffer # but forward will access via dict
def add_parameter(self, name: str, param: nn.Parameter):
self.register_parameter(name, param)
self.params[name] = param
class MyModel(BaseModule):
def __init__(self):
super().__init__()
self.add_layer("linear1", nn.Linear(10, 10))
self.add_layer("linear2", nn.Linear(10, 10))
self.add_buffer("buf", flow.randn(10))
self.add_parameter("scale", nn.Parameter(flow.randn(1, 10)))
def forward(self, x):
# KEY: access via dict instead of self.linear1 / self.buf / self.scale
x = self.layers["linear1"](x)
x = x + self._buffers["buf"] # crashes later in functional::add path
x = self.layers["linear2"](x)
x = x * self.params["scale"]
return x
def GetInput():
return flow.randn(1, 10, dtype=flow.float32)
def trigger():
model = MyModel()
x = GetInput()
class G(nn.Graph):
def __init__(self, m):
super().__init__()
self.m = m
def build(self, inp):
return self.m(inp)
g = G(model)
out = g(x) # warning + segfault
_ = out.numpy() # usually never reached
if __name__ == "__main__":
trigger()Output
.../oneflow/nn/modules/module.py:204: UserWarning:
Linear(in_features=10, out_features=10, bias=True) is called in a nn.Graph, but not registered into a nn.Graph.
Stack trace (most recent call last):
... functional::add(...)
... OpInterpUtil::Dispatch ...
Segmentation fault (Address not mapped to object [0x61])
Segmentation fault (core dumped)
System Information
- OS: Ubuntu 22.04.4 LTS (x86_64)
- OneFlow version : 1.0.0.dev20250921+cpu
- Python version: 3.10.16