Skip to content

Commit 8a53365

Browse files
authored
update module importer (#2636)
Update the ModuleImporter class to use `find_spec` instead of `find_module` and `create_module` and `exec_module` instead of `load_module`. In Python 3.12, the fallback that looked for a `find_module()` method if a meta_path entry didn't have a `find_spec()` method was removed, so the escape hatch didn't work correctly in this version when running with `--environment=conda.`
1 parent 65cbcd6 commit 8a53365

File tree

1 file changed

+102
-72
lines changed

1 file changed

+102
-72
lines changed

metaflow/plugins/env_escape/client_modules.py

Lines changed: 102 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import atexit
22
import importlib
3+
import importlib.util
34
import itertools
45
import pickle
56
import re
@@ -41,6 +42,8 @@ def __init__(self, loader, prefix, exports, client):
4142
def __getattr__(self, name):
4243
if name == "__loader__":
4344
return self._loader
45+
if name == "__spec__":
46+
return importlib.util.spec_from_loader(self._prefix, self._loader)
4447
if name in ("__name__", "__package__"):
4548
return self._prefix
4649
if name in ("__file__", "__path__"):
@@ -71,7 +74,8 @@ def func(*args, **kwargs):
7174
# Try to see if this is a submodule that we can load
7275
m = None
7376
try:
74-
m = self._loader.load_module(".".join([self._prefix, name]))
77+
submodule_name = ".".join([self._prefix, name])
78+
m = importlib.import_module(submodule_name)
7579
except ImportError:
7680
pass
7781
if m is None:
@@ -117,7 +121,28 @@ def __setattr__(self, name, value):
117121

118122

119123
class ModuleImporter(object):
120-
# This ModuleImporter implements the Importer Protocol defined in PEP 302
124+
"""
125+
A custom import hook that proxies module imports to a different Python environment.
126+
127+
This class implements the MetaPathFinder and Loader protocols (PEP 451) to enable
128+
"environment escape" - allowing the current Python process to import and use modules
129+
from a different Python interpreter with potentially different versions or packages.
130+
131+
When a module is imported through this importer:
132+
1. A client spawns a server process in the target Python environment
133+
2. The module is loaded in the remote environment
134+
3. A _WrappedModule proxy is returned that forwards all operations (function calls,
135+
attribute access, etc.) to the remote environment via RPC
136+
4. Data is serialized/deserialized using pickle for cross-environment communication
137+
138+
Args:
139+
python_executable: Path to the Python interpreter for the remote environment
140+
pythonpath: Python path to use in the remote environment
141+
max_pickle_version: Maximum pickle protocol version supported by remote interpreter
142+
config_dir: Directory containing configuration for the environment escape
143+
module_prefixes: List of module name prefixes to handle
144+
"""
145+
121146
def __init__(
122147
self,
123148
python_executable,
@@ -135,84 +160,89 @@ def __init__(
135160
self._handled_modules = None
136161
self._aliases = {}
137162

138-
def find_module(self, fullname, path=None):
163+
def find_spec(self, fullname, path=None, target=None):
139164
if self._handled_modules is not None:
140165
if get_canonical_name(fullname, self._aliases) in self._handled_modules:
141-
return self
166+
return importlib.util.spec_from_loader(fullname, self)
142167
return None
143168
if any([fullname.startswith(prefix) for prefix in self._module_prefixes]):
144169
# We potentially handle this
145-
return self
170+
return importlib.util.spec_from_loader(fullname, self)
146171
return None
147172

148-
def load_module(self, fullname):
149-
if fullname in sys.modules:
150-
return sys.modules[fullname]
151-
if self._client is None:
152-
if sys.version_info[0] < 3:
153-
raise NotImplementedError(
154-
"Environment escape imports are not supported in Python 2"
155-
)
156-
# We initialize a client and query the modules we handle
157-
# The max_pickle_version is the pickle version that the server (so
158-
# the underlying interpreter we call into) supports; we determine
159-
# what version the current environment support and take the minimum
160-
# of those two
161-
max_pickle_version = min(self._max_pickle_version, pickle.HIGHEST_PROTOCOL)
162-
163-
self._client = Client(
164-
self._module_prefixes,
165-
self._python_executable,
166-
self._pythonpath,
167-
max_pickle_version,
168-
self._config_dir,
169-
)
170-
atexit.register(_clean_client, self._client)
171-
172-
# Get information about overrides and what the server knows about
173-
exports = self._client.get_exports()
174-
175-
prefixes = set()
176-
export_classes = exports.get("classes", [])
177-
export_functions = exports.get("functions", [])
178-
export_values = exports.get("values", [])
179-
export_exceptions = exports.get("exceptions", [])
180-
self._aliases = exports.get("aliases", {})
181-
for name in itertools.chain(
182-
export_classes,
183-
export_functions,
184-
export_values,
185-
(e[0] for e in export_exceptions),
186-
):
187-
splits = name.rsplit(".", 1)
188-
prefixes.add(splits[0])
189-
# We will make sure that we create modules even for "empty" prefixes
190-
# because packages are always loaded hierarchically so if we have
191-
# something in `a.b.c` but nothing directly in `a`, we still need to
192-
# create a module named `a`. There is probably a better way of doing this
193-
all_prefixes = list(prefixes)
194-
for prefix in all_prefixes:
195-
parts = prefix.split(".")
196-
cur = parts[0]
197-
for i in range(1, len(parts)):
198-
prefixes.add(cur)
199-
cur = ".".join([cur, parts[i]])
200-
201-
# We now know all the modules that we can handle. We update
202-
# handled_module and return the module if we have it or raise ImportError
203-
self._handled_modules = {}
204-
for prefix in prefixes:
205-
self._handled_modules[prefix] = _WrappedModule(
206-
self, prefix, exports, self._client
207-
)
173+
def create_module(self, spec):
174+
# Return the pre-created wrapped module for this spec
175+
self._initialize_client()
176+
177+
fullname = spec.name
208178
canonical_fullname = get_canonical_name(fullname, self._aliases)
209-
# Modules are created canonically but we need to return something for any
210-
# of the aliases.
211-
module = self._handled_modules.get(canonical_fullname)
212-
if module is None:
213-
raise ImportError
214-
sys.modules[fullname] = module
215-
return module
179+
# Modules are created canonically but we need to handle any of the aliases.
180+
wrapped_module = self._handled_modules.get(canonical_fullname)
181+
if wrapped_module is None:
182+
raise ImportError(f"No module named '{fullname}'")
183+
return wrapped_module
184+
185+
def exec_module(self, module):
186+
# No initialization needed since the wrapped module returned by
187+
# create_module() is fully initialized
188+
pass
189+
190+
def _initialize_client(self):
191+
if self._client is not None:
192+
return
193+
194+
# We initialize a client and query the modules we handle
195+
# The max_pickle_version is the pickle version that the server (so
196+
# the underlying interpreter we call into) supports; we determine
197+
# what version the current environment support and take the minimum
198+
# of those two
199+
max_pickle_version = min(self._max_pickle_version, pickle.HIGHEST_PROTOCOL)
200+
201+
self._client = Client(
202+
self._module_prefixes,
203+
self._python_executable,
204+
self._pythonpath,
205+
max_pickle_version,
206+
self._config_dir,
207+
)
208+
atexit.register(_clean_client, self._client)
209+
210+
# Get information about overrides and what the server knows about
211+
exports = self._client.get_exports()
212+
213+
prefixes = set()
214+
export_classes = exports.get("classes", [])
215+
export_functions = exports.get("functions", [])
216+
export_values = exports.get("values", [])
217+
export_exceptions = exports.get("exceptions", [])
218+
self._aliases = exports.get("aliases", {})
219+
for name in itertools.chain(
220+
export_classes,
221+
export_functions,
222+
export_values,
223+
(e[0] for e in export_exceptions),
224+
):
225+
splits = name.rsplit(".", 1)
226+
prefixes.add(splits[0])
227+
# We will make sure that we create modules even for "empty" prefixes
228+
# because packages are always loaded hierarchically so if we have
229+
# something in `a.b.c` but nothing directly in `a`, we still need to
230+
# create a module named `a`. There is probably a better way of doing this
231+
all_prefixes = list(prefixes)
232+
for prefix in all_prefixes:
233+
parts = prefix.split(".")
234+
cur = parts[0]
235+
for i in range(1, len(parts)):
236+
prefixes.add(cur)
237+
cur = ".".join([cur, parts[i]])
238+
239+
# We now know all the modules that we can handle. We update
240+
# handled_module and return the module if we have it or raise ImportError
241+
self._handled_modules = {}
242+
for prefix in prefixes:
243+
self._handled_modules[prefix] = _WrappedModule(
244+
self, prefix, exports, self._client
245+
)
216246

217247

218248
def create_modules(python_executable, pythonpath, max_pickle_version, path, prefixes):

0 commit comments

Comments
 (0)