diff --git a/README.md b/README.md index 5bc551a..e907b14 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,8 @@ pip install -r requirements.txt ``` # must have venv/venv3 activated $ python src/vis.py +# show help and optional cmd flags +$ python src/vis.py -h ``` __Example:__ @@ -68,3 +70,12 @@ Also displays with `graphviz`: Another example graph from a [slightly more substantial project](https://github.com/nicolashahn/set-solver) (blue arrows/nodes indicate modules where the code does not live in the project directory [such as modules installed through pip]): ![](examples/set-solver.png) + +### Custom filter logic + +If not existent create a module `modfilter.py` alongside `vis.py` in src/ and add one or both of the following two callback functions: +`parent_mod_filter_func(mod_dict: Dict) -> Dict` +`import_mod_filter_func(modname: str, parentname: str) -> bool` + +vis.py checks for a module modfilter.py and these two callback functions as part of the modfilter module and calls them during processing +if existent to allow for custom filtering of modules. A commented example `modfilter.py` is already included in the project. diff --git a/src/modfilter.py b/src/modfilter.py new file mode 100644 index 0000000..978a205 --- /dev/null +++ b/src/modfilter.py @@ -0,0 +1,34 @@ +from typing import Callable, Dict +import sys + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + +# function to edit mod_dict and filter out modules from project tree (will be parsed for imports) +def parent_mod_filter_func(mod_dict: Dict) -> Dict: + temp = dict(mod_dict) + for name, _ in mod_dict.items(): + if not parent_filter(name): + del temp[name] + return temp + +# example filter function for listed modules in project tree +# return false to exclude module +def parent_filter(modname: str) -> bool: + # example filter logic + #return not (is_test_module(modname) or is_logging_module(modname) or is_django_module(modname)) + return True + +# example filterfunction for filtering specific module +# return false to exclude module +def import_mod_filter_func(modname: str, parentname: str) -> bool: + # Example filter logic + #return not (is_test_module(parentname) or is_logging_module(parentname) or is_django_module(parentname)) + return True + + +is_test_module: Callable[[str], bool] = lambda modname: '.tests' in modname + +is_logging_module: Callable[[str], bool] = lambda modname: 'logging' in modname + +is_django_module: Callable[[str], bool] = lambda modname: 'django' in modname \ No newline at end of file diff --git a/src/vis.py b/src/vis.py index 36480f3..78da75f 100644 --- a/src/vis.py +++ b/src/vis.py @@ -4,21 +4,26 @@ import argparse import dis +import graphviz +import importlib.util import matplotlib.colors as mc +import networkx as nx import os import platform import sys from collections import defaultdict +from libinfo import is_std_lib_module from modulefinder import ModuleFinder, Module as MFModule from matplotlib.colors import hsv_to_rgb -from pyvis.network import Network -import networkx as nx from networkx.drawing.nx_pydot import write_dot +from pyvis.network import Network +from typing import Callable -import graphviz - -from libinfo import is_std_lib_module +if importlib.util.find_spec('modfilter', __package__) is not None: + import modfilter +else: + modfilter = None # actual opcodes LOAD_CONST = dis.opmap["LOAD_CONST"] @@ -48,6 +53,10 @@ JAVA_SYSTEM_NAME = "Java" WINDOWS_SYSTEM_NAME = "Windows" +# function for logging (to stderr) +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + def abs_mod_name(module, root_dir): """ From a Module's absolute path, and the root directory, return a string with how that module would be imported from a script in the root @@ -72,7 +81,6 @@ def abs_mod_name(module, root_dir): mod_name = ".".join(path_parts) return mod_name - def get_modules_from_file(script, root_dir=None, use_sys_path=False): """ Use ModuleFinder.load_file() to get module imports for the given script. @@ -107,7 +115,6 @@ def get_modules_from_file(script, root_dir=None, use_sys_path=False): return modules - def get_modules_in_dir(root_dir, ignore_venv=True): """ Walk a directory recursively and get the module imports for all .py files in the directory. @@ -130,7 +137,6 @@ def get_modules_in_dir(root_dir, ignore_venv=True): mods[mod_name] = mod return mods - class Module(MFModule, object): """ Extension of modulefinder.ModuleFinder to add custom attrs. """ @@ -141,7 +147,6 @@ def __init__(self, *args, **kwargs): # value = list of names imported from that module self.direct_imports = {} - def _unpack_opargs(code): """ Step through the python bytecode and generate a tuple (int, int, int): (operation_index, operation_byte, argument_byte) for each operation. @@ -170,7 +175,6 @@ def _unpack_opargs(code): yield (i, op, arg) # Python 1? - def scan_opcodes(compiled): """ This function is stolen w/ slight modifications from the standard library @@ -214,8 +218,7 @@ def scan_opcodes(compiled): yield REL_IMPORT, (level, fromlist, names[oparg]) continue - -def get_fq_immediate_deps(all_mods, module): +def get_fq_immediate_deps(all_mods, module, modfilterfunc: Callable[[str, str], bool]=lambda name, parentname: True): """ From a Module, using the module's absolute path, compile the code and then search through it for the imports and get a list of the immediately @@ -240,13 +243,18 @@ def get_fq_immediate_deps(all_mods, module): if op == ABS_IMPORT: names, top = args if ( - not is_std_lib_module(top.split(".")[0], PY_VERSION) - or top in all_mods + (not is_std_lib_module(top.split(".")[0], PY_VERSION) + or top in all_mods) + and modfilterfunc("", top) ): if not names: fq_deps[top].append([]) for name in names: fq_name = top + "." + name + if not modfilterfunc(name, top): + eprint("EXCLUDE: ", top, "->", name) + continue + if fq_name in all_mods: # just to make sure it's in the dict fq_deps[fq_name].append([]) @@ -259,17 +267,15 @@ def get_fq_immediate_deps(all_mods, module): return fq_deps - -def add_immediate_deps_to_modules(mod_dict): +def add_immediate_deps_to_modules(mod_dict, modfilterfunc: Callable[[str, str], bool]=lambda name, parentname: True): """ Take a module dictionary, and add the names of the modules directly imported by each module in the dictionary, and add them to the module's direct_imports. """ for name, module in sorted(mod_dict.items()): - fq_deps = get_fq_immediate_deps(mod_dict, module) + fq_deps = get_fq_immediate_deps(mod_dict, module, modfilterfunc=modfilterfunc) module.direct_imports = fq_deps - def mod_dict_to_dag(mod_dict, graph_name): """ Take a module dictionary, and return a graphviz.Digraph object representing the module import relationships. """ @@ -288,7 +294,6 @@ def mod_dict_to_dag(mod_dict, graph_name): dag.edge(name, di, **attrs) return dag - def get_args(): """ Parse and return command line args. """ parser = argparse.ArgumentParser( @@ -328,7 +333,6 @@ def get_args(): # help='file that contains names of modules to ignore') return parser.parse_args() - def generate_pyvis_visualization(mod_dict, dotfile='', show=False): def get_hex_color_of_shade(value): if value < 0 or value > 1: @@ -409,6 +413,8 @@ def normaliz_between_n1_1(min, max, val): def main(): + endnotice = False + args = get_args() if args.path[-3:] == ".py": script = args.path @@ -419,9 +425,24 @@ def main(): else: root_dir = args.path mod_dict = get_modules_in_dir(root_dir) - - - add_immediate_deps_to_modules(mod_dict) + + # check for filterfunction callback to be present, else use stub lambda + if modfilter is None: + add_immediate_deps_to_modules(mod_dict) + else: + match hasattr(modfilter, "parent_mod_filter_func"): + case True: + mod_dict = modfilter.parent_mod_filter_func(mod_dict) + match hasattr(modfilter, "import_mod_filter_func"): + case False: + add_immediate_deps_to_modules(mod_dict) + case True: + add_immediate_deps_to_modules(mod_dict, modfilterfunc=modfilter.import_mod_filter_func) + + # print notice to either implement one of the callbacks or consider removing modfilter module + if not hasattr(modfilter, "parent_mod_filter_func") and hasattr(modfilter, "import_mod_filter_func"): + endnotice = True + print("Module dependencies:") for name, module in sorted(mod_dict.items()): print("\n" + name) @@ -440,5 +461,8 @@ def main(): else: generate_pyvis_visualization(mod_dict, show=args.show_graph) + if endnotice: + eprint("Notice: consider adding one of the filter functions (parent_mod_filter_func or import_mod_filter_func) to modfilter module or removing modfilter module completely.") + if __name__ == "__main__": - main() + main() \ No newline at end of file