From 70d2cc0f1adfdafc680d60cf8b47d152a97a14e8 Mon Sep 17 00:00:00 2001 From: GeorgeM Date: Wed, 12 Jul 2023 18:11:13 +0100 Subject: [PATCH 01/10] implement improvements from other repo from myself and ana-096, refactor code into multiple files, add intial unit tests, add paralelism --- .gitignore | 166 ++++++++++++++++++++++ cubes.py | 304 ++++++++++++++--------------------------- libraries/cache.py | 81 +++++++++++ libraries/cropping.py | 55 ++++++++ libraries/packing.py | 49 +++++++ libraries/parallel.py | 86 ++++++++++++ libraries/renderer.py | 36 +++++ libraries/rotation.py | 38 ++++++ tests/test_cache.py | 22 +++ tests/test_cropping.py | 9 ++ tests/test_packing.py | 52 +++++++ tests/test_parallel.py | 22 +++ tests/test_rotation.py | 26 ++++ tests/utils.py | 4 + 14 files changed, 746 insertions(+), 204 deletions(-) create mode 100644 .gitignore create mode 100644 libraries/cache.py create mode 100644 libraries/cropping.py create mode 100644 libraries/packing.py create mode 100644 libraries/parallel.py create mode 100644 libraries/renderer.py create mode 100644 libraries/rotation.py create mode 100644 tests/test_cache.py create mode 100644 tests/test_cropping.py create mode 100644 tests/test_packing.py create mode 100644 tests/test_parallel.py create mode 100644 tests/test_rotation.py create mode 100644 tests/utils.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7e5e05e --- /dev/null +++ b/.gitignore @@ -0,0 +1,166 @@ +# custom ignores to prevent including cubes files +*.npy + +# custom ignores to prevent vscode files being included +.vscode + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/cubes.py b/cubes.py index 18e6fb5..f2db4d8 100644 --- a/cubes.py +++ b/cubes.py @@ -1,266 +1,162 @@ import os -import sys -import math import numpy as np +import math import argparse from time import perf_counter +from libraries.cache import get_cache, save_cache, cache_exists +from libraries.cropping import crop_cube, expand_cube +from libraries.packing import pack, unpack +from libraries.parallel import dispatch_tasks, init_parrelism +from libraries.renderer import render_shapes +from libraries.rotation import all_rotations +from multiprocessing import Queue -def all_rotations(polycube): - """ - Calculates all rotations of a polycube. - - Adapted from https://stackoverflow.com/questions/33190042/how-to-calculate-all-24-rotations-of-3d-array. - This function computes all 24 rotations around each of the axis x,y,z. It uses numpy operations to do this, to avoid unecessary copies. - The function returns a generator, to avoid computing all rotations if they are not needed. - - Parameters: - polycube (np.array): 3D Numpy byte array where 1 values indicate polycube positions - - Returns: - generator(np.array): Yields new rotations of this cube about all axes - - """ - def single_axis_rotation(polycube, axes): - """Yield four rotations of the given 3d array in the plane spanned by the given axes. - For example, a rotation in axes (0,1) is a rotation around axis 2""" - for i in range(4): - yield np.rot90(polycube, i, axes) - - # 4 rotations about axis 0 - yield from single_axis_rotation(polycube, (1,2)) - # rotate 180 about axis 1, 4 rotations about axis 0 - yield from single_axis_rotation(np.rot90(polycube, 2, axes=(0,2)), (1,2)) +def unpack_hashes_task(args: tuple[list[int], Queue]) -> np.ndarray: + cube_hashes, logging_queue = args + polycubes = [] + uid = os.getpid() + + n = 0 + log_base = max(math.ceil((len(cube_hashes)/ 1000)), 100) # send log updates every 0.1% or every 100, whichever is bigger + for cube_hash in cube_hashes: + polycubes.append(unpack(cube_hash)) + + if (n % log_base == 0): + if (logging_queue): + logging_queue.put((uid, n, len(cube_hashes))) + else: + print(f'done {((n/len(cube_hashes)) * 100):.2f} %') + n += 1 + + if (logging_queue): + logging_queue.put((uid, n, len(cube_hashes))) + else: + print(f'done {((n/len(cube_hashes)) * 100):.2f} %') + return polycubes - # rotate 90 or 270 about axis 1, 8 rotations about axis 2 - yield from single_axis_rotation(np.rot90(polycube, axes=(0,2)), (0,1)) - yield from single_axis_rotation(np.rot90(polycube, -1, axes=(0,2)), (0,1)) - # rotate about axis 2, 8 rotations about axis 1 - yield from single_axis_rotation(np.rot90(polycube, axes=(0,1)), (0,2)) - yield from single_axis_rotation(np.rot90(polycube, -1, axes=(0,1)), (0,2)) +def hash_cubes_task(args: tuple[list[np.ndarray], Queue]) -> list[int]: + base_cubes, logging_queue = args + # Empty list of new n-polycubes + hashes = set() + uid = os.getpid() -def crop_cube(cube): - """ - Crops an np.array to have no all-zero padding around the edge. + n = 0 + log_base = max(math.ceil((len(base_cubes)/ 1000)), 100) # send log updates every 0.1% or every 100, whichever is bigger + for base_cube in base_cubes: + for new_cube in expand_cube(base_cube): + cube_hash = get_canoincal_packing(new_cube) + hashes.add(cube_hash) - Given in https://stackoverflow.com/questions/39465812/how-to-crop-zero-edges-of-a-numpy-array - - Parameters: - cube (np.array): 3D Numpy byte array where 1 values indicate polycube positions - - Returns: - np.array: Cropped 3D Numpy byte array equivalent to cube, but with no zero padding - - """ - for i in range(cube.ndim): - cube = np.swapaxes(cube, 0, i) # send i-th axis to front - while np.all( cube[0]==0 ): - cube = cube[1:] - while np.all( cube[-1]==0 ): - cube = cube[:-1] - cube = np.swapaxes(cube, 0, i) # send i-th axis to its original position - return cube - -def expand_cube(cube): - """ - Expands a polycube by adding single blocks at all valid locations. - - Calculates all valid new positions of a polycube by shifting the existing cube +1 and -1 in each dimension. - New valid cubes are returned via a generator function, in case they are not all needed. - - Parameters: - cube (np.array): 3D Numpy byte array where 1 values indicate polycube positions - - Returns: - generator(np.array): Yields new polycubes that are extensions of cube - - """ - cube = np.pad(cube, 1, 'constant', constant_values=0) - output_cube = np.array(cube) + if (n % log_base == 0): + if (logging_queue): + logging_queue.put((uid, n, len(base_cubes))) + else: + print(f'done {((n/len(base_cubes)) * 100):.2f} %') + n += 1 - xs,ys,zs = cube.nonzero() - output_cube[xs+1,ys,zs] = 1 - output_cube[xs-1,ys,zs] = 1 - output_cube[xs,ys+1,zs] = 1 - output_cube[xs,ys-1,zs] = 1 - output_cube[xs,ys,zs+1] = 1 - output_cube[xs,ys,zs-1] = 1 + if (logging_queue): + logging_queue.put((uid, n, len(base_cubes))) + else: + print(f'done {((n/len(base_cubes)) * 100):.2f} %') - exp = (output_cube ^ cube).nonzero() + return hashes - for (x,y,z) in zip(exp[0], exp[1], exp[2]): - new_cube = np.array(cube) - new_cube[x,y,z] = 1 - yield crop_cube(new_cube) -def generate_polycubes(n, use_cache=False): +def generate_polycubes(n: int, use_cache: bool = False, enable_multicore: bool = False) -> list[np.ndarray]: """ Generates all polycubes of size n - + Generates a list of all possible configurations of n cubes, where all cubes are connected via at least one face. Builds each new polycube from the previous set of polycubes n-1. Uses an optional cache to save and load polycubes of size n-1 for efficiency. - + Parameters: n (int): The size of the polycubes to generate, e.g. all combinations of n=4 cubes. - + Returns: list(np.array): Returns a list of all polycubes of size n as numpy byte arrays - + """ if n < 1: return [] elif n == 1: - return [np.ones((1,1,1), dtype=np.byte)] + return [np.ones((1, 1, 1), dtype=np.byte)] elif n == 2: - return [np.ones((2,1,1), dtype=np.byte)] - - # Check cache - cache_path = f"cubes_{n}.npy" - if use_cache and os.path.exists(cache_path): - print(f"\rLoading polycubes n={n} from cache: ", end = "") - polycubes = np.load(cache_path, allow_pickle=True) - print(f"{len(polycubes)} shapes") - return polycubes - - # Empty list of new n-polycubes - polycubes = [] - polycubes_rle = set() + return [np.ones((2, 1, 1), dtype=np.byte)] - base_cubes = generate_polycubes(n-1, use_cache) + if (use_cache and cache_exists(n)): + results = get_cache(n) + print(f"Got polycubes from cache n={n}\n") + else: + pollycubes = generate_polycubes(n-1, use_cache, enable_multicore) + results = dispatch_tasks(hash_cubes_task, pollycubes, enable_multicore) + print(f"Hashed polycubes n={n}\n") + results = dispatch_tasks(unpack_hashes_task, results, enable_multicore) + print(f"Generated polycubes from hash n={n}\n") - for idx, base_cube in enumerate(base_cubes): - # Iterate over possible expansion positions - for new_cube in expand_cube(base_cube): - if not cube_exists_rle(new_cube, polycubes_rle): - polycubes.append(new_cube) - polycubes_rle.add(rle(new_cube)) - - if (idx % 100 == 0): - perc = round((idx / len(base_cubes)) * 100,2) - print(f"\rGenerating polycubes n={n}: {perc}%", end="") + if (use_cache and not cache_exists(n)): + save_cache(n, results) - print(f"\rGenerating polycubes n={n}: 100% ") - - if use_cache: - cache_path = f"cubes_{n}.npy" - np.save(cache_path, np.array(polycubes, dtype=object), allow_pickle=True) + return results - return polycubes -def rle(polycube): - """ - Computes a simple run-length encoding of a given polycube. This function allows cubes to be more quickly compared via hashing. - - Converts a {0,1} nd array into a tuple that encodes the same shape. The array is first flattened, and then the following algorithm is applied: - - 1) The first three values in tuple contain the x,y,z dimension sizes of the array - 2) Each string of zeros of length n is replaced with a single value -n - 3) Each string of ones of length m is replaced with a single value +m - - Parameters: - polycube (np.array): 3D Numpy byte array where 1 values indicate polycube positions - - Returns: - tuple(int): Run length encoded polycube in the form (X, Y, Z, a, b, c, ...) - - """ - r = [] - r.extend(polycube.shape) - current = None - val = 0 - for x in polycube.flat: - if current is None: - current = x - val = 1 - pass - elif current == x: - val += 1 - elif current != x: - r.append(val if current == 1 else -val) - current = x - val = 1 - - r.append(val if current == 1 else -val) - - return tuple(r) - -def cube_exists_rle(polycube, polycubes_rle): +def get_canoincal_packing(polycube: np.ndarray) -> int: """ Determines if a polycube has already been seen. - + Considers all possible rotations of a cube against the existing cubes stored in memory. Returns True if the cube exists, or False if it is new. - + Parameters: polycube (np.array): 3D Numpy byte array where 1 values indicate polycube positions - + Returns: boolean: True if polycube is already present in the set of all cubes so far. - + hash: the hash for this cube + """ + max_hash = 0 for cube_rotation in all_rotations(polycube): - if rle(cube_rotation) in polycubes_rle: - return True + this_hash = pack(cube_rotation) + if (this_hash > max_hash): + max_hash = this_hash + return max_hash - return False if __name__ == "__main__": parser = argparse.ArgumentParser( - prog='Polycube Generator', - description='Generates all polycubes (combinations of cubes) of size n.') + prog='Polycube Generator', + description='Generates all polycubes (combinations of cubes) of size n.') parser.add_argument('n', metavar='N', type=int, - help='The number of cubes within each polycube') - - #Requires python >=3.9 + help='The number of cubes within each polycube') + + # Requires python >=3.9 parser.add_argument('--cache', action=argparse.BooleanOptionalAction) + parser.add_argument('--multicore', action=argparse.BooleanOptionalAction) + parser.add_argument('--render', action=argparse.BooleanOptionalAction) + + init_parrelism() args = parser.parse_args() - + n = args.n use_cache = args.cache if args.cache is not None else True + multicore = args.multicore if args.multicore is not None else False + render = args.render if args.render is not None else False # Start the timer t1_start = perf_counter() - all_cubes = list(generate_polycubes(n, use_cache=use_cache)) + all_cubes = generate_polycubes(n, use_cache=use_cache, enable_multicore=multicore) # Stop the timer t1_stop = perf_counter() - print (f"Found {len(all_cubes)} unique polycubes") - print (f"Elapsed time: {round(t1_stop - t1_start,3)}s") - - -# Code for if you want to generate pictures of the sets of cubes. Will work up to about n=8, before there are simply too many! -# Could be adapted for larger cube sizes by splitting the dataset up into separate images. -# def render_shapes(shapes, path): -# n = len(shapes) -# dim = max(max(a.shape) for a in shapes) -# i = math.isqrt(n) + 1 -# voxel_dim = dim * i -# voxel_array = np.zeros((voxel_dim + i,voxel_dim + i,dim), dtype=np.byte) -# pad = 1 -# for idx, shape in enumerate(shapes): -# x = (idx % i) * dim + (idx % i) -# y = (idx // i) * dim + (idx // i) -# xpad = x * pad -# ypad = y * pad -# s = shape.shape -# voxel_array[x:x + s[0], y:y + s[1] , 0 : s[2]] = shape - -# voxel_array = crop_cube(voxel_array) -# colors = np.empty(voxel_array.shape, dtype=object) -# colors[:] = '#FFD65DC0' - -# ax = plt.figure(figsize=(20,16), dpi=600).add_subplot(projection='3d') -# ax.voxels(voxel_array, facecolors = colors, edgecolor='k', linewidth=0.1) - -# ax.set_xlim([0, voxel_array.shape[0]]) -# ax.set_ylim([0, voxel_array.shape[1]]) -# ax.set_zlim([0, voxel_array.shape[2]]) -# plt.axis("off") -# ax.set_box_aspect((1, 1, voxel_array.shape[2] / voxel_array.shape[0])) -# plt.savefig(path + ".png", bbox_inches='tight', pad_inches = 0) + if (render): + render_shapes(all_cubes, "./out") + + print(f"\nFound {len(all_cubes)} unique polycubes") + print(f"\nElapsed time: {round(t1_stop - t1_start,3)}s") diff --git a/libraries/cache.py b/libraries/cache.py new file mode 100644 index 0000000..311e243 --- /dev/null +++ b/libraries/cache.py @@ -0,0 +1,81 @@ +import os +import numpy as np + +cache_path_fstring = "cubes_{0}.npy" + + +def cache_exists(n: int) -> bool: + """ + Checks if a cache file exists with the standard name for a given batch size + + Parameters: + n (int): the size of polycube to search for + + Returns: + bool: whether that cache exists + + """ + cache_path = cache_path_fstring.format(n) + return os.path.exists(cache_path) + + +def get_cache_raw(cache_path: str) -> list[np.ndarray]: + """ + Loads a Cache File for a given pathname + + Parameters: + cache_path (str): the file location to look for the cache file + + Returns: + list[np.ndarray]: the list of polycubes from the cache + + """ + if os.path.exists(cache_path): + + polycubes = np.load(cache_path, allow_pickle=True) + + return polycubes + else: + return None + + +def get_cache(n: int) -> np.ndarray: + """ + Loads a Cache File for a given size of polycube + + Parameters: + n (int): the size of polycube to load the cache of + + Returns: + list[np.ndarray]: the list of polycubes of that size from the cache + + """ + cache_path = cache_path_fstring.format(n) + print(f"\rLoading polycubes n={n} from cache: ", end="") + polycubes = get_cache_raw(cache_path) + print(f"{len(polycubes)} shapes") + return polycubes + + +def save_cache_raw(cache_path: str, polycubes: list[np.ndarray]) -> None: + """ + Saves a Cache File to a file at a given pathname + + Parameters: + cache_path (str): the file location to sabe the cache file + polycubes (list[np.ndarray]): the polycubes to be cached + """ + np.save(cache_path, np.array(polycubes, dtype=object), allow_pickle=True) + + +def save_cache(n: int, polycubes: np.ndarray) -> None: + """ + Saves a Cache File for a given polycube size + + Parameters: + n (int): the size of the polycubes to be cached + polycubes (list[np.ndarray]): the polycubes to be cached + """ + cache_path = cache_path_fstring.format(n) + save_cache_raw(cache_path, polycubes) + print(f"Wrote file for polycubes n={n}") diff --git a/libraries/cropping.py b/libraries/cropping.py new file mode 100644 index 0000000..0d281be --- /dev/null +++ b/libraries/cropping.py @@ -0,0 +1,55 @@ +import numpy as np + + +def crop_cube(cube: np.ndarray) -> np.ndarray: + """ + Crops an np.array to have no all-zero padding around the edge. + + Given in https://stackoverflow.com/questions/39465812/how-to-crop-zero-edges-of-a-numpy-array + + Parameters: + cube (np.array): 3D Numpy byte array where 1 values indicate polycube positions + + Returns: + np.array: Cropped 3D Numpy byte array equivalent to cube, but with no zero padding + + """ + for i in range(cube.ndim): + cube = np.swapaxes(cube, 0, i) + nonzero_indices = np.any(cube != 0, axis=tuple(range(1, cube.ndim))) + cube = cube[nonzero_indices] + cube = np.swapaxes(cube, 0, i) + return cube + + +def expand_cube(cube: np.ndarray) -> np.ndarray: + """ + Expands a polycube by adding single blocks at all valid locations. + + Calculates all valid new positions of a polycube by shifting the existing cube +1 and -1 in each dimension. + New valid cubes are returned via a generator function, in case they are not all needed. + + Parameters: + cube (np.array): 3D Numpy byte array where 1 values indicate polycube positions + + Returns: + generator(np.array): Yields new polycubes that are extensions of cube + + """ + cube = np.pad(cube, 1, 'constant', constant_values=0) + output_cube = np.array(cube) + + xs, ys, zs = cube.nonzero() + output_cube[xs+1, ys, zs] = 1 + output_cube[xs-1, ys, zs] = 1 + output_cube[xs, ys+1, zs] = 1 + output_cube[xs, ys-1, zs] = 1 + output_cube[xs, ys, zs+1] = 1 + output_cube[xs, ys, zs-1] = 1 + + exp = (output_cube ^ cube).nonzero() + + for (x, y, z) in zip(exp[0], exp[1], exp[2]): + new_cube = np.array(cube) + new_cube[x, y, z] = 1 + yield crop_cube(new_cube) diff --git a/libraries/packing.py b/libraries/packing.py new file mode 100644 index 0000000..e77f0b0 --- /dev/null +++ b/libraries/packing.py @@ -0,0 +1,49 @@ +import numpy as np + + +def pack(polycube: np.ndarray) -> int: + """ + Converts a 3D ndarray into a single unsigned integer for quick hashing and efficient storage + + Converts a {0,1} nd array into a single unique large integer + + Parameters: + polycube (np.array): 3D Numpy byte array where 1 values indicate polycube positions + + Returns: + int: a unique integer hash + + """ + + pack_cube = np.packbits(polycube.flatten(), bitorder='big') + cube_hash = 0 + for index in polycube.shape: + cube_hash = (cube_hash << 8) + int(index) + for part in pack_cube: + cube_hash = (cube_hash << 8) + int(part) + return cube_hash + + +def unpack(cube_hash: int) -> np.ndarray: + """ + Converts a single integer back into a 3D ndarray + + + Parameters: + cube_hash (int): a unique integer hash + + Returns: + np.array: 3D Numpy byte array where 1 values indicate polycube positions + + """ + parts = [] + while (cube_hash): + parts.append(cube_hash % 256) + cube_hash >>= 8 + parts = parts[::-1] + shape = (parts[0], parts[1], parts[2]) + data = parts[3:] + size = shape[0] * shape[1] * shape[2] + raw = np.unpackbits(np.array(data, dtype=np.uint8), bitorder='big') + final = raw[0:size].reshape(shape) + return final diff --git a/libraries/parallel.py b/libraries/parallel.py new file mode 100644 index 0000000..0a5f190 --- /dev/null +++ b/libraries/parallel.py @@ -0,0 +1,86 @@ +import multiprocessing +import math +from typing import Callable + +manager = None +logging_queue = None +pool = None + + +def logging_task(queue: multiprocessing.Queue) -> None: + """ + A Task function which waits for data on the log queue and prints it to the console + + Parameters: + queue (multiprocessing.Queue): the queue to listen on + """ + try: + status = {} + while True: + (uid, done, total) = queue.get() + status[uid] = (done, total) + + total_done = 0 + total_total = 0 + for uid, (done, total) in status.items(): + total_done += done + total_total += total + + if (total_done == total_total): + print(f'\rCompleted {total_done} of {total_total} {((total_done/total_total) * 100):.2f}%') + status = {} + else: + print(f'\rCompleted {total_done} of {total_total} {((total_done/total_total) * 100):.2f}%', end="") + except: + print("multithreaded logging ended") + + +def init_parrelism() -> None: + """ + helper function to initalise multiprocessing manager, queue, and logging task should only be called once + """ + global logging_queue + global pool + global manager + manager = multiprocessing.Manager() + logging_queue = manager.Queue() + logging_process = multiprocessing.Process(target=logging_task, args=[logging_queue]) + logging_process.daemon = True + logging_process.start() + pool = multiprocessing.Pool(multiprocessing.cpu_count()) + + +def dispatch_tasks(task_function: Callable, items: list, enable_multicore: bool) -> list: + """ + Runs a function on a list of data either sequentialy or in parallel depending on enable_multicore + handles data chunking and recombination for you + + Parameters: + task_function (function): the function to be mapped onto the items + items (list): a list of arguments to your function, this will be cut into smaller lists which will be individualy sent to instances of your function + enable_multicore (bool): whether to run the task in paralel + + + Returns: + list: a merged list of all of your functions returned lists, in order such that it is as if they where all run in one function sequentialy + + """ + # todo dont keep everything in memory, write out to files async as we go + if (enable_multicore): + per_core_split = math.ceil(len(items) / multiprocessing.cpu_count()) + chunk_size = min(10000, per_core_split) # split per core up to 10000 + chunk_size = max(32, chunk_size) # dont send unessicarily small chunks + chunks = [] + for chunk_base in range(0, len(items), chunk_size): + chunks.append((items[chunk_base: min(chunk_base + chunk_size, len(items))], logging_queue)) + items = None + results = pool.map(task_function, chunks) + final_result = [] + for result in reversed(results): + if (isinstance(result, list)): + final_result += result + else: + final_result += list(result) + return final_result + else: + return task_function((items, None)) diff --git a/libraries/renderer.py b/libraries/renderer.py new file mode 100644 index 0000000..9b490e7 --- /dev/null +++ b/libraries/renderer.py @@ -0,0 +1,36 @@ +import math +import numpy as np +import matplotlib.pyplot as plt + +# # Code for if you want to generate pictures of the sets of cubes. Will work up to about n=8, before there are simply too many! +# # Could be adapted for larger cube sizes by splitting the dataset up into separate images. + + +def render_shapes(shapes: list[np.ndarray], path: str): + n = len(shapes) + dim = max(max(a.shape) for a in shapes) + i = math.isqrt(n) + 1 + voxel_dim = dim * i + voxel_array = np.zeros((voxel_dim + i, voxel_dim + i, dim), dtype=np.byte) + pad = 1 + for idx, shape in enumerate(shapes): + x = (idx % i) * dim + (idx % i) + y = (idx // i) * dim + (idx // i) + xpad = x * pad + ypad = y * pad + s = shape.shape + voxel_array[x:x + s[0], y:y + s[1], 0: s[2]] = shape + + # voxel_array = crop_cube(voxel_array) + colors = np.empty(voxel_array.shape, dtype=object) + colors[:] = '#FFD65DC0' + + ax = plt.figure(figsize=(20, 16), dpi=600).add_subplot(projection='3d') + ax.voxels(voxel_array, facecolors=colors, edgecolor='k', linewidth=0.1) + + ax.set_xlim([0, voxel_array.shape[0]]) + ax.set_ylim([0, voxel_array.shape[1]]) + ax.set_zlim([0, voxel_array.shape[2]]) + plt.axis("off") + ax.set_box_aspect((1, 1, voxel_array.shape[2] / voxel_array.shape[0])) + plt.savefig(path + ".png", bbox_inches='tight', pad_inches=0) diff --git a/libraries/rotation.py b/libraries/rotation.py new file mode 100644 index 0000000..e20edb6 --- /dev/null +++ b/libraries/rotation.py @@ -0,0 +1,38 @@ +import numpy as np +from typing import Generator + + +def all_rotations(polycube: np.ndarray) -> Generator[np.ndarray, None, None]: + """ + Calculates all rotations of a polycube. + + Adapted from https://stackoverflow.com/questions/33190042/how-to-calculate-all-24-rotations-of-3d-array. + This function computes all 24 rotations around each of the axis x,y,z. It uses numpy operations to do this, to avoid unecessary copies. + The function returns a generator, to avoid computing all rotations if they are not needed. + + Parameters: + polycube (np.array): 3D Numpy byte array where 1 values indicate polycube positions + + Returns: + generator(np.array): Yields new rotations of this cube about all axes + + """ + def single_axis_rotation(polycube, axes): + """Yield four rotations of the given 3d array in the plane spanned by the given axes. + For example, a rotation in axes (0,1) is a rotation around axis 2""" + for i in range(4): + yield np.rot90(polycube, i, axes) + + # 4 rotations about axis 0 + yield from single_axis_rotation(polycube, (1, 2)) + + # rotate 180 about axis 1, 4 rotations about axis 0 + yield from single_axis_rotation(np.rot90(polycube, 2, axes=(0, 2)), (1, 2)) + + # rotate 90 or 270 about axis 1, 8 rotations about axis 2 + yield from single_axis_rotation(np.rot90(polycube, axes=(0, 2)), (0, 1)) + yield from single_axis_rotation(np.rot90(polycube, -1, axes=(0, 2)), (0, 1)) + + # rotate about axis 2, 8 rotations about axis 1 + yield from single_axis_rotation(np.rot90(polycube, axes=(0, 1)), (0, 2)) + yield from single_axis_rotation(np.rot90(polycube, -1, axes=(0, 1)), (0, 2)) diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 0000000..abb9a0d --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,22 @@ +import unittest +import os +from libraries.cache import get_cache, save_cache +from numpy.testing import assert_array_equal +from utils import get_test_data + +class CachingTests(unittest.TestCase): + + def test_cache_consistency(self): + test_data = get_test_data() + + save_cache("test_temp", test_data) + reloaded_data = get_cache("test_temp") + + for test, reloaded in zip(test_data, reloaded_data): + assert_array_equal(test, reloaded) + + @classmethod + def tearDownClass(cls): + expected_test_file_name = "cubes_test_temp.npy" + if os.path.exists(expected_test_file_name): + os.remove(expected_test_file_name) \ No newline at end of file diff --git a/tests/test_cropping.py b/tests/test_cropping.py new file mode 100644 index 0000000..db089ef --- /dev/null +++ b/tests/test_cropping.py @@ -0,0 +1,9 @@ +import unittest +import os +from libraries.cache import get_cache, save_cache +from numpy.testing import assert_array_equal +from utils import get_test_data + +class CroppingTests(unittest.TestCase): + #todo + # i am not smart enough to come up with helpful cropping tests right now. \ No newline at end of file diff --git a/tests/test_packing.py b/tests/test_packing.py new file mode 100644 index 0000000..930ef8e --- /dev/null +++ b/tests/test_packing.py @@ -0,0 +1,52 @@ +import unittest +from numpy.testing import assert_array_equal +from libraries.packing import pack, unpack +from utils import get_test_data + +class PackingTests(unittest.TestCase): + def test_pack_does_not_throw(self): + test_data = get_test_data() + for polycube in test_data: + try: + packed = pack(polycube) + except: + self.fail(f"pack threw on pollycube {polycube}") + + def test_unpack_does_not_throw(self): + test_data = get_test_data() + for polycube in test_data: + packed = pack(polycube) + try: + unpacked = unpack(packed) + except: + self.fail(f"unpack threw on packing {packed} for pollycube {polycube}") + + def test_pack_hashable(self): + test_data = get_test_data() + for polycube in test_data: + packed = pack(polycube) + try: + hash(packed) + except: + self.fail(f"packing of pollycube {polycube} isnt hashable") + + def test_pack_equitable(self): + test_data = get_test_data() + for polycube in test_data: + packed = pack(polycube) + self.assertEqual(packed, packed, "hash does not equal itself") + + def test_pack_unique(self): + test_data = get_test_data() + seen = set() + for polycube in test_data: + packed = pack(polycube) + self.assertNotIn(packed, seen) + seen.add(packed) + + def test_pack_symetric(self): + test_data = get_test_data() + for polycube in test_data: + packed = pack(polycube) + unpacked = unpack(packed) + assert_array_equal(polycube, unpacked, f"packing of polycube isnt symetric, unpacked polycube {polycube} packed to {packed} which unpacked to {unpacked}") \ No newline at end of file diff --git a/tests/test_parallel.py b/tests/test_parallel.py new file mode 100644 index 0000000..ffa1fbe --- /dev/null +++ b/tests/test_parallel.py @@ -0,0 +1,22 @@ +import unittest +import random +from libraries.parallel import init_parrelism, dispatch_tasks + +def reverse_ints_task(args): + (data, logger) = args + return data[::-1] + +class ParallelTests(unittest.TestCase): + def _underlying_test(self, run_in_paralel): + init_parrelism() + random.seed(0) # determenistic random data generation + test_data = [random.randint(0, 100) for x in range(0,763554)] + + results = dispatch_tasks(reverse_ints_task, test_data, run_in_paralel) + self.assertEqual(test_data, results[::-1]) + + def test_simple_serial_task(self): + self._underlying_test(False) + + def test_simple_paralel_task(self): + self._underlying_test(True) \ No newline at end of file diff --git a/tests/test_rotation.py b/tests/test_rotation.py new file mode 100644 index 0000000..ee4e064 --- /dev/null +++ b/tests/test_rotation.py @@ -0,0 +1,26 @@ +import unittest +import numpy as np +from libraries.rotation import all_rotations +from utils import get_test_data + +class RotatingTests(unittest.TestCase): + def test_rotate_quantity(self): + test_data = get_test_data() + for polycube in test_data: + rots = all_rotations(polycube) + self.assertEqual(len(list(rots)), 24, "all_rotations failed to produce 24 rotations") + + def test_rotate_symetric(self): + # tests that all rotations of any given rotation from an all rotations set, itself is in the all rotations set. + # e.g. repeatedly rotating doesnt change the fundemental 24 rotations of a given polycube + test_data = get_test_data() + for polycube in test_data: + base_polycube_rotations = list(all_rotations(polycube)) + for base_cube_rotation in base_polycube_rotations: + for rotated_cube_rotation in all_rotations(base_cube_rotation): + found_match = False + for inner_base_polycube_rotation in base_polycube_rotations: + if np.array_equal(rotated_cube_rotation, inner_base_polycube_rotation): + found_match = True + break + self.assertTrue(found_match, "rotation of a rotated polycube wasnt in the set of all rotations for the initial pollycube, rotating it twice has changed its shape") \ No newline at end of file diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..1399c21 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,4 @@ +from libraries.cache import get_cache_raw + +def get_test_data(): + return get_cache_raw('./tests/test_data.npy') \ No newline at end of file From 218503d1cca10c9009f8580ea0cfa581cfe7626a Mon Sep 17 00:00:00 2001 From: GeorgeM Date: Wed, 12 Jul 2023 19:56:12 +0100 Subject: [PATCH 02/10] back out multithreading changes for a different pull request, rename cropping library to resizing, fix type anotations --- cubes.py | 96 ++++++-------------- libraries/parallel.py | 86 ------------------ libraries/{cropping.py => resizing.py} | 3 +- tests/{test_cropping.py => test_resizing.py} | 6 +- 4 files changed, 34 insertions(+), 157 deletions(-) delete mode 100644 libraries/parallel.py rename libraries/{cropping.py => resizing.py} (94%) rename tests/{test_cropping.py => test_resizing.py} (61%) diff --git a/cubes.py b/cubes.py index f2db4d8..8a0a500 100644 --- a/cubes.py +++ b/cubes.py @@ -1,70 +1,19 @@ -import os import numpy as np -import math import argparse from time import perf_counter from libraries.cache import get_cache, save_cache, cache_exists -from libraries.cropping import crop_cube, expand_cube +from libraries.resizing import expand_cube from libraries.packing import pack, unpack -from libraries.parallel import dispatch_tasks, init_parrelism from libraries.renderer import render_shapes from libraries.rotation import all_rotations -from multiprocessing import Queue -def unpack_hashes_task(args: tuple[list[int], Queue]) -> np.ndarray: - cube_hashes, logging_queue = args - polycubes = [] - uid = os.getpid() +def log_if_needed(n, total_n): + if (n == total_n or n % 100 == 0): + print(f"\rcompleted {(n / total_n) * 100:.2f}%", end="\n" if n == total_n else "") - n = 0 - log_base = max(math.ceil((len(cube_hashes)/ 1000)), 100) # send log updates every 0.1% or every 100, whichever is bigger - for cube_hash in cube_hashes: - polycubes.append(unpack(cube_hash)) - if (n % log_base == 0): - if (logging_queue): - logging_queue.put((uid, n, len(cube_hashes))) - else: - print(f'done {((n/len(cube_hashes)) * 100):.2f} %') - n += 1 - - if (logging_queue): - logging_queue.put((uid, n, len(cube_hashes))) - else: - print(f'done {((n/len(cube_hashes)) * 100):.2f} %') - return polycubes - - -def hash_cubes_task(args: tuple[list[np.ndarray], Queue]) -> list[int]: - base_cubes, logging_queue = args - # Empty list of new n-polycubes - hashes = set() - uid = os.getpid() - - n = 0 - log_base = max(math.ceil((len(base_cubes)/ 1000)), 100) # send log updates every 0.1% or every 100, whichever is bigger - for base_cube in base_cubes: - for new_cube in expand_cube(base_cube): - cube_hash = get_canoincal_packing(new_cube) - hashes.add(cube_hash) - - if (n % log_base == 0): - if (logging_queue): - logging_queue.put((uid, n, len(base_cubes))) - else: - print(f'done {((n/len(base_cubes)) * 100):.2f} %') - n += 1 - - if (logging_queue): - logging_queue.put((uid, n, len(base_cubes))) - else: - print(f'done {((n/len(base_cubes)) * 100):.2f} %') - - return hashes - - -def generate_polycubes(n: int, use_cache: bool = False, enable_multicore: bool = False) -> list[np.ndarray]: +def generate_polycubes(n: int, use_cache: bool = False) -> list[np.ndarray]: """ Generates all polycubes of size n @@ -74,6 +23,7 @@ def generate_polycubes(n: int, use_cache: bool = False, enable_multicore: bool = Parameters: n (int): The size of the polycubes to generate, e.g. all combinations of n=4 cubes. + use_cahe (bool): whether to use cache files. Returns: list(np.array): Returns a list of all polycubes of size n as numpy byte arrays @@ -88,13 +38,29 @@ def generate_polycubes(n: int, use_cache: bool = False, enable_multicore: bool = if (use_cache and cache_exists(n)): results = get_cache(n) - print(f"Got polycubes from cache n={n}\n") + print(f"\nGot polycubes from cache n={n}") else: - pollycubes = generate_polycubes(n-1, use_cache, enable_multicore) - results = dispatch_tasks(hash_cubes_task, pollycubes, enable_multicore) - print(f"Hashed polycubes n={n}\n") - results = dispatch_tasks(unpack_hashes_task, results, enable_multicore) - print(f"Generated polycubes from hash n={n}\n") + pollycubes = generate_polycubes(n-1, use_cache) + + hashes = set() + done = 0 + print(f"\nHashing polycubes n={n}") + for base_cube in pollycubes: + for new_cube in expand_cube(base_cube): + cube_hash = get_canoincal_packing(new_cube) + hashes.add(cube_hash) + log_if_needed(done, len(pollycubes)) + done += 1 + log_if_needed(done, len(pollycubes)) + + print(f"\nGenerating polycubes from hash n={n}") + results = [] + done = 0 + for cube_hash in hashes: + results.append(unpack(cube_hash)) + log_if_needed(done, len(hashes)) + done += 1 + log_if_needed(done, len(hashes)) if (use_cache and not cache_exists(n)): save_cache(n, results) @@ -135,22 +101,18 @@ def get_canoincal_packing(polycube: np.ndarray) -> int: # Requires python >=3.9 parser.add_argument('--cache', action=argparse.BooleanOptionalAction) - parser.add_argument('--multicore', action=argparse.BooleanOptionalAction) parser.add_argument('--render', action=argparse.BooleanOptionalAction) - init_parrelism() - args = parser.parse_args() n = args.n use_cache = args.cache if args.cache is not None else True - multicore = args.multicore if args.multicore is not None else False render = args.render if args.render is not None else False # Start the timer t1_start = perf_counter() - all_cubes = generate_polycubes(n, use_cache=use_cache, enable_multicore=multicore) + all_cubes = generate_polycubes(n, use_cache=use_cache) # Stop the timer t1_stop = perf_counter() diff --git a/libraries/parallel.py b/libraries/parallel.py deleted file mode 100644 index 0a5f190..0000000 --- a/libraries/parallel.py +++ /dev/null @@ -1,86 +0,0 @@ -import multiprocessing -import math -from typing import Callable - -manager = None -logging_queue = None -pool = None - - -def logging_task(queue: multiprocessing.Queue) -> None: - """ - A Task function which waits for data on the log queue and prints it to the console - - Parameters: - queue (multiprocessing.Queue): the queue to listen on - """ - try: - status = {} - while True: - (uid, done, total) = queue.get() - status[uid] = (done, total) - - total_done = 0 - total_total = 0 - for uid, (done, total) in status.items(): - total_done += done - total_total += total - - if (total_done == total_total): - print(f'\rCompleted {total_done} of {total_total} {((total_done/total_total) * 100):.2f}%') - status = {} - else: - print(f'\rCompleted {total_done} of {total_total} {((total_done/total_total) * 100):.2f}%', end="") - except: - print("multithreaded logging ended") - - -def init_parrelism() -> None: - """ - helper function to initalise multiprocessing manager, queue, and logging task should only be called once - """ - global logging_queue - global pool - global manager - manager = multiprocessing.Manager() - logging_queue = manager.Queue() - logging_process = multiprocessing.Process(target=logging_task, args=[logging_queue]) - logging_process.daemon = True - logging_process.start() - pool = multiprocessing.Pool(multiprocessing.cpu_count()) - - -def dispatch_tasks(task_function: Callable, items: list, enable_multicore: bool) -> list: - """ - Runs a function on a list of data either sequentialy or in parallel depending on enable_multicore - handles data chunking and recombination for you - - Parameters: - task_function (function): the function to be mapped onto the items - items (list): a list of arguments to your function, this will be cut into smaller lists which will be individualy sent to instances of your function - enable_multicore (bool): whether to run the task in paralel - - - Returns: - list: a merged list of all of your functions returned lists, in order such that it is as if they where all run in one function sequentialy - - """ - # todo dont keep everything in memory, write out to files async as we go - if (enable_multicore): - per_core_split = math.ceil(len(items) / multiprocessing.cpu_count()) - chunk_size = min(10000, per_core_split) # split per core up to 10000 - chunk_size = max(32, chunk_size) # dont send unessicarily small chunks - chunks = [] - for chunk_base in range(0, len(items), chunk_size): - chunks.append((items[chunk_base: min(chunk_base + chunk_size, len(items))], logging_queue)) - items = None - results = pool.map(task_function, chunks) - final_result = [] - for result in reversed(results): - if (isinstance(result, list)): - final_result += result - else: - final_result += list(result) - return final_result - else: - return task_function((items, None)) diff --git a/libraries/cropping.py b/libraries/resizing.py similarity index 94% rename from libraries/cropping.py rename to libraries/resizing.py index 0d281be..517f5f4 100644 --- a/libraries/cropping.py +++ b/libraries/resizing.py @@ -1,4 +1,5 @@ import numpy as np +from typing import Generator def crop_cube(cube: np.ndarray) -> np.ndarray: @@ -22,7 +23,7 @@ def crop_cube(cube: np.ndarray) -> np.ndarray: return cube -def expand_cube(cube: np.ndarray) -> np.ndarray: +def expand_cube(cube: np.ndarray) -> Generator[np.ndarray, None, None]: """ Expands a polycube by adding single blocks at all valid locations. diff --git a/tests/test_cropping.py b/tests/test_resizing.py similarity index 61% rename from tests/test_cropping.py rename to tests/test_resizing.py index db089ef..f97081c 100644 --- a/tests/test_cropping.py +++ b/tests/test_resizing.py @@ -1,9 +1,9 @@ import unittest import os -from libraries.cache import get_cache, save_cache -from numpy.testing import assert_array_equal +from libraries.resizing import crop_cube, expand_cube from utils import get_test_data class CroppingTests(unittest.TestCase): #todo - # i am not smart enough to come up with helpful cropping tests right now. \ No newline at end of file + # i am not smart enough to come up with helpful cropping tests right now. + pass \ No newline at end of file From d6b518e72517989a14298ca0628349ef75dc413e Mon Sep 17 00:00:00 2001 From: GeorgeM Date: Wed, 12 Jul 2023 20:09:56 +0100 Subject: [PATCH 03/10] cleanup of annotations, optimisation to break early on seeing a canonical hash --- cubes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cubes.py b/cubes.py index 8a0a500..8c770c2 100644 --- a/cubes.py +++ b/cubes.py @@ -47,7 +47,7 @@ def generate_polycubes(n: int, use_cache: bool = False) -> list[np.ndarray]: print(f"\nHashing polycubes n={n}") for base_cube in pollycubes: for new_cube in expand_cube(base_cube): - cube_hash = get_canoincal_packing(new_cube) + cube_hash = get_canoincal_packing(new_cube, hashes) hashes.add(cube_hash) log_if_needed(done, len(pollycubes)) done += 1 @@ -68,7 +68,7 @@ def generate_polycubes(n: int, use_cache: bool = False) -> list[np.ndarray]: return results -def get_canoincal_packing(polycube: np.ndarray) -> int: +def get_canoincal_packing(polycube: np.ndarray, known_hashes: set[int]) -> int: """ Determines if a polycube has already been seen. @@ -79,13 +79,14 @@ def get_canoincal_packing(polycube: np.ndarray) -> int: polycube (np.array): 3D Numpy byte array where 1 values indicate polycube positions Returns: - boolean: True if polycube is already present in the set of all cubes so far. hash: the hash for this cube """ max_hash = 0 for cube_rotation in all_rotations(polycube): this_hash = pack(cube_rotation) + if(this_hash in known_hashes): + return this_hash if (this_hash > max_hash): max_hash = this_hash return max_hash From 287dffde9ab23f793765564ecda6df9b01973148 Mon Sep 17 00:00:00 2001 From: GeorgeM Date: Wed, 12 Jul 2023 21:29:17 +0100 Subject: [PATCH 04/10] remove legacy parrelism test file --- tests/test_parallel.py | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 tests/test_parallel.py diff --git a/tests/test_parallel.py b/tests/test_parallel.py deleted file mode 100644 index ffa1fbe..0000000 --- a/tests/test_parallel.py +++ /dev/null @@ -1,22 +0,0 @@ -import unittest -import random -from libraries.parallel import init_parrelism, dispatch_tasks - -def reverse_ints_task(args): - (data, logger) = args - return data[::-1] - -class ParallelTests(unittest.TestCase): - def _underlying_test(self, run_in_paralel): - init_parrelism() - random.seed(0) # determenistic random data generation - test_data = [random.randint(0, 100) for x in range(0,763554)] - - results = dispatch_tasks(reverse_ints_task, test_data, run_in_paralel) - self.assertEqual(test_data, results[::-1]) - - def test_simple_serial_task(self): - self._underlying_test(False) - - def test_simple_paralel_task(self): - self._underlying_test(True) \ No newline at end of file From 2059b9f46aa676aa2f7328ebcf7fd2a32aadffa4 Mon Sep 17 00:00:00 2001 From: GeorgeM Date: Thu, 13 Jul 2023 00:04:32 +0100 Subject: [PATCH 05/10] try faster packing and unpacking with builtin byte conversions --- libraries/packing.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/libraries/packing.py b/libraries/packing.py index e77f0b0..f4fb6e5 100644 --- a/libraries/packing.py +++ b/libraries/packing.py @@ -1,4 +1,5 @@ import numpy as np +import math def pack(polycube: np.ndarray) -> int: @@ -15,13 +16,16 @@ def pack(polycube: np.ndarray) -> int: """ - pack_cube = np.packbits(polycube.flatten(), bitorder='big') - cube_hash = 0 - for index in polycube.shape: - cube_hash = (cube_hash << 8) + int(index) - for part in pack_cube: - cube_hash = (cube_hash << 8) + int(part) - return cube_hash + # pack_cube = np.packbits(polycube.flatten(), bitorder='big') + # cube_hash = 0 + # for index in polycube.shape: + # cube_hash = (cube_hash << 8) + int(index) + # for part in pack_cube: + # cube_hash = (cube_hash << 8) + int(part) + # return cube_hash + + data = polycube.tobytes() + polycube.shape[0].to_bytes(1, 'big') + polycube.shape[1].to_bytes(1, 'big') + polycube.shape[2].to_bytes(1, 'big') + return int.from_bytes(data, 'big') def unpack(cube_hash: int) -> np.ndarray: @@ -36,14 +40,15 @@ def unpack(cube_hash: int) -> np.ndarray: np.array: 3D Numpy byte array where 1 values indicate polycube positions """ - parts = [] - while (cube_hash): - parts.append(cube_hash % 256) - cube_hash >>= 8 - parts = parts[::-1] - shape = (parts[0], parts[1], parts[2]) - data = parts[3:] + + length = math.ceil(math.log2(cube_hash)) + parts = cube_hash.to_bytes(length, byteorder='big') + shape = ( + parts[-3], + parts[-2], + parts[-1], + ) size = shape[0] * shape[1] * shape[2] - raw = np.unpackbits(np.array(data, dtype=np.uint8), bitorder='big') - final = raw[0:size].reshape(shape) + raw = np.frombuffer(parts[:-3], dtype=np.uint8) + final = raw[(len(raw) - size):len(raw)].reshape(shape) return final From 0f3220dd5533ff7eb50c2cbad1c7ccb2674cf6a7 Mon Sep 17 00:00:00 2001 From: Vladimir Fokow <57260995+VladimirFokow@users.noreply.github.com> Date: Thu, 13 Jul 2023 03:36:10 +0200 Subject: [PATCH 06/10] - cube_hash is now not int but bytes: replaced unnecessary `int.from_bytes(data, 'big')` with just `data` - change naming: cube_hash to cube_id - typo correction: get_canoincal_packing - relevant docstring updates --- cubes.py | 42 ++++++++++++++++++--------------- libraries/packing.py | 55 ++++++++++++++++++++++++-------------------- 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/cubes.py b/cubes.py index 8c770c2..44c33fb 100644 --- a/cubes.py +++ b/cubes.py @@ -42,13 +42,13 @@ def generate_polycubes(n: int, use_cache: bool = False) -> list[np.ndarray]: else: pollycubes = generate_polycubes(n-1, use_cache) - hashes = set() + known_ids = set() done = 0 print(f"\nHashing polycubes n={n}") for base_cube in pollycubes: for new_cube in expand_cube(base_cube): - cube_hash = get_canoincal_packing(new_cube, hashes) - hashes.add(cube_hash) + cube_id = get_canonical_packing(new_cube, known_ids) + known_ids.add(cube_id) log_if_needed(done, len(pollycubes)) done += 1 log_if_needed(done, len(pollycubes)) @@ -56,11 +56,11 @@ def generate_polycubes(n: int, use_cache: bool = False) -> list[np.ndarray]: print(f"\nGenerating polycubes from hash n={n}") results = [] done = 0 - for cube_hash in hashes: - results.append(unpack(cube_hash)) - log_if_needed(done, len(hashes)) + for cube_id in known_ids: + results.append(unpack(cube_id)) + log_if_needed(done, len(known_ids)) done += 1 - log_if_needed(done, len(hashes)) + log_if_needed(done, len(known_ids)) if (use_cache and not cache_exists(n)): save_cache(n, results) @@ -68,28 +68,32 @@ def generate_polycubes(n: int, use_cache: bool = False) -> list[np.ndarray]: return results -def get_canoincal_packing(polycube: np.ndarray, known_hashes: set[int]) -> int: +def get_canonical_packing(polycube: np.ndarray, + known_ids: set[bytes]) -> bytes: """ Determines if a polycube has already been seen. - Considers all possible rotations of a cube against the existing cubes stored in memory. - Returns True if the cube exists, or False if it is new. + Considers all possible rotations of a polycube against the existing + ones stored in memory. Returns the id if it's found in the set, + or the maximum id of all rotations if the polycube is new. Parameters: - polycube (np.array): 3D Numpy byte array where 1 values indicate polycube positions + polycube (np.array): 3D Numpy byte array where 1 values indicate + cube positions. Must be of type np.int8 + known_ids (set[bytes]): A set of all known polycube ids Returns: - hash: the hash for this cube + cube_id (bytes): the id for this cube """ - max_hash = 0 + max_id = b'\x00' for cube_rotation in all_rotations(polycube): - this_hash = pack(cube_rotation) - if(this_hash in known_hashes): - return this_hash - if (this_hash > max_hash): - max_hash = this_hash - return max_hash + this_id = pack(cube_rotation) + if (this_id in known_ids): + return this_id + if (this_id > max_id): + max_id = this_id + return max_id if __name__ == "__main__": diff --git a/libraries/packing.py b/libraries/packing.py index f4fb6e5..9fd39ad 100644 --- a/libraries/packing.py +++ b/libraries/packing.py @@ -2,20 +2,22 @@ import math -def pack(polycube: np.ndarray) -> int: +def pack(polycube: np.ndarray) -> bytes: """ - Converts a 3D ndarray into a single unsigned integer for quick hashing and efficient storage - - Converts a {0,1} nd array into a single unique large integer + Converts a 3D ndarray into a single bytes object that unique identifies + the polycube, is hashable, comparable, and allows to reconstruct the + original polycube ndarray. Parameters: - polycube (np.array): 3D Numpy byte array where 1 values indicate polycube positions + polycube (np.array): 3D Numpy byte array where 1 values indicate polycube positions, + and 0 values indicate empty space. Must be of type np.int8. Returns: - int: a unique integer hash + cube_id (bytes): a bytes representation of the polycube """ + # # Previous implementation: # pack_cube = np.packbits(polycube.flatten(), bitorder='big') # cube_hash = 0 # for index in polycube.shape: @@ -24,31 +26,34 @@ def pack(polycube: np.ndarray) -> int: # cube_hash = (cube_hash << 8) + int(part) # return cube_hash - data = polycube.tobytes() + polycube.shape[0].to_bytes(1, 'big') + polycube.shape[1].to_bytes(1, 'big') + polycube.shape[2].to_bytes(1, 'big') - return int.from_bytes(data, 'big') + # # dtype should be np.int8: (commented out for efficiency) + # if polycube.dtype != np.int8: + # raise TypeError("Polycube must be of type np.int8") + # pack cube + data = polycube.tobytes() + polycube.shape[0].to_bytes(1, 'big') \ + + polycube.shape[1].to_bytes(1, 'big') \ + + polycube.shape[2].to_bytes(1, 'big') + return data -def unpack(cube_hash: int) -> np.ndarray: - """ - Converts a single integer back into a 3D ndarray +def unpack(cube_id: bytes) -> np.ndarray: + """ + Converts a bytes object back into a 3D ndarray Parameters: - cube_hash (int): a unique integer hash + cube_id (bytes): a unique bytes object Returns: - np.array: 3D Numpy byte array where 1 values indicate polycube positions - + polycube (np.array): 3D Numpy byte array where 1 values indicate + cube positions + """ + # Extract shape information + shape = (cube_id[-3], cube_id[-2], cube_id[-1]) + + # Create ndarray from byte data + polycube = np.frombuffer(cube_id[:-3], dtype=np.int8) + polycube = polycube.reshape(shape) + return polycube - length = math.ceil(math.log2(cube_hash)) - parts = cube_hash.to_bytes(length, byteorder='big') - shape = ( - parts[-3], - parts[-2], - parts[-1], - ) - size = shape[0] * shape[1] * shape[2] - raw = np.frombuffer(parts[:-3], dtype=np.uint8) - final = raw[(len(raw) - size):len(raw)].reshape(shape) - return final From 7dd61a58ce1a9aa247f6c530cd029ef0df2a5f47 Mon Sep 17 00:00:00 2001 From: Andreas Gravrok <45593399+joulebit@users.noreply.github.com> Date: Thu, 13 Jul 2023 04:04:41 +0200 Subject: [PATCH 07/10] 6% faster with better cropped cube --- libraries/resizing.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/libraries/resizing.py b/libraries/resizing.py index 517f5f4..b06dea2 100644 --- a/libraries/resizing.py +++ b/libraries/resizing.py @@ -15,12 +15,11 @@ def crop_cube(cube: np.ndarray) -> np.ndarray: np.array: Cropped 3D Numpy byte array equivalent to cube, but with no zero padding """ - for i in range(cube.ndim): - cube = np.swapaxes(cube, 0, i) - nonzero_indices = np.any(cube != 0, axis=tuple(range(1, cube.ndim))) - cube = cube[nonzero_indices] - cube = np.swapaxes(cube, 0, i) - return cube + nonzero_indices = np.where(cube != 0) + cropped_cube = cube[np.min(nonzero_indices[0]):np.max(nonzero_indices[0]) + 1, + np.min(nonzero_indices[1]):np.max(nonzero_indices[1]) + 1, + np.min(nonzero_indices[2]):np.max(nonzero_indices[2]) + 1] + return cropped_cube def expand_cube(cube: np.ndarray) -> Generator[np.ndarray, None, None]: From 15816f71ac337c7746903bab51bad0f317a89c4a Mon Sep 17 00:00:00 2001 From: GeorgeM Date: Thu, 13 Jul 2023 14:43:25 +0100 Subject: [PATCH 08/10] still pack the bits to save memory but now use frombuffer for much faster encoding --- libraries/packing.py | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/libraries/packing.py b/libraries/packing.py index 9fd39ad..60204de 100644 --- a/libraries/packing.py +++ b/libraries/packing.py @@ -16,24 +16,10 @@ def pack(polycube: np.ndarray) -> bytes: cube_id (bytes): a bytes representation of the polycube """ - - # # Previous implementation: - # pack_cube = np.packbits(polycube.flatten(), bitorder='big') - # cube_hash = 0 - # for index in polycube.shape: - # cube_hash = (cube_hash << 8) + int(index) - # for part in pack_cube: - # cube_hash = (cube_hash << 8) + int(part) - # return cube_hash - - # # dtype should be np.int8: (commented out for efficiency) - # if polycube.dtype != np.int8: - # raise TypeError("Polycube must be of type np.int8") - - # pack cube - data = polycube.tobytes() + polycube.shape[0].to_bytes(1, 'big') \ - + polycube.shape[1].to_bytes(1, 'big') \ - + polycube.shape[2].to_bytes(1, 'big') + data = polycube.shape[0].to_bytes(1, 'little') \ + + polycube.shape[1].to_bytes(1, 'little') \ + + polycube.shape[2].to_bytes(1, 'little') \ + + np.packbits(polycube.flatten(), bitorder='little').tobytes() return data @@ -50,10 +36,8 @@ def unpack(cube_id: bytes) -> np.ndarray: """ # Extract shape information - shape = (cube_id[-3], cube_id[-2], cube_id[-1]) - - # Create ndarray from byte data - polycube = np.frombuffer(cube_id[:-3], dtype=np.int8) - polycube = polycube.reshape(shape) + shape = (cube_id[0], cube_id[1], cube_id[2]) + size = shape[0] * shape[1] * shape[2] + polycube = np.unpackbits(np.frombuffer(cube_id[3:], dtype=np.uint8), count=size, bitorder='little').reshape(shape) return polycube From 6de8ef87d8c00e09eea4b33bcbe4d8b5d83bfe17 Mon Sep 17 00:00:00 2001 From: GeorgeM Date: Sun, 16 Jul 2023 19:49:25 +0100 Subject: [PATCH 09/10] add test data, make unit-tests correctly auto discover, update README --- README.md | 16 ++++++++++------ tests/__init__.py | 4 ++++ tests/test_cache.py | 2 +- tests/test_data.npy | Bin 0 -> 1774 bytes tests/test_packing.py | 2 +- tests/test_resizing.py | 2 +- tests/test_rotation.py | 2 +- 7 files changed, 18 insertions(+), 10 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/test_data.npy diff --git a/README.md b/README.md index 4a12b83..3aba5c2 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,6 @@ The code includes some doc strings to help you understand what it does, but in s To generate all combinations of n cubes, we first calculate all possible n-1 shapes based on the same algorithm. We begin by taking all of the n-1 shape, and for each of these add new cubes in any possible free locations. For each of these potential new shapes, we test each rotation of this shape to see if it's been seen before. Entirely new shapes are added to a set of all shapes, to check future candidates. -In order to check slightly faster than simply comparing arrays, each shape is converted into a shortened run length encoding form, which allows hashes to be computed, so we can make use of the set datastructure. - ## Running the code With python installed, you can run the code like this: @@ -21,13 +19,19 @@ With python installed, you can run the code like this: Where n is the number of cubes you'd like to calculate. If you specify `--cache` then the program will attempt to load .npy files that hold all the pre-computed cubes for n-1 and then n. If you specify `--no-cache` then everything is calcuated from scratch, and no cache files are stored. +## Testing your changes. +If you are contributing to the python version of this project, you can find some unit tests in the tests folder. +these can be run with "python -m unittest". these tests are not complete or rigerous but they might help spot obvious errors in any changes you make. + ## Pre-computed cache files -You can download the cache files for n=3 to n=11 from [here](https://drive.google.com/drive/folders/1Ls3gJCrNQ17yg1IhrIav70zLHl858Fl4?usp=drive_link). If you manage to calculate any more sets, please feel free to save them as an npy file and I'll upload them! +You can download the cache files for n=3 to n=12 from [here](https://drive.google.com/drive/folders/1Ls3gJCrNQ17yg1IhrIav70zLHl858Fl4?usp=drive_link). If you manage to calculate any more sets, please feel free to save them as an npy file and I'll upload them! ## Improving the code -This was just a bit of fun, and as soon as it broadly worked, I stopped! This code could be made a lot better, and actually the whole point of the video was to get people thinking and have a mess around yourselves. Some things you might think about: -- Another language like c or java would be substantially faster -- Other languages would also have better support for multi-threading, which would be a transformative speedup +This repo already has some improvements included, and will happily accept more via pull request. +Some things you might think about: +- C and Rust implementations are currently in development, if you would like to contribute to these look at the pull requests (or of course feel free to start you own). +- The main limiting factor at this time seems to be memory usage, at n=14+ you need hundereds of GB's just to store the cubes, so keeping them all in main memory gets dificult. +- Distributing the computation across many systems would allow us to scale horizontally rather than vertically, but it opens questions of how to do so without each system having a full copy of all the cubes, and how to manage the large quantities of data. - Calculating 24 rotations of a cube is slow, the only way to avoid this would be to come up with some rotationally invariant way of comparing cubes. I've not thought of one yet! ## Contributing! diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..686a12b --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,4 @@ +from . import test_cache +from . import test_packing +from . import test_resizing +from . import test_rotation \ No newline at end of file diff --git a/tests/test_cache.py b/tests/test_cache.py index abb9a0d..0f57389 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -2,7 +2,7 @@ import os from libraries.cache import get_cache, save_cache from numpy.testing import assert_array_equal -from utils import get_test_data +from .utils import get_test_data class CachingTests(unittest.TestCase): diff --git a/tests/test_data.npy b/tests/test_data.npy new file mode 100644 index 0000000000000000000000000000000000000000..477f6b92491b85b30017c25d0ebfffbd20c71fd1 GIT binary patch literal 1774 zcmbu<`F9gl6bJA(4TTbmf+C`V1OyYLluZoA2+$_|PHxajkwt5W zY(iCV7ZgEUK$Ly`&*a^i$?H4*N=}+{GW~w;z3;u545rttSh?0wJJil(-Ye()}QHyB|q=U9)$B51brBufPG}ZQ3F_I8Zxv@@1U+c#M>2$jNKm7_m+{KLMY;6NZr3$JLlz*_G zYd=I7Ewy8AgWFKWm_k^xG6xS!$wvrdb;-j2iYdmU*;-q(baEVdD}`~4$FlX}xL+ek-%W(pG+6AgGuz$8M#nr55uUM3GBVO3 zXrz#0OgH9PF*67))?hQE!NSETv@&KHFk8SJ!dwf?tFI(jh{AJ>`35{M;03}03oIP! zAWg7{@uC4Q33!?CinWZz;WC2#jh3;5@v1SeiD@IeZZ%mt)I{dOGR7MQyeZ%_}BDm-)t#CY47)ne8V-m%QPp^?)I7vi9kvDTRP#H=H% zx6Fp2K6SH^(PhjgF&RSEGC9+y-U@k!XUt|XTL=ZqY_TLIZilXnXm2Hg8K&FFupbBJ2BrAeh{O7xa6nnM=?!uC(D-M zC&o#WKPCB}3BO3bQUB@vJuUf0o!5`!uZ%M$d{)B05za|CI*i{<@nCcRVEk$Fe@Xs4 z;X-`@y7=FcA1=e_>K7URnDoDrzC^fejeezGU4~b<%D86o*Cl_0aMP-Ot6p6edYf@4 K>oM+TTkiq#!@p?& literal 0 HcmV?d00001 diff --git a/tests/test_packing.py b/tests/test_packing.py index 930ef8e..8b61417 100644 --- a/tests/test_packing.py +++ b/tests/test_packing.py @@ -1,7 +1,7 @@ import unittest from numpy.testing import assert_array_equal from libraries.packing import pack, unpack -from utils import get_test_data +from .utils import get_test_data class PackingTests(unittest.TestCase): def test_pack_does_not_throw(self): diff --git a/tests/test_resizing.py b/tests/test_resizing.py index f97081c..5a95ac0 100644 --- a/tests/test_resizing.py +++ b/tests/test_resizing.py @@ -1,7 +1,7 @@ import unittest import os from libraries.resizing import crop_cube, expand_cube -from utils import get_test_data +from .utils import get_test_data class CroppingTests(unittest.TestCase): #todo diff --git a/tests/test_rotation.py b/tests/test_rotation.py index ee4e064..4dc66cc 100644 --- a/tests/test_rotation.py +++ b/tests/test_rotation.py @@ -1,7 +1,7 @@ import unittest import numpy as np from libraries.rotation import all_rotations -from utils import get_test_data +from .utils import get_test_data class RotatingTests(unittest.TestCase): def test_rotate_quantity(self): From 1e380a8dd2117f34e33e6a9055cfed7622a0957b Mon Sep 17 00:00:00 2001 From: GeorgeM Date: Sun, 16 Jul 2023 19:51:40 +0100 Subject: [PATCH 10/10] fix minor typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3aba5c2..aae9bdc 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ You can download the cache files for n=3 to n=12 from [here](https://drive.googl ## Improving the code This repo already has some improvements included, and will happily accept more via pull request. Some things you might think about: -- C and Rust implementations are currently in development, if you would like to contribute to these look at the pull requests (or of course feel free to start you own). +- C++ and Rust implementations are currently in development, if you would like to contribute to these look at the pull requests (or of course feel free to start you own). - The main limiting factor at this time seems to be memory usage, at n=14+ you need hundereds of GB's just to store the cubes, so keeping them all in main memory gets dificult. - Distributing the computation across many systems would allow us to scale horizontally rather than vertically, but it opens questions of how to do so without each system having a full copy of all the cubes, and how to manage the large quantities of data. - Calculating 24 rotations of a cube is slow, the only way to avoid this would be to come up with some rotationally invariant way of comparing cubes. I've not thought of one yet!