Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ The following example shows how to get a benchmark clustering problem (by name o
```python
from iohclustering import get_problem, download_benchmark_datasets

# Download benchmark datasets
download_benchmark_datasets()

# Get benchmark problem by name (e.g., "iris_pca") with k=2 clusters
clustering_problem, retransform = get_problem(fid="iris_pca", k=2)
Expand Down Expand Up @@ -85,7 +83,10 @@ This project is licensed under a standard BSD-3 clause License. See the LICENSE

## Acknowledgments


This work has been estabilished as a collaboration between:
* Diederick Vermetten
* Catalin-Viorel Dinu
* Marcus Gallagher

## Cite Us

Expand Down
10 changes: 9 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "iohclustering"
version = "0.0.1b"
version = "0.0.2"
authors = [
]
description = "Package for IOH Clustering problems"
Expand All @@ -20,6 +20,14 @@ dependencies = [
"numpy",
]

[tool.setuptools]
package-dir = {"" = "src"}


[tool.setuptools.package-data]
"iohclustering" = ["static/*.txt"]


[project.optional-dependencies]
test = [
"coverage",
Expand Down
56 changes: 10 additions & 46 deletions src/iohclustering/cluster_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
from .cluster_metrics import *
from .cluster_baseline_problems import *
from importlib.resources import files


def create_cluster_problem(dataset: str | np.ndarray, k: int, instance=1, error_metric="mse_euclidean") -> tuple[ioh.problem.RealSingleObjective, callable]:
Expand All @@ -17,8 +18,7 @@ def create_cluster_problem(dataset: str | np.ndarray, k: int, instance=1, error_
---------
dataset : str or np.ndarray
The dataset to be used for clustering. If a string is provided, it is assumed
to be the name of a file (without extension) located in the 'banchmark_datasets/' directory
with a '.txt' extension. If an np.ndarray is provided, it is used directly
to be the name of a banchmark dataset. If an np.ndarray is provided, it is used directly
as the dataset.
k : int
The number of clusters to create.
Expand Down Expand Up @@ -53,11 +53,9 @@ def create_cluster_problem(dataset: str | np.ndarray, k: int, instance=1, error_
"""
id = None
if isinstance(dataset, str):
if os.path.exists(f'{dataset}.txt'):
data = np.loadtxt(f'{dataset}.txt', delimiter=',')
else:
id = get_problem_id(dataset)
data = np.loadtxt(f'banchmark_datasets/{dataset}.txt', delimiter=',')
id = get_problem_id(dataset)
dataset_path = files("iohclustering.static").joinpath(f"{dataset}.txt")
data = np.loadtxt(dataset_path, delimiter=',')

else:
data = dataset
Expand All @@ -71,7 +69,6 @@ def create_cluster_problem(dataset: str | np.ndarray, k: int, instance=1, error_


data_np = np.array(data)

# Normalize the data to the range [0, 1]
data_min = np.tile(np.min(data_np, axis=0), k)
data_max = np.tile(np.max(data_np, axis=0), k)
Expand Down Expand Up @@ -103,41 +100,6 @@ def retransform(X):

return f, retransform

def download_benchmark_datasets(warn = True) -> None:
"""
Downloads and extracts benchmark datasets from a remote GitHub repository.
This function downloads a compressed tarball containing benchmark datasets
from a specified branch of the IOHClustering GitHub repository and extracts
its contents into a local directory named "banchmark_datasets". If the target
directory already exists, a warning is issued (if `warn` is set to True) and
the download is skipped.
Args:
warn (bool): If True, a warning is issued when the target directory
already exists. Defaults to True.
Raises:
urllib.error.URLError: If there is an issue with downloading the file
from the remote URL.
tarfile.TarError: If there is an issue with extracting the tarball.
Notes:
- The function assumes that the target directory is relative to the
current working directory.
- The tarball is downloaded from the "main" branch of the repository.
"""
target = os.path.realpath("banchmark_datasets")
branch = "main"

if os.path.isdir(target) and warn:
warnings.warn(f"Attempting to download static folder but path {target} already exists. Skipping...")
return

os.makedirs(target, exist_ok=True)
github_static_folder = f"https://github.com/IOHprofiler/IOHClustering/blob/{branch}/static.tar.gz?raw=true"
print(f"Downloading static folder from {github_static_folder} to {target}")
with urllib.request.urlopen(github_static_folder) as f:
thetarfile = tarfile.open(fileobj=f, mode="r|gz")
thetarfile.extractall(target)



def get_problem_id(dataset_name: str) -> int:
"""
Expand Down Expand Up @@ -231,11 +193,13 @@ def load_problems():
and the values are the corresponding clustering problem objects.

"""
download_benchmark_datasets(warn=False)
datasets_path = "banchmark_datasets"

folder = files("iohclustering.static")
datasets_names = [f.name for f in folder.iterdir() if f.suffix == ".txt"]

problems = {}
for dataset in CLUSTER_BASELINE_DATASETS.values():
if f"{dataset}.txt" in os.listdir(datasets_path):
if f"{dataset}.txt" in datasets_names:
for k in BASELINE_K_DIMENTIONS[dataset]:
problem, retransform = create_cluster_problem(dataset, k)
problems[problem.meta_data.name] = problem, retransform
Expand Down
Empty file.
106 changes: 106 additions & 0 deletions src/iohclustering/static/breast_pca.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
-1.848982724994230331e-01,3.328202814139474275e-01
-7.240586425684303329e-01,2.198421000584537488e+00
4.286043357930507391e-01,1.138245782413504825e+00
-2.587553981135277126e-01,2.609080554216371972e+00
-6.741198420671984382e-01,1.805546179230361981e+00
-9.583882252728245232e-01,3.555546797097947076e-01
-1.440760389747365311e+00,4.134665947724535523e-02
-1.182575545409856543e+00,9.573855329593937702e-01
1.313382936248615185e-01,1.967459347507418910e+00
-1.357556379326936846e-01,2.145026897195538851e+00
2.792327651612147021e-01,3.349931330371387350e+00
1.334131481525345775e-01,1.060229812989774256e+00
-4.982280109676025526e-01,1.008252750470800763e+00
-1.701871266934971150e-01,2.784371517488861247e+00
3.285448519081174412e-02,1.379410631738837267e+00
9.053543976519000391e-02,3.214270116569836411e+00
-1.307072483517048411e+00,6.962660066452452945e-01
-9.072018099584816531e-01,1.335096165696885162e+00
-3.293908191184288237e-01,2.157144425402158117e+00
-2.590113541161894184e-01,3.116490085081830674e+00
-4.870285324051700226e-01,1.328666541844850002e+00
-1.897761841740689182e+00,-6.788460407715413147e-01
-1.960379205734269492e+00,-1.052842719736690302e+00
-1.461179410029408210e+00,7.332827640061865271e-01
-1.338919166880424783e+00,2.465845768346427297e-02
-1.719960361286458905e+00,-1.021819709187707481e+00
-1.756216957335545059e+00,-1.030355831297673053e+00
-1.720687052485004420e+00,-1.141553282089176724e+00
-1.831001209057610435e+00,-7.108522319812335954e-01
-1.338121326600621774e+00,-6.655153851912668506e-01
-1.668699527787910419e+00,-8.082490598305543950e-01
-1.306586709088289666e+00,-3.893175779619356680e-01
-1.810693602870551588e+00,1.017280820757553883e+00
-1.518599609822518293e+00,-4.187633231521315502e-01
-1.833762424400594560e+00,7.266122723606173173e-01
-2.117976500422840136e+00,-2.732877688678871930e-01
-1.661089426675247172e+00,1.269783905173813565e+00
-1.705605360112204760e+00,8.736995842165060644e-01
-1.027398046313792834e+00,-1.536998190669197906e-01
-1.821426314340875319e+00,-1.288451764200204908e+00
-1.586494447697767152e+00,-1.252101928973064826e+00
-1.666663372189262082e+00,6.375568565917143493e-01
-1.251977948680964348e+00,-9.366293955148259354e-01
-1.737787577977409903e+00,-4.566908458241893398e-01
-1.989894224328545569e+00,-2.631684801776546512e-01
-2.063309917689361672e+00,5.495878944476452199e-01
-1.588416907416133395e+00,9.959153230694850478e-01
-1.313374358093098326e+00,-7.809945608952784113e-01
-1.798707107745871614e+00,-8.345764688375999496e-01
-1.164517220578309420e+00,-1.252440641544600108e+00
-9.517084371850630387e-01,1.125503905569050778e+00
-1.355081211389860707e+00,6.102861192882447217e-01
-3.723751054650796255e-01,3.310338913216768209e+00
-1.232097817416121233e+00,2.543804857884927606e-01
-8.192274194272235466e-01,-1.544016712190948160e-01
-1.864434872752001704e+00,-5.434264122612918191e-02
-1.872130416393183783e+00,1.212130799227967071e+00
-1.883941879715140155e+00,-9.397691390430359393e-01
-1.822584971178903501e+00,-3.558291676287129168e-01
-1.801957677902964461e+00,7.845162594995971905e-02
-1.801957677902964461e+00,7.845162594995971905e-02
-1.826174177238950147e+00,-2.111250840323902200e-01
-1.786696400415751285e+00,3.865838482934754738e-01
-1.838065021858341197e+00,2.735524093524348554e-01
-1.700536886262037584e+00,-1.183405526887598480e+00
-1.305311735438024368e+00,-9.810663375105362904e-01
-2.024550740829507145e+00,-4.094014355141412098e-01
-2.038911767639823402e+00,7.649757410821866888e-02
-2.080713790785273964e+00,2.163542585430171250e-01
-1.929714750690816105e+00,1.540573504668079519e+00
1.315627774196893807e+00,-2.321166507067586871e+00
1.309647032240341291e+00,-4.382160731653361729e-01
-1.131173640425042586e-02,-1.430295156545804414e+00
-4.512478618865424784e-01,-7.679638055363463556e-01
1.204995641059246170e+00,-1.652294829907287488e+00
-8.604746493734681811e-01,-1.752586179191983451e+00
-6.430304809442323188e-01,-8.842406976130920970e-01
-7.912717456025313290e-01,-1.396406587600655946e+00
1.255279830703438959e+00,-1.834918358860255561e+00
2.530591583927060295e+00,-1.035789568344203548e+00
1.064021637611846094e+00,-8.033762235686184860e-01
1.437386531535752798e+00,-1.413480160120521578e+00
2.561741313062207315e+00,-1.232969373175034633e+00
2.428573734826210639e+00,-1.676462352493848229e+00
2.771762714558825724e+00,-2.012017974113212038e+00
1.246933239809339788e+00,-2.038916716734023638e+00
5.339009058023971299e+00,2.035030175046296463e+00
2.058310088388645986e-01,-1.919046539542039298e+00
2.254157075286578049e+00,-2.742345656143270749e-01
1.012412519827168289e+00,-1.912984036199320226e+00
2.244938588880122765e+00,-8.075015492813163442e-01
4.904499131031587744e+00,2.191618171074125321e-01
2.895809649155808518e+00,1.092106248452114937e-02
1.865368928339548482e+00,-1.198873471915568700e+00
2.601525586030091652e+00,-6.627592695917537968e-01
2.409816772711398158e+00,-2.847229478713268480e-01
1.165702398319113486e+00,-1.691515548002207936e+00
5.681183623804933802e+00,-5.355353757934687708e-01
4.688831082575416254e+00,8.628985692809039154e-01
5.467613980210909830e+00,2.808147193715014589e-02
2.797188845073364849e+00,-7.854488349066930386e-01
4.651591113628814611e+00,-4.414534766912490277e-02
1.233099334668400004e+01,3.220132363746897486e+00
1.988619122429610808e+00,-2.001715569536710149e+00
1.984934095409382371e+00,-1.266944911062789725e+00
5.527583030387858365e+00,-1.572698337686180947e+00
Loading