From 66fdb1cbc2f3283eb6a7905f3a8a65b52c86a301 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-14-157.us-west-2.compute.internal>
Date: Wed, 28 Feb 2024 19:47:07 +0000
Subject: [PATCH 01/13] automl benchmark script

---
 .../benchmarks/timeseries_test.yaml           |  13 +
 .../cloud_configs/amlb_configs/config.yaml    |  14 +
 .../amlb_configs/constraints.yaml             |   7 +
 .../amlb_configs/frameworks_example.yaml      |  18 +
 sample_configs/cloud_configs/bench_all.py     |  84 ++++
 .../custom_metrics/sample_metrics.py          |  11 +
 .../dataloaders/paper_image_datasets.yaml     | 119 +++++
 .../dataloaders/paper_text_datasets.yaml      | 123 ++++++
 .../paper_text_tabular_datasets.yaml          |  90 ++++
 .../dataloaders/text_dataloader.py            |  71 +++
 .../dataloaders/text_datasets.yaml            |  73 ++++
 .../dataloaders/text_tabular_dataloader.py    |  69 +++
 .../dataloaders/vision_dataloader.py          |  67 +++
 .../dataloaders/vision_datasets.yaml          |  31 ++
 .../multimodal_cloud_configs.yaml             |  36 ++
 .../multimodal_local_configs.yaml             |  24 ++
 .../paper_image_local_configs.yaml            |  50 +++
 .../paper_text_local_configs.yaml             |  40 ++
 .../paper_text_tabular_local_configs.yaml     |  37 ++
 .../resources/multimodal_constraints.yaml     |  13 +
 .../resources/multimodal_frameworks.yaml      |  69 +++
 .../cloud_configs/tabular_cloud_configs.yaml  |  40 ++
 .../cloud_configs/tabular_local_configs.yaml  |  15 +
 .../timeseries_cloud_configs.yaml             |  34 ++
 .../timeseries_local_configs.yaml             |  15 +
 .../dataloaders/paper_image_datasets.yaml     | 119 +++++
 .../dataloaders/paper_text_datasets.yaml      | 124 ++++++
 .../paper_text_tabular_datasets.yaml          |  94 ++++
 sample_configs/dataloaders/text_dataloader.py |  17 +-
 .../dataloaders/text_tabular_dataloader.py    |  69 +++
 .../dataloaders/vision_dataloader.py          |  17 +-
 sample_configs/paper_image_local_configs.yaml |  51 +++
 sample_configs/paper_text_local_configs.yaml  |  32 ++
 .../paper_text_tabular_local_configs.yaml     |  32 ++
 .../resources/multimodal_constraints.yaml     |   7 +
 .../resources/multimodal_frameworks.yaml      |  59 +++
 src/autogluon/bench/Dockerfile                |   2 +-
 .../batch_stack/lambdas/lambda_function.py    |   4 +-
 .../dataloaders/paper_image_datasets.yaml     | 119 +++++
 .../dataloaders/paper_text_datasets.yaml      | 123 ++++++
 .../paper_text_tabular_datasets.yaml          |  90 ++++
 .../dataloaders/text_dataloader.py            |  71 +++
 .../dataloaders/text_datasets.yaml            |  73 ++++
 .../dataloaders/text_tabular_dataloader.py    |  69 +++
 .../dataloaders/vision_dataloader.py          |  67 +++
 .../dataloaders/vision_datasets.yaml          |  31 ++
 .../resources/multimodal_constraints.yaml     |  13 +
 .../resources/multimodal_frameworks.yaml      |  69 +++
 .../bench/datasets/multimodal_dataset.py      |  51 ---
 .../eval/scripts/run_evaluation_openml.py     |   2 +-
 .../eval/scripts/run_generate_clean_openml.py |  39 +-
 .../autokeras/autokeras_benchmark.py          | 124 ++++++
 .../bench/frameworks/autokeras/exec.py        | 405 ++++++++++++++++++
 .../bench/frameworks/autokeras/setup.sh       |  36 ++
 .../bench/frameworks/autokeras/test.py        | 336 +++++++++++++++
 src/autogluon/bench/frameworks/benchmark.py   |   4 +
 .../bench/frameworks/multimodal/exec.py       |  25 +-
 src/autogluon/bench/runbenchmark.py           |   9 +-
 58 files changed, 3470 insertions(+), 76 deletions(-)
 create mode 100644 sample_configs/cloud_configs/amlb_configs/benchmarks/timeseries_test.yaml
 create mode 100644 sample_configs/cloud_configs/amlb_configs/config.yaml
 create mode 100644 sample_configs/cloud_configs/amlb_configs/constraints.yaml
 create mode 100644 sample_configs/cloud_configs/amlb_configs/frameworks_example.yaml
 create mode 100644 sample_configs/cloud_configs/bench_all.py
 create mode 100644 sample_configs/cloud_configs/custom_metrics/sample_metrics.py
 create mode 100644 sample_configs/cloud_configs/dataloaders/paper_image_datasets.yaml
 create mode 100644 sample_configs/cloud_configs/dataloaders/paper_text_datasets.yaml
 create mode 100644 sample_configs/cloud_configs/dataloaders/paper_text_tabular_datasets.yaml
 create mode 100644 sample_configs/cloud_configs/dataloaders/text_dataloader.py
 create mode 100644 sample_configs/cloud_configs/dataloaders/text_datasets.yaml
 create mode 100644 sample_configs/cloud_configs/dataloaders/text_tabular_dataloader.py
 create mode 100644 sample_configs/cloud_configs/dataloaders/vision_dataloader.py
 create mode 100644 sample_configs/cloud_configs/dataloaders/vision_datasets.yaml
 create mode 100644 sample_configs/cloud_configs/multimodal_cloud_configs.yaml
 create mode 100644 sample_configs/cloud_configs/multimodal_local_configs.yaml
 create mode 100644 sample_configs/cloud_configs/paper_image_local_configs.yaml
 create mode 100644 sample_configs/cloud_configs/paper_text_local_configs.yaml
 create mode 100644 sample_configs/cloud_configs/paper_text_tabular_local_configs.yaml
 create mode 100644 sample_configs/cloud_configs/resources/multimodal_constraints.yaml
 create mode 100644 sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
 create mode 100644 sample_configs/cloud_configs/tabular_cloud_configs.yaml
 create mode 100644 sample_configs/cloud_configs/tabular_local_configs.yaml
 create mode 100644 sample_configs/cloud_configs/timeseries_cloud_configs.yaml
 create mode 100644 sample_configs/cloud_configs/timeseries_local_configs.yaml
 create mode 100644 sample_configs/dataloaders/paper_image_datasets.yaml
 create mode 100644 sample_configs/dataloaders/paper_text_datasets.yaml
 create mode 100644 sample_configs/dataloaders/paper_text_tabular_datasets.yaml
 create mode 100644 sample_configs/dataloaders/text_tabular_dataloader.py
 create mode 100644 sample_configs/paper_image_local_configs.yaml
 create mode 100644 sample_configs/paper_text_local_configs.yaml
 create mode 100644 sample_configs/paper_text_tabular_local_configs.yaml
 create mode 100644 src/autogluon/bench/custom_configs/dataloaders/paper_image_datasets.yaml
 create mode 100644 src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml
 create mode 100644 src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml
 create mode 100644 src/autogluon/bench/custom_configs/dataloaders/text_dataloader.py
 create mode 100644 src/autogluon/bench/custom_configs/dataloaders/text_datasets.yaml
 create mode 100644 src/autogluon/bench/custom_configs/dataloaders/text_tabular_dataloader.py
 create mode 100644 src/autogluon/bench/custom_configs/dataloaders/vision_dataloader.py
 create mode 100644 src/autogluon/bench/custom_configs/dataloaders/vision_datasets.yaml
 create mode 100644 src/autogluon/bench/custom_configs/resources/multimodal_constraints.yaml
 create mode 100644 src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
 create mode 100644 src/autogluon/bench/frameworks/autokeras/autokeras_benchmark.py
 create mode 100644 src/autogluon/bench/frameworks/autokeras/exec.py
 create mode 100755 src/autogluon/bench/frameworks/autokeras/setup.sh
 create mode 100644 src/autogluon/bench/frameworks/autokeras/test.py

diff --git a/sample_configs/cloud_configs/amlb_configs/benchmarks/timeseries_test.yaml b/sample_configs/cloud_configs/amlb_configs/benchmarks/timeseries_test.yaml
new file mode 100644
index 00000000..fd607a09
--- /dev/null
+++ b/sample_configs/cloud_configs/amlb_configs/benchmarks/timeseries_test.yaml
@@ -0,0 +1,13 @@
+- name: m4_hourly_2
+  dataset:
+    path: https://autogluon.s3.amazonaws.com/datasets/timeseries/m4_hourly/test.csv
+    type: timeseries
+    freq: H
+    forecast_horizon_in_steps: 48
+    seasonality: 24
+    target: target
+    id_column: item_id
+    timestamp_column: timestamp
+  metric: [mase, smape, mape, rmse, mql, wql, sql]
+  quantile_levels: [0.05, 0.5, 0.95]
+  folds: 2
diff --git a/sample_configs/cloud_configs/amlb_configs/config.yaml b/sample_configs/cloud_configs/amlb_configs/config.yaml
new file mode 100644
index 00000000..646d805a
--- /dev/null
+++ b/sample_configs/cloud_configs/amlb_configs/config.yaml
@@ -0,0 +1,14 @@
+frameworks:              # configuration namespace for the frameworks definitions.
+  definition_file:       # list of yaml files describing the frameworks base definitions.
+    - '{root}/resources/frameworks.yaml'
+    - '{user}/frameworks.yaml'
+  allow_duplicates: true     # if true, the last definition is used.
+  tags: ['stable', 'latest', '2020Q2', '2021Q3', '2023Q2', 'example']  # the list of supported tags when looking up frameworks:
+                              # for example frmwk:latest will look for framework frmwk in a frameworks_latest.yaml file if present.
+benchmarks:                     # configuration namespace for the benchmarks definitions.
+  definition_dir:               
+    - '{root}/resources/benchmarks'
+    - '{user}/benchmarks'
+  constraints_file:             # list of yaml files describing the benchmarks runtime constraints.
+    - '{root}/resources/constraints.yaml'
+    - '{user}/constraints.yaml'
diff --git a/sample_configs/cloud_configs/amlb_configs/constraints.yaml b/sample_configs/cloud_configs/amlb_configs/constraints.yaml
new file mode 100644
index 00000000..747faecf
--- /dev/null
+++ b/sample_configs/cloud_configs/amlb_configs/constraints.yaml
@@ -0,0 +1,7 @@
+---
+
+test101:
+  folds: 3
+  max_runtime_seconds: 600
+  cores: 4
+  min_vol_size_mb: 100000
diff --git a/sample_configs/cloud_configs/amlb_configs/frameworks_example.yaml b/sample_configs/cloud_configs/amlb_configs/frameworks_example.yaml
new file mode 100644
index 00000000..f3127079
--- /dev/null
+++ b/sample_configs/cloud_configs/amlb_configs/frameworks_example.yaml
@@ -0,0 +1,18 @@
+---
+
+#########################
+### AutoML frameworks ###
+#########################
+
+######### Do Not Remove #########
+AutoGluon:
+  version: "latest"
+######### Do Not Remove #########
+
+
+AutoGluon_dev:
+  extends: AutoGluon
+  repo: https://github.com/autogluon/autogluon.git
+  version: stable_GA4_update  # branch name
+  params:  # TabularPredictor.fit(params)
+    presets: best_quality
diff --git a/sample_configs/cloud_configs/bench_all.py b/sample_configs/cloud_configs/bench_all.py
new file mode 100644
index 00000000..d24a12ef
--- /dev/null
+++ b/sample_configs/cloud_configs/bench_all.py
@@ -0,0 +1,84 @@
+import random
+n_experiments = 5
+seeds = []
+for i in range(n_experiments):
+    seeds.append(random.randint(0, 100))
+print(seeds)
+
+config_paths = [
+    "sample_configs/paper_text_tabular_local_configs.yaml",
+    "sample_configs/paper_text_local_configs.yaml",
+    "sample_configs/paper_image_local_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_fs_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_tabular_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_tabular_image_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_tabular_image_standard_configs.yaml"
+]
+frameworks = [
+    "AutoGluon_best_master",
+    "ablation_greedy_soup",
+    "ablation_gradient_clip",
+    "ablation_warmup_steps",
+    "ablation_cosine_decay",
+    "ablation_weight_decay",
+    "ablation_lr_decay",
+    # "autokeras_master",
+    # "torch_compile_best",
+    # "AutoGluon_best_master",
+    # "AutoGluon_high_master",
+    # "AutoGluon_medium_master",
+    # "AutoGluon_high_vitlarge",
+    # "AutoGluon_medium_vitlarge",
+    # "AutoGluon_best_vitlarge",
+    # "AutoGluon_best_caformer",
+    # "AutoGluon_best_beit",
+    # "AutoGluon_best_swinv2"
+    # "AutoGluon_high_0_8",
+    # "AutoGluon_medium_0_8",
+    # "AutoGluon_best_0_8",
+]
+constraints = [
+    "g4_12x"
+]
+fs = [
+    1,
+    5,
+    10
+]
+# module = "autokeras"
+module = "multimodal"
+
+import yaml
+import os
+import subprocess
+
+config_root = "./temp_configs"
+os.makedirs(config_root, exist_ok=True)
+
+for constraint in constraints:
+    os.makedirs(f"{config_root}/{constraint}", exist_ok=True)
+    for framework in frameworks:
+        # for shot in fs:
+            config_dir = f"{config_root}/{constraint}/{framework}"
+            os.makedirs(config_dir, exist_ok=True)
+
+            for config_path in config_paths:
+                for seed in seeds:
+                    with open(config_path, "r") as f:
+                        configs = yaml.safe_load(f)
+                        if constraint == "g4_12x":
+                            configs["cdk_context"]["PREFIX"] = f"{configs['cdk_context']['PREFIX']}-multi"
+                        configs["constraint"] = constraint
+                        configs["framework"] = framework
+                        configs["module"] = module
+                        configs["seed"] = seed 
+                        # configs["custom_dataloader"]["shot"] = shot
+                        # configs["benchmark_name"] = f"{configs['benchmark_name']}-{shot}"
+                        new_config_path = os.path.join(config_dir, os.path.basename(config_path))
+                        with open(new_config_path, "w") as new_f:
+                            yaml.dump(configs, new_f)
+
+                        command = ["agbench", "run", new_config_path]
+                        subprocess.run(command)
+
diff --git a/sample_configs/cloud_configs/custom_metrics/sample_metrics.py b/sample_configs/cloud_configs/custom_metrics/sample_metrics.py
new file mode 100644
index 00000000..460af376
--- /dev/null
+++ b/sample_configs/cloud_configs/custom_metrics/sample_metrics.py
@@ -0,0 +1,11 @@
+def f1_score(y_true, y_pred):
+    assert len(y_true) == len(y_pred)
+
+    TP = ((y_true == 1) & (y_pred == 1)).sum()
+    FP = ((y_true == 0) & (y_pred == 1)).sum()
+    FN = ((y_true == 1) & (y_pred == 0)).sum()
+
+    precision = TP / (TP + FP) if TP + FP != 0 else 0
+    recall = TP / (TP + FN) if TP + FN != 0 else 0
+
+    return 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
diff --git a/sample_configs/cloud_configs/dataloaders/paper_image_datasets.yaml b/sample_configs/cloud_configs/dataloaders/paper_image_datasets.yaml
new file mode 100644
index 00000000..043aaedc
--- /dev/null
+++ b/sample_configs/cloud_configs/dataloaders/paper_image_datasets.yaml
@@ -0,0 +1,119 @@
+# data = custom_class(dataset_name=dataset_name, split=split, **kwargs)
+# custom_class: custom dataloader class, e.g. sample_configs/vision_dataset.py
+
+
+base: &base
+  url: s3://zs-models/datasets/{name}.zip
+  splits:
+    - train
+    - test
+  image_columns:
+    - ImageID
+  text_columns:
+  label_columns:
+    - LabelName
+  columns_to_drop:
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{name}/{value}"
+  metric: acc
+  problem_type: multiclass
+  
+fashion_mnist:
+  <<: *base
+  image_path: "{split}/{value}"
+
+casting:
+  <<: *base
+  image_path: "{value}"
+  problem_type: binary
+  metric: roc_auc
+
+food101:
+  <<: *base
+
+oxfordflowers:
+  <<: *base
+  image_path: "{name}/{split}/{value}"
+
+OxfordIIITPet:
+  <<: *base
+  splits:
+    - train
+    - validation
+    - test
+  annotation: "{name}_{split}_anno.csv"
+  image_path: "{split}/{value}"
+
+europeanflooddepth:
+  <<: *base
+  problem_type: binary
+  metric: roc_auc
+
+magnetictiledefects:
+  <<: *base
+
+stanfordcars:
+  <<: *base
+
+cub200:
+  <<: *base
+
+
+petfinder:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - Description 
+  label_columns:
+    - AdoptionSpeed
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{value}"
+  metric: acc
+  problem_type: multiclass
+
+
+ham10000:
+  <<: *base
+  splits:
+    - train
+  label_columns:
+    - dx
+  image_path: "{split}/{value}"
+
+cd18:
+  <<: *base
+  splits:
+    - train
+  label_columns:
+    - Price
+  image_path: "{split}/{value}"
+  metric: rmse
+  problem_type: regression
+
+    
+hateful_meme:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - text    
+  image_path: "{value}"
+  metric: roc_auc
+  problem_type: binary
+
+
+memotion:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - text_corrected  
+  columns_to_drop:
+    - text_ocr
+  label_columns:
+    - overall_sentiment
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{split}/{value}"
+  metric: acc
+  problem_type: multiclass
diff --git a/sample_configs/cloud_configs/dataloaders/paper_text_datasets.yaml b/sample_configs/cloud_configs/dataloaders/paper_text_datasets.yaml
new file mode 100644
index 00000000..067e12d1
--- /dev/null
+++ b/sample_configs/cloud_configs/dataloaders/paper_text_datasets.yaml
@@ -0,0 +1,123 @@
+# supports both regular and fewshot datasets
+# for fewshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     fewshot: True
+#     shot: 10
+#     seed: 8
+#     dataset_config_file: test/automm_text_datasets.yaml
+# 
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://automl-mm-bench/comprehend_benchmarks/{name}/{lang}/{subsample_path}{split}.csv
+  splits:
+    - train
+    - validation
+    - test
+  langs:
+    - en
+    - de
+    - es
+    - fr
+    - it
+
+  data_columns:
+    - label
+    - text
+  image_columns:
+  text_columns:
+    - text
+  label_columns:
+    - label
+  columns_to_drop:
+  data_sep: ","
+  metric: acc
+  problem_type: multiclass
+  subsample_path: subsampling/{shot}_shot/seed{seed}/
+  subsample_splits:
+    - train
+    - validation
+  shots:
+    - 1
+    - 5
+    - 10
+    - 20
+    - 50
+    - 100
+    - 500
+    - 1000
+  seeds:
+    - 0
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 8
+    - 9
+
+
+financial_news:
+  <<: *base
+  url: s3://zs-models/datasets/financial_news/{lang}/{split}.csv
+  splits:
+    - train
+  langs:
+    - en
+
+MLDoc-11000:
+  <<: *base
+
+MultiATIS-5000:
+  <<: *base
+  langs:
+    - en
+    - es
+    - fr
+    - pt
+
+amazon_reviews:
+  <<: *base
+  langs:
+    - en
+  problem_type: binary
+  metric: roc_auc
+
+
+gnad10:
+  <<: *base
+  langs:
+    - de
+
+fb_dialog:
+  <<: *base
+  langs:
+    - en
+    - es
+
+yahoo_anwsers:
+  <<: *base
+  langs:
+    - en
+
+french_tweets_sentiment:
+  <<: *base
+  langs:
+    - fr
+  problem_type: binary
+  metric: roc_auc
+
+ag_news:
+  <<: *base
+  langs:
+    - en
+
+SNIPS:
+  <<: *base
+  langs:
+    - en
diff --git a/sample_configs/cloud_configs/dataloaders/paper_text_tabular_datasets.yaml b/sample_configs/cloud_configs/dataloaders/paper_text_tabular_datasets.yaml
new file mode 100644
index 00000000..f4ddfd70
--- /dev/null
+++ b/sample_configs/cloud_configs/dataloaders/paper_text_tabular_datasets.yaml
@@ -0,0 +1,90 @@
+base: &base
+  url: s3://zs-models/{name}/{split}.csv
+  test_split_name: test
+  splits:
+    - train
+    - test
+  image_columns:
+  text_columns:
+  columns_to_drop:
+  label_columns:
+    - LabelName
+  metric: acc
+  problem_type: multiclass
+  
+
+airbnb:
+  <<: *base
+  url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
+  text_columns:
+    - summary
+    - amenities
+  label_columns:
+    - price_label
+  columns_to_drop:
+    - id
+    - listing_url
+    - scrape_id
+    - last_scraped
+    - picture_url
+    - host_id
+    - host_url
+    - host_name
+    - host_thumbnail_url
+    - host_picture_url
+    - calendar_last_scraped
+
+
+kick_start:
+  <<: *base
+  url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
+  text_columns:
+    - name
+    - desc
+    - keywords
+  label_columns:
+    - final_status
+  metric: roc_auc
+  problem_type: binary
+
+
+cloth_review:
+  <<: *base
+  url: s3://automl-mm-bench/women_clothing_review/{split}.pq
+  text_columns:
+    - Title
+    - Review Text
+  label_columns:
+    - Rating
+  metric: rmse
+  problem_type: regression
+
+
+news_popularity:
+  <<: *base
+  url: s3://automl-mm-bench/news_popularity2/{split}.csv
+  text_columns: 
+    - article_title
+  label_columns:
+    - log_shares
+  image_columns:
+  metric: rmse
+  problem_type: regression
+
+
+cal_house:
+  <<: *base
+  url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
+  text_columns:
+    - Summary
+    - Appliances included
+    - Laundry features
+    - Parking features
+    - Flooring
+    - Elementary School
+    - Middle School
+    - High School
+  label_columns:
+    - Sold Price
+  metric: rmse
+  problem_type: regression
diff --git a/sample_configs/cloud_configs/dataloaders/text_dataloader.py b/sample_configs/cloud_configs/dataloaders/text_dataloader.py
new file mode 100644
index 00000000..452a9808
--- /dev/null
+++ b/sample_configs/cloud_configs/dataloaders/text_dataloader.py
@@ -0,0 +1,71 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders._utils import download
+
+logger = logging.getLogger(__name__)
+
+
+class TextDataLoader:
+    def __init__(
+        self,
+        dataset_name: str,
+        dataset_config_file: str,
+        split: str = "train",
+        fewshot: bool = False,
+        shot: int = 50,
+        seed: int = 0,
+    ):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+        self.dataset_config = config[dataset_name]
+        if split == "val":
+            split = "validation"
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = self.dataset_config["image_columns"] or []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+        
+        lang = self.dataset_config["langs"][0]
+        subsample_path = self.dataset_config["subsample_path"].format(shot=shot, seed=seed)
+        url = self.dataset_config["url"].format(
+            name=self.name,
+            lang=lang,
+            subsample_path=subsample_path if fewshot and self.split in self.dataset_config["subsample_splits"] else "",
+            split=self.split,
+        )
+        base_dir = get_data_home_dir()
+        data_dir = os.path.join(self.name, lang)
+        if fewshot:
+            data_dir = os.path.join(data_dir, "subsampling", f"{shot}_shot-seed{seed}")
+        self.dataset_dir = os.path.join(base_dir, data_dir)
+        data_path = os.path.join(self.dataset_dir, f"{split}.csv")
+        download(url, path=data_path)
+
+        self.data = pd.read_csv(
+            data_path,
+            header=None,
+            names=self.dataset_config["data_columns"],
+            sep=self.dataset_config.get("data_sep", ","),
+            on_bad_lines="warn",
+        )
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
+
diff --git a/sample_configs/cloud_configs/dataloaders/text_datasets.yaml b/sample_configs/cloud_configs/dataloaders/text_datasets.yaml
new file mode 100644
index 00000000..8960f4af
--- /dev/null
+++ b/sample_configs/cloud_configs/dataloaders/text_datasets.yaml
@@ -0,0 +1,73 @@
+# supports both regular and fewshot datasets
+# for fullshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     dataset_config_file: test/automm_text_datasets.yaml
+
+# for fewshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     fewshot: True
+#     shot: 10
+#     seed: 8
+#     dataset_config_file: test/automm_text_datasets.yaml
+# 
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://<data_bucket>/{name}/{lang}/{subsample_path}{split}.csv
+  splits:
+    - train
+    - validation
+    - test
+  langs:
+    - de
+    - en
+  data_columns:
+    - label
+    - text
+  feature_columns:
+    - text
+  label_columns:
+    - label
+  data_sep: ","
+  metric: acc
+  problem_type: multiclass
+  subsample_path: subsampling/{shot}_shot/seed{seed}/
+  subsample_splits:
+    - train
+    - validation
+  shots:
+    - 1
+    - 5
+    - 10
+    - 20
+  seeds:
+    - 0
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 8
+    - 9
+
+
+dataset_1:
+  <<: *base
+  langs:
+    - en
+  problem_type: binary
+
+dataset_2:
+  <<: *base
+  langs:
+    - fr
+  problem_type: binary
+
diff --git a/sample_configs/cloud_configs/dataloaders/text_tabular_dataloader.py b/sample_configs/cloud_configs/dataloaders/text_tabular_dataloader.py
new file mode 100644
index 00000000..c587af89
--- /dev/null
+++ b/sample_configs/cloud_configs/dataloaders/text_tabular_dataloader.py
@@ -0,0 +1,69 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders import load_zip
+from autogluon.common.loaders._utils import download
+
+
+def path_expander(path, base_folder):
+    path_l = path.split(";")
+    return ";".join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])
+
+
+logger = logging.getLogger(__name__)
+
+
+class TextTabularDataLoader:
+    def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+
+        self.dataset_config = config[dataset_name]
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+        if split == "test" and self.dataset_config["test_split_name"] == "dev":
+            split = "dev"
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+
+        # url = self.dataset_config["url"].format(name=self.name)
+        # base_dir = get_data_home_dir()
+        # load_zip.unzip(url, unzip_dir=base_dir)
+        # self.dataset_dir = os.path.join(base_dir, self.name)
+
+        url = self.dataset_config["url"].format(split=self.split)
+        file_extention = os.path.splitext(url)[-1]
+        base_dir = get_data_home_dir()
+
+        self.data_path = os.path.join(base_dir, self.name, f"{split}{file_extention}")
+        download(url, path=self.data_path)
+        if file_extention == ".csv":
+            self.data = pd.read_csv(self.data_path)
+        elif file_extention == ".pq":
+            self.data = pd.read_parquet(self.data_path)
+        else:
+            raise NotImplementedError("Unsupported data type.")
+
+        if self.columns_to_drop is not None:
+            self.data.drop(columns=self.columns_to_drop, inplace=True)
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
+
+
diff --git a/sample_configs/cloud_configs/dataloaders/vision_dataloader.py b/sample_configs/cloud_configs/dataloaders/vision_dataloader.py
new file mode 100644
index 00000000..84ab5d91
--- /dev/null
+++ b/sample_configs/cloud_configs/dataloaders/vision_dataloader.py
@@ -0,0 +1,67 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders import load_zip
+
+
+def path_expander(path, base_folder):
+    path_l = path.split(";")
+    return ";".join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])
+
+
+logger = logging.getLogger(__name__)
+
+
+class VisionDataLoader:
+    def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+
+        self.dataset_config = config[dataset_name]
+        if split == "val":
+            split = "validation"
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = self.dataset_config["image_columns"] or []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+
+        url = self.dataset_config["url"].format(name=self.name)
+        base_dir = get_data_home_dir()
+        load_zip.unzip(url, unzip_dir=base_dir)
+        self.dataset_dir = os.path.join(base_dir, self.name)
+
+        annotation_filename = self.dataset_config["annotation"].format(name=self.name, split=self.split)
+        image_path_pattern = self.dataset_config["image_path"]
+
+        self.data = pd.read_csv(os.path.join(self.dataset_dir, annotation_filename))
+        self.tabular_columns = self.data.columns.difference(self.image_columns + self.text_columns + self.label_columns + self.columns_to_drop)
+        print("Image columns: ", self.image_columns)
+        print("Text columns: ", self.text_columns)
+        print("Tabular columns: ", self.tabular_columns)
+        self.data.drop(columns=self.columns_to_drop, inplace=True)
+        self.data.dropna(inplace=True)
+
+        image_base_path = image_path_pattern.format(name=self.name, split=self.split, value="")
+        for col in self.image_columns:
+            self.data[col] = self.data[col].apply(
+                lambda ele: path_expander(ele, base_folder=os.path.join(self.dataset_dir, image_base_path))
+            )
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
diff --git a/sample_configs/cloud_configs/dataloaders/vision_datasets.yaml b/sample_configs/cloud_configs/dataloaders/vision_datasets.yaml
new file mode 100644
index 00000000..a095403e
--- /dev/null
+++ b/sample_configs/cloud_configs/dataloaders/vision_datasets.yaml
@@ -0,0 +1,31 @@
+# data = custom_class(dataset_name=dataset_name, split=split, **kwargs)
+# custom_class: custom dataloader class, e.g. sample_configs/vision_dataset.py
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://<dataset_bucket>/vision_datasets/{name}.zip
+  splits:
+    - train
+    - test
+  feature_columns:
+    - ImageID
+  label_columns:
+    - LabelName
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{name}/{value}"
+  metric: acc
+  problem_type: multiclass
+  
+
+dataset_1:
+  <<: *base
+  image_path: "{split}/{value}"
+  problem_type: binary
+
+dataset_2:
+  <<: *base
+
+dataset_3:
+  <<: *base
+  image_path: "{name}/{split}/{value}"
+
diff --git a/sample_configs/cloud_configs/multimodal_cloud_configs.yaml b/sample_configs/cloud_configs/multimodal_cloud_configs.yaml
new file mode 100644
index 00000000..1b4bb707
--- /dev/null
+++ b/sample_configs/cloud_configs/multimodal_cloud_configs.yaml
@@ -0,0 +1,36 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
+  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
+  # MAX_MACHINE_NUM: 20   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
+  # VPC_NAME: existing-vpc-name  # optional
+
+module: multimodal  # required
+mode: aws  # required
+benchmark_name: ag_bench  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: test  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework: AutoGluon_stable  # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+dataset_name:  # required
+  - shopee
+  - melbourne_airbnb
+
+### Customizations ####
+# custom_resource_dir: sample_configs/resources/  # path to custom multimodal_constraints.yaml and multimodal_frameworks.yaml
+# custom_dataloader:
+#     dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
+#     class_name: VisionDataLoader
+#     dataset_config_file: sample_configs/dataloaders/vision_datasets.yaml 
+
+# custom_metrics:
+#     metrics_path: sample_configs/custom_metrics/sample_metrics.py
+#     function_name: f1_score
+#     # Other optional parameters can be set, ref: https://auto.gluon.ai/stable/tutorials/tabular/advanced/tabular-custom-metric.html
+#     optimum: 1
+#     greater_is_better: true
diff --git a/sample_configs/cloud_configs/multimodal_local_configs.yaml b/sample_configs/cloud_configs/multimodal_local_configs.yaml
new file mode 100644
index 00000000..0bfadcd3
--- /dev/null
+++ b/sample_configs/cloud_configs/multimodal_local_configs.yaml
@@ -0,0 +1,24 @@
+# Benchmark configurations
+module: multimodal  # required
+mode: local  # required
+benchmark_name: ag_bench  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
+
+# Multimodal specific
+framework: AutoGluon_stable  # required
+dataset_name:  # required
+  melbourne_airbnb  
+
+#### Customizations ####
+# custom_resource_dir: sample_configs/resources/  # path to custom multimodal_frameworks.yaml and multimodal_constraints.yaml
+# custom_dataloader:
+#     dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
+#     class_name: VisionDataLoader
+#     dataset_config_file: sample_configs/dataloaders/vision_datasets.yaml 
+# custom_metrics:
+#     metrics_path: sample_configs/custom_metrics/sample_metrics.py
+#     function_name: f1_score
+#     # Other optional parameters can be set, ref: https://auto.gluon.ai/stable/tutorials/tabular/advanced/tabular-custom-metric.html
+#     optimum: 1
+#     greater_is_better: true
diff --git a/sample_configs/cloud_configs/paper_image_local_configs.yaml b/sample_configs/cloud_configs/paper_image_local_configs.yaml
new file mode 100644
index 00000000..2cabc1e7
--- /dev/null
+++ b/sample_configs/cloud_configs/paper_image_local_configs.yaml
@@ -0,0 +1,50 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
+  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
+  # VPC_NAME: existing-vpc-name  # optional
+
+module: multimodal  # required
+mode: aws  # required
+benchmark_name: ag_bench_image  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  # AutoGluon_medium_master
+  # AutoGluon_high_master
+  AutoGluon_best_master
+  # torch_compile_medium
+  # torch_compile_high
+  # torch_compile_best
+dataset_name: 
+  # image
+   - fashion_mnist
+   - casting
+   - food101
+   - stanfordcars
+   - magnetictiledefects
+   - europeanflooddepth
+   - oxfordflowers
+   - OxfordIIITPet
+  # image-tabular
+   -  ham10000
+   - cd18
+  # image-text
+   - hateful_meme
+  # image-text-tabular
+   - petfinder
+   - memotion
+## Customizations ####
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
+    class_name: VisionDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_image_datasets.yaml 
+custom_resource_dir: sample_configs/resources/
+
diff --git a/sample_configs/cloud_configs/paper_text_local_configs.yaml b/sample_configs/cloud_configs/paper_text_local_configs.yaml
new file mode 100644
index 00000000..bebe3bbd
--- /dev/null
+++ b/sample_configs/cloud_configs/paper_text_local_configs.yaml
@@ -0,0 +1,40 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
+  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
+  # VPC_NAME: existing-vpc-name  # optional
+
+module: multimodal  # required
+mode: aws  # required
+benchmark_name: ag_bench_text  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  # AutoGluon_medium_master
+  # AutoGluon_high_master
+  AutoGluon_best_master
+  # torch_compile_medium
+  # torch_compile_high
+  # torch_compile_best
+dataset_name:
+    - financial_news
+    - MLDoc-11000
+    - gnad10
+    - MultiATIS-5000
+    - fb_dialog
+    - SNIPS
+    - ag_news
+
+## Customizations ####
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/text_dataloader.py   # relative path to WORKDIR
+    class_name: TextDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_text_datasets.yaml
+custom_resource_dir: sample_configs/resources/
diff --git a/sample_configs/cloud_configs/paper_text_tabular_local_configs.yaml b/sample_configs/cloud_configs/paper_text_tabular_local_configs.yaml
new file mode 100644
index 00000000..d0719d18
--- /dev/null
+++ b/sample_configs/cloud_configs/paper_text_tabular_local_configs.yaml
@@ -0,0 +1,37 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
+  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  DATA_BUCKET: automl-mm-bench  # optional, S3 bucket to download your private datasets
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
+  # VPC_NAME: existing-vpc-name  # optional
+
+module: multimodal  # required
+mode: aws  # required
+benchmark_name: ag_bench_text_tabular  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  # torch_compile_medium
+  # torch_compile_high
+  # torch_compile_best
+  # AutoGluon_medium_master
+  # AutoGluon_high_master
+  AutoGluon_best_master
+dataset_name:
+        - airbnb
+        - kick_start
+        - cloth_review
+        - news_popularity
+        - cal_house
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/text_tabular_dataloader.py   # relative path to WORKDIR
+    class_name: TextTabularDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_text_tabular_datasets.yaml 
+custom_resource_dir: sample_configs/resources/
+
diff --git a/sample_configs/cloud_configs/resources/multimodal_constraints.yaml b/sample_configs/cloud_configs/resources/multimodal_constraints.yaml
new file mode 100644
index 00000000..940345c5
--- /dev/null
+++ b/sample_configs/cloud_configs/resources/multimodal_constraints.yaml
@@ -0,0 +1,13 @@
+10m4x:
+  TIME_LIMIT: 500
+  INSTANCE: g4dn.4xlarge
+  # MAX_MACHINE_NUM: 20   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
+
+g4_12x:
+  INSTANCE: g4dn.12xlarge
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  BLOCK_DEVICE_VOLUME: 1000   # optional, default 100GB
+  RESERVED_MEMORY_SIZE: 12000  # optional, default 15000MB
+
diff --git a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
new file mode 100644
index 00000000..7a2765f5
--- /dev/null
+++ b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
@@ -0,0 +1,69 @@
+AutoGluon_branch:
+  repo: https://github.com/autogluon/autogluon.git
+  version: stable_GA4_update
+  params:  # MultimodalPredictor.fit(params)
+    presets: medium_quality
+    time_limit:  90
+    hyperparameters:
+      optimization.max_epochs: 1
+      optimization.learning_rate: 0.005
+
+AutoGluon_best_master:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+
+ablation_greedy_soup:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.top_k_average_method: best
+
+ablation_gradient_clip:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.gradient_clip_algorithm: value
+
+ablation_warmup_steps:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.warmup_steps: 0.0
+
+ablation_cosine_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.lr_schedule: polynomial_decay
+
+ablation_weight_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.weight_decay: 0.0
+
+ablation_lr_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.lr_decay: 0.0 
+
+
+autokeras_master:
+  repo: https://github.com/keras-team/keras-tuner.git 
+  version: master
+  
diff --git a/sample_configs/cloud_configs/tabular_cloud_configs.yaml b/sample_configs/cloud_configs/tabular_cloud_configs.yaml
new file mode 100644
index 00000000..a73a95d1
--- /dev/null
+++ b/sample_configs/cloud_configs/tabular_cloud_configs.yaml
@@ -0,0 +1,40 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
+  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
+  # MAX_MACHINE_NUM: 20   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
+  # INSTANCE: g4dn.2xlarge  # optional, default g4dn.2xlarge
+  # TIME_LIMIT: 3600  # optional, EC2 timeout, default 3600s
+  # VPC_NAME: existing-vpc-name  # optional
+
+# Benchmark configurations
+module: tabular  # required
+mode: aws  # required
+benchmark_name: ag_bench  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+git_uri#branch:  # required, can be any AMLB custom fork and branch, only one value allowed
+  https://github.com/openml/automlbenchmark.git#master
+framework:  # required, only one value allowed
+  AutoGluon:stable
+amlb_constraint:  # optional, only one value allowed, default: test
+  test
+amlb_benchmark:  # required, a list of amlb benchmarks to run, ref: https://github.com/openml/automlbenchmark/tree/master/resources/benchmarks
+  - small
+amlb_task:  # optional, value of each key is a subset of dataset names in each 'amlb_benchmark', ref: https://github.com/openml/automlbenchmark/blob/master/resources/benchmarks/small.yaml
+  small:
+    - credit-g
+    - vehicle
+
+# fold_to_run:  # optional, capped by `folds` in amlb_constraint, ref: https://github.com/openml/automlbenchmark/blob/master/resources/constraints.yaml
+#   small:
+#     credit-g:
+#       - 3
+#       - 6
+# amlb_user_dir:  # optional, local/s3 path where all the amlb customizations are stored, only one value allowed
+#   sample_configs/amlb_configs
diff --git a/sample_configs/cloud_configs/tabular_local_configs.yaml b/sample_configs/cloud_configs/tabular_local_configs.yaml
new file mode 100644
index 00000000..62196d54
--- /dev/null
+++ b/sample_configs/cloud_configs/tabular_local_configs.yaml
@@ -0,0 +1,15 @@
+# Benchmark configurations
+module: tabular  # required
+mode: local  # required
+benchmark_name: ag_bench  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
+
+# Tabular specific
+git_uri#branch: https://github.com/openml/automlbenchmark.git#stable  # required, can be any AMLB custom fork and branch
+framework: AutoGluon:stable  # required
+amlb_benchmark: small  # required
+amlb_task: vehicle # optional
+amlb_constraint: test  # optional
+fold_to_run: 1  # optional, the specific data fold to run
+amlb_user_dir: sample_configs/amlb_configs  # optional, local/s3 path where all the amlb customizations are stored
diff --git a/sample_configs/cloud_configs/timeseries_cloud_configs.yaml b/sample_configs/cloud_configs/timeseries_cloud_configs.yaml
new file mode 100644
index 00000000..6cb9441c
--- /dev/null
+++ b/sample_configs/cloud_configs/timeseries_cloud_configs.yaml
@@ -0,0 +1,34 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
+  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
+  # MAX_MACHINE_NUM: 20   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
+  # INSTANCE: g4dn.2xlarge  # optional, default g4dn.2xlarge
+  # TIME_LIMIT: 3600  # optional, EC2 timeout, default 3600s
+  # VPC_NAME: existing-vpc-name  # optional
+
+# Benchmark configurations
+module: timeseries  # required
+mode: aws  # required
+benchmark_name: ag_bench  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+git_uri#branch:  # required, can be any AMLB custom fork and branch, only one value allowed
+  https://github.com/openml/automlbenchmark.git#master
+framework:  # required, only one value allowed
+  AutoGluon_dev:example
+amlb_constraint:  # optional, only one value allowed, default: test
+  test
+amlb_benchmark:  # required, a list of amlb benchmarks to run, ref: https://github.com/openml/automlbenchmark/tree/master/resources/benchmarks
+  - timeseries_test
+amlb_task:  # optional, value of each key is a subset of dataset names in each 'amlb_benchmark', ref: https://github.com/openml/automlbenchmark/blob/master/resources/benchmarks/small.yaml
+  timeseries_test:
+    - m4_hourly_2
+# fold_to_run:  # optional, capped by `folds` in amlb_constraint, ref: https://github.com/openml/automlbenchmark/blob/master/resources/constraints.yaml
+amlb_user_dir:  # optional, local/s3 path where all the amlb customizations are stored, only one value allowed
+  sample_configs/amlb_configs
diff --git a/sample_configs/cloud_configs/timeseries_local_configs.yaml b/sample_configs/cloud_configs/timeseries_local_configs.yaml
new file mode 100644
index 00000000..838ad9b3
--- /dev/null
+++ b/sample_configs/cloud_configs/timeseries_local_configs.yaml
@@ -0,0 +1,15 @@
+# Benchmark configurations
+module: timeseries  # required
+mode: local  # required
+benchmark_name: ag_bench  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
+
+# Tabular specific
+git_uri#branch: https://github.com/openml/automlbenchmark.git#stable  # required, can be any AMLB custom fork and branch
+framework: AutoGluon  # required
+amlb_benchmark: timeseries_test  # required
+amlb_task: m4_hourly_2 # optional
+amlb_constraint: test  # optional
+fold_to_run: 1  # optional, the specific data fold to run
+amlb_user_dir: sample_configs/amlb_configs  # optional, local/s3 path where all the amlb customizations are stored
diff --git a/sample_configs/dataloaders/paper_image_datasets.yaml b/sample_configs/dataloaders/paper_image_datasets.yaml
new file mode 100644
index 00000000..043aaedc
--- /dev/null
+++ b/sample_configs/dataloaders/paper_image_datasets.yaml
@@ -0,0 +1,119 @@
+# data = custom_class(dataset_name=dataset_name, split=split, **kwargs)
+# custom_class: custom dataloader class, e.g. sample_configs/vision_dataset.py
+
+
+base: &base
+  url: s3://zs-models/datasets/{name}.zip
+  splits:
+    - train
+    - test
+  image_columns:
+    - ImageID
+  text_columns:
+  label_columns:
+    - LabelName
+  columns_to_drop:
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{name}/{value}"
+  metric: acc
+  problem_type: multiclass
+  
+fashion_mnist:
+  <<: *base
+  image_path: "{split}/{value}"
+
+casting:
+  <<: *base
+  image_path: "{value}"
+  problem_type: binary
+  metric: roc_auc
+
+food101:
+  <<: *base
+
+oxfordflowers:
+  <<: *base
+  image_path: "{name}/{split}/{value}"
+
+OxfordIIITPet:
+  <<: *base
+  splits:
+    - train
+    - validation
+    - test
+  annotation: "{name}_{split}_anno.csv"
+  image_path: "{split}/{value}"
+
+europeanflooddepth:
+  <<: *base
+  problem_type: binary
+  metric: roc_auc
+
+magnetictiledefects:
+  <<: *base
+
+stanfordcars:
+  <<: *base
+
+cub200:
+  <<: *base
+
+
+petfinder:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - Description 
+  label_columns:
+    - AdoptionSpeed
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{value}"
+  metric: acc
+  problem_type: multiclass
+
+
+ham10000:
+  <<: *base
+  splits:
+    - train
+  label_columns:
+    - dx
+  image_path: "{split}/{value}"
+
+cd18:
+  <<: *base
+  splits:
+    - train
+  label_columns:
+    - Price
+  image_path: "{split}/{value}"
+  metric: rmse
+  problem_type: regression
+
+    
+hateful_meme:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - text    
+  image_path: "{value}"
+  metric: roc_auc
+  problem_type: binary
+
+
+memotion:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - text_corrected  
+  columns_to_drop:
+    - text_ocr
+  label_columns:
+    - overall_sentiment
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{split}/{value}"
+  metric: acc
+  problem_type: multiclass
diff --git a/sample_configs/dataloaders/paper_text_datasets.yaml b/sample_configs/dataloaders/paper_text_datasets.yaml
new file mode 100644
index 00000000..d3113109
--- /dev/null
+++ b/sample_configs/dataloaders/paper_text_datasets.yaml
@@ -0,0 +1,124 @@
+# supports both regular and fewshot datasets
+# for fewshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     fewshot: True
+#     shot: 10
+#     seed: 8
+#     dataset_config_file: test/automm_text_datasets.yaml
+# 
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://automl-mm-bench/comprehend_benchmarks/{name}/{lang}/{subsample_path}{split}.csv
+  splits:
+    - train
+    - validation
+    - test
+  langs:
+    - en
+    - de
+    - es
+    - fr
+    - it
+
+  data_columns:
+    - label
+    - text
+  image_columns:
+  text_columns:
+    - text
+  label_columns:
+    - label
+  columns_to_drop:
+  data_sep: ","
+  metric: acc
+  problem_type: multiclass
+  subsample_path: subsampling/{shot}_shot/seed{seed}/
+  subsample_splits:
+    - train
+    - validation
+  shots:
+    - 1
+    - 5
+    - 10
+    - 20
+    - 50
+    - 100
+    - 500
+    - 1000
+  seeds:
+    - 0
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 8
+    - 9
+
+
+financial_news:
+  url: s3://zs-models/datasets/financial_news/{lang}/{split}.csv
+  splits:
+    - train
+  langs:
+    - en
+  metric: accuracy
+  problem_type: classification
+
+MLDoc-11000:
+  <<: *base
+
+MultiATIS-5000:
+  <<: *base
+  langs:
+    - en
+    - es
+    - fr
+    - pt
+
+amazon_reviews:
+  <<: *base
+  langs:
+    - en
+  problem_type: binary
+  metric: roc_auc
+
+
+gnad10:
+  <<: *base
+  langs:
+    - de
+
+fb_dialog:
+  <<: *base
+  langs:
+    - en
+    - es
+
+yahoo_anwsers:
+  <<: *base
+  langs:
+    - en
+
+french_tweets_sentiment:
+  <<: *base
+  langs:
+    - fr
+  problem_type: binary
+  metric: roc_auc
+
+ag_news:
+  <<: *base
+  langs:
+    - en
+
+SNIPS:
+  <<: *base
+  langs:
+    - en
diff --git a/sample_configs/dataloaders/paper_text_tabular_datasets.yaml b/sample_configs/dataloaders/paper_text_tabular_datasets.yaml
new file mode 100644
index 00000000..b43fbab6
--- /dev/null
+++ b/sample_configs/dataloaders/paper_text_tabular_datasets.yaml
@@ -0,0 +1,94 @@
+base: &base
+  url: s3://zs-models/{name}/{split}.csv
+  test_split_name: test
+  splits:
+    - train
+    - test
+  image_columns:
+  text_columns:
+  columns_to_drop:
+  label_columns:
+    - LabelName
+  metric: acc
+  problem_type: multiclass
+  
+
+airbnb:
+  <<: *base
+  url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
+  text_columns:
+    - summary
+    - amenities
+    - description
+    - notes
+    - name
+    - neighborhood
+  label_columns:
+    - price_label
+  columns_to_drop:
+    - id
+    - listing_url
+    - scrape_id
+    - last_scraped
+    - picture_url
+    - host_id
+    - host_url
+    - host_name
+    - host_thumbnail_url
+    - host_picture_url
+    - calendar_last_scraped
+
+
+kick_start:
+  <<: *base
+  url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
+  text_columns:
+    - name
+    - desc
+    - keywords
+  label_columns:
+    - final_status
+  metric: roc_auc
+  problem_type: binary
+
+
+cloth_review:
+  <<: *base
+  url: s3://automl-mm-bench/women_clothing_review/{split}.pq
+  text_columns:
+    - Title
+    - Review Text
+  label_columns:
+    - Rating
+  metric: rmse
+  problem_type: regression
+
+
+news_popularity:
+  <<: *base
+  url: s3://automl-mm-bench/news_popularity2/{split}.csv
+  text_columns: 
+    - article_title
+  label_columns:
+    - log_shares
+  image_columns:
+  metric: rmse
+  problem_type: regression
+
+
+cal_house:
+  <<: *base
+  url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
+  text_columns:
+    - Summary
+    - Appliances included
+    - Laundry features
+    - Parking features
+    - Flooring
+    - Elementary School
+    - Middle School
+    - High School
+  label_columns:
+    - Sold Price
+  metric: rmse
+  problem_type: regression
diff --git a/sample_configs/dataloaders/text_dataloader.py b/sample_configs/dataloaders/text_dataloader.py
index 0c699d43..452a9808 100644
--- a/sample_configs/dataloaders/text_dataloader.py
+++ b/sample_configs/dataloaders/text_dataloader.py
@@ -10,13 +10,12 @@
 logger = logging.getLogger(__name__)
 
 
-class TextDataLoaer:
+class TextDataLoader:
     def __init__(
         self,
         dataset_name: str,
         dataset_config_file: str,
         split: str = "train",
-        lang: str = "en",
         fewshot: bool = False,
         shot: int = 50,
         seed: int = 0,
@@ -33,7 +32,12 @@ def __init__(
 
         self.name = dataset_name
         self.split = split
-
+        self.image_columns = self.dataset_config["image_columns"] or []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+        
+        lang = self.dataset_config["langs"][0]
         subsample_path = self.dataset_config["subsample_path"].format(shot=shot, seed=seed)
         url = self.dataset_config["url"].format(
             name=self.name,
@@ -65,10 +69,3 @@ def problem_type(self):
     def metric(self):
         return self.dataset_config["metric"]
 
-    @property
-    def feature_columns(self):
-        return self.dataset_config["feature_columns"]
-
-    @property
-    def label_columns(self):
-        return self.dataset_config["label_columns"]
diff --git a/sample_configs/dataloaders/text_tabular_dataloader.py b/sample_configs/dataloaders/text_tabular_dataloader.py
new file mode 100644
index 00000000..c587af89
--- /dev/null
+++ b/sample_configs/dataloaders/text_tabular_dataloader.py
@@ -0,0 +1,69 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders import load_zip
+from autogluon.common.loaders._utils import download
+
+
+def path_expander(path, base_folder):
+    path_l = path.split(";")
+    return ";".join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])
+
+
+logger = logging.getLogger(__name__)
+
+
+class TextTabularDataLoader:
+    def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+
+        self.dataset_config = config[dataset_name]
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+        if split == "test" and self.dataset_config["test_split_name"] == "dev":
+            split = "dev"
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+
+        # url = self.dataset_config["url"].format(name=self.name)
+        # base_dir = get_data_home_dir()
+        # load_zip.unzip(url, unzip_dir=base_dir)
+        # self.dataset_dir = os.path.join(base_dir, self.name)
+
+        url = self.dataset_config["url"].format(split=self.split)
+        file_extention = os.path.splitext(url)[-1]
+        base_dir = get_data_home_dir()
+
+        self.data_path = os.path.join(base_dir, self.name, f"{split}{file_extention}")
+        download(url, path=self.data_path)
+        if file_extention == ".csv":
+            self.data = pd.read_csv(self.data_path)
+        elif file_extention == ".pq":
+            self.data = pd.read_parquet(self.data_path)
+        else:
+            raise NotImplementedError("Unsupported data type.")
+
+        if self.columns_to_drop is not None:
+            self.data.drop(columns=self.columns_to_drop, inplace=True)
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
+
+
diff --git a/sample_configs/dataloaders/vision_dataloader.py b/sample_configs/dataloaders/vision_dataloader.py
index 8a629706..84ab5d91 100644
--- a/sample_configs/dataloaders/vision_dataloader.py
+++ b/sample_configs/dataloaders/vision_dataloader.py
@@ -16,7 +16,7 @@ def path_expander(path, base_folder):
 logger = logging.getLogger(__name__)
 
 
-class VisionDataLoaer:
+class VisionDataLoader:
     def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
         with open(dataset_config_file, "r") as f:
             config = yaml.safe_load(f)
@@ -31,7 +31,9 @@ def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "tr
 
         self.name = dataset_name
         self.split = split
-        self.feature_columns = self.dataset_config["feature_columns"]
+        self.image_columns = self.dataset_config["image_columns"] or []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
         self.label_columns = self.dataset_config["label_columns"]
 
         url = self.dataset_config["url"].format(name=self.name)
@@ -43,10 +45,15 @@ def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "tr
         image_path_pattern = self.dataset_config["image_path"]
 
         self.data = pd.read_csv(os.path.join(self.dataset_dir, annotation_filename))
-        _columns_to_drop = self.data.columns.difference(self.feature_columns + self.label_columns)
-        self.data.drop(columns=_columns_to_drop, inplace=True)
+        self.tabular_columns = self.data.columns.difference(self.image_columns + self.text_columns + self.label_columns + self.columns_to_drop)
+        print("Image columns: ", self.image_columns)
+        print("Text columns: ", self.text_columns)
+        print("Tabular columns: ", self.tabular_columns)
+        self.data.drop(columns=self.columns_to_drop, inplace=True)
+        self.data.dropna(inplace=True)
+
         image_base_path = image_path_pattern.format(name=self.name, split=self.split, value="")
-        for col in self.feature_columns:
+        for col in self.image_columns:
             self.data[col] = self.data[col].apply(
                 lambda ele: path_expander(ele, base_folder=os.path.join(self.dataset_dir, image_base_path))
             )
diff --git a/sample_configs/paper_image_local_configs.yaml b/sample_configs/paper_image_local_configs.yaml
new file mode 100644
index 00000000..a3e11928
--- /dev/null
+++ b/sample_configs/paper_image_local_configs.yaml
@@ -0,0 +1,51 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
+  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
+  # VPC_NAME: existing-vpc-name  # optional
+
+module: autokeras  # required
+mode: local  # required
+benchmark_name: ag_bench_image  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  # AutoGluon_medium_master
+  # AutoGluon_high_master
+  AutoGluon_best_master
+  # torch_compile_medium
+  # torch_compile_high
+  # torch_compile_best
+dataset_name: fashion_mnist 
+dataset_names:  
+  # image
+   - fashion_mnist
+   - casting
+   - food101
+   - stanfordcars
+   - magnetictiledefects
+   - europeanflooddepth
+   - oxfordflowers
+   - OxfordIIITPet
+  # image-tabular
+   -  ham10000
+   - cd18
+  # image-text
+   - hateful_meme
+  # image-text-tabular
+   - petfinder
+   - memotion
+## Customizations ####
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
+    class_name: VisionDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_image_datasets.yaml 
+custom_resource_dir: sample_configs/resources/
+
diff --git a/sample_configs/paper_text_local_configs.yaml b/sample_configs/paper_text_local_configs.yaml
new file mode 100644
index 00000000..5b38ae0e
--- /dev/null
+++ b/sample_configs/paper_text_local_configs.yaml
@@ -0,0 +1,32 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
+  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
+  # VPC_NAME: existing-vpc-name  # optional
+
+module: multimodal  # required
+mode: local  # required
+benchmark_name: ag_bench_text  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  # AutoGluon_medium_master
+  # AutoGluon_high_master
+  AutoGluon_best_master
+  # torch_compile_medium
+  # torch_compile_high
+  # torch_compile_best
+dataset_name: ag_news 
+## Customizations ####
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/text_dataloader.py   # relative path to WORKDIR
+    class_name: TextDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_text_datasets.yaml
+custom_resource_dir: sample_configs/resources/
diff --git a/sample_configs/paper_text_tabular_local_configs.yaml b/sample_configs/paper_text_tabular_local_configs.yaml
new file mode 100644
index 00000000..212b6092
--- /dev/null
+++ b/sample_configs/paper_text_tabular_local_configs.yaml
@@ -0,0 +1,32 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
+  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  DATA_BUCKET: automl-mm-bench  # optional, S3 bucket to download your private datasets
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
+  # VPC_NAME: existing-vpc-name  # optional
+
+module: autokeras  # required
+mode: local  # required
+benchmark_name: ag_bench_text_tabular  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  # torch_compile_medium
+  # torch_compile_high
+  # torch_compile_best
+  # AutoGluon_medium_master
+  # AutoGluon_high_master
+  AutoGluon_best_master
+dataset_name: airbnb 
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/text_tabular_dataloader.py   # relative path to WORKDIR
+    class_name: TextTabularDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_text_tabular_datasets.yaml 
+custom_resource_dir: sample_configs/resources/
+
diff --git a/sample_configs/resources/multimodal_constraints.yaml b/sample_configs/resources/multimodal_constraints.yaml
index 22defb3c..940345c5 100644
--- a/sample_configs/resources/multimodal_constraints.yaml
+++ b/sample_configs/resources/multimodal_constraints.yaml
@@ -4,3 +4,10 @@
   # MAX_MACHINE_NUM: 20   # optional, default 20
   # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
   # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
+
+g4_12x:
+  INSTANCE: g4dn.12xlarge
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  BLOCK_DEVICE_VOLUME: 1000   # optional, default 100GB
+  RESERVED_MEMORY_SIZE: 12000  # optional, default 15000MB
+
diff --git a/sample_configs/resources/multimodal_frameworks.yaml b/sample_configs/resources/multimodal_frameworks.yaml
index 0c384263..f39799de 100644
--- a/sample_configs/resources/multimodal_frameworks.yaml
+++ b/sample_configs/resources/multimodal_frameworks.yaml
@@ -7,3 +7,62 @@ AutoGluon_branch:
     hyperparameters:
       optimization.max_epochs: 1
       optimization.learning_rate: 0.005
+
+AutoGluon_best_master:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    time_limit: 90
+
+ablation_greedy_soup:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.top_k_average_method: best
+
+ablation_gradient_clip:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.gradient_clip_algorithm: value
+
+ablation_warmup_steps:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.warmup_steps: 0.0
+
+ablation_cosine_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.lr_schedule: polynomial_decay
+
+ablation_weight_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.weight_decay: 0.0
+
+ablation_cosine_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.lr_decay: 0.0 
+
+
+AutoKeras:
+
diff --git a/src/autogluon/bench/Dockerfile b/src/autogluon/bench/Dockerfile
index 200fb03d..43059d1a 100644
--- a/src/autogluon/bench/Dockerfile
+++ b/src/autogluon/bench/Dockerfile
@@ -62,7 +62,7 @@ RUN chmod +x entrypoint.sh \
         else \
             bash ${AG_BENCH_BASE_DIR}/${FRAMEWORK_PATH}/setup.sh $GIT_URI $GIT_BRANCH $VENV_BASE_DIR $AMLB_FRAMEWORK; \
         fi; \
-    elif echo "$FRAMEWORK_PATH" | grep -q "multimodal"; then \
+    elif echo "$FRAMEWORK_PATH" | grep -q -E "multimodal|autokeras"; then \
         bash ${AG_BENCH_BASE_DIR}/${FRAMEWORK_PATH}/setup.sh $GIT_URI $GIT_BRANCH $VENV_BASE_DIR $AG_BENCH_VERSION; \
     fi \
     && echo "CDK_DEPLOY_REGION=$CDK_DEPLOY_REGION" >> /etc/environment
diff --git a/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py b/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py
index c460d1bd..51c4bfef 100644
--- a/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py
+++ b/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py
@@ -216,10 +216,10 @@ def generate_config_combinations(config, metrics_bucket, batch_job_queue, batch_
     job_configs = []
     if config["module"] in AMLB_DEPENDENT_MODULES:
         job_configs = generate_amlb_module_config_combinations(config)
-    elif config["module"] == "multimodal":
+    elif config["module"] in ["multimodal", "autokeras"]:
         job_configs = generate_multimodal_config_combinations(config)
     else:
-        raise ValueError("Invalid module. Choose either 'tabular', 'timeseries', or 'multimodal'.")
+        raise ValueError("Invalid module. Choose either 'tabular', 'timeseries', 'autokeras', or 'multimodal'.")
 
     if len(job_configs) == 0:
         return {parent_job_id: "No job submitted"}
diff --git a/src/autogluon/bench/custom_configs/dataloaders/paper_image_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/paper_image_datasets.yaml
new file mode 100644
index 00000000..043aaedc
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/paper_image_datasets.yaml
@@ -0,0 +1,119 @@
+# data = custom_class(dataset_name=dataset_name, split=split, **kwargs)
+# custom_class: custom dataloader class, e.g. sample_configs/vision_dataset.py
+
+
+base: &base
+  url: s3://zs-models/datasets/{name}.zip
+  splits:
+    - train
+    - test
+  image_columns:
+    - ImageID
+  text_columns:
+  label_columns:
+    - LabelName
+  columns_to_drop:
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{name}/{value}"
+  metric: acc
+  problem_type: multiclass
+  
+fashion_mnist:
+  <<: *base
+  image_path: "{split}/{value}"
+
+casting:
+  <<: *base
+  image_path: "{value}"
+  problem_type: binary
+  metric: roc_auc
+
+food101:
+  <<: *base
+
+oxfordflowers:
+  <<: *base
+  image_path: "{name}/{split}/{value}"
+
+OxfordIIITPet:
+  <<: *base
+  splits:
+    - train
+    - validation
+    - test
+  annotation: "{name}_{split}_anno.csv"
+  image_path: "{split}/{value}"
+
+europeanflooddepth:
+  <<: *base
+  problem_type: binary
+  metric: roc_auc
+
+magnetictiledefects:
+  <<: *base
+
+stanfordcars:
+  <<: *base
+
+cub200:
+  <<: *base
+
+
+petfinder:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - Description 
+  label_columns:
+    - AdoptionSpeed
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{value}"
+  metric: acc
+  problem_type: multiclass
+
+
+ham10000:
+  <<: *base
+  splits:
+    - train
+  label_columns:
+    - dx
+  image_path: "{split}/{value}"
+
+cd18:
+  <<: *base
+  splits:
+    - train
+  label_columns:
+    - Price
+  image_path: "{split}/{value}"
+  metric: rmse
+  problem_type: regression
+
+    
+hateful_meme:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - text    
+  image_path: "{value}"
+  metric: roc_auc
+  problem_type: binary
+
+
+memotion:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - text_corrected  
+  columns_to_drop:
+    - text_ocr
+  label_columns:
+    - overall_sentiment
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{split}/{value}"
+  metric: acc
+  problem_type: multiclass
diff --git a/src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml
new file mode 100644
index 00000000..067e12d1
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml
@@ -0,0 +1,123 @@
+# supports both regular and fewshot datasets
+# for fewshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     fewshot: True
+#     shot: 10
+#     seed: 8
+#     dataset_config_file: test/automm_text_datasets.yaml
+# 
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://automl-mm-bench/comprehend_benchmarks/{name}/{lang}/{subsample_path}{split}.csv
+  splits:
+    - train
+    - validation
+    - test
+  langs:
+    - en
+    - de
+    - es
+    - fr
+    - it
+
+  data_columns:
+    - label
+    - text
+  image_columns:
+  text_columns:
+    - text
+  label_columns:
+    - label
+  columns_to_drop:
+  data_sep: ","
+  metric: acc
+  problem_type: multiclass
+  subsample_path: subsampling/{shot}_shot/seed{seed}/
+  subsample_splits:
+    - train
+    - validation
+  shots:
+    - 1
+    - 5
+    - 10
+    - 20
+    - 50
+    - 100
+    - 500
+    - 1000
+  seeds:
+    - 0
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 8
+    - 9
+
+
+financial_news:
+  <<: *base
+  url: s3://zs-models/datasets/financial_news/{lang}/{split}.csv
+  splits:
+    - train
+  langs:
+    - en
+
+MLDoc-11000:
+  <<: *base
+
+MultiATIS-5000:
+  <<: *base
+  langs:
+    - en
+    - es
+    - fr
+    - pt
+
+amazon_reviews:
+  <<: *base
+  langs:
+    - en
+  problem_type: binary
+  metric: roc_auc
+
+
+gnad10:
+  <<: *base
+  langs:
+    - de
+
+fb_dialog:
+  <<: *base
+  langs:
+    - en
+    - es
+
+yahoo_anwsers:
+  <<: *base
+  langs:
+    - en
+
+french_tweets_sentiment:
+  <<: *base
+  langs:
+    - fr
+  problem_type: binary
+  metric: roc_auc
+
+ag_news:
+  <<: *base
+  langs:
+    - en
+
+SNIPS:
+  <<: *base
+  langs:
+    - en
diff --git a/src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml
new file mode 100644
index 00000000..f4ddfd70
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml
@@ -0,0 +1,90 @@
+base: &base
+  url: s3://zs-models/{name}/{split}.csv
+  test_split_name: test
+  splits:
+    - train
+    - test
+  image_columns:
+  text_columns:
+  columns_to_drop:
+  label_columns:
+    - LabelName
+  metric: acc
+  problem_type: multiclass
+  
+
+airbnb:
+  <<: *base
+  url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
+  text_columns:
+    - summary
+    - amenities
+  label_columns:
+    - price_label
+  columns_to_drop:
+    - id
+    - listing_url
+    - scrape_id
+    - last_scraped
+    - picture_url
+    - host_id
+    - host_url
+    - host_name
+    - host_thumbnail_url
+    - host_picture_url
+    - calendar_last_scraped
+
+
+kick_start:
+  <<: *base
+  url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
+  text_columns:
+    - name
+    - desc
+    - keywords
+  label_columns:
+    - final_status
+  metric: roc_auc
+  problem_type: binary
+
+
+cloth_review:
+  <<: *base
+  url: s3://automl-mm-bench/women_clothing_review/{split}.pq
+  text_columns:
+    - Title
+    - Review Text
+  label_columns:
+    - Rating
+  metric: rmse
+  problem_type: regression
+
+
+news_popularity:
+  <<: *base
+  url: s3://automl-mm-bench/news_popularity2/{split}.csv
+  text_columns: 
+    - article_title
+  label_columns:
+    - log_shares
+  image_columns:
+  metric: rmse
+  problem_type: regression
+
+
+cal_house:
+  <<: *base
+  url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
+  text_columns:
+    - Summary
+    - Appliances included
+    - Laundry features
+    - Parking features
+    - Flooring
+    - Elementary School
+    - Middle School
+    - High School
+  label_columns:
+    - Sold Price
+  metric: rmse
+  problem_type: regression
diff --git a/src/autogluon/bench/custom_configs/dataloaders/text_dataloader.py b/src/autogluon/bench/custom_configs/dataloaders/text_dataloader.py
new file mode 100644
index 00000000..452a9808
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/text_dataloader.py
@@ -0,0 +1,71 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders._utils import download
+
+logger = logging.getLogger(__name__)
+
+
+class TextDataLoader:
+    def __init__(
+        self,
+        dataset_name: str,
+        dataset_config_file: str,
+        split: str = "train",
+        fewshot: bool = False,
+        shot: int = 50,
+        seed: int = 0,
+    ):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+        self.dataset_config = config[dataset_name]
+        if split == "val":
+            split = "validation"
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = self.dataset_config["image_columns"] or []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+        
+        lang = self.dataset_config["langs"][0]
+        subsample_path = self.dataset_config["subsample_path"].format(shot=shot, seed=seed)
+        url = self.dataset_config["url"].format(
+            name=self.name,
+            lang=lang,
+            subsample_path=subsample_path if fewshot and self.split in self.dataset_config["subsample_splits"] else "",
+            split=self.split,
+        )
+        base_dir = get_data_home_dir()
+        data_dir = os.path.join(self.name, lang)
+        if fewshot:
+            data_dir = os.path.join(data_dir, "subsampling", f"{shot}_shot-seed{seed}")
+        self.dataset_dir = os.path.join(base_dir, data_dir)
+        data_path = os.path.join(self.dataset_dir, f"{split}.csv")
+        download(url, path=data_path)
+
+        self.data = pd.read_csv(
+            data_path,
+            header=None,
+            names=self.dataset_config["data_columns"],
+            sep=self.dataset_config.get("data_sep", ","),
+            on_bad_lines="warn",
+        )
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
+
diff --git a/src/autogluon/bench/custom_configs/dataloaders/text_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/text_datasets.yaml
new file mode 100644
index 00000000..8960f4af
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/text_datasets.yaml
@@ -0,0 +1,73 @@
+# supports both regular and fewshot datasets
+# for fullshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     dataset_config_file: test/automm_text_datasets.yaml
+
+# for fewshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     fewshot: True
+#     shot: 10
+#     seed: 8
+#     dataset_config_file: test/automm_text_datasets.yaml
+# 
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://<data_bucket>/{name}/{lang}/{subsample_path}{split}.csv
+  splits:
+    - train
+    - validation
+    - test
+  langs:
+    - de
+    - en
+  data_columns:
+    - label
+    - text
+  feature_columns:
+    - text
+  label_columns:
+    - label
+  data_sep: ","
+  metric: acc
+  problem_type: multiclass
+  subsample_path: subsampling/{shot}_shot/seed{seed}/
+  subsample_splits:
+    - train
+    - validation
+  shots:
+    - 1
+    - 5
+    - 10
+    - 20
+  seeds:
+    - 0
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 8
+    - 9
+
+
+dataset_1:
+  <<: *base
+  langs:
+    - en
+  problem_type: binary
+
+dataset_2:
+  <<: *base
+  langs:
+    - fr
+  problem_type: binary
+
diff --git a/src/autogluon/bench/custom_configs/dataloaders/text_tabular_dataloader.py b/src/autogluon/bench/custom_configs/dataloaders/text_tabular_dataloader.py
new file mode 100644
index 00000000..c587af89
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/text_tabular_dataloader.py
@@ -0,0 +1,69 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders import load_zip
+from autogluon.common.loaders._utils import download
+
+
+def path_expander(path, base_folder):
+    path_l = path.split(";")
+    return ";".join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])
+
+
+logger = logging.getLogger(__name__)
+
+
+class TextTabularDataLoader:
+    def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+
+        self.dataset_config = config[dataset_name]
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+        if split == "test" and self.dataset_config["test_split_name"] == "dev":
+            split = "dev"
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+
+        # url = self.dataset_config["url"].format(name=self.name)
+        # base_dir = get_data_home_dir()
+        # load_zip.unzip(url, unzip_dir=base_dir)
+        # self.dataset_dir = os.path.join(base_dir, self.name)
+
+        url = self.dataset_config["url"].format(split=self.split)
+        file_extention = os.path.splitext(url)[-1]
+        base_dir = get_data_home_dir()
+
+        self.data_path = os.path.join(base_dir, self.name, f"{split}{file_extention}")
+        download(url, path=self.data_path)
+        if file_extention == ".csv":
+            self.data = pd.read_csv(self.data_path)
+        elif file_extention == ".pq":
+            self.data = pd.read_parquet(self.data_path)
+        else:
+            raise NotImplementedError("Unsupported data type.")
+
+        if self.columns_to_drop is not None:
+            self.data.drop(columns=self.columns_to_drop, inplace=True)
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
+
+
diff --git a/src/autogluon/bench/custom_configs/dataloaders/vision_dataloader.py b/src/autogluon/bench/custom_configs/dataloaders/vision_dataloader.py
new file mode 100644
index 00000000..84ab5d91
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/vision_dataloader.py
@@ -0,0 +1,67 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders import load_zip
+
+
+def path_expander(path, base_folder):
+    path_l = path.split(";")
+    return ";".join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])
+
+
+logger = logging.getLogger(__name__)
+
+
+class VisionDataLoader:
+    def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+
+        self.dataset_config = config[dataset_name]
+        if split == "val":
+            split = "validation"
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = self.dataset_config["image_columns"] or []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+
+        url = self.dataset_config["url"].format(name=self.name)
+        base_dir = get_data_home_dir()
+        load_zip.unzip(url, unzip_dir=base_dir)
+        self.dataset_dir = os.path.join(base_dir, self.name)
+
+        annotation_filename = self.dataset_config["annotation"].format(name=self.name, split=self.split)
+        image_path_pattern = self.dataset_config["image_path"]
+
+        self.data = pd.read_csv(os.path.join(self.dataset_dir, annotation_filename))
+        self.tabular_columns = self.data.columns.difference(self.image_columns + self.text_columns + self.label_columns + self.columns_to_drop)
+        print("Image columns: ", self.image_columns)
+        print("Text columns: ", self.text_columns)
+        print("Tabular columns: ", self.tabular_columns)
+        self.data.drop(columns=self.columns_to_drop, inplace=True)
+        self.data.dropna(inplace=True)
+
+        image_base_path = image_path_pattern.format(name=self.name, split=self.split, value="")
+        for col in self.image_columns:
+            self.data[col] = self.data[col].apply(
+                lambda ele: path_expander(ele, base_folder=os.path.join(self.dataset_dir, image_base_path))
+            )
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
diff --git a/src/autogluon/bench/custom_configs/dataloaders/vision_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/vision_datasets.yaml
new file mode 100644
index 00000000..a095403e
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/vision_datasets.yaml
@@ -0,0 +1,31 @@
+# data = custom_class(dataset_name=dataset_name, split=split, **kwargs)
+# custom_class: custom dataloader class, e.g. sample_configs/vision_dataset.py
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://<dataset_bucket>/vision_datasets/{name}.zip
+  splits:
+    - train
+    - test
+  feature_columns:
+    - ImageID
+  label_columns:
+    - LabelName
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{name}/{value}"
+  metric: acc
+  problem_type: multiclass
+  
+
+dataset_1:
+  <<: *base
+  image_path: "{split}/{value}"
+  problem_type: binary
+
+dataset_2:
+  <<: *base
+
+dataset_3:
+  <<: *base
+  image_path: "{name}/{split}/{value}"
+
diff --git a/src/autogluon/bench/custom_configs/resources/multimodal_constraints.yaml b/src/autogluon/bench/custom_configs/resources/multimodal_constraints.yaml
new file mode 100644
index 00000000..940345c5
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/resources/multimodal_constraints.yaml
@@ -0,0 +1,13 @@
+10m4x:
+  TIME_LIMIT: 500
+  INSTANCE: g4dn.4xlarge
+  # MAX_MACHINE_NUM: 20   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
+
+g4_12x:
+  INSTANCE: g4dn.12xlarge
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  BLOCK_DEVICE_VOLUME: 1000   # optional, default 100GB
+  RESERVED_MEMORY_SIZE: 12000  # optional, default 15000MB
+
diff --git a/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
new file mode 100644
index 00000000..7a2765f5
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
@@ -0,0 +1,69 @@
+AutoGluon_branch:
+  repo: https://github.com/autogluon/autogluon.git
+  version: stable_GA4_update
+  params:  # MultimodalPredictor.fit(params)
+    presets: medium_quality
+    time_limit:  90
+    hyperparameters:
+      optimization.max_epochs: 1
+      optimization.learning_rate: 0.005
+
+AutoGluon_best_master:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+
+ablation_greedy_soup:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.top_k_average_method: best
+
+ablation_gradient_clip:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.gradient_clip_algorithm: value
+
+ablation_warmup_steps:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.warmup_steps: 0.0
+
+ablation_cosine_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.lr_schedule: polynomial_decay
+
+ablation_weight_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.weight_decay: 0.0
+
+ablation_lr_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+      optimization.lr_decay: 0.0 
+
+
+autokeras_master:
+  repo: https://github.com/keras-team/keras-tuner.git 
+  version: master
+  
diff --git a/src/autogluon/bench/datasets/multimodal_dataset.py b/src/autogluon/bench/datasets/multimodal_dataset.py
index 39ce15ac..fffac121 100644
--- a/src/autogluon/bench/datasets/multimodal_dataset.py
+++ b/src/autogluon/bench/datasets/multimodal_dataset.py
@@ -33,7 +33,6 @@
     "AEPricePrediction",
     "IMDBGenrePrediction",
     "JCPennyCategory",
-    "NewsPopularity",
     "NewsChannel",
 ]
 
@@ -651,56 +650,6 @@ def problem_type(self):
         return _REGRESSION
 
 
-class NewsPopularity(BaseMultiModalDataset):
-    _SOURCE = "https://archive.ics.uci.edu/ml/datasets/online+news+popularity"
-    _INFO = {
-        "train": {
-            "url": get_repo_url() + "news_popularity2/train.csv",
-            "sha1sum": "390b15e77fa77a2722ce2d459a977034a9565f46",
-        },
-        "test": {
-            "url": get_repo_url() + "news_popularity2/test.csv",
-            "sha1sum": "297253bdca18f6aafbaee0262be430126c1f9044",
-        },
-    }
-    _registry_name = "news_popularity"
-
-    def __init__(self, split="train"):
-        super().__init__(split=split, dataset_name=self._registry_name, data_info=self._INFO)
-
-    @property
-    def data(self):
-        return self._data
-
-    @classmethod
-    def splits(cls):
-        return cls._INFO.keys()
-
-    @property
-    def data(self):
-        return self._data
-
-    @property
-    def label_columns(self):
-        return ["log_shares"]
-
-    @property
-    def label_types(self):
-        return [_NUMERICAL]
-
-    @property
-    def feature_columns(self):
-        return [col for col in list(self.data.columns) if col not in self.label_columns]
-
-    @property
-    def metric(self):
-        return "r2"
-
-    @property
-    def problem_type(self):
-        return _REGRESSION
-
-
 class NewsChannel(BaseMultiModalDataset):
     _SOURCE = "https://archive.ics.uci.edu/ml/datasets/online+news+popularity"
     _INFO = {
diff --git a/src/autogluon/bench/eval/scripts/run_evaluation_openml.py b/src/autogluon/bench/eval/scripts/run_evaluation_openml.py
index 1da146fb..249c3c54 100644
--- a/src/autogluon/bench/eval/scripts/run_evaluation_openml.py
+++ b/src/autogluon/bench/eval/scripts/run_evaluation_openml.py
@@ -265,7 +265,7 @@ def evaluate(
         raise ValueError(f"Unexpected value for frameworks_compare_vs_all: {frameworks_compare_vs_all}")
 
     if verbose:
-        print("frameworks = [")
+        print("frameworks = frameworks = [")
         for i in range(len(frameworks_run)):
             print(f'\t"{frameworks_run[i]}",')
         print("]")
diff --git a/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py b/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py
index de1085e0..e5cbe9d1 100644
--- a/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py
+++ b/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py
@@ -8,6 +8,16 @@
 import typer
 from typing_extensions import Annotated
 
+from autogluon.bench.eval.evaluation.constants import (
+    DATASET,
+    FOLD,
+    FRAMEWORK,
+    METRIC,
+    METRIC_ERROR,
+    PROBLEM_TYPE,
+    TIME_INFER_S,
+    TIME_TRAIN_S,
+)
 from autogluon.bench.eval.evaluation.constants import (
     DATASET,
     FOLD,
@@ -42,6 +52,7 @@
 def clean_amlb_results(
     benchmark_name: str = typer.Argument(
         None, help="Benchmark name populated by benchmark run, in format <benchmark_name>_<timestamp>"
+        None, help="Benchmark name populated by benchmark run, in format <benchmark_name>_<timestamp>"
     ),
     results_dir: str = typer.Option("data/results/", help="Root directory of raw and prepared results."),
     results_dir_input: str = typer.Option(
@@ -115,6 +126,7 @@ def clean_and_save_results(
     run_name_in_output_path: bool = True,
     save: bool = True,
     save_minimal: bool = True,
+    constraints: List[str] | None = None,
     out_path_prefix: str = "openml_ag_",
     out_path_suffix: str = "",
     framework_suffix_column: str = "constraint",
@@ -139,9 +151,34 @@ def clean_and_save_results(
         results_list.append(results)
     results_raw = pd.concat(results_list, ignore_index=True, sort=True)
 
+    if "framework_parent" in results_raw.columns:
+        results_raw[FRAMEWORK] = results_raw["framework_parent"] + "_" + run_name + "_" + results_raw[FRAMEWORK]
+    else:
+        results_raw[FRAMEWORK] = results_raw[FRAMEWORK] + "_" + run_name
+
+    minimal_columns = [
+        DATASET,
+        FOLD,
+        FRAMEWORK,
+        "constraint",
+        METRIC,
+        METRIC_ERROR,
+        TIME_TRAIN_S,
+        TIME_INFER_S,
+        PROBLEM_TYPE,
+        "tid",
+    ]
+
+    results_raw_columns = list(results_raw.columns)
+    results_raw_columns = [c for c in results_raw_columns if c in minimal_columns] + [
+        c for c in results_raw_columns if c not in minimal_columns
+    ]
+    results_raw = results_raw[results_raw_columns]
+
     if save:
         if run_name_in_output_path:
             save_path = os.path.join(results_dir_output, f"{out_path_prefix}{run_name}{out_path_suffix}")
+            save_path = os.path.join(results_dir_output, f"{out_path_prefix}{run_name}{out_path_suffix}")
         else:
             save_path = os.path.join(results_dir_output, f"{out_path_prefix}{out_path_suffix}")
         save_path_file = f"{save_path}.csv"
@@ -151,7 +188,7 @@ def clean_and_save_results(
         save_path_file_pq = f"{save_path}.parquet"
         save_pd.save(path=save_path_file_pq, df=results_raw)
         if save_minimal:
-            results_raw_minimal = results_raw[MINIMAL_COLUMNS]
+            results_raw_minimal = results_raw[minimal_columns]
 
             save_path_file_minimum = f"{save_path}_min.csv"
             save_pd.save(path=save_path_file_minimum, df=results_raw_minimal)
diff --git a/src/autogluon/bench/frameworks/autokeras/autokeras_benchmark.py b/src/autogluon/bench/frameworks/autokeras/autokeras_benchmark.py
new file mode 100644
index 00000000..e3015eb0
--- /dev/null
+++ b/src/autogluon/bench/frameworks/autokeras/autokeras_benchmark.py
@@ -0,0 +1,124 @@
+import json
+import logging
+import os
+import subprocess
+import sys
+from typing import Optional
+
+from autogluon.bench import __version__ as agbench_version
+from autogluon.bench.frameworks.benchmark import Benchmark
+
+logger = logging.getLogger(__name__)
+
+
+class AutoKerasBenchmark(Benchmark):
+    """
+    A benchmark class for AutoGluon MultiModal.
+
+    Attributes:
+        benchmark_name (str): The name of the benchmark.
+        root_dir (str): The root directory for the benchmark.
+        module (str): The name of the module being benchmarked (multimodal).
+
+    Methods:
+        setup(): Sets up the virtual environment for running the benchmark.
+        run(): Runs the benchmark on a given dataset.
+    """
+
+    def setup(
+        self,
+        git_uri: str = "https://github.com/keras-team/keras-tuner.git",
+        git_branch: str = "master",
+    ):
+        """
+        Sets up the virtual environment for running the benchmark.
+
+        Args:
+            git_uri (str): The URI of the Git repository to clone (default: "https://github.com/autogluon/autogluon.git").
+            git_branch (str): The branch of the Git repository to clone (default: "master").
+
+        Returns:
+            None
+        """
+        setup_script_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "setup.sh")
+        command = [setup_script_path, git_uri, git_branch, self.benchmark_dir, agbench_version]
+        result = subprocess.run(command)
+        if result.returncode != 0:
+            sys.exit(1)
+        else:
+            logger.info("Successfully set up the environment under %s/.venv.", self.benchmark_dir)
+
+    def run(
+        self,
+        dataset_name: str,
+        framework: str,
+        constraint: Optional[str] = None,
+        params: Optional[dict] = None,
+        custom_dataloader: Optional[dict] = None,
+        custom_metrics: Optional[dict] = None,
+    ):
+        """
+        Runs the benchmark on a given dataset.
+
+        Args:
+            dataset_name (str): Dataset name, can be registered with multimodal_dataset_registry or a custom dataset.
+
+                                To get a list of datasets:
+                                from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry
+                                multimodal_dataset_registry.list_keys()
+            framework (str): The name of the framework to use for the benchmark.
+            constraint (str): The resource constraint used by benchmarking during AWS mode.
+            params (str): The multimodal params.
+            custom_dataloader (Optional[dict], None): A dictionary containing information about a custom dataloader to use. Defaults to None.
+                                To define a custom dataloader in the config file:
+
+                                custom_dataloader:
+                                    dataloader_file: path_to/dataloader.py   # relative path to WORKDIR
+                                    class_name: DataLoaderClass
+                                    dataset_config_file: path_to/dataset_config.yaml
+                                    **kwargs (of DataLoaderClass)
+            custom_metrics (Optional[dict], None): A dictionary containing information about a custom metrics to use. Defaults to None.
+                                To define a custom metrics in the config file:
+
+                                custom_metrics:
+                                    metrics_path: path_to/metrics.py   # relative path to WORKDIR
+                                    function_name: custom_metrics_function
+                                    **kwargs (of )
+
+        Returns:
+            None
+        """
+        if os.environ.get("RUNNING_IN_DOCKER", "false") == "true":
+            venv_base_dir = os.environ["VENV_BASE_DIR"]
+        else:
+            venv_base_dir = self.benchmark_dir
+        PY_EXC_PATH = os.path.join(venv_base_dir, ".venv/bin/python")
+
+        exec_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "exec.py")
+        logger.info(f"Executing {exec_path} under {PY_EXC_PATH}")
+        command = [
+            PY_EXC_PATH,
+            exec_path,
+            "--dataset_name",
+            dataset_name,
+            "--framework",
+            framework,
+            "--benchmark_dir",
+            self.benchmark_dir,
+            "--metrics_dir",
+            self.metrics_dir,
+        ]
+        if constraint is not None:
+            command += ["--constraint", constraint]
+        if params is not None:
+            command += ["--params", json.dumps(params)]
+        if custom_dataloader is not None:
+            command += ["--custom_dataloader", json.dumps(custom_dataloader)]
+        if custom_metrics is not None:
+            command += ["--custom_metrics", json.dumps(custom_metrics)]
+        result = subprocess.run(command)
+        if result.returncode != 0:
+            sys.exit(1)
+        else:
+            logger.info(f"Benchmark {self.benchmark_name} on dataset {dataset_name} is complete.")
+
diff --git a/src/autogluon/bench/frameworks/autokeras/exec.py b/src/autogluon/bench/frameworks/autokeras/exec.py
new file mode 100644
index 00000000..8b3a0df3
--- /dev/null
+++ b/src/autogluon/bench/frameworks/autokeras/exec.py
@@ -0,0 +1,405 @@
+import argparse
+import csv
+import importlib
+import json
+import logging
+import os
+import time
+import tensorflow as tf
+import random
+from datetime import datetime
+from typing import Optional, Union
+import autokeras as ak
+from PIL import Image
+import numpy as np
+from sklearn.model_selection import train_test_split
+from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry
+import pandas as pd
+
+import tensorflow as tf
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+def _flatten_dict(data):
+    flattened = {}
+    for key, value in data.items():
+        if isinstance(value, dict):
+            flattened.update(_flatten_dict(value))
+        else:
+            flattened[key] = value
+    return flattened
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--dataset_name",
+        type=str,
+        help="Dataset that has been registered with multimodal_dataset_registry.",
+    )
+    parser.add_argument("--framework", type=str, help="Framework (and) branch/version.")
+    parser.add_argument("--benchmark_dir", type=str, help="Directory to save benchmarking run.")
+    parser.add_argument("--metrics_dir", type=str, help="Directory to save benchmarking metrics.")
+    parser.add_argument("--constraint", type=str, default=None, help="AWS resources constraint setting.")
+    parser.add_argument("--params", type=str, default=None, help="AWS resources constraint setting.")
+    parser.add_argument(
+        "--custom_dataloader", type=str, default=None, help="Custom dataloader to use in the benchmark."
+    )
+    parser.add_argument("--custom_metrics", type=str, default=None, help="Custom metrics to use in the benchmark.")
+
+    args = parser.parse_args()
+    return args
+
+
+def load_dataset(dataset_name: str, custom_dataloader: dict = None):  # dataset name
+    """Loads and preprocesses a dataset.
+
+    Args:
+        dataset_name (str): The name of the dataset to load.
+        custom_dataloader (dict): A dictionary containing information about a custom dataloader to use. Defaults to None.
+
+    Returns:
+        Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the training and test datasets.
+    """
+    splits = ["train", "val", "test"]
+    data = {}
+    if dataset_name in multimodal_dataset_registry.list_keys():
+        logger.info(f"Loading dataset {dataset_name} from multimodal_dataset_registry")
+        for split in splits:
+            data[split] = multimodal_dataset_registry.create(dataset_name, split)
+    elif custom_dataloader is not None:
+        logger.info(f"Loading dataset {dataset_name} from custom dataloader {custom_dataloader}.")
+        custom_dataloader_file = custom_dataloader.pop("dataloader_file")
+        class_name = custom_dataloader.pop("class_name")
+        spec = importlib.util.spec_from_file_location(class_name, custom_dataloader_file)
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+        custom_class = getattr(module, class_name)
+        for split in splits:
+            data[split] = custom_class(dataset_name=dataset_name, split=split, **custom_dataloader)
+    else:
+        raise ModuleNotFoundError(f"Dataset Loader for dataset {dataset_name} is not available.")
+
+    return data.values()
+
+
+def save_metrics(metrics_path: str, metrics: dict):
+    """Saves evaluation metrics to a JSON file.
+
+    Args:
+        metrics_path (str): The path to the directory where the metrics should be saved.
+        metrics: The evaluation metrics to save.
+
+    Returns:
+        None
+    """
+    if metrics is None:
+        logger.warning("No metrics were created.")
+        return
+
+    if not os.path.exists(metrics_path):
+        os.makedirs(metrics_path)
+    file = os.path.join(metrics_path, "results.csv")
+    flat_metrics = _flatten_dict(metrics)
+    field_names = flat_metrics.keys()
+
+    with open(file, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=field_names)
+        writer.writeheader()
+        writer.writerow(flat_metrics)
+    logger.info("Metrics saved to %s.", file)
+    f.close()
+
+
+def find_average_image_size(image_paths, max_size=(224, 224), min_size=(32, 32)):
+    """
+    Analyzes a subset of images to determine an average target size.
+    Ensures the target size is within specified bounds.
+    """
+    total_width, total_height, count = 0, 0, 0
+    
+    for path in image_paths:
+        try:
+            with Image.open(path) as img:
+                total_width += img.width
+                total_height += img.height
+                count += 1
+        except Exception as e:
+            print(f"Error loading image {path}: {e}")
+    
+    if count == 0:
+        return max_size  # Return max_size if no images were processed
+    
+    # Calculate average size
+    avg_width = total_width // count
+    avg_height = total_height // count
+    
+    # Ensure the target size is within specified bounds
+    target_width = max(min_size[0], min(max_size[0], avg_width))
+    target_height = max(min_size[1], min(max_size[1], avg_height))
+    
+    return (target_width, target_height)
+
+def resize_images_to_target_size(image_paths, target_size, mode='RGB'):
+    """
+    Resizes images to a specified target size.
+    """
+    resized_images = []
+    
+    for path in image_paths:
+        try:
+            with Image.open(path) as img:
+                img = img.convert(mode)
+                img = img.resize(target_size)
+                resized_images.append(np.array(img))
+        except Exception as e:
+            print(f"Error processing image {path}: {e}")
+            resized_images.append(np.zeros(target_size + (3,), dtype=np.uint8))  # Assuming RGB mode
+            
+    return np.array(resized_images)
+
+def load_image(image_path, target_size=(224, 224)):
+    try:
+        with Image.open(image_path) as img:
+            img = img.convert("RGB")
+            img = img.resize(target_size)
+            return np.array(img)
+    except Exception as e:
+        print(f"Error loading image {image_path}: {e}")
+        return np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)  # Placeholder for an invalid image
+
+def create_zero_image(target_size=(224, 224)):
+    # Create a zero (blank) image
+    return np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
+
+def average_images(image_paths, target_size=(224, 224)):
+    images = [load_image(path, target_size) for path in image_paths[:2]]  # Load the first two images
+    # Calculate the average of the images
+    average_img = np.mean(images, axis=0).astype(np.uint8)
+    return average_img
+
+
+def decode_img(img):
+    # Convert the compressed string to a 3D uint8 tensor
+    img = tf.image.decode_jpeg(img, channels=3)
+    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
+    return tf.image.convert_image_dtype(img, tf.float32)
+
+def process_path(file_path):
+    # Load the raw data from the file as a string
+    img = tf.io.read_file(file_path)
+    img = decode_img(img)
+    return img
+
+def preprocess_data(features, image_columns, text_columns, target_size):
+    # Process image data
+    image_data = None
+    if image_columns is not None and len(image_columns) > 0:
+        image_data = []        
+        features.loc[:, image_columns[0]] = features[image_columns[0]].apply(lambda x: x.split(';')[0] if pd.notnull(x) else x)
+        image_paths = features[image_columns[0]].values
+        for path in image_paths:
+            img = load_image(path, target_size)
+            image_data.append(img)
+    
+        # Convert column image data to a NumPy array and normalize
+        image_data = np.array(image_data)
+
+    # Process text data
+    text_data = None
+    if text_columns is not None and len(text_columns) > 0:
+        text_data = features.apply(lambda row: " ".join((str(row[col]) if row[col] is not None else "") for col in text_columns), axis=1) 
+        text_data = text_data.to_numpy(dtype=str)
+        print("Text data is: ", text_data)
+    
+    # Process tabular data
+    tabular_data = None
+    all_image_text_columns = image_columns or [] + text_columns or [] 
+    tabular_columns = features.columns.difference(all_image_text_columns)
+    if len(tabular_columns) > 0:
+        tabular_data = features[tabular_columns].to_numpy()
+
+    return image_data, tabular_data, text_data
+
+
+def run(
+    dataset_name: Union[str, dict],
+    framework: str,
+    benchmark_dir: str,
+    metrics_dir: str,
+    constraint: Optional[str] = None,
+    params: Optional[dict] = None,
+    custom_dataloader: Optional[dict] = None,
+    custom_metrics: Optional[dict] = None,
+):
+    """Runs the AutoGluon multimodal benchmark on a given dataset.
+
+    Args:
+        dataset_name (Union[str, dict]): Dataset that has been registered with multimodal_dataset_registry.
+
+                            To get a list of datasets:
+
+                            from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry
+                            multimodal_dataset_registry.list_keys()
+
+        benchmark_dir (str): The path to the directory where benchmarking artifacts should be saved.
+        constraint (str): The resource constraint used by benchmarking during AWS mode, default: None.
+        params (str): The multimodal params, default: {}.
+        custom_dataloader (dict): A dictionary containing information about a custom dataloader to use. Defaults to None.
+                                To define a custom dataloader in the config file:
+
+                                custom_dataloader:
+                                    dataloader_file: path_to/dataloader.py   # relative path to WORKDIR
+                                    class_name: DataLoaderClass
+                                    dataset_config_file: path_to/dataset_config.yaml
+                                    **kwargs (of DataLoaderClass)
+        custom_metrics (dict): A dictionary containing information about a custom metrics to use. Defaults to None.
+                                To define a custom metrics in the config file:
+
+                                custom_metrics:
+                                    metrics_path: path_to/metrics.py   # relative path to WORKDIR
+                                    function_name: custom_metrics_function
+                                    **kwargs (of autogluon.core.metrics.make_scorer)
+    Returns:
+        None
+    """
+    seed = params.pop("seed", 42)
+    tf.random.set_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+
+    train_data, val_data, test_data = load_dataset(dataset_name=dataset_name, custom_dataloader=custom_dataloader)
+    image_columns = train_data.image_columns
+    text_columns = train_data.text_columns
+    tabular_columns = list(set(train_data.data.columns) - set(image_columns) - set(text_columns) - set(train_data.columns_to_drop) - set(train_data.label_columns))
+    feature_columns = tabular_columns + image_columns + text_columns
+    
+    features_train, labels_train = train_data.data[feature_columns], train_data.data[train_data.label_columns]
+    if test_data.data is None:
+        print("No test data found, splitting test data from train data")
+        features_train, features_test, labels_train, labels_test = train_test_split(features_train, labels_train, test_size=0.2, random_state=seed)
+    else:
+        features_test, labels_test = test_data.data[feature_columns], test_data.data[train_data.label_columns]
+
+    features_val, labels_val = None, None 
+    if val_data.data is not None:
+        features_val, labels_val = val_data.data[feature_columns], val_data.data[train_data.label_columns]
+
+    target_size = None
+    if image_columns is not None and len(image_columns) > 0:
+        image_paths = features_train[image_columns[0]].tolist()
+        target_size = find_average_image_size(image_paths, max_size=(224, 224), min_size=(32, 32))
+
+    image_data_train, tabular_data_train, text_data_train = preprocess_data(features_train, image_columns, text_columns, target_size)
+    image_data_test, tabular_data_test, text_data_test = preprocess_data(features_test, image_columns, text_columns, target_size)
+
+    image_data_val, tabular_data_val, text_data_val = (None, None, None)
+    
+    if features_val is not None and labels_val is not None:
+        image_data_val, tabular_data_val, text_data_val = preprocess_data(features_val, image_columns, text_columns, target_size)
+
+
+    inputs = []
+    if image_data_train is not None:
+        print("has image_data")
+        inputs.append(ak.ImageInput())
+    if tabular_data_train is not None:
+        print("has tabular_data")
+        inputs.append(ak.StructuredDataInput())
+    if text_data_train is not None:
+        print("has text_data")
+        inputs.append(ak.TextInput())
+    
+ 
+    if train_data.problem_type == "regression":
+        output_node = ak.RegressionHead(metrics=[tf.keras.metrics.RootMeanSquaredError()])#metrics=[tf.keras.metrics.RootMeanSquaredError()])
+    elif train_data.problem_type in ["multiclass", "classification"]:
+        output_node = ak.ClassificationHead(metrics=["accuracy"])#metrics=[tf.keras.metrics.Accuracy()])
+    elif train_data.problem_type == "binary":
+        output_node = ak.ClassificationHead(metrics=["auc"])#[tf.keras.metrics.AUC(curve="ROC")])
+
+    # Combine the data into a list for the model
+    train_data_list = [data for data in [image_data_train, tabular_data_train, text_data_train] if data is not None]
+
+    # Combine the data into a list for the model
+    test_data_list = [data for data in [image_data_test, tabular_data_test, text_data_test] if data is not None]
+
+
+    auto_model = ak.AutoModel(
+        inputs=inputs,
+        outputs=output_node,
+        overwrite=True,
+        max_trials=1
+    )
+
+    utc_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
+    start_time = time.time()
+    if features_val is not None and labels_val is not None:
+        # Combine the data into a list for the model
+        val_data_list = [data for data in [image_data_val, tabular_data_val, text_data_val] if data is not None]
+
+        auto_model.fit(
+            train_data_list,
+            labels_train,
+            validation_data=(val_data_list, labels_val),
+            epochs=10
+        )
+    else:
+        auto_model.fit(
+            train_data_list,
+            labels_train,
+            epochs=10
+        )
+    end_time = time.time()
+    training_duration = round(end_time - start_time, 1)
+
+    start_time = time.time()
+    metrics = auto_model.evaluate(test_data_list, labels_test)
+    end_time = time.time()
+    predict_duration = round(end_time - start_time, 1)
+
+    metric_name = train_data.metric
+    version = "master"
+    metrics = {
+        "id": "id/0",  # dummy id to make it align with amlb benchmark output
+        "task": dataset_name,
+        "framework": framework,
+        "constraint": constraint,
+        "version": version,
+        "fold": 0,
+        "type": train_data.problem_type,
+        "result": metrics[1],
+        "metric": metric_name,
+        "utc": utc_time,
+        "training_duration": training_duration,
+        "predict_duration": predict_duration,
+        "scores": metrics[1],
+    }
+    subdir = f"{framework}.{dataset_name}.{constraint}.local"
+    save_metrics(os.path.join(metrics_dir, subdir, "scores"), metrics)
+    
+
+if __name__ == "__main__":
+    args = get_args()
+    if args.params is not None:
+        args.params = json.loads(args.params)
+    if args.custom_dataloader is not None:
+        args.custom_dataloader = json.loads(args.custom_dataloader)
+    if args.custom_metrics is not None:
+        args.custom_metrics = json.loads(args.custom_metrics)
+
+    run(
+        dataset_name=args.dataset_name,
+        framework=args.framework,
+        benchmark_dir=args.benchmark_dir,
+        metrics_dir=args.metrics_dir,
+        constraint=args.constraint,
+        params=args.params,
+        custom_dataloader=args.custom_dataloader,
+        custom_metrics=args.custom_metrics,
+    )
+
diff --git a/src/autogluon/bench/frameworks/autokeras/setup.sh b/src/autogluon/bench/frameworks/autokeras/setup.sh
new file mode 100755
index 00000000..0951f3e9
--- /dev/null
+++ b/src/autogluon/bench/frameworks/autokeras/setup.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+set -eo pipefail
+
+GIT_URI=$1
+BRANCH=$2
+venv_base_dir=$3  # from root of benchmark run
+AG_BENCH_VERSION=$4
+
+if [ ! -d $venv_base_dir ]; then
+  mkdir -p $venv_base_dir
+fi
+
+echo "Cloning $GIT_URI#$BRANCH..."
+repo_name=$(basename -s .git $(echo $GIT_URI))
+git clone --depth 1 --single-branch --branch ${BRANCH} --recurse-submodules ${GIT_URI} $venv_base_dir/$repo_name
+
+# create virtual env
+python3 -m venv $venv_base_dir/.venv
+source $venv_base_dir/.venv/bin/activate
+
+python3 -m pip install --upgrade pip
+python3 -m pip install --upgrade setuptools wheel
+python3 -m pip install scikit-learn
+
+if echo "$AG_BENCH_VERSION" | grep -q "dev"; then
+  # install from local source or docker
+  python3 -m pip install .
+else
+  python3 -m pip install autogluon.bench==$AG_BENCH_VERSION
+fi
+
+cd $venv_base_dir
+pip install autokeras pyarrow fastparquet
+python3 -m pip install tensorflow[and-cuda]
+
diff --git a/src/autogluon/bench/frameworks/autokeras/test.py b/src/autogluon/bench/frameworks/autokeras/test.py
new file mode 100644
index 00000000..511a2e77
--- /dev/null
+++ b/src/autogluon/bench/frameworks/autokeras/test.py
@@ -0,0 +1,336 @@
+import argparse
+import csv
+import importlib
+import json
+import logging
+import os
+import time
+from datetime import datetime
+from typing import Optional, Union
+import autokeras as ak
+from PIL import Image
+import numpy as np
+from sklearn.model_selection import train_test_split
+from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry
+import pandas as pd
+
+import tensorflow as tf
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+def _flatten_dict(data):
+    flattened = {}
+    for key, value in data.items():
+        if isinstance(value, dict):
+            flattened.update(_flatten_dict(value))
+        else:
+            flattened[key] = value
+    return flattened
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--dataset_name",
+        type=str,
+        help="Dataset that has been registered with multimodal_dataset_registry.",
+    )
+    parser.add_argument("--framework", type=str, help="Framework (and) branch/version.")
+    parser.add_argument("--benchmark_dir", type=str, help="Directory to save benchmarking run.")
+    parser.add_argument("--metrics_dir", type=str, help="Directory to save benchmarking metrics.")
+    parser.add_argument("--constraint", type=str, default=None, help="AWS resources constraint setting.")
+    parser.add_argument("--params", type=str, default=None, help="AWS resources constraint setting.")
+    parser.add_argument(
+        "--custom_dataloader", type=str, default=None, help="Custom dataloader to use in the benchmark."
+    )
+    parser.add_argument("--custom_metrics", type=str, default=None, help="Custom metrics to use in the benchmark.")
+
+    args = parser.parse_args()
+    return args
+
+
+def load_dataset(dataset_name: str, custom_dataloader: dict = None):  # dataset name
+    """Loads and preprocesses a dataset.
+
+    Args:
+        dataset_name (str): The name of the dataset to load.
+        custom_dataloader (dict): A dictionary containing information about a custom dataloader to use. Defaults to None.
+
+    Returns:
+        Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the training and test datasets.
+    """
+    splits = ["train", "val", "test"]
+    data = {}
+    if dataset_name in multimodal_dataset_registry.list_keys():
+        logger.info(f"Loading dataset {dataset_name} from multimodal_dataset_registry")
+        for split in splits:
+            data[split] = multimodal_dataset_registry.create(dataset_name, split)
+    elif custom_dataloader is not None:
+        logger.info(f"Loading dataset {dataset_name} from custom dataloader {custom_dataloader}.")
+        custom_dataloader_file = custom_dataloader.pop("dataloader_file")
+        class_name = custom_dataloader.pop("class_name")
+        spec = importlib.util.spec_from_file_location(class_name, custom_dataloader_file)
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+        custom_class = getattr(module, class_name)
+        for split in splits:
+            data[split] = custom_class(dataset_name=dataset_name, split=split, **custom_dataloader)
+    else:
+        raise ModuleNotFoundError(f"Dataset Loader for dataset {dataset_name} is not available.")
+
+    return data.values()
+
+
+def save_metrics(metrics_path: str, metrics: dict):
+    """Saves evaluation metrics to a JSON file.
+
+    Args:
+        metrics_path (str): The path to the directory where the metrics should be saved.
+        metrics: The evaluation metrics to save.
+
+    Returns:
+        None
+    """
+    if metrics is None:
+        logger.warning("No metrics were created.")
+        return
+
+    if not os.path.exists(metrics_path):
+        os.makedirs(metrics_path)
+    file = os.path.join(metrics_path, "results.csv")
+    flat_metrics = _flatten_dict(metrics)
+    field_names = flat_metrics.keys()
+
+    with open(file, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=field_names)
+        writer.writeheader()
+        writer.writerow(flat_metrics)
+    logger.info("Metrics saved to %s.", file)
+    f.close()
+
+
+def load_image(image_path, target_size=(224, 224)):
+    try:
+        with Image.open(image_path) as img:
+            img = img.convert("RGB")
+            #img = img.resize(target_size)
+            return np.array(img)
+    except Exception as e:
+        print(f"Error loading image {image_path}: {e}")
+        return np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)  # Placeholder for an invalid image
+
+def create_zero_image(target_size=(224, 224)):
+    # Create a zero (blank) image
+    return np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
+
+def average_images(image_paths, target_size=(224, 224)):
+    images = [load_image(path, target_size) for path in image_paths[:2]]  # Load the first two images
+    # Calculate the average of the images
+    average_img = np.mean(images, axis=0).astype(np.uint8)
+    return average_img
+
+
+def preprocess_data(features, image_columns, text_columns):
+    # Process image data
+    image_data = None
+    if image_columns is not None and len(image_columns) > 0:
+        image_data = []
+        features.loc[:, image_columns[0]] = features[image_columns[0]].apply(lambda x: x.split(';')[0] if pd.notnull(x) else x)
+        image_paths = features[image_columns[0]].values
+        for path in image_paths:
+            img = load_image(path)
+            image_data.append(img)
+    
+        # Convert column image data to a NumPy array and normalize
+        image_data = np.array(image_data)
+
+    # Process text data
+    text_data = None
+    if text_columns is not None and len(text_columns) > 0:
+        text_data = features.apply(lambda row: " ".join((str(row[col]) if row[col] is not None else "") for col in text_columns), axis=1) 
+        text_data = text_data.to_numpy(dtype=str)
+        print("Text data is: ", text_data)
+    
+    # Process tabular data
+    tabular_data = None
+    all_image_text_columns = image_columns or [] + text_columns or [] 
+    tabular_columns = features.columns.difference(all_image_text_columns)
+    print("tabular column is: ", tabular_columns) 
+    if len(tabular_columns) > 0:
+        tabular_data = features[tabular_columns].to_numpy()
+        print(tabular_data[0])
+
+    return image_data, tabular_data, text_data
+
+
+def run(
+    dataset_name: Union[str, dict],
+    framework: str,
+    benchmark_dir: str,
+    metrics_dir: str,
+    constraint: Optional[str] = None,
+    params: Optional[dict] = None,
+    custom_dataloader: Optional[dict] = None,
+    custom_metrics: Optional[dict] = None,
+):
+    """Runs the AutoGluon multimodal benchmark on a given dataset.
+
+    Args:
+        dataset_name (Union[str, dict]): Dataset that has been registered with multimodal_dataset_registry.
+
+                            To get a list of datasets:
+
+                            from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry
+                            multimodal_dataset_registry.list_keys()
+
+        benchmark_dir (str): The path to the directory where benchmarking artifacts should be saved.
+        constraint (str): The resource constraint used by benchmarking during AWS mode, default: None.
+        params (str): The multimodal params, default: {}.
+        custom_dataloader (dict): A dictionary containing information about a custom dataloader to use. Defaults to None.
+                                To define a custom dataloader in the config file:
+
+                                custom_dataloader:
+                                    dataloader_file: path_to/dataloader.py   # relative path to WORKDIR
+                                    class_name: DataLoaderClass
+                                    dataset_config_file: path_to/dataset_config.yaml
+                                    **kwargs (of DataLoaderClass)
+        custom_metrics (dict): A dictionary containing information about a custom metrics to use. Defaults to None.
+                                To define a custom metrics in the config file:
+
+                                custom_metrics:
+                                    metrics_path: path_to/metrics.py   # relative path to WORKDIR
+                                    function_name: custom_metrics_function
+                                    **kwargs (of autogluon.core.metrics.make_scorer)
+    Returns:
+        None
+    """
+    train_data, val_data, test_data = load_dataset(dataset_name=dataset_name, custom_dataloader=custom_dataloader)
+    image_columns = train_data.image_columns
+    text_columns = train_data.text_columns
+    tabular_columns = list(set(train_data.data.columns) - set(image_columns) - set(text_columns) - set(train_data.columns_to_drop) - set(train_data.label_columns))
+    feature_columns = tabular_columns + image_columns + text_columns
+    print("Label column: ", train_data.label_columns, train_data.data[train_data.label_columns])
+
+    features_train, labels_train = train_data.data[feature_columns], train_data.data[train_data.label_columns]
+    if test_data.data is None:
+        print("No test data found, splitting test data from train data")
+        features_train, features_test, labels_train, labels_test = train_test_split(features_train, labels_train, test_size=0.2, random_state=42)
+    else:
+        features_test, labels_test = test_data.data[feature_columns], test_data.data[train_data.label_columns]
+
+    features_val, labels_val = None, None 
+    if val_data.data is not None:
+        features_val, labels_val = val_data.data[feature_columns], val_data.data[train_data.label_columns]
+
+    image_data_train, tabular_data_train, text_data_train = preprocess_data(features_train, image_columns, text_columns)
+    image_data_test, tabular_data_test, text_data_test = preprocess_data(features_test, image_columns, text_columns)
+
+    image_data_val, tabular_data_val, text_data_val = (None, None, None)
+    
+    if features_val is not None and labels_val is not None:
+        image_data_val, tabular_data_val, text_data_val = preprocess_data(features_val, image_columns, text_columns)
+
+
+    inputs = []
+    if image_data_train is not None:
+        print("has image_data")
+        inputs.append(ak.ImageInput())
+    if tabular_data_train is not None:
+        print("has tabular_data")
+        inputs.append(ak.StructuredDataInput())
+    if text_data_train is not None:
+        print("has text_data")
+        inputs.append(ak.TextInput())
+    
+    import tensorflow as tf
+    if train_data.problem_type == "regression":
+        output_node = ak.RegressionHead(metrics=[tf.keras.metrics.RootMeanSquaredError()])
+    elif train_data.problem_type in ["multiclass", "classification"]:
+        output_node = ak.ClassificationHead(loss="categorical_crossentropy",metrics=[tf.keras.metrics.Accuracy()])
+    elif train_data.problem_type == "binary":
+        output_node = ak.ClassificationHead(loss="binary_crossentropy",metrics=[tf.keras.metrics.AUC(curve="ROC")])
+    else:
+        print("Warning: problem type unknown").
+
+    # Combine the data into a list for the model
+    train_data_list = [data for data in [image_data_train, tabular_data_train, text_data_train] if data is not None]
+
+    # Combine the data into a list for the model
+    test_data_list = [data for data in [image_data_test, tabular_data_test, text_data_test] if data is not None]
+
+
+    auto_model = ak.AutoModel(
+        inputs=inputs,
+        outputs=output_node,
+        overwrite=True,
+    )
+
+    utc_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
+    start_time = time.time()
+    if features_val is not None and labels_val is not None:
+        # Combine the data into a list for the model
+        val_data_list = [data for data in [image_data_val, tabular_data_val, text_data_val] if data is not None]
+
+        auto_model.fit(
+            train_data_list,
+            labels_train,
+            validation_data=(val_data_list, labels_val),
+        )
+    else:
+        auto_model.fit(
+            train_data_list,
+            labels_train,
+        )
+    end_time = time.time()
+    training_duration = round(end_time - start_time, 1)
+
+    start_time = time.time()
+    metrics = auto_model.evaluate(test_data_list, labels_test)
+    end_time = time.time()
+    predict_duration = round(end_time - start_time, 1)
+
+    metric_name = train_data.metric
+    version = "master"
+    metrics = {
+        "id": "id/0",  # dummy id to make it align with amlb benchmark output
+        "task": dataset_name,
+        "framework": framework,
+        "constraint": constraint,
+        "version": version,
+        "fold": 0,
+        "type": train_data.problem_type,
+        "result": metrics[1],
+        "metric": metric_name,
+        "utc": utc_time,
+        "training_duration": training_duration,
+        "predict_duration": predict_duration,
+        "scores": metrics[1],
+    }
+    subdir = f"{framework}.{dataset_name}.{constraint}.local"
+    save_metrics(os.path.join(metrics_dir, subdir, "scores"), metrics)
+    
+
+if __name__ == "__main__":
+    args = get_args()
+    if args.params is not None:
+        args.params = json.loads(args.params)
+    if args.custom_dataloader is not None:
+        args.custom_dataloader = json.loads(args.custom_dataloader)
+    if args.custom_metrics is not None:
+        args.custom_metrics = json.loads(args.custom_metrics)
+
+    run(
+        dataset_name=args.dataset_name,
+        framework=args.framework,
+        benchmark_dir=args.benchmark_dir,
+        metrics_dir=args.metrics_dir,
+        constraint=args.constraint,
+        params=args.params,
+        custom_dataloader=args.custom_dataloader,
+        custom_metrics=args.custom_metrics,
+    )
+
diff --git a/src/autogluon/bench/frameworks/benchmark.py b/src/autogluon/bench/frameworks/benchmark.py
index a3699de9..61eab3c4 100644
--- a/src/autogluon/bench/frameworks/benchmark.py
+++ b/src/autogluon/bench/frameworks/benchmark.py
@@ -10,6 +10,10 @@ class Benchmark(ABC):
     def __init__(self, benchmark_name: str, benchmark_dir: str):
         self.benchmark_name = benchmark_name
         self.benchmark_dir = benchmark_dir
+        # self.benchmark_name = "ag_bench_image_20240225T084239"
+        # self.benchmark_dir = "ag_bench_runs/multimodal/ag_bench_image_20240225T084239"
+        # self.benchmark_name = "ag_bench_text_tabular_20240227T144413"
+        # self.benchmark_dir = "ag_bench_runs/autokeras/ag_bench_text_tabular_20240227T144413"
         self.metrics_dir = os.path.join(self.benchmark_dir, "results")
         self.benchmark_dir_s3 = None
 
diff --git a/src/autogluon/bench/frameworks/multimodal/exec.py b/src/autogluon/bench/frameworks/multimodal/exec.py
index b0f53847..c6019d89 100644
--- a/src/autogluon/bench/frameworks/multimodal/exec.py
+++ b/src/autogluon/bench/frameworks/multimodal/exec.py
@@ -1,10 +1,13 @@
 import argparse
 import csv
+import copy
 import importlib
 import json
 import logging
 import os
 import time
+import random
+import numpy as np
 from datetime import datetime
 from typing import Optional, Union
 
@@ -13,6 +16,7 @@
 from autogluon.multimodal import MultiModalPredictor
 from autogluon.multimodal import __version__ as ag_version
 from autogluon.multimodal.constants import IMAGE_SIMILARITY, IMAGE_TEXT_SIMILARITY, OBJECT_DETECTION, TEXT_SIMILARITY
+from sklearn.model_selection import train_test_split
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -139,6 +143,11 @@ def save_metrics(metrics_path: str, metrics: dict):
     logger.info("Metrics saved to %s.", file)
     f.close()
 
+def set_seed(seed):
+    import torch as th
+    th.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
 
 def run(
     dataset_name: Union[str, dict],
@@ -146,7 +155,7 @@ def run(
     benchmark_dir: str,
     metrics_dir: str,
     constraint: Optional[str] = None,
-    params: Optional[dict] = None,
+    params: Optional[dict] = {},
     custom_dataloader: Optional[dict] = None,
     custom_metrics: Optional[dict] = None,
 ):
@@ -181,13 +190,22 @@ def run(
     Returns:
         None
     """
+    seed = params.get("seed", 42)
+    set_seed(seed)
+
     train_data, val_data, test_data = load_dataset(dataset_name=dataset_name, custom_dataloader=custom_dataloader)
+    if test_data.data is None:
+        print("No test data found, splitting test data from train data")
+        train_set, test_set = train_test_split(train_data.data, test_size=0.2, random_state=seed)
+        train_data.data = train_set
+        test_data.data = test_set
     try:
         label_column = train_data.label_columns[0]
     except (AttributeError, IndexError):  # Object Detection does not have label columns
         label_column = None
-    if params is None:
-        params = {}
+
+    print("train_data: ", train_data, train_data.problem_type)
+
     predictor_args = {
         "label": label_column,
         "problem_type": train_data.problem_type,
@@ -217,6 +235,7 @@ def run(
     if custom_metrics is not None and custom_metrics["function_name"] == train_data.metric:
         metrics_func = load_custom_metrics(custom_metrics=custom_metrics)
 
+    print("predictor args: !!! ", predictor_args)
     predictor = MultiModalPredictor(**predictor_args)
 
     fit_args = {"train_data": train_data.data, "tuning_data": val_data.data, **params}
diff --git a/src/autogluon/bench/runbenchmark.py b/src/autogluon/bench/runbenchmark.py
index 1ed7b639..a577ebed 100644
--- a/src/autogluon/bench/runbenchmark.py
+++ b/src/autogluon/bench/runbenchmark.py
@@ -19,6 +19,7 @@
 from autogluon.bench.eval.hardware_metrics.hardware_metrics import get_hardware_metrics
 from autogluon.bench.frameworks.multimodal.multimodal_benchmark import MultiModalBenchmark
 from autogluon.bench.frameworks.tabular.tabular_benchmark import TabularBenchmark
+from autogluon.bench.frameworks.autokeras.autokeras_benchmark import AutoKerasBenchmark
 from autogluon.bench.frameworks.timeseries.timeseries_benchmark import TimeSeriesBenchmark
 from autogluon.bench.utils.general_utils import (
     download_dir_from_s3,
@@ -48,7 +49,7 @@ def get_kwargs(module: str, configs: dict):
         A dictionary containing the keyword arguments to be used for setting up and running the benchmark.
     """
 
-    if module == "multimodal":
+    if module in ["multimodal", "autokeras"]:
         framework_configs = get_framework_configs(configs=configs)
         return {
             "setup_kwargs": {
@@ -110,6 +111,7 @@ def run_benchmark(
         "multimodal": MultiModalBenchmark,
         "tabular": TabularBenchmark,
         "timeseries": TimeSeriesBenchmark,
+        "autokeras": AutoKerasBenchmark,
     }
     module_name = configs["module"]
 
@@ -352,6 +354,9 @@ def get_framework_configs(configs: dict):
     framework_name = configs.get("framework", "stable")
     frameworks = get_resource(configs=configs, resource_name="multimodal_frameworks")
     framework_configs = frameworks[framework_name]
+    if "params" not in framework_configs:
+        framework_configs["params"] = {}
+    framework_configs["params"]["seed"] = configs.get("seed", 42)
     return framework_configs
 
 
@@ -430,7 +435,7 @@ def run(
                         _mount_dir(orig_path=original_path, new_path=path)
                     os.environ["AMLB_USER_DIR"] = default_user_dir  # For Docker build
                     configs["amlb_user_dir"] = default_user_dir  # For Lambda job config
-            elif module == "multimodal":
+            elif module in ["multimodal", "autokeras"]:
                 if configs.get("custom_dataloader") is not None:
                     original_path, custom_dataloader_path = update_custom_dataloader(configs=configs)
                     paths.append(custom_dataloader_path)

From 9964a1f4a40c39b6b4420792807a6c05c1944863 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Thu, 29 Feb 2024 05:29:48 +0000
Subject: [PATCH 02/13] fix metrics

---
 src/autogluon/bench/frameworks/autokeras/exec.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/autogluon/bench/frameworks/autokeras/exec.py b/src/autogluon/bench/frameworks/autokeras/exec.py
index 8b3a0df3..47b0605e 100644
--- a/src/autogluon/bench/frameworks/autokeras/exec.py
+++ b/src/autogluon/bench/frameworks/autokeras/exec.py
@@ -320,7 +320,7 @@ def run(
     elif train_data.problem_type in ["multiclass", "classification"]:
         output_node = ak.ClassificationHead(metrics=["accuracy"])#metrics=[tf.keras.metrics.Accuracy()])
     elif train_data.problem_type == "binary":
-        output_node = ak.ClassificationHead(metrics=["auc"])#[tf.keras.metrics.AUC(curve="ROC")])
+        output_node = ak.ClassificationHead(metrics=["AUC"])#[tf.keras.metrics.AUC(curve="ROC")])
 
     # Combine the data into a list for the model
     train_data_list = [data for data in [image_data_train, tabular_data_train, text_data_train] if data is not None]

From c38295a949954e620faef31bbb5521afc4305502 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Thu, 29 Feb 2024 06:19:39 +0000
Subject: [PATCH 03/13] update

---
 sample_configs/cloud_configs/bench_all.py     | 41 ++++++++++---------
 .../resources/multimodal_frameworks.yaml      | 11 ++++-
 2 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/sample_configs/cloud_configs/bench_all.py b/sample_configs/cloud_configs/bench_all.py
index d24a12ef..fa3ca559 100644
--- a/sample_configs/cloud_configs/bench_all.py
+++ b/sample_configs/cloud_configs/bench_all.py
@@ -5,10 +5,11 @@
     seeds.append(random.randint(0, 100))
 print(seeds)
 
+seeds = [22, 92, 54, 86, 41]
 config_paths = [
     "sample_configs/paper_text_tabular_local_configs.yaml",
-    "sample_configs/paper_text_local_configs.yaml",
-    "sample_configs/paper_image_local_configs.yaml",
+    # "sample_configs/paper_text_local_configs.yaml",
+    # "sample_configs/paper_image_local_configs.yaml",
     # "sample_configs/multimodal_cloud_text_configs.yaml",
     # "sample_configs/multimodal_cloud_text_fs_configs.yaml",
     # "sample_configs/multimodal_cloud_text_tabular_configs.yaml",
@@ -16,14 +17,14 @@
     # "sample_configs/multimodal_cloud_text_tabular_image_standard_configs.yaml"
 ]
 frameworks = [
-    "AutoGluon_best_master",
-    "ablation_greedy_soup",
-    "ablation_gradient_clip",
-    "ablation_warmup_steps",
-    "ablation_cosine_decay",
-    "ablation_weight_decay",
-    "ablation_lr_decay",
-    # "autokeras_master",
+    # "AutoGluon_best_master",
+    # "ablation_greedy_soup",
+    # "ablation_gradient_clip",
+    # "ablation_warmup_steps",
+    # "ablation_cosine_decay",
+    # "ablation_weight_decay",
+    # "ablation_lr_decay",
+    "autokeras_master",
     # "torch_compile_best",
     # "AutoGluon_best_master",
     # "AutoGluon_high_master",
@@ -46,8 +47,8 @@
     5,
     10
 ]
-# module = "autokeras"
-module = "multimodal"
+module = "autokeras"
+# module = "multimodal"
 
 import yaml
 import os
@@ -56,15 +57,15 @@
 config_root = "./temp_configs"
 os.makedirs(config_root, exist_ok=True)
 
-for constraint in constraints:
-    os.makedirs(f"{config_root}/{constraint}", exist_ok=True)
-    for framework in frameworks:
-        # for shot in fs:
-            config_dir = f"{config_root}/{constraint}/{framework}"
-            os.makedirs(config_dir, exist_ok=True)
+for seed in seeds:
+    for constraint in constraints:
+        os.makedirs(f"{config_root}/{constraint}", exist_ok=True)
+        for framework in frameworks:
+            # for shot in fs:
+                config_dir = f"{config_root}/{constraint}/{framework}"
+                os.makedirs(config_dir, exist_ok=True)
 
-            for config_path in config_paths:
-                for seed in seeds:
+                for config_path in config_paths:
                     with open(config_path, "r") as f:
                         configs = yaml.safe_load(f)
                         if constraint == "g4_12x":
diff --git a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
index 7a2765f5..d1a1770f 100644
--- a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
+++ b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
@@ -13,6 +13,8 @@ AutoGluon_best_master:
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
+    # hyperparameters:
+    #         optimization.gradient_clip_val: 0.1
 
 ablation_greedy_soup:
   repo: https://github.com/autogluon/autogluon.git
@@ -20,7 +22,8 @@ ablation_greedy_soup:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.top_k_average_method: best
+        optimization.top_k_average_method: best
+        #    optimization.gradient_clip_val: 0.1
 
 ablation_gradient_clip:
   repo: https://github.com/autogluon/autogluon.git
@@ -28,7 +31,7 @@ ablation_gradient_clip:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.gradient_clip_algorithm: value
+      optimization.gradient_clip_val: 10000
 
 ablation_warmup_steps:
   repo: https://github.com/autogluon/autogluon.git
@@ -37,6 +40,7 @@ ablation_warmup_steps:
     presets: best_quality
     hyperparameters:
       optimization.warmup_steps: 0.0
+      # optimization.gradient_clip_val: 0.1
 
 ablation_cosine_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -45,6 +49,7 @@ ablation_cosine_decay:
     presets: best_quality
     hyperparameters:
       optimization.lr_schedule: polynomial_decay
+      # optimization.gradient_clip_val: 0.1
 
 ablation_weight_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -53,6 +58,7 @@ ablation_weight_decay:
     presets: best_quality
     hyperparameters:
       optimization.weight_decay: 0.0
+      # optimization.gradient_clip_val: 0.1
 
 ablation_lr_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -61,6 +67,7 @@ ablation_lr_decay:
     presets: best_quality
     hyperparameters:
       optimization.lr_decay: 0.0 
+      optimization.gradient_clip_val: 0.1
 
 
 autokeras_master:

From 28a86827f22ad7b3b3f26e641745f853b6850315 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Thu, 29 Feb 2024 08:20:33 +0000
Subject: [PATCH 04/13] update

---
 .../cloud_configs/resources/bench_all.py      | 87 +++++++++++++++++++
 .../resources/multimodal_frameworks.yaml      | 45 +++++++---
 .../custom_configs/dataloaders/__init__.py    |  0
 .../custom_configs/resources/__init__.py      |  0
 .../resources/multimodal_frameworks.yaml      | 38 ++++++--
 .../eval/scripts/run_generate_clean_openml.py | 39 +--------
 6 files changed, 150 insertions(+), 59 deletions(-)
 create mode 100644 sample_configs/cloud_configs/resources/bench_all.py
 delete mode 100644 src/autogluon/bench/custom_configs/dataloaders/__init__.py
 delete mode 100644 src/autogluon/bench/custom_configs/resources/__init__.py

diff --git a/sample_configs/cloud_configs/resources/bench_all.py b/sample_configs/cloud_configs/resources/bench_all.py
new file mode 100644
index 00000000..f098db57
--- /dev/null
+++ b/sample_configs/cloud_configs/resources/bench_all.py
@@ -0,0 +1,87 @@
+import random
+n_experiments = 5
+seeds = []
+for i in range(n_experiments):
+    seeds.append(random.randint(0, 100))
+
+seeds = [22, 92, 54, 86, 41]
+
+config_paths = [
+    "sample_configs/paper_text_tabular_local_configs.yaml",
+    "sample_configs/paper_text_local_configs.yaml",
+    "sample_configs/paper_image_local_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_fs_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_tabular_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_tabular_image_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_tabular_image_standard_configs.yaml"
+]
+frameworks = [
+    # "AutoGluon_best_master",
+    # "ablation_base",
+    # "ablation_greedy_soup",
+    # "ablation_gradient_clip",
+    "ablation_warmup_steps",
+    "ablation_cosine_decay",
+    "ablation_weight_decay",
+    # "ablation_lr_decay",
+    # "autokeras_master",
+    # "torch_compile_best",
+    # "AutoGluon_best_master",
+    # "AutoGluon_high_master",
+    # "AutoGluon_medium_master",
+    # "AutoGluon_high_vitlarge",
+    # "AutoGluon_medium_vitlarge",
+    # "AutoGluon_best_vitlarge",
+    # "AutoGluon_best_caformer",
+    # "AutoGluon_best_beit",
+    # "AutoGluon_best_swinv2"
+    # "AutoGluon_high_0_8",
+    # "AutoGluon_medium_0_8",
+    # "AutoGluon_best_0_8",
+]
+constraints = [
+    "g4_12x"
+]
+fs = [
+    1,
+    5,
+    10
+]
+# module = "autokeras"
+module = "multimodal"
+
+import yaml
+import os
+import subprocess
+
+config_root = "./temp_configs"
+os.makedirs(config_root, exist_ok=True)
+
+for seed in seeds:
+    print("Seed: ", seed)
+    for constraint in constraints:
+        os.makedirs(f"{config_root}/{constraint}", exist_ok=True)
+        for framework in frameworks:
+            # for shot in fs:
+                config_dir = f"{config_root}/{constraint}/{framework}"
+                os.makedirs(config_dir, exist_ok=True)
+
+                for config_path in config_paths:
+                    with open(config_path, "r") as f:
+                        configs = yaml.safe_load(f)
+                        if constraint == "g4_12x":
+                            configs["cdk_context"]["PREFIX"] = f"{configs['cdk_context']['PREFIX']}-multi"
+                        configs["constraint"] = constraint
+                        configs["framework"] = framework
+                        configs["module"] = module
+                        configs["seed"] = seed 
+                        # configs["custom_dataloader"]["shot"] = shot
+                        configs["benchmark_name"] = f"{configs['benchmark_name']}-{seed}"
+                        new_config_path = os.path.join(config_dir, os.path.basename(config_path))
+                        with open(new_config_path, "w") as new_f:
+                            yaml.dump(configs, new_f)
+                        print("Running config: ", new_config_path)
+                        command = ["agbench", "run", new_config_path]
+                        subprocess.run(command)
+
diff --git a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
index d1a1770f..514879a4 100644
--- a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
+++ b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
@@ -13,17 +13,32 @@ AutoGluon_best_master:
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    # hyperparameters:
-    #         optimization.gradient_clip_val: 0.1
 
-ablation_greedy_soup:
+ablation_base:
   repo: https://github.com/autogluon/autogluon.git
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
         optimization.top_k_average_method: best
-        #    optimization.gradient_clip_val: 0.1
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
+
+ablation_greedy_soup:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_gradient_clip:
   repo: https://github.com/autogluon/autogluon.git
@@ -31,7 +46,10 @@ ablation_gradient_clip:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.gradient_clip_val: 10000
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_warmup_steps:
   repo: https://github.com/autogluon/autogluon.git
@@ -39,8 +57,9 @@ ablation_warmup_steps:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.warmup_steps: 0.0
-      # optimization.gradient_clip_val: 0.1
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_cosine_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -48,8 +67,9 @@ ablation_cosine_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.lr_schedule: polynomial_decay
-      # optimization.gradient_clip_val: 0.1
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
 
 ablation_weight_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -57,17 +77,14 @@ ablation_weight_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.weight_decay: 0.0
-      # optimization.gradient_clip_val: 0.1
+        optimization.lr_decay: 0
+
 
 ablation_lr_decay:
   repo: https://github.com/autogluon/autogluon.git
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    hyperparameters:
-      optimization.lr_decay: 0.0 
-      optimization.gradient_clip_val: 0.1
 
 
 autokeras_master:
diff --git a/src/autogluon/bench/custom_configs/dataloaders/__init__.py b/src/autogluon/bench/custom_configs/dataloaders/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/autogluon/bench/custom_configs/resources/__init__.py b/src/autogluon/bench/custom_configs/resources/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
index 7a2765f5..514879a4 100644
--- a/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
+++ b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
@@ -14,13 +14,31 @@ AutoGluon_best_master:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
 
+ablation_base:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
+
 ablation_greedy_soup:
   repo: https://github.com/autogluon/autogluon.git
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_gradient_clip:
   repo: https://github.com/autogluon/autogluon.git
@@ -28,7 +46,10 @@ ablation_gradient_clip:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.gradient_clip_algorithm: value
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_warmup_steps:
   repo: https://github.com/autogluon/autogluon.git
@@ -36,7 +57,9 @@ ablation_warmup_steps:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.warmup_steps: 0.0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_cosine_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -44,7 +67,9 @@ ablation_cosine_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.lr_schedule: polynomial_decay
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
 
 ablation_weight_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -52,15 +77,14 @@ ablation_weight_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.weight_decay: 0.0
+        optimization.lr_decay: 0
+
 
 ablation_lr_decay:
   repo: https://github.com/autogluon/autogluon.git
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    hyperparameters:
-      optimization.lr_decay: 0.0 
 
 
 autokeras_master:
diff --git a/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py b/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py
index e5cbe9d1..de1085e0 100644
--- a/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py
+++ b/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py
@@ -8,16 +8,6 @@
 import typer
 from typing_extensions import Annotated
 
-from autogluon.bench.eval.evaluation.constants import (
-    DATASET,
-    FOLD,
-    FRAMEWORK,
-    METRIC,
-    METRIC_ERROR,
-    PROBLEM_TYPE,
-    TIME_INFER_S,
-    TIME_TRAIN_S,
-)
 from autogluon.bench.eval.evaluation.constants import (
     DATASET,
     FOLD,
@@ -52,7 +42,6 @@
 def clean_amlb_results(
     benchmark_name: str = typer.Argument(
         None, help="Benchmark name populated by benchmark run, in format <benchmark_name>_<timestamp>"
-        None, help="Benchmark name populated by benchmark run, in format <benchmark_name>_<timestamp>"
     ),
     results_dir: str = typer.Option("data/results/", help="Root directory of raw and prepared results."),
     results_dir_input: str = typer.Option(
@@ -126,7 +115,6 @@ def clean_and_save_results(
     run_name_in_output_path: bool = True,
     save: bool = True,
     save_minimal: bool = True,
-    constraints: List[str] | None = None,
     out_path_prefix: str = "openml_ag_",
     out_path_suffix: str = "",
     framework_suffix_column: str = "constraint",
@@ -151,34 +139,9 @@ def clean_and_save_results(
         results_list.append(results)
     results_raw = pd.concat(results_list, ignore_index=True, sort=True)
 
-    if "framework_parent" in results_raw.columns:
-        results_raw[FRAMEWORK] = results_raw["framework_parent"] + "_" + run_name + "_" + results_raw[FRAMEWORK]
-    else:
-        results_raw[FRAMEWORK] = results_raw[FRAMEWORK] + "_" + run_name
-
-    minimal_columns = [
-        DATASET,
-        FOLD,
-        FRAMEWORK,
-        "constraint",
-        METRIC,
-        METRIC_ERROR,
-        TIME_TRAIN_S,
-        TIME_INFER_S,
-        PROBLEM_TYPE,
-        "tid",
-    ]
-
-    results_raw_columns = list(results_raw.columns)
-    results_raw_columns = [c for c in results_raw_columns if c in minimal_columns] + [
-        c for c in results_raw_columns if c not in minimal_columns
-    ]
-    results_raw = results_raw[results_raw_columns]
-
     if save:
         if run_name_in_output_path:
             save_path = os.path.join(results_dir_output, f"{out_path_prefix}{run_name}{out_path_suffix}")
-            save_path = os.path.join(results_dir_output, f"{out_path_prefix}{run_name}{out_path_suffix}")
         else:
             save_path = os.path.join(results_dir_output, f"{out_path_prefix}{out_path_suffix}")
         save_path_file = f"{save_path}.csv"
@@ -188,7 +151,7 @@ def clean_and_save_results(
         save_path_file_pq = f"{save_path}.parquet"
         save_pd.save(path=save_path_file_pq, df=results_raw)
         if save_minimal:
-            results_raw_minimal = results_raw[minimal_columns]
+            results_raw_minimal = results_raw[MINIMAL_COLUMNS]
 
             save_path_file_minimum = f"{save_path}_min.csv"
             save_pd.save(path=save_path_file_minimum, df=results_raw_minimal)

From b21c952036aa34b7e1802f94093d5b4d4446174d Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Thu, 29 Feb 2024 08:20:33 +0000
Subject: [PATCH 05/13] update

---
 sample_configs/cloud_configs/bench_all.py     | 24 ++---
 .../cloud_configs/resources/bench_all.py      | 87 +++++++++++++++++++
 .../resources/multimodal_frameworks.yaml      | 45 +++++++---
 .../custom_configs/dataloaders/__init__.py    |  0
 .../custom_configs/resources/__init__.py      |  0
 .../resources/multimodal_frameworks.yaml      | 38 ++++++--
 .../eval/scripts/run_generate_clean_openml.py | 39 +--------
 7 files changed, 163 insertions(+), 70 deletions(-)
 create mode 100644 sample_configs/cloud_configs/resources/bench_all.py
 delete mode 100644 src/autogluon/bench/custom_configs/dataloaders/__init__.py
 delete mode 100644 src/autogluon/bench/custom_configs/resources/__init__.py

diff --git a/sample_configs/cloud_configs/bench_all.py b/sample_configs/cloud_configs/bench_all.py
index fa3ca559..f098db57 100644
--- a/sample_configs/cloud_configs/bench_all.py
+++ b/sample_configs/cloud_configs/bench_all.py
@@ -3,13 +3,13 @@
 seeds = []
 for i in range(n_experiments):
     seeds.append(random.randint(0, 100))
-print(seeds)
 
 seeds = [22, 92, 54, 86, 41]
+
 config_paths = [
     "sample_configs/paper_text_tabular_local_configs.yaml",
-    # "sample_configs/paper_text_local_configs.yaml",
-    # "sample_configs/paper_image_local_configs.yaml",
+    "sample_configs/paper_text_local_configs.yaml",
+    "sample_configs/paper_image_local_configs.yaml",
     # "sample_configs/multimodal_cloud_text_configs.yaml",
     # "sample_configs/multimodal_cloud_text_fs_configs.yaml",
     # "sample_configs/multimodal_cloud_text_tabular_configs.yaml",
@@ -18,13 +18,14 @@
 ]
 frameworks = [
     # "AutoGluon_best_master",
+    # "ablation_base",
     # "ablation_greedy_soup",
     # "ablation_gradient_clip",
-    # "ablation_warmup_steps",
-    # "ablation_cosine_decay",
-    # "ablation_weight_decay",
+    "ablation_warmup_steps",
+    "ablation_cosine_decay",
+    "ablation_weight_decay",
     # "ablation_lr_decay",
-    "autokeras_master",
+    # "autokeras_master",
     # "torch_compile_best",
     # "AutoGluon_best_master",
     # "AutoGluon_high_master",
@@ -47,8 +48,8 @@
     5,
     10
 ]
-module = "autokeras"
-# module = "multimodal"
+# module = "autokeras"
+module = "multimodal"
 
 import yaml
 import os
@@ -58,6 +59,7 @@
 os.makedirs(config_root, exist_ok=True)
 
 for seed in seeds:
+    print("Seed: ", seed)
     for constraint in constraints:
         os.makedirs(f"{config_root}/{constraint}", exist_ok=True)
         for framework in frameworks:
@@ -75,11 +77,11 @@
                         configs["module"] = module
                         configs["seed"] = seed 
                         # configs["custom_dataloader"]["shot"] = shot
-                        # configs["benchmark_name"] = f"{configs['benchmark_name']}-{shot}"
+                        configs["benchmark_name"] = f"{configs['benchmark_name']}-{seed}"
                         new_config_path = os.path.join(config_dir, os.path.basename(config_path))
                         with open(new_config_path, "w") as new_f:
                             yaml.dump(configs, new_f)
-
+                        print("Running config: ", new_config_path)
                         command = ["agbench", "run", new_config_path]
                         subprocess.run(command)
 
diff --git a/sample_configs/cloud_configs/resources/bench_all.py b/sample_configs/cloud_configs/resources/bench_all.py
new file mode 100644
index 00000000..f098db57
--- /dev/null
+++ b/sample_configs/cloud_configs/resources/bench_all.py
@@ -0,0 +1,87 @@
+import random
+n_experiments = 5
+seeds = []
+for i in range(n_experiments):
+    seeds.append(random.randint(0, 100))
+
+seeds = [22, 92, 54, 86, 41]
+
+config_paths = [
+    "sample_configs/paper_text_tabular_local_configs.yaml",
+    "sample_configs/paper_text_local_configs.yaml",
+    "sample_configs/paper_image_local_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_fs_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_tabular_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_tabular_image_configs.yaml",
+    # "sample_configs/multimodal_cloud_text_tabular_image_standard_configs.yaml"
+]
+frameworks = [
+    # "AutoGluon_best_master",
+    # "ablation_base",
+    # "ablation_greedy_soup",
+    # "ablation_gradient_clip",
+    "ablation_warmup_steps",
+    "ablation_cosine_decay",
+    "ablation_weight_decay",
+    # "ablation_lr_decay",
+    # "autokeras_master",
+    # "torch_compile_best",
+    # "AutoGluon_best_master",
+    # "AutoGluon_high_master",
+    # "AutoGluon_medium_master",
+    # "AutoGluon_high_vitlarge",
+    # "AutoGluon_medium_vitlarge",
+    # "AutoGluon_best_vitlarge",
+    # "AutoGluon_best_caformer",
+    # "AutoGluon_best_beit",
+    # "AutoGluon_best_swinv2"
+    # "AutoGluon_high_0_8",
+    # "AutoGluon_medium_0_8",
+    # "AutoGluon_best_0_8",
+]
+constraints = [
+    "g4_12x"
+]
+fs = [
+    1,
+    5,
+    10
+]
+# module = "autokeras"
+module = "multimodal"
+
+import yaml
+import os
+import subprocess
+
+config_root = "./temp_configs"
+os.makedirs(config_root, exist_ok=True)
+
+for seed in seeds:
+    print("Seed: ", seed)
+    for constraint in constraints:
+        os.makedirs(f"{config_root}/{constraint}", exist_ok=True)
+        for framework in frameworks:
+            # for shot in fs:
+                config_dir = f"{config_root}/{constraint}/{framework}"
+                os.makedirs(config_dir, exist_ok=True)
+
+                for config_path in config_paths:
+                    with open(config_path, "r") as f:
+                        configs = yaml.safe_load(f)
+                        if constraint == "g4_12x":
+                            configs["cdk_context"]["PREFIX"] = f"{configs['cdk_context']['PREFIX']}-multi"
+                        configs["constraint"] = constraint
+                        configs["framework"] = framework
+                        configs["module"] = module
+                        configs["seed"] = seed 
+                        # configs["custom_dataloader"]["shot"] = shot
+                        configs["benchmark_name"] = f"{configs['benchmark_name']}-{seed}"
+                        new_config_path = os.path.join(config_dir, os.path.basename(config_path))
+                        with open(new_config_path, "w") as new_f:
+                            yaml.dump(configs, new_f)
+                        print("Running config: ", new_config_path)
+                        command = ["agbench", "run", new_config_path]
+                        subprocess.run(command)
+
diff --git a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
index d1a1770f..514879a4 100644
--- a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
+++ b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
@@ -13,17 +13,32 @@ AutoGluon_best_master:
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    # hyperparameters:
-    #         optimization.gradient_clip_val: 0.1
 
-ablation_greedy_soup:
+ablation_base:
   repo: https://github.com/autogluon/autogluon.git
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
         optimization.top_k_average_method: best
-        #    optimization.gradient_clip_val: 0.1
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
+
+ablation_greedy_soup:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_gradient_clip:
   repo: https://github.com/autogluon/autogluon.git
@@ -31,7 +46,10 @@ ablation_gradient_clip:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.gradient_clip_val: 10000
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_warmup_steps:
   repo: https://github.com/autogluon/autogluon.git
@@ -39,8 +57,9 @@ ablation_warmup_steps:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.warmup_steps: 0.0
-      # optimization.gradient_clip_val: 0.1
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_cosine_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -48,8 +67,9 @@ ablation_cosine_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.lr_schedule: polynomial_decay
-      # optimization.gradient_clip_val: 0.1
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
 
 ablation_weight_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -57,17 +77,14 @@ ablation_weight_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.weight_decay: 0.0
-      # optimization.gradient_clip_val: 0.1
+        optimization.lr_decay: 0
+
 
 ablation_lr_decay:
   repo: https://github.com/autogluon/autogluon.git
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    hyperparameters:
-      optimization.lr_decay: 0.0 
-      optimization.gradient_clip_val: 0.1
 
 
 autokeras_master:
diff --git a/src/autogluon/bench/custom_configs/dataloaders/__init__.py b/src/autogluon/bench/custom_configs/dataloaders/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/autogluon/bench/custom_configs/resources/__init__.py b/src/autogluon/bench/custom_configs/resources/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
index 7a2765f5..514879a4 100644
--- a/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
+++ b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
@@ -14,13 +14,31 @@ AutoGluon_best_master:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
 
+ablation_base:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
+
 ablation_greedy_soup:
   repo: https://github.com/autogluon/autogluon.git
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_gradient_clip:
   repo: https://github.com/autogluon/autogluon.git
@@ -28,7 +46,10 @@ ablation_gradient_clip:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.gradient_clip_algorithm: value
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_warmup_steps:
   repo: https://github.com/autogluon/autogluon.git
@@ -36,7 +57,9 @@ ablation_warmup_steps:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.warmup_steps: 0.0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_cosine_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -44,7 +67,9 @@ ablation_cosine_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.lr_schedule: polynomial_decay
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
 
 ablation_weight_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -52,15 +77,14 @@ ablation_weight_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.weight_decay: 0.0
+        optimization.lr_decay: 0
+
 
 ablation_lr_decay:
   repo: https://github.com/autogluon/autogluon.git
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    hyperparameters:
-      optimization.lr_decay: 0.0 
 
 
 autokeras_master:
diff --git a/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py b/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py
index e5cbe9d1..de1085e0 100644
--- a/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py
+++ b/src/autogluon/bench/eval/scripts/run_generate_clean_openml.py
@@ -8,16 +8,6 @@
 import typer
 from typing_extensions import Annotated
 
-from autogluon.bench.eval.evaluation.constants import (
-    DATASET,
-    FOLD,
-    FRAMEWORK,
-    METRIC,
-    METRIC_ERROR,
-    PROBLEM_TYPE,
-    TIME_INFER_S,
-    TIME_TRAIN_S,
-)
 from autogluon.bench.eval.evaluation.constants import (
     DATASET,
     FOLD,
@@ -52,7 +42,6 @@
 def clean_amlb_results(
     benchmark_name: str = typer.Argument(
         None, help="Benchmark name populated by benchmark run, in format <benchmark_name>_<timestamp>"
-        None, help="Benchmark name populated by benchmark run, in format <benchmark_name>_<timestamp>"
     ),
     results_dir: str = typer.Option("data/results/", help="Root directory of raw and prepared results."),
     results_dir_input: str = typer.Option(
@@ -126,7 +115,6 @@ def clean_and_save_results(
     run_name_in_output_path: bool = True,
     save: bool = True,
     save_minimal: bool = True,
-    constraints: List[str] | None = None,
     out_path_prefix: str = "openml_ag_",
     out_path_suffix: str = "",
     framework_suffix_column: str = "constraint",
@@ -151,34 +139,9 @@ def clean_and_save_results(
         results_list.append(results)
     results_raw = pd.concat(results_list, ignore_index=True, sort=True)
 
-    if "framework_parent" in results_raw.columns:
-        results_raw[FRAMEWORK] = results_raw["framework_parent"] + "_" + run_name + "_" + results_raw[FRAMEWORK]
-    else:
-        results_raw[FRAMEWORK] = results_raw[FRAMEWORK] + "_" + run_name
-
-    minimal_columns = [
-        DATASET,
-        FOLD,
-        FRAMEWORK,
-        "constraint",
-        METRIC,
-        METRIC_ERROR,
-        TIME_TRAIN_S,
-        TIME_INFER_S,
-        PROBLEM_TYPE,
-        "tid",
-    ]
-
-    results_raw_columns = list(results_raw.columns)
-    results_raw_columns = [c for c in results_raw_columns if c in minimal_columns] + [
-        c for c in results_raw_columns if c not in minimal_columns
-    ]
-    results_raw = results_raw[results_raw_columns]
-
     if save:
         if run_name_in_output_path:
             save_path = os.path.join(results_dir_output, f"{out_path_prefix}{run_name}{out_path_suffix}")
-            save_path = os.path.join(results_dir_output, f"{out_path_prefix}{run_name}{out_path_suffix}")
         else:
             save_path = os.path.join(results_dir_output, f"{out_path_prefix}{out_path_suffix}")
         save_path_file = f"{save_path}.csv"
@@ -188,7 +151,7 @@ def clean_and_save_results(
         save_path_file_pq = f"{save_path}.parquet"
         save_pd.save(path=save_path_file_pq, df=results_raw)
         if save_minimal:
-            results_raw_minimal = results_raw[minimal_columns]
+            results_raw_minimal = results_raw[MINIMAL_COLUMNS]
 
             save_path_file_minimum = f"{save_path}_min.csv"
             save_pd.save(path=save_path_file_minimum, df=results_raw_minimal)

From 75ccfc2ad4e8f37781e6aa4faba97a4d5febe919 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Thu, 29 Feb 2024 08:39:09 +0000
Subject: [PATCH 06/13] update frameworks

---
 .../resources/multimodal_frameworks.yaml      | 75 ++++++++++++-------
 1 file changed, 50 insertions(+), 25 deletions(-)

diff --git a/sample_configs/resources/multimodal_frameworks.yaml b/sample_configs/resources/multimodal_frameworks.yaml
index f39799de..38dc46f1 100644
--- a/sample_configs/resources/multimodal_frameworks.yaml
+++ b/sample_configs/resources/multimodal_frameworks.yaml
@@ -9,60 +9,85 @@ AutoGluon_branch:
       optimization.learning_rate: 0.005
 
 AutoGluon_best_master:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git 
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    time_limit: 90
+
+ablation_base:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
 
 ablation_greedy_soup:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_gradient_clip:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.gradient_clip_algorithm: value
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_warmup_steps:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.warmup_steps: 0.0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_cosine_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.lr_schedule: polynomial_decay
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
 
 ablation_weight_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.weight_decay: 0.0
+        optimization.lr_decay: 0
 
-ablation_cosine_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+
+ablation_lr_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    hyperparameters:
-      optimization.lr_decay: 0.0 
-
 
-AutoKeras:
 
+autokeras_master:
+  repo: https://github.com/keras-team/keras-tuner.git 
+  version: master
+  

From dfd0f3fa8833431cdfef1675b3c62bb78bd79904 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Thu, 29 Feb 2024 08:39:09 +0000
Subject: [PATCH 07/13] update frameworks

---
 .../resources/multimodal_frameworks.yaml      | 32 ++++----
 .../resources/multimodal_frameworks.yaml      | 75 ++++++++++++-------
 2 files changed, 66 insertions(+), 41 deletions(-)

diff --git a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
index 514879a4..38dc46f1 100644
--- a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
+++ b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
@@ -9,14 +9,14 @@ AutoGluon_branch:
       optimization.learning_rate: 0.005
 
 AutoGluon_best_master:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git 
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
 
 ablation_base:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
@@ -29,8 +29,8 @@ ablation_base:
 
 
 ablation_greedy_soup:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
@@ -41,8 +41,8 @@ ablation_greedy_soup:
         optimization.lr_decay: 0
 
 ablation_gradient_clip:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
@@ -52,8 +52,8 @@ ablation_gradient_clip:
         optimization.lr_decay: 0
 
 ablation_warmup_steps:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
@@ -62,8 +62,8 @@ ablation_warmup_steps:
         optimization.lr_decay: 0
 
 ablation_cosine_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
@@ -72,8 +72,8 @@ ablation_cosine_decay:
 
 
 ablation_weight_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
@@ -81,8 +81,8 @@ ablation_weight_decay:
 
 
 ablation_lr_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
 
diff --git a/sample_configs/resources/multimodal_frameworks.yaml b/sample_configs/resources/multimodal_frameworks.yaml
index f39799de..38dc46f1 100644
--- a/sample_configs/resources/multimodal_frameworks.yaml
+++ b/sample_configs/resources/multimodal_frameworks.yaml
@@ -9,60 +9,85 @@ AutoGluon_branch:
       optimization.learning_rate: 0.005
 
 AutoGluon_best_master:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git 
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    time_limit: 90
+
+ablation_base:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
 
 ablation_greedy_soup:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_gradient_clip:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.gradient_clip_algorithm: value
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_warmup_steps:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.warmup_steps: 0.0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
 
 ablation_cosine_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.lr_schedule: polynomial_decay
+        optimization.weight_decay: 0
+        optimization.lr_decay: 0
+
 
 ablation_weight_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.weight_decay: 0.0
+        optimization.lr_decay: 0
 
-ablation_cosine_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+
+ablation_lr_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    hyperparameters:
-      optimization.lr_decay: 0.0 
-
 
-AutoKeras:
 
+autokeras_master:
+  repo: https://github.com/keras-team/keras-tuner.git 
+  version: master
+  

From fc5f31b67b78f62021db5d94f959298a258854e2 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Tue, 5 Mar 2024 00:05:22 +0000
Subject: [PATCH 08/13] update ablation configs

---
 .../resources/multimodal_frameworks.yaml      | 38 ++++---------------
 1 file changed, 7 insertions(+), 31 deletions(-)

diff --git a/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
index 514879a4..7a2765f5 100644
--- a/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
+++ b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
@@ -14,31 +14,13 @@ AutoGluon_best_master:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
 
-ablation_base:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
-  params:  # MultimodalPredictor.fit(params)
-    presets: best_quality
-    hyperparameters:
-        optimization.top_k_average_method: best
-        optimization.gradient_clip_val: 0
-        optimization.warmup_steps: 0
-        optimization.lr_schedule: constant
-        optimization.weight_decay: 0
-        optimization.lr_decay: 0
-
-
 ablation_greedy_soup:
   repo: https://github.com/autogluon/autogluon.git
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-        optimization.gradient_clip_val: 0
-        optimization.warmup_steps: 0
-        optimization.lr_schedule: constant
-        optimization.weight_decay: 0
-        optimization.lr_decay: 0
+      optimization.top_k_average_method: best
 
 ablation_gradient_clip:
   repo: https://github.com/autogluon/autogluon.git
@@ -46,10 +28,7 @@ ablation_gradient_clip:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-        optimization.warmup_steps: 0
-        optimization.lr_schedule: constant
-        optimization.weight_decay: 0
-        optimization.lr_decay: 0
+      optimization.gradient_clip_algorithm: value
 
 ablation_warmup_steps:
   repo: https://github.com/autogluon/autogluon.git
@@ -57,9 +36,7 @@ ablation_warmup_steps:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-        optimization.lr_schedule: constant
-        optimization.weight_decay: 0
-        optimization.lr_decay: 0
+      optimization.warmup_steps: 0.0
 
 ablation_cosine_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -67,9 +44,7 @@ ablation_cosine_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-        optimization.weight_decay: 0
-        optimization.lr_decay: 0
-
+      optimization.lr_schedule: polynomial_decay
 
 ablation_weight_decay:
   repo: https://github.com/autogluon/autogluon.git
@@ -77,14 +52,15 @@ ablation_weight_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-        optimization.lr_decay: 0
-
+      optimization.weight_decay: 0.0
 
 ablation_lr_decay:
   repo: https://github.com/autogluon/autogluon.git
   version: master
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
+    hyperparameters:
+      optimization.lr_decay: 0.0 
 
 
 autokeras_master:

From c3134447094d1d43a8b093b848d8af3d5152cf24 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Tue, 5 Mar 2024 02:38:06 +0000
Subject: [PATCH 09/13] update

---
 ...sion_dataloader.py => image_dataloader.py} |   2 +-
 ...sion_datasets.yaml => image_datasets.yaml} |   0
 sample_configs/multimodal_cloud_configs.yaml  |  36 --
 sample_configs/multimodal_local_configs.yaml  |  24 --
 ...gs.yaml => paper_image_cloud_configs.yaml} |  23 +-
 sample_configs/paper_image_local_configs.yaml |  69 ++--
 ...igs.yaml => paper_text_cloud_configs.yaml} |  17 +-
 sample_configs/paper_text_local_configs.yaml  |  28 +-
 ... => paper_text_tabular_cloud_configs.yaml} |  15 +-
 .../paper_text_tabular_local_configs.yaml     |  27 +-
 sample_configs/tabular_cloud_configs.yaml     |  40 ---
 sample_configs/tabular_local_configs.yaml     |  15 -
 sample_configs/timeseries_cloud_configs.yaml  |  34 --
 sample_configs/timeseries_local_configs.yaml  |  15 -
 .../bench/frameworks/autokeras/exec.py        |  45 +--
 .../bench/frameworks/autokeras/test.py        | 336 ------------------
 16 files changed, 84 insertions(+), 642 deletions(-)
 rename sample_configs/dataloaders/{vision_dataloader.py => image_dataloader.py} (99%)
 rename sample_configs/dataloaders/{vision_datasets.yaml => image_datasets.yaml} (100%)
 delete mode 100644 sample_configs/multimodal_cloud_configs.yaml
 delete mode 100644 sample_configs/multimodal_local_configs.yaml
 rename sample_configs/{cloud_configs/paper_image_local_configs.yaml => paper_image_cloud_configs.yaml} (79%)
 rename sample_configs/{cloud_configs/paper_text_local_configs.yaml => paper_text_cloud_configs.yaml} (83%)
 rename sample_configs/{cloud_configs/paper_text_tabular_local_configs.yaml => paper_text_tabular_cloud_configs.yaml} (86%)
 delete mode 100644 sample_configs/tabular_cloud_configs.yaml
 delete mode 100644 sample_configs/tabular_local_configs.yaml
 delete mode 100644 sample_configs/timeseries_cloud_configs.yaml
 delete mode 100644 sample_configs/timeseries_local_configs.yaml
 delete mode 100644 src/autogluon/bench/frameworks/autokeras/test.py

diff --git a/sample_configs/dataloaders/vision_dataloader.py b/sample_configs/dataloaders/image_dataloader.py
similarity index 99%
rename from sample_configs/dataloaders/vision_dataloader.py
rename to sample_configs/dataloaders/image_dataloader.py
index 84ab5d91..21d6a25f 100644
--- a/sample_configs/dataloaders/vision_dataloader.py
+++ b/sample_configs/dataloaders/image_dataloader.py
@@ -16,7 +16,7 @@ def path_expander(path, base_folder):
 logger = logging.getLogger(__name__)
 
 
-class VisionDataLoader:
+class ImageDataLoader:
     def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
         with open(dataset_config_file, "r") as f:
             config = yaml.safe_load(f)
diff --git a/sample_configs/dataloaders/vision_datasets.yaml b/sample_configs/dataloaders/image_datasets.yaml
similarity index 100%
rename from sample_configs/dataloaders/vision_datasets.yaml
rename to sample_configs/dataloaders/image_datasets.yaml
diff --git a/sample_configs/multimodal_cloud_configs.yaml b/sample_configs/multimodal_cloud_configs.yaml
deleted file mode 100644
index 1b4bb707..00000000
--- a/sample_configs/multimodal_cloud_configs.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
-  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
-  # MAX_MACHINE_NUM: 20   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
-  # VPC_NAME: existing-vpc-name  # optional
-
-module: multimodal  # required
-mode: aws  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-
-constraint: test  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
-framework: AutoGluon_stable  # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
-dataset_name:  # required
-  - shopee
-  - melbourne_airbnb
-
-### Customizations ####
-# custom_resource_dir: sample_configs/resources/  # path to custom multimodal_constraints.yaml and multimodal_frameworks.yaml
-# custom_dataloader:
-#     dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
-#     class_name: VisionDataLoader
-#     dataset_config_file: sample_configs/dataloaders/vision_datasets.yaml 
-
-# custom_metrics:
-#     metrics_path: sample_configs/custom_metrics/sample_metrics.py
-#     function_name: f1_score
-#     # Other optional parameters can be set, ref: https://auto.gluon.ai/stable/tutorials/tabular/advanced/tabular-custom-metric.html
-#     optimum: 1
-#     greater_is_better: true
diff --git a/sample_configs/multimodal_local_configs.yaml b/sample_configs/multimodal_local_configs.yaml
deleted file mode 100644
index 0bfadcd3..00000000
--- a/sample_configs/multimodal_local_configs.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Benchmark configurations
-module: multimodal  # required
-mode: local  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
-
-# Multimodal specific
-framework: AutoGluon_stable  # required
-dataset_name:  # required
-  melbourne_airbnb  
-
-#### Customizations ####
-# custom_resource_dir: sample_configs/resources/  # path to custom multimodal_frameworks.yaml and multimodal_constraints.yaml
-# custom_dataloader:
-#     dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
-#     class_name: VisionDataLoader
-#     dataset_config_file: sample_configs/dataloaders/vision_datasets.yaml 
-# custom_metrics:
-#     metrics_path: sample_configs/custom_metrics/sample_metrics.py
-#     function_name: f1_score
-#     # Other optional parameters can be set, ref: https://auto.gluon.ai/stable/tutorials/tabular/advanced/tabular-custom-metric.html
-#     optimum: 1
-#     greater_is_better: true
diff --git a/sample_configs/cloud_configs/paper_image_local_configs.yaml b/sample_configs/paper_image_cloud_configs.yaml
similarity index 79%
rename from sample_configs/cloud_configs/paper_image_local_configs.yaml
rename to sample_configs/paper_image_cloud_configs.yaml
index 2cabc1e7..d9d49386 100644
--- a/sample_configs/cloud_configs/paper_image_local_configs.yaml
+++ b/sample_configs/paper_image_cloud_configs.yaml
@@ -1,4 +1,5 @@
 # Infra configurations
+# Infra configurations
 cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
   CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
   CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
@@ -10,19 +11,23 @@ cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS C
   # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
   # VPC_NAME: existing-vpc-name  # optional
 
-module: multimodal  # required
+module: multimodal  # required, choise of 'multimodal' pr 'autokeras'
 mode: aws  # required
 benchmark_name: ag_bench_image  # required
 root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
 
 constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
 framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
-  # AutoGluon_medium_master
-  # AutoGluon_high_master
   AutoGluon_best_master
-  # torch_compile_medium
-  # torch_compile_high
-  # torch_compile_best
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
+
 dataset_name: 
   # image
    - fashion_mnist
@@ -34,7 +39,7 @@ dataset_name:
    - oxfordflowers
    - OxfordIIITPet
   # image-tabular
-   -  ham10000
+   - ham10000
    - cd18
   # image-text
    - hateful_meme
@@ -43,8 +48,8 @@ dataset_name:
    - memotion
 ## Customizations ####
 custom_dataloader:
-    dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
-    class_name: VisionDataLoader
+    dataloader_file: sample_configs/dataloaders/image_dataloader.py   # relative path to WORKDIR
+    class_name: ImageDataLoader
     dataset_config_file: sample_configs/dataloaders/paper_image_datasets.yaml 
 custom_resource_dir: sample_configs/resources/
 
diff --git a/sample_configs/paper_image_local_configs.yaml b/sample_configs/paper_image_local_configs.yaml
index a3e11928..c1cee4cc 100644
--- a/sample_configs/paper_image_local_configs.yaml
+++ b/sample_configs/paper_image_local_configs.yaml
@@ -1,51 +1,40 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
-  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
-  MAX_MACHINE_NUM: 1000   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
-  # VPC_NAME: existing-vpc-name  # optional
-
-module: autokeras  # required
+module: multimodal  # required, choise of 'multimodal' pr 'autokeras'
 mode: local  # required
 benchmark_name: ag_bench_image  # required
 root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
 
-constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
 framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
-  # AutoGluon_medium_master
-  # AutoGluon_high_master
   AutoGluon_best_master
-  # torch_compile_medium
-  # torch_compile_high
-  # torch_compile_best
-dataset_name: fashion_mnist 
-dataset_names:  
-  # image
-   - fashion_mnist
-   - casting
-   - food101
-   - stanfordcars
-   - magnetictiledefects
-   - europeanflooddepth
-   - oxfordflowers
-   - OxfordIIITPet
-  # image-tabular
-   -  ham10000
-   - cd18
-  # image-text
-   - hateful_meme
-  # image-text-tabular
-   - petfinder
-   - memotion
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
+dataset_name: fashion_mnist
+  # # image
+  #  - fashion_mnist
+  #  - casting
+  #  - food101
+  #  - stanfordcars
+  #  - magnetictiledefects
+  #  - europeanflooddepth
+  #  - oxfordflowers
+  #  - OxfordIIITPet
+  # # image-tabular
+  #  - ham10000
+  #  - cd18
+  # # image-text
+  #  - hateful_meme
+  # # image-text-tabular
+  #  - petfinder
+  #  - memotion
 ## Customizations ####
 custom_dataloader:
-    dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
-    class_name: VisionDataLoader
+    dataloader_file: sample_configs/dataloaders/image_dataloader.py   # relative path to WORKDIR
+    class_name: ImageDataLoader
     dataset_config_file: sample_configs/dataloaders/paper_image_datasets.yaml 
 custom_resource_dir: sample_configs/resources/
 
diff --git a/sample_configs/cloud_configs/paper_text_local_configs.yaml b/sample_configs/paper_text_cloud_configs.yaml
similarity index 83%
rename from sample_configs/cloud_configs/paper_text_local_configs.yaml
rename to sample_configs/paper_text_cloud_configs.yaml
index bebe3bbd..25996146 100644
--- a/sample_configs/cloud_configs/paper_text_local_configs.yaml
+++ b/sample_configs/paper_text_cloud_configs.yaml
@@ -1,4 +1,4 @@
-# Infra configurations
+sample_configs/paper_text_# Infra configurations
 cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
   CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
   CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
@@ -10,19 +10,22 @@ cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS C
   # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
   # VPC_NAME: existing-vpc-name  # optional
 
-module: multimodal  # required
+module: multimodal  # required, choise of 'multimodal' pr 'autokeras'
 mode: aws  # required
 benchmark_name: ag_bench_text  # required
 root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
 
 constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
 framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
-  # AutoGluon_medium_master
-  # AutoGluon_high_master
   AutoGluon_best_master
-  # torch_compile_medium
-  # torch_compile_high
-  # torch_compile_best
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
 dataset_name:
     - financial_news
     - MLDoc-11000
diff --git a/sample_configs/paper_text_local_configs.yaml b/sample_configs/paper_text_local_configs.yaml
index 5b38ae0e..7764d2ff 100644
--- a/sample_configs/paper_text_local_configs.yaml
+++ b/sample_configs/paper_text_local_configs.yaml
@@ -1,28 +1,18 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
-  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
-  MAX_MACHINE_NUM: 1000   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
-  # VPC_NAME: existing-vpc-name  # optional
-
-module: multimodal  # required
+module: multimodal  # required, choise of 'multimodal' pr 'autokeras'
 mode: local  # required
 benchmark_name: ag_bench_text  # required
 root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
 
-constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
 framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
-  # AutoGluon_medium_master
-  # AutoGluon_high_master
   AutoGluon_best_master
-  # torch_compile_medium
-  # torch_compile_high
-  # torch_compile_best
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
 dataset_name: ag_news 
 ## Customizations ####
 custom_dataloader:
diff --git a/sample_configs/cloud_configs/paper_text_tabular_local_configs.yaml b/sample_configs/paper_text_tabular_cloud_configs.yaml
similarity index 86%
rename from sample_configs/cloud_configs/paper_text_tabular_local_configs.yaml
rename to sample_configs/paper_text_tabular_cloud_configs.yaml
index d0719d18..16ba00c1 100644
--- a/sample_configs/cloud_configs/paper_text_tabular_local_configs.yaml
+++ b/sample_configs/paper_text_tabular_cloud_configs.yaml
@@ -10,19 +10,22 @@ cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS C
   # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
   # VPC_NAME: existing-vpc-name  # optional
 
-module: multimodal  # required
+module: multimodal  # required, choise of 'multimodal' pr 'autokeras'
 mode: aws  # required
 benchmark_name: ag_bench_text_tabular  # required
 root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
 
 constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
 framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
-  # torch_compile_medium
-  # torch_compile_high
-  # torch_compile_best
-  # AutoGluon_medium_master
-  # AutoGluon_high_master
   AutoGluon_best_master
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
 dataset_name:
         - airbnb
         - kick_start
diff --git a/sample_configs/paper_text_tabular_local_configs.yaml b/sample_configs/paper_text_tabular_local_configs.yaml
index 212b6092..86f82d75 100644
--- a/sample_configs/paper_text_tabular_local_configs.yaml
+++ b/sample_configs/paper_text_tabular_local_configs.yaml
@@ -1,28 +1,19 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
-  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  DATA_BUCKET: automl-mm-bench  # optional, S3 bucket to download your private datasets
-  MAX_MACHINE_NUM: 1000   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
-  # VPC_NAME: existing-vpc-name  # optional
-
-module: autokeras  # required
+module: autokeras  # required, choise of 'multimodal' pr 'autokeras'
 mode: local  # required
 benchmark_name: ag_bench_text_tabular  # required
 root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
 
 constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
 framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
-  # torch_compile_medium
-  # torch_compile_high
-  # torch_compile_best
-  # AutoGluon_medium_master
-  # AutoGluon_high_master
   AutoGluon_best_master
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
 dataset_name: airbnb 
 custom_dataloader:
     dataloader_file: sample_configs/dataloaders/text_tabular_dataloader.py   # relative path to WORKDIR
diff --git a/sample_configs/tabular_cloud_configs.yaml b/sample_configs/tabular_cloud_configs.yaml
deleted file mode 100644
index a73a95d1..00000000
--- a/sample_configs/tabular_cloud_configs.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
-  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
-  # MAX_MACHINE_NUM: 20   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
-  # INSTANCE: g4dn.2xlarge  # optional, default g4dn.2xlarge
-  # TIME_LIMIT: 3600  # optional, EC2 timeout, default 3600s
-  # VPC_NAME: existing-vpc-name  # optional
-
-# Benchmark configurations
-module: tabular  # required
-mode: aws  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-
-git_uri#branch:  # required, can be any AMLB custom fork and branch, only one value allowed
-  https://github.com/openml/automlbenchmark.git#master
-framework:  # required, only one value allowed
-  AutoGluon:stable
-amlb_constraint:  # optional, only one value allowed, default: test
-  test
-amlb_benchmark:  # required, a list of amlb benchmarks to run, ref: https://github.com/openml/automlbenchmark/tree/master/resources/benchmarks
-  - small
-amlb_task:  # optional, value of each key is a subset of dataset names in each 'amlb_benchmark', ref: https://github.com/openml/automlbenchmark/blob/master/resources/benchmarks/small.yaml
-  small:
-    - credit-g
-    - vehicle
-
-# fold_to_run:  # optional, capped by `folds` in amlb_constraint, ref: https://github.com/openml/automlbenchmark/blob/master/resources/constraints.yaml
-#   small:
-#     credit-g:
-#       - 3
-#       - 6
-# amlb_user_dir:  # optional, local/s3 path where all the amlb customizations are stored, only one value allowed
-#   sample_configs/amlb_configs
diff --git a/sample_configs/tabular_local_configs.yaml b/sample_configs/tabular_local_configs.yaml
deleted file mode 100644
index 62196d54..00000000
--- a/sample_configs/tabular_local_configs.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Benchmark configurations
-module: tabular  # required
-mode: local  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
-
-# Tabular specific
-git_uri#branch: https://github.com/openml/automlbenchmark.git#stable  # required, can be any AMLB custom fork and branch
-framework: AutoGluon:stable  # required
-amlb_benchmark: small  # required
-amlb_task: vehicle # optional
-amlb_constraint: test  # optional
-fold_to_run: 1  # optional, the specific data fold to run
-amlb_user_dir: sample_configs/amlb_configs  # optional, local/s3 path where all the amlb customizations are stored
diff --git a/sample_configs/timeseries_cloud_configs.yaml b/sample_configs/timeseries_cloud_configs.yaml
deleted file mode 100644
index 6cb9441c..00000000
--- a/sample_configs/timeseries_cloud_configs.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
-  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
-  # MAX_MACHINE_NUM: 20   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
-  # INSTANCE: g4dn.2xlarge  # optional, default g4dn.2xlarge
-  # TIME_LIMIT: 3600  # optional, EC2 timeout, default 3600s
-  # VPC_NAME: existing-vpc-name  # optional
-
-# Benchmark configurations
-module: timeseries  # required
-mode: aws  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-
-git_uri#branch:  # required, can be any AMLB custom fork and branch, only one value allowed
-  https://github.com/openml/automlbenchmark.git#master
-framework:  # required, only one value allowed
-  AutoGluon_dev:example
-amlb_constraint:  # optional, only one value allowed, default: test
-  test
-amlb_benchmark:  # required, a list of amlb benchmarks to run, ref: https://github.com/openml/automlbenchmark/tree/master/resources/benchmarks
-  - timeseries_test
-amlb_task:  # optional, value of each key is a subset of dataset names in each 'amlb_benchmark', ref: https://github.com/openml/automlbenchmark/blob/master/resources/benchmarks/small.yaml
-  timeseries_test:
-    - m4_hourly_2
-# fold_to_run:  # optional, capped by `folds` in amlb_constraint, ref: https://github.com/openml/automlbenchmark/blob/master/resources/constraints.yaml
-amlb_user_dir:  # optional, local/s3 path where all the amlb customizations are stored, only one value allowed
-  sample_configs/amlb_configs
diff --git a/sample_configs/timeseries_local_configs.yaml b/sample_configs/timeseries_local_configs.yaml
deleted file mode 100644
index 838ad9b3..00000000
--- a/sample_configs/timeseries_local_configs.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Benchmark configurations
-module: timeseries  # required
-mode: local  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
-
-# Tabular specific
-git_uri#branch: https://github.com/openml/automlbenchmark.git#stable  # required, can be any AMLB custom fork and branch
-framework: AutoGluon  # required
-amlb_benchmark: timeseries_test  # required
-amlb_task: m4_hourly_2 # optional
-amlb_constraint: test  # optional
-fold_to_run: 1  # optional, the specific data fold to run
-amlb_user_dir: sample_configs/amlb_configs  # optional, local/s3 path where all the amlb customizations are stored
diff --git a/src/autogluon/bench/frameworks/autokeras/exec.py b/src/autogluon/bench/frameworks/autokeras/exec.py
index 47b0605e..78ede744 100644
--- a/src/autogluon/bench/frameworks/autokeras/exec.py
+++ b/src/autogluon/bench/frameworks/autokeras/exec.py
@@ -144,23 +144,6 @@ def find_average_image_size(image_paths, max_size=(224, 224), min_size=(32, 32))
     
     return (target_width, target_height)
 
-def resize_images_to_target_size(image_paths, target_size, mode='RGB'):
-    """
-    Resizes images to a specified target size.
-    """
-    resized_images = []
-    
-    for path in image_paths:
-        try:
-            with Image.open(path) as img:
-                img = img.convert(mode)
-                img = img.resize(target_size)
-                resized_images.append(np.array(img))
-        except Exception as e:
-            print(f"Error processing image {path}: {e}")
-            resized_images.append(np.zeros(target_size + (3,), dtype=np.uint8))  # Assuming RGB mode
-            
-    return np.array(resized_images)
 
 def load_image(image_path, target_size=(224, 224)):
     try:
@@ -172,28 +155,6 @@ def load_image(image_path, target_size=(224, 224)):
         print(f"Error loading image {image_path}: {e}")
         return np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)  # Placeholder for an invalid image
 
-def create_zero_image(target_size=(224, 224)):
-    # Create a zero (blank) image
-    return np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
-
-def average_images(image_paths, target_size=(224, 224)):
-    images = [load_image(path, target_size) for path in image_paths[:2]]  # Load the first two images
-    # Calculate the average of the images
-    average_img = np.mean(images, axis=0).astype(np.uint8)
-    return average_img
-
-
-def decode_img(img):
-    # Convert the compressed string to a 3D uint8 tensor
-    img = tf.image.decode_jpeg(img, channels=3)
-    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
-    return tf.image.convert_image_dtype(img, tf.float32)
-
-def process_path(file_path):
-    # Load the raw data from the file as a string
-    img = tf.io.read_file(file_path)
-    img = decode_img(img)
-    return img
 
 def preprocess_data(features, image_columns, text_columns, target_size):
     # Process image data
@@ -316,11 +277,11 @@ def run(
     
  
     if train_data.problem_type == "regression":
-        output_node = ak.RegressionHead(metrics=[tf.keras.metrics.RootMeanSquaredError()])#metrics=[tf.keras.metrics.RootMeanSquaredError()])
+        output_node = ak.RegressionHead(metrics=[tf.keras.metrics.RootMeanSquaredError()])
     elif train_data.problem_type in ["multiclass", "classification"]:
-        output_node = ak.ClassificationHead(metrics=["accuracy"])#metrics=[tf.keras.metrics.Accuracy()])
+        output_node = ak.ClassificationHead(metrics=["accuracy"])
     elif train_data.problem_type == "binary":
-        output_node = ak.ClassificationHead(metrics=["AUC"])#[tf.keras.metrics.AUC(curve="ROC")])
+        output_node = ak.ClassificationHead(metrics=["AUC"])
 
     # Combine the data into a list for the model
     train_data_list = [data for data in [image_data_train, tabular_data_train, text_data_train] if data is not None]
diff --git a/src/autogluon/bench/frameworks/autokeras/test.py b/src/autogluon/bench/frameworks/autokeras/test.py
deleted file mode 100644
index 511a2e77..00000000
--- a/src/autogluon/bench/frameworks/autokeras/test.py
+++ /dev/null
@@ -1,336 +0,0 @@
-import argparse
-import csv
-import importlib
-import json
-import logging
-import os
-import time
-from datetime import datetime
-from typing import Optional, Union
-import autokeras as ak
-from PIL import Image
-import numpy as np
-from sklearn.model_selection import train_test_split
-from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry
-import pandas as pd
-
-import tensorflow as tf
-
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
-
-
-def _flatten_dict(data):
-    flattened = {}
-    for key, value in data.items():
-        if isinstance(value, dict):
-            flattened.update(_flatten_dict(value))
-        else:
-            flattened[key] = value
-    return flattened
-
-
-def get_args():
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "--dataset_name",
-        type=str,
-        help="Dataset that has been registered with multimodal_dataset_registry.",
-    )
-    parser.add_argument("--framework", type=str, help="Framework (and) branch/version.")
-    parser.add_argument("--benchmark_dir", type=str, help="Directory to save benchmarking run.")
-    parser.add_argument("--metrics_dir", type=str, help="Directory to save benchmarking metrics.")
-    parser.add_argument("--constraint", type=str, default=None, help="AWS resources constraint setting.")
-    parser.add_argument("--params", type=str, default=None, help="AWS resources constraint setting.")
-    parser.add_argument(
-        "--custom_dataloader", type=str, default=None, help="Custom dataloader to use in the benchmark."
-    )
-    parser.add_argument("--custom_metrics", type=str, default=None, help="Custom metrics to use in the benchmark.")
-
-    args = parser.parse_args()
-    return args
-
-
-def load_dataset(dataset_name: str, custom_dataloader: dict = None):  # dataset name
-    """Loads and preprocesses a dataset.
-
-    Args:
-        dataset_name (str): The name of the dataset to load.
-        custom_dataloader (dict): A dictionary containing information about a custom dataloader to use. Defaults to None.
-
-    Returns:
-        Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the training and test datasets.
-    """
-    splits = ["train", "val", "test"]
-    data = {}
-    if dataset_name in multimodal_dataset_registry.list_keys():
-        logger.info(f"Loading dataset {dataset_name} from multimodal_dataset_registry")
-        for split in splits:
-            data[split] = multimodal_dataset_registry.create(dataset_name, split)
-    elif custom_dataloader is not None:
-        logger.info(f"Loading dataset {dataset_name} from custom dataloader {custom_dataloader}.")
-        custom_dataloader_file = custom_dataloader.pop("dataloader_file")
-        class_name = custom_dataloader.pop("class_name")
-        spec = importlib.util.spec_from_file_location(class_name, custom_dataloader_file)
-        module = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(module)
-        custom_class = getattr(module, class_name)
-        for split in splits:
-            data[split] = custom_class(dataset_name=dataset_name, split=split, **custom_dataloader)
-    else:
-        raise ModuleNotFoundError(f"Dataset Loader for dataset {dataset_name} is not available.")
-
-    return data.values()
-
-
-def save_metrics(metrics_path: str, metrics: dict):
-    """Saves evaluation metrics to a JSON file.
-
-    Args:
-        metrics_path (str): The path to the directory where the metrics should be saved.
-        metrics: The evaluation metrics to save.
-
-    Returns:
-        None
-    """
-    if metrics is None:
-        logger.warning("No metrics were created.")
-        return
-
-    if not os.path.exists(metrics_path):
-        os.makedirs(metrics_path)
-    file = os.path.join(metrics_path, "results.csv")
-    flat_metrics = _flatten_dict(metrics)
-    field_names = flat_metrics.keys()
-
-    with open(file, "w", newline="") as f:
-        writer = csv.DictWriter(f, fieldnames=field_names)
-        writer.writeheader()
-        writer.writerow(flat_metrics)
-    logger.info("Metrics saved to %s.", file)
-    f.close()
-
-
-def load_image(image_path, target_size=(224, 224)):
-    try:
-        with Image.open(image_path) as img:
-            img = img.convert("RGB")
-            #img = img.resize(target_size)
-            return np.array(img)
-    except Exception as e:
-        print(f"Error loading image {image_path}: {e}")
-        return np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)  # Placeholder for an invalid image
-
-def create_zero_image(target_size=(224, 224)):
-    # Create a zero (blank) image
-    return np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
-
-def average_images(image_paths, target_size=(224, 224)):
-    images = [load_image(path, target_size) for path in image_paths[:2]]  # Load the first two images
-    # Calculate the average of the images
-    average_img = np.mean(images, axis=0).astype(np.uint8)
-    return average_img
-
-
-def preprocess_data(features, image_columns, text_columns):
-    # Process image data
-    image_data = None
-    if image_columns is not None and len(image_columns) > 0:
-        image_data = []
-        features.loc[:, image_columns[0]] = features[image_columns[0]].apply(lambda x: x.split(';')[0] if pd.notnull(x) else x)
-        image_paths = features[image_columns[0]].values
-        for path in image_paths:
-            img = load_image(path)
-            image_data.append(img)
-    
-        # Convert column image data to a NumPy array and normalize
-        image_data = np.array(image_data)
-
-    # Process text data
-    text_data = None
-    if text_columns is not None and len(text_columns) > 0:
-        text_data = features.apply(lambda row: " ".join((str(row[col]) if row[col] is not None else "") for col in text_columns), axis=1) 
-        text_data = text_data.to_numpy(dtype=str)
-        print("Text data is: ", text_data)
-    
-    # Process tabular data
-    tabular_data = None
-    all_image_text_columns = image_columns or [] + text_columns or [] 
-    tabular_columns = features.columns.difference(all_image_text_columns)
-    print("tabular column is: ", tabular_columns) 
-    if len(tabular_columns) > 0:
-        tabular_data = features[tabular_columns].to_numpy()
-        print(tabular_data[0])
-
-    return image_data, tabular_data, text_data
-
-
-def run(
-    dataset_name: Union[str, dict],
-    framework: str,
-    benchmark_dir: str,
-    metrics_dir: str,
-    constraint: Optional[str] = None,
-    params: Optional[dict] = None,
-    custom_dataloader: Optional[dict] = None,
-    custom_metrics: Optional[dict] = None,
-):
-    """Runs the AutoGluon multimodal benchmark on a given dataset.
-
-    Args:
-        dataset_name (Union[str, dict]): Dataset that has been registered with multimodal_dataset_registry.
-
-                            To get a list of datasets:
-
-                            from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry
-                            multimodal_dataset_registry.list_keys()
-
-        benchmark_dir (str): The path to the directory where benchmarking artifacts should be saved.
-        constraint (str): The resource constraint used by benchmarking during AWS mode, default: None.
-        params (str): The multimodal params, default: {}.
-        custom_dataloader (dict): A dictionary containing information about a custom dataloader to use. Defaults to None.
-                                To define a custom dataloader in the config file:
-
-                                custom_dataloader:
-                                    dataloader_file: path_to/dataloader.py   # relative path to WORKDIR
-                                    class_name: DataLoaderClass
-                                    dataset_config_file: path_to/dataset_config.yaml
-                                    **kwargs (of DataLoaderClass)
-        custom_metrics (dict): A dictionary containing information about a custom metrics to use. Defaults to None.
-                                To define a custom metrics in the config file:
-
-                                custom_metrics:
-                                    metrics_path: path_to/metrics.py   # relative path to WORKDIR
-                                    function_name: custom_metrics_function
-                                    **kwargs (of autogluon.core.metrics.make_scorer)
-    Returns:
-        None
-    """
-    train_data, val_data, test_data = load_dataset(dataset_name=dataset_name, custom_dataloader=custom_dataloader)
-    image_columns = train_data.image_columns
-    text_columns = train_data.text_columns
-    tabular_columns = list(set(train_data.data.columns) - set(image_columns) - set(text_columns) - set(train_data.columns_to_drop) - set(train_data.label_columns))
-    feature_columns = tabular_columns + image_columns + text_columns
-    print("Label column: ", train_data.label_columns, train_data.data[train_data.label_columns])
-
-    features_train, labels_train = train_data.data[feature_columns], train_data.data[train_data.label_columns]
-    if test_data.data is None:
-        print("No test data found, splitting test data from train data")
-        features_train, features_test, labels_train, labels_test = train_test_split(features_train, labels_train, test_size=0.2, random_state=42)
-    else:
-        features_test, labels_test = test_data.data[feature_columns], test_data.data[train_data.label_columns]
-
-    features_val, labels_val = None, None 
-    if val_data.data is not None:
-        features_val, labels_val = val_data.data[feature_columns], val_data.data[train_data.label_columns]
-
-    image_data_train, tabular_data_train, text_data_train = preprocess_data(features_train, image_columns, text_columns)
-    image_data_test, tabular_data_test, text_data_test = preprocess_data(features_test, image_columns, text_columns)
-
-    image_data_val, tabular_data_val, text_data_val = (None, None, None)
-    
-    if features_val is not None and labels_val is not None:
-        image_data_val, tabular_data_val, text_data_val = preprocess_data(features_val, image_columns, text_columns)
-
-
-    inputs = []
-    if image_data_train is not None:
-        print("has image_data")
-        inputs.append(ak.ImageInput())
-    if tabular_data_train is not None:
-        print("has tabular_data")
-        inputs.append(ak.StructuredDataInput())
-    if text_data_train is not None:
-        print("has text_data")
-        inputs.append(ak.TextInput())
-    
-    import tensorflow as tf
-    if train_data.problem_type == "regression":
-        output_node = ak.RegressionHead(metrics=[tf.keras.metrics.RootMeanSquaredError()])
-    elif train_data.problem_type in ["multiclass", "classification"]:
-        output_node = ak.ClassificationHead(loss="categorical_crossentropy",metrics=[tf.keras.metrics.Accuracy()])
-    elif train_data.problem_type == "binary":
-        output_node = ak.ClassificationHead(loss="binary_crossentropy",metrics=[tf.keras.metrics.AUC(curve="ROC")])
-    else:
-        print("Warning: problem type unknown").
-
-    # Combine the data into a list for the model
-    train_data_list = [data for data in [image_data_train, tabular_data_train, text_data_train] if data is not None]
-
-    # Combine the data into a list for the model
-    test_data_list = [data for data in [image_data_test, tabular_data_test, text_data_test] if data is not None]
-
-
-    auto_model = ak.AutoModel(
-        inputs=inputs,
-        outputs=output_node,
-        overwrite=True,
-    )
-
-    utc_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
-    start_time = time.time()
-    if features_val is not None and labels_val is not None:
-        # Combine the data into a list for the model
-        val_data_list = [data for data in [image_data_val, tabular_data_val, text_data_val] if data is not None]
-
-        auto_model.fit(
-            train_data_list,
-            labels_train,
-            validation_data=(val_data_list, labels_val),
-        )
-    else:
-        auto_model.fit(
-            train_data_list,
-            labels_train,
-        )
-    end_time = time.time()
-    training_duration = round(end_time - start_time, 1)
-
-    start_time = time.time()
-    metrics = auto_model.evaluate(test_data_list, labels_test)
-    end_time = time.time()
-    predict_duration = round(end_time - start_time, 1)
-
-    metric_name = train_data.metric
-    version = "master"
-    metrics = {
-        "id": "id/0",  # dummy id to make it align with amlb benchmark output
-        "task": dataset_name,
-        "framework": framework,
-        "constraint": constraint,
-        "version": version,
-        "fold": 0,
-        "type": train_data.problem_type,
-        "result": metrics[1],
-        "metric": metric_name,
-        "utc": utc_time,
-        "training_duration": training_duration,
-        "predict_duration": predict_duration,
-        "scores": metrics[1],
-    }
-    subdir = f"{framework}.{dataset_name}.{constraint}.local"
-    save_metrics(os.path.join(metrics_dir, subdir, "scores"), metrics)
-    
-
-if __name__ == "__main__":
-    args = get_args()
-    if args.params is not None:
-        args.params = json.loads(args.params)
-    if args.custom_dataloader is not None:
-        args.custom_dataloader = json.loads(args.custom_dataloader)
-    if args.custom_metrics is not None:
-        args.custom_metrics = json.loads(args.custom_metrics)
-
-    run(
-        dataset_name=args.dataset_name,
-        framework=args.framework,
-        benchmark_dir=args.benchmark_dir,
-        metrics_dir=args.metrics_dir,
-        constraint=args.constraint,
-        params=args.params,
-        custom_dataloader=args.custom_dataloader,
-        custom_metrics=args.custom_metrics,
-    )
-

From cefa4747b5fbc847c81e681333c07a0422ff8c70 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Tue, 5 Mar 2024 03:28:04 +0000
Subject: [PATCH 10/13] update scripts

---
 .../bench_all.py => bench_all.py              |  25 +---
 process_ablation.py                           |  41 ++++++
 process_results.py                            |  34 +++++
 .../benchmarks/timeseries_test.yaml           |  13 --
 .../cloud_configs/amlb_configs/config.yaml    |  14 --
 .../amlb_configs/constraints.yaml             |   7 -
 .../amlb_configs/frameworks_example.yaml      |  18 ---
 .../custom_metrics/sample_metrics.py          |  11 --
 .../dataloaders/paper_image_datasets.yaml     | 119 -----------------
 .../dataloaders/paper_text_datasets.yaml      | 123 ------------------
 .../paper_text_tabular_datasets.yaml          |  90 -------------
 .../dataloaders/text_dataloader.py            |  71 ----------
 .../dataloaders/text_datasets.yaml            |  73 -----------
 .../dataloaders/text_tabular_dataloader.py    |  69 ----------
 .../dataloaders/vision_dataloader.py          |  67 ----------
 .../dataloaders/vision_datasets.yaml          |  31 -----
 .../multimodal_cloud_configs.yaml             |  36 -----
 .../multimodal_local_configs.yaml             |  24 ----
 .../cloud_configs/resources/bench_all.py      |  87 -------------
 .../resources/multimodal_constraints.yaml     |  13 --
 .../resources/multimodal_frameworks.yaml      |  93 -------------
 .../cloud_configs/tabular_cloud_configs.yaml  |  40 ------
 .../cloud_configs/tabular_local_configs.yaml  |  15 ---
 .../timeseries_cloud_configs.yaml             |  34 -----
 .../timeseries_local_configs.yaml             |  15 ---
 sample_configs/paper_image_local_configs.yaml |   6 +-
 26 files changed, 79 insertions(+), 1090 deletions(-)
 rename sample_configs/cloud_configs/bench_all.py => bench_all.py (73%)
 create mode 100644 process_ablation.py
 create mode 100644 process_results.py
 delete mode 100644 sample_configs/cloud_configs/amlb_configs/benchmarks/timeseries_test.yaml
 delete mode 100644 sample_configs/cloud_configs/amlb_configs/config.yaml
 delete mode 100644 sample_configs/cloud_configs/amlb_configs/constraints.yaml
 delete mode 100644 sample_configs/cloud_configs/amlb_configs/frameworks_example.yaml
 delete mode 100644 sample_configs/cloud_configs/custom_metrics/sample_metrics.py
 delete mode 100644 sample_configs/cloud_configs/dataloaders/paper_image_datasets.yaml
 delete mode 100644 sample_configs/cloud_configs/dataloaders/paper_text_datasets.yaml
 delete mode 100644 sample_configs/cloud_configs/dataloaders/paper_text_tabular_datasets.yaml
 delete mode 100644 sample_configs/cloud_configs/dataloaders/text_dataloader.py
 delete mode 100644 sample_configs/cloud_configs/dataloaders/text_datasets.yaml
 delete mode 100644 sample_configs/cloud_configs/dataloaders/text_tabular_dataloader.py
 delete mode 100644 sample_configs/cloud_configs/dataloaders/vision_dataloader.py
 delete mode 100644 sample_configs/cloud_configs/dataloaders/vision_datasets.yaml
 delete mode 100644 sample_configs/cloud_configs/multimodal_cloud_configs.yaml
 delete mode 100644 sample_configs/cloud_configs/multimodal_local_configs.yaml
 delete mode 100644 sample_configs/cloud_configs/resources/bench_all.py
 delete mode 100644 sample_configs/cloud_configs/resources/multimodal_constraints.yaml
 delete mode 100644 sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
 delete mode 100644 sample_configs/cloud_configs/tabular_cloud_configs.yaml
 delete mode 100644 sample_configs/cloud_configs/tabular_local_configs.yaml
 delete mode 100644 sample_configs/cloud_configs/timeseries_cloud_configs.yaml
 delete mode 100644 sample_configs/cloud_configs/timeseries_local_configs.yaml

diff --git a/sample_configs/cloud_configs/bench_all.py b/bench_all.py
similarity index 73%
rename from sample_configs/cloud_configs/bench_all.py
rename to bench_all.py
index f098db57..72c7d6d2 100644
--- a/sample_configs/cloud_configs/bench_all.py
+++ b/bench_all.py
@@ -4,17 +4,12 @@
 for i in range(n_experiments):
     seeds.append(random.randint(0, 100))
 
-seeds = [22, 92, 54, 86, 41]
+# seeds = [22, 92, 54, 86, 41]
 
 config_paths = [
     "sample_configs/paper_text_tabular_local_configs.yaml",
     "sample_configs/paper_text_local_configs.yaml",
     "sample_configs/paper_image_local_configs.yaml",
-    # "sample_configs/multimodal_cloud_text_configs.yaml",
-    # "sample_configs/multimodal_cloud_text_fs_configs.yaml",
-    # "sample_configs/multimodal_cloud_text_tabular_configs.yaml",
-    # "sample_configs/multimodal_cloud_text_tabular_image_configs.yaml",
-    # "sample_configs/multimodal_cloud_text_tabular_image_standard_configs.yaml"
 ]
 frameworks = [
     # "AutoGluon_best_master",
@@ -26,28 +21,10 @@
     "ablation_weight_decay",
     # "ablation_lr_decay",
     # "autokeras_master",
-    # "torch_compile_best",
-    # "AutoGluon_best_master",
-    # "AutoGluon_high_master",
-    # "AutoGluon_medium_master",
-    # "AutoGluon_high_vitlarge",
-    # "AutoGluon_medium_vitlarge",
-    # "AutoGluon_best_vitlarge",
-    # "AutoGluon_best_caformer",
-    # "AutoGluon_best_beit",
-    # "AutoGluon_best_swinv2"
-    # "AutoGluon_high_0_8",
-    # "AutoGluon_medium_0_8",
-    # "AutoGluon_best_0_8",
 ]
 constraints = [
     "g4_12x"
 ]
-fs = [
-    1,
-    5,
-    10
-]
 # module = "autokeras"
 module = "multimodal"
 
diff --git a/process_ablation.py b/process_ablation.py
new file mode 100644
index 00000000..e7ca7d34
--- /dev/null
+++ b/process_ablation.py
@@ -0,0 +1,41 @@
+import pandas as pd
+
+# Load the processed data
+df = pd.read_csv('classification_regression.csv')
+
+# Define the desired dataset (Task) order
+task_order = [
+    "fasion_mnist", "food101", "stanfordcars", "magnetictiledefects",
+    "europeanflooddepth", "oxfordflowers", "OxfordIIITPet", "cd18", "ham10000",
+    "hateful_meme", "petfinder", "memotion", "financial_news", "MLDoc-11000",
+    "MultiATIS-5000", "fb_dialog", "SNIPS", "ag_news", "airbnb", "kick_start",
+    "cloth_review", "news_popularity", "cal_house"
+]
+
+# Pivot the DataFrame
+pivoted_df = df.pivot(index='task', columns='framework', values='result')
+
+# Ensure the DataFrame rows follow the specified task order
+# Reindex the DataFrame according to the task_order list, this will automatically sort the rows
+pivoted_df = pivoted_df.reindex(task_order)
+
+# Specify the desired column (Framework) order
+column_order = [
+    'autokeras_master',
+    "ablation_base", 
+    "ablation_greedy_soup", 
+    "ablation_gradient_clip", 
+    "ablation_warmup_steps", 
+    "ablation_cosine_decay", 
+    "ablation_weight_decay", 
+    "ablation_lr_decay"
+]
+
+# Reorder the columns according to the specified order
+pivoted_df = pivoted_df[column_order]
+
+# Save the reformatted DataFrame to a new CSV file
+pivoted_df.to_csv('reformatted_results.csv')
+
+print("Reformatted results saved to 'reformatted_results.csv'.")
+
diff --git a/process_results.py b/process_results.py
new file mode 100644
index 00000000..d7b5ad1b
--- /dev/null
+++ b/process_results.py
@@ -0,0 +1,34 @@
+import pandas as pd
+import numpy as np
+from scipy.stats import sem  # Import the sem function for standard error of mean calculation
+
+input_file = 'classification_regression.csv'
+output_file = 'result_file.csv'
+
+df = pd.read_csv(input_file)
+grouped = df.groupby(['framework', 'task'])
+
+results = []
+
+# Iterate over each group
+for (framework, task), group in grouped:
+    results_data = group['result'].dropna()
+
+    mean = results_data.mean()
+    se = sem(results_data)
+    se_196 = se * 1.96
+
+    results.append({
+        'Framework': framework,
+        'Task': task,
+        'Result': f"{mean:.3f}({se_196:.3f})"
+    })
+
+results_df = pd.DataFrame(results)
+
+results_df.sort_values(by=['Framework', 'Task'], inplace=True)
+
+results_df.to_csv(output_file, index=False)
+
+print(f"Results have been saved to {output_file}")
+
diff --git a/sample_configs/cloud_configs/amlb_configs/benchmarks/timeseries_test.yaml b/sample_configs/cloud_configs/amlb_configs/benchmarks/timeseries_test.yaml
deleted file mode 100644
index fd607a09..00000000
--- a/sample_configs/cloud_configs/amlb_configs/benchmarks/timeseries_test.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-- name: m4_hourly_2
-  dataset:
-    path: https://autogluon.s3.amazonaws.com/datasets/timeseries/m4_hourly/test.csv
-    type: timeseries
-    freq: H
-    forecast_horizon_in_steps: 48
-    seasonality: 24
-    target: target
-    id_column: item_id
-    timestamp_column: timestamp
-  metric: [mase, smape, mape, rmse, mql, wql, sql]
-  quantile_levels: [0.05, 0.5, 0.95]
-  folds: 2
diff --git a/sample_configs/cloud_configs/amlb_configs/config.yaml b/sample_configs/cloud_configs/amlb_configs/config.yaml
deleted file mode 100644
index 646d805a..00000000
--- a/sample_configs/cloud_configs/amlb_configs/config.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-frameworks:              # configuration namespace for the frameworks definitions.
-  definition_file:       # list of yaml files describing the frameworks base definitions.
-    - '{root}/resources/frameworks.yaml'
-    - '{user}/frameworks.yaml'
-  allow_duplicates: true     # if true, the last definition is used.
-  tags: ['stable', 'latest', '2020Q2', '2021Q3', '2023Q2', 'example']  # the list of supported tags when looking up frameworks:
-                              # for example frmwk:latest will look for framework frmwk in a frameworks_latest.yaml file if present.
-benchmarks:                     # configuration namespace for the benchmarks definitions.
-  definition_dir:               
-    - '{root}/resources/benchmarks'
-    - '{user}/benchmarks'
-  constraints_file:             # list of yaml files describing the benchmarks runtime constraints.
-    - '{root}/resources/constraints.yaml'
-    - '{user}/constraints.yaml'
diff --git a/sample_configs/cloud_configs/amlb_configs/constraints.yaml b/sample_configs/cloud_configs/amlb_configs/constraints.yaml
deleted file mode 100644
index 747faecf..00000000
--- a/sample_configs/cloud_configs/amlb_configs/constraints.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
----
-
-test101:
-  folds: 3
-  max_runtime_seconds: 600
-  cores: 4
-  min_vol_size_mb: 100000
diff --git a/sample_configs/cloud_configs/amlb_configs/frameworks_example.yaml b/sample_configs/cloud_configs/amlb_configs/frameworks_example.yaml
deleted file mode 100644
index f3127079..00000000
--- a/sample_configs/cloud_configs/amlb_configs/frameworks_example.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
----
-
-#########################
-### AutoML frameworks ###
-#########################
-
-######### Do Not Remove #########
-AutoGluon:
-  version: "latest"
-######### Do Not Remove #########
-
-
-AutoGluon_dev:
-  extends: AutoGluon
-  repo: https://github.com/autogluon/autogluon.git
-  version: stable_GA4_update  # branch name
-  params:  # TabularPredictor.fit(params)
-    presets: best_quality
diff --git a/sample_configs/cloud_configs/custom_metrics/sample_metrics.py b/sample_configs/cloud_configs/custom_metrics/sample_metrics.py
deleted file mode 100644
index 460af376..00000000
--- a/sample_configs/cloud_configs/custom_metrics/sample_metrics.py
+++ /dev/null
@@ -1,11 +0,0 @@
-def f1_score(y_true, y_pred):
-    assert len(y_true) == len(y_pred)
-
-    TP = ((y_true == 1) & (y_pred == 1)).sum()
-    FP = ((y_true == 0) & (y_pred == 1)).sum()
-    FN = ((y_true == 1) & (y_pred == 0)).sum()
-
-    precision = TP / (TP + FP) if TP + FP != 0 else 0
-    recall = TP / (TP + FN) if TP + FN != 0 else 0
-
-    return 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
diff --git a/sample_configs/cloud_configs/dataloaders/paper_image_datasets.yaml b/sample_configs/cloud_configs/dataloaders/paper_image_datasets.yaml
deleted file mode 100644
index 043aaedc..00000000
--- a/sample_configs/cloud_configs/dataloaders/paper_image_datasets.yaml
+++ /dev/null
@@ -1,119 +0,0 @@
-# data = custom_class(dataset_name=dataset_name, split=split, **kwargs)
-# custom_class: custom dataloader class, e.g. sample_configs/vision_dataset.py
-
-
-base: &base
-  url: s3://zs-models/datasets/{name}.zip
-  splits:
-    - train
-    - test
-  image_columns:
-    - ImageID
-  text_columns:
-  label_columns:
-    - LabelName
-  columns_to_drop:
-  annotation: "{name}_{split}_annotations.csv"
-  image_path: "{name}/{value}"
-  metric: acc
-  problem_type: multiclass
-  
-fashion_mnist:
-  <<: *base
-  image_path: "{split}/{value}"
-
-casting:
-  <<: *base
-  image_path: "{value}"
-  problem_type: binary
-  metric: roc_auc
-
-food101:
-  <<: *base
-
-oxfordflowers:
-  <<: *base
-  image_path: "{name}/{split}/{value}"
-
-OxfordIIITPet:
-  <<: *base
-  splits:
-    - train
-    - validation
-    - test
-  annotation: "{name}_{split}_anno.csv"
-  image_path: "{split}/{value}"
-
-europeanflooddepth:
-  <<: *base
-  problem_type: binary
-  metric: roc_auc
-
-magnetictiledefects:
-  <<: *base
-
-stanfordcars:
-  <<: *base
-
-cub200:
-  <<: *base
-
-
-petfinder:
-  <<: *base
-  splits:
-    - train
-  text_columns:
-    - Description 
-  label_columns:
-    - AdoptionSpeed
-  annotation: "{name}_{split}_annotations.csv"
-  image_path: "{value}"
-  metric: acc
-  problem_type: multiclass
-
-
-ham10000:
-  <<: *base
-  splits:
-    - train
-  label_columns:
-    - dx
-  image_path: "{split}/{value}"
-
-cd18:
-  <<: *base
-  splits:
-    - train
-  label_columns:
-    - Price
-  image_path: "{split}/{value}"
-  metric: rmse
-  problem_type: regression
-
-    
-hateful_meme:
-  <<: *base
-  splits:
-    - train
-  text_columns:
-    - text    
-  image_path: "{value}"
-  metric: roc_auc
-  problem_type: binary
-
-
-memotion:
-  <<: *base
-  splits:
-    - train
-  text_columns:
-    - text_corrected  
-  columns_to_drop:
-    - text_ocr
-  label_columns:
-    - overall_sentiment
-  annotation: "{name}_{split}_annotations.csv"
-  image_path: "{split}/{value}"
-  metric: acc
-  problem_type: multiclass
diff --git a/sample_configs/cloud_configs/dataloaders/paper_text_datasets.yaml b/sample_configs/cloud_configs/dataloaders/paper_text_datasets.yaml
deleted file mode 100644
index 067e12d1..00000000
--- a/sample_configs/cloud_configs/dataloaders/paper_text_datasets.yaml
+++ /dev/null
@@ -1,123 +0,0 @@
-# supports both regular and fewshot datasets
-# for fewshot:
-# custom_dataloader:
-#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
-#     class_name: TextDataset
-#     lang: en
-#     fewshot: True
-#     shot: 10
-#     seed: 8
-#     dataset_config_file: test/automm_text_datasets.yaml
-# 
-# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
-
-base: &base
-  url: s3://automl-mm-bench/comprehend_benchmarks/{name}/{lang}/{subsample_path}{split}.csv
-  splits:
-    - train
-    - validation
-    - test
-  langs:
-    - en
-    - de
-    - es
-    - fr
-    - it
-
-  data_columns:
-    - label
-    - text
-  image_columns:
-  text_columns:
-    - text
-  label_columns:
-    - label
-  columns_to_drop:
-  data_sep: ","
-  metric: acc
-  problem_type: multiclass
-  subsample_path: subsampling/{shot}_shot/seed{seed}/
-  subsample_splits:
-    - train
-    - validation
-  shots:
-    - 1
-    - 5
-    - 10
-    - 20
-    - 50
-    - 100
-    - 500
-    - 1000
-  seeds:
-    - 0
-    - 1
-    - 2
-    - 3
-    - 4
-    - 5
-    - 6
-    - 7
-    - 8
-    - 9
-
-
-financial_news:
-  <<: *base
-  url: s3://zs-models/datasets/financial_news/{lang}/{split}.csv
-  splits:
-    - train
-  langs:
-    - en
-
-MLDoc-11000:
-  <<: *base
-
-MultiATIS-5000:
-  <<: *base
-  langs:
-    - en
-    - es
-    - fr
-    - pt
-
-amazon_reviews:
-  <<: *base
-  langs:
-    - en
-  problem_type: binary
-  metric: roc_auc
-
-
-gnad10:
-  <<: *base
-  langs:
-    - de
-
-fb_dialog:
-  <<: *base
-  langs:
-    - en
-    - es
-
-yahoo_anwsers:
-  <<: *base
-  langs:
-    - en
-
-french_tweets_sentiment:
-  <<: *base
-  langs:
-    - fr
-  problem_type: binary
-  metric: roc_auc
-
-ag_news:
-  <<: *base
-  langs:
-    - en
-
-SNIPS:
-  <<: *base
-  langs:
-    - en
diff --git a/sample_configs/cloud_configs/dataloaders/paper_text_tabular_datasets.yaml b/sample_configs/cloud_configs/dataloaders/paper_text_tabular_datasets.yaml
deleted file mode 100644
index f4ddfd70..00000000
--- a/sample_configs/cloud_configs/dataloaders/paper_text_tabular_datasets.yaml
+++ /dev/null
@@ -1,90 +0,0 @@
-base: &base
-  url: s3://zs-models/{name}/{split}.csv
-  test_split_name: test
-  splits:
-    - train
-    - test
-  image_columns:
-  text_columns:
-  columns_to_drop:
-  label_columns:
-    - LabelName
-  metric: acc
-  problem_type: multiclass
-  
-
-airbnb:
-  <<: *base
-  url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
-  text_columns:
-    - summary
-    - amenities
-  label_columns:
-    - price_label
-  columns_to_drop:
-    - id
-    - listing_url
-    - scrape_id
-    - last_scraped
-    - picture_url
-    - host_id
-    - host_url
-    - host_name
-    - host_thumbnail_url
-    - host_picture_url
-    - calendar_last_scraped
-
-
-kick_start:
-  <<: *base
-  url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
-  text_columns:
-    - name
-    - desc
-    - keywords
-  label_columns:
-    - final_status
-  metric: roc_auc
-  problem_type: binary
-
-
-cloth_review:
-  <<: *base
-  url: s3://automl-mm-bench/women_clothing_review/{split}.pq
-  text_columns:
-    - Title
-    - Review Text
-  label_columns:
-    - Rating
-  metric: rmse
-  problem_type: regression
-
-
-news_popularity:
-  <<: *base
-  url: s3://automl-mm-bench/news_popularity2/{split}.csv
-  text_columns: 
-    - article_title
-  label_columns:
-    - log_shares
-  image_columns:
-  metric: rmse
-  problem_type: regression
-
-
-cal_house:
-  <<: *base
-  url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
-  text_columns:
-    - Summary
-    - Appliances included
-    - Laundry features
-    - Parking features
-    - Flooring
-    - Elementary School
-    - Middle School
-    - High School
-  label_columns:
-    - Sold Price
-  metric: rmse
-  problem_type: regression
diff --git a/sample_configs/cloud_configs/dataloaders/text_dataloader.py b/sample_configs/cloud_configs/dataloaders/text_dataloader.py
deleted file mode 100644
index 452a9808..00000000
--- a/sample_configs/cloud_configs/dataloaders/text_dataloader.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import logging
-import os
-
-import pandas as pd
-import yaml
-
-from autogluon.bench.utils.dataset_utils import get_data_home_dir
-from autogluon.common.loaders._utils import download
-
-logger = logging.getLogger(__name__)
-
-
-class TextDataLoader:
-    def __init__(
-        self,
-        dataset_name: str,
-        dataset_config_file: str,
-        split: str = "train",
-        fewshot: bool = False,
-        shot: int = 50,
-        seed: int = 0,
-    ):
-        with open(dataset_config_file, "r") as f:
-            config = yaml.safe_load(f)
-        self.dataset_config = config[dataset_name]
-        if split == "val":
-            split = "validation"
-        if split not in self.dataset_config["splits"]:
-            logger.warning(f"Data split {split} not available.")
-            self.data = None
-            return
-
-        self.name = dataset_name
-        self.split = split
-        self.image_columns = self.dataset_config["image_columns"] or []
-        self.text_columns = self.dataset_config["text_columns"] or []
-        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
-        self.label_columns = self.dataset_config["label_columns"]
-        
-        lang = self.dataset_config["langs"][0]
-        subsample_path = self.dataset_config["subsample_path"].format(shot=shot, seed=seed)
-        url = self.dataset_config["url"].format(
-            name=self.name,
-            lang=lang,
-            subsample_path=subsample_path if fewshot and self.split in self.dataset_config["subsample_splits"] else "",
-            split=self.split,
-        )
-        base_dir = get_data_home_dir()
-        data_dir = os.path.join(self.name, lang)
-        if fewshot:
-            data_dir = os.path.join(data_dir, "subsampling", f"{shot}_shot-seed{seed}")
-        self.dataset_dir = os.path.join(base_dir, data_dir)
-        data_path = os.path.join(self.dataset_dir, f"{split}.csv")
-        download(url, path=data_path)
-
-        self.data = pd.read_csv(
-            data_path,
-            header=None,
-            names=self.dataset_config["data_columns"],
-            sep=self.dataset_config.get("data_sep", ","),
-            on_bad_lines="warn",
-        )
-
-    @property
-    def problem_type(self):
-        return self.dataset_config["problem_type"]
-
-    @property
-    def metric(self):
-        return self.dataset_config["metric"]
-
diff --git a/sample_configs/cloud_configs/dataloaders/text_datasets.yaml b/sample_configs/cloud_configs/dataloaders/text_datasets.yaml
deleted file mode 100644
index 8960f4af..00000000
--- a/sample_configs/cloud_configs/dataloaders/text_datasets.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-# supports both regular and fewshot datasets
-# for fullshot:
-# custom_dataloader:
-#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
-#     class_name: TextDataset
-#     lang: en
-#     dataset_config_file: test/automm_text_datasets.yaml
-
-# for fewshot:
-# custom_dataloader:
-#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
-#     class_name: TextDataset
-#     lang: en
-#     fewshot: True
-#     shot: 10
-#     seed: 8
-#     dataset_config_file: test/automm_text_datasets.yaml
-# 
-# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
-
-base: &base
-  url: s3://<data_bucket>/{name}/{lang}/{subsample_path}{split}.csv
-  splits:
-    - train
-    - validation
-    - test
-  langs:
-    - de
-    - en
-  data_columns:
-    - label
-    - text
-  feature_columns:
-    - text
-  label_columns:
-    - label
-  data_sep: ","
-  metric: acc
-  problem_type: multiclass
-  subsample_path: subsampling/{shot}_shot/seed{seed}/
-  subsample_splits:
-    - train
-    - validation
-  shots:
-    - 1
-    - 5
-    - 10
-    - 20
-  seeds:
-    - 0
-    - 1
-    - 2
-    - 3
-    - 4
-    - 5
-    - 6
-    - 7
-    - 8
-    - 9
-
-
-dataset_1:
-  <<: *base
-  langs:
-    - en
-  problem_type: binary
-
-dataset_2:
-  <<: *base
-  langs:
-    - fr
-  problem_type: binary
-
diff --git a/sample_configs/cloud_configs/dataloaders/text_tabular_dataloader.py b/sample_configs/cloud_configs/dataloaders/text_tabular_dataloader.py
deleted file mode 100644
index c587af89..00000000
--- a/sample_configs/cloud_configs/dataloaders/text_tabular_dataloader.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import logging
-import os
-
-import pandas as pd
-import yaml
-
-from autogluon.bench.utils.dataset_utils import get_data_home_dir
-from autogluon.common.loaders import load_zip
-from autogluon.common.loaders._utils import download
-
-
-def path_expander(path, base_folder):
-    path_l = path.split(";")
-    return ";".join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])
-
-
-logger = logging.getLogger(__name__)
-
-
-class TextTabularDataLoader:
-    def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
-        with open(dataset_config_file, "r") as f:
-            config = yaml.safe_load(f)
-
-        self.dataset_config = config[dataset_name]
-        if split not in self.dataset_config["splits"]:
-            logger.warning(f"Data split {split} not available.")
-            self.data = None
-            return
-        if split == "test" and self.dataset_config["test_split_name"] == "dev":
-            split = "dev"
-
-        self.name = dataset_name
-        self.split = split
-        self.image_columns = []
-        self.text_columns = self.dataset_config["text_columns"] or []
-        self.label_columns = self.dataset_config["label_columns"]
-        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
-
-        # url = self.dataset_config["url"].format(name=self.name)
-        # base_dir = get_data_home_dir()
-        # load_zip.unzip(url, unzip_dir=base_dir)
-        # self.dataset_dir = os.path.join(base_dir, self.name)
-
-        url = self.dataset_config["url"].format(split=self.split)
-        file_extention = os.path.splitext(url)[-1]
-        base_dir = get_data_home_dir()
-
-        self.data_path = os.path.join(base_dir, self.name, f"{split}{file_extention}")
-        download(url, path=self.data_path)
-        if file_extention == ".csv":
-            self.data = pd.read_csv(self.data_path)
-        elif file_extention == ".pq":
-            self.data = pd.read_parquet(self.data_path)
-        else:
-            raise NotImplementedError("Unsupported data type.")
-
-        if self.columns_to_drop is not None:
-            self.data.drop(columns=self.columns_to_drop, inplace=True)
-
-    @property
-    def problem_type(self):
-        return self.dataset_config["problem_type"]
-
-    @property
-    def metric(self):
-        return self.dataset_config["metric"]
-
-
diff --git a/sample_configs/cloud_configs/dataloaders/vision_dataloader.py b/sample_configs/cloud_configs/dataloaders/vision_dataloader.py
deleted file mode 100644
index 84ab5d91..00000000
--- a/sample_configs/cloud_configs/dataloaders/vision_dataloader.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import logging
-import os
-
-import pandas as pd
-import yaml
-
-from autogluon.bench.utils.dataset_utils import get_data_home_dir
-from autogluon.common.loaders import load_zip
-
-
-def path_expander(path, base_folder):
-    path_l = path.split(";")
-    return ";".join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])
-
-
-logger = logging.getLogger(__name__)
-
-
-class VisionDataLoader:
-    def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
-        with open(dataset_config_file, "r") as f:
-            config = yaml.safe_load(f)
-
-        self.dataset_config = config[dataset_name]
-        if split == "val":
-            split = "validation"
-        if split not in self.dataset_config["splits"]:
-            logger.warning(f"Data split {split} not available.")
-            self.data = None
-            return
-
-        self.name = dataset_name
-        self.split = split
-        self.image_columns = self.dataset_config["image_columns"] or []
-        self.text_columns = self.dataset_config["text_columns"] or []
-        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
-        self.label_columns = self.dataset_config["label_columns"]
-
-        url = self.dataset_config["url"].format(name=self.name)
-        base_dir = get_data_home_dir()
-        load_zip.unzip(url, unzip_dir=base_dir)
-        self.dataset_dir = os.path.join(base_dir, self.name)
-
-        annotation_filename = self.dataset_config["annotation"].format(name=self.name, split=self.split)
-        image_path_pattern = self.dataset_config["image_path"]
-
-        self.data = pd.read_csv(os.path.join(self.dataset_dir, annotation_filename))
-        self.tabular_columns = self.data.columns.difference(self.image_columns + self.text_columns + self.label_columns + self.columns_to_drop)
-        print("Image columns: ", self.image_columns)
-        print("Text columns: ", self.text_columns)
-        print("Tabular columns: ", self.tabular_columns)
-        self.data.drop(columns=self.columns_to_drop, inplace=True)
-        self.data.dropna(inplace=True)
-
-        image_base_path = image_path_pattern.format(name=self.name, split=self.split, value="")
-        for col in self.image_columns:
-            self.data[col] = self.data[col].apply(
-                lambda ele: path_expander(ele, base_folder=os.path.join(self.dataset_dir, image_base_path))
-            )
-
-    @property
-    def problem_type(self):
-        return self.dataset_config["problem_type"]
-
-    @property
-    def metric(self):
-        return self.dataset_config["metric"]
diff --git a/sample_configs/cloud_configs/dataloaders/vision_datasets.yaml b/sample_configs/cloud_configs/dataloaders/vision_datasets.yaml
deleted file mode 100644
index a095403e..00000000
--- a/sample_configs/cloud_configs/dataloaders/vision_datasets.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-# data = custom_class(dataset_name=dataset_name, split=split, **kwargs)
-# custom_class: custom dataloader class, e.g. sample_configs/vision_dataset.py
-# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
-
-base: &base
-  url: s3://<dataset_bucket>/vision_datasets/{name}.zip
-  splits:
-    - train
-    - test
-  feature_columns:
-    - ImageID
-  label_columns:
-    - LabelName
-  annotation: "{name}_{split}_annotations.csv"
-  image_path: "{name}/{value}"
-  metric: acc
-  problem_type: multiclass
-  
-
-dataset_1:
-  <<: *base
-  image_path: "{split}/{value}"
-  problem_type: binary
-
-dataset_2:
-  <<: *base
-
-dataset_3:
-  <<: *base
-  image_path: "{name}/{split}/{value}"
-
diff --git a/sample_configs/cloud_configs/multimodal_cloud_configs.yaml b/sample_configs/cloud_configs/multimodal_cloud_configs.yaml
deleted file mode 100644
index 1b4bb707..00000000
--- a/sample_configs/cloud_configs/multimodal_cloud_configs.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
-  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
-  # MAX_MACHINE_NUM: 20   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
-  # VPC_NAME: existing-vpc-name  # optional
-
-module: multimodal  # required
-mode: aws  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-
-constraint: test  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
-framework: AutoGluon_stable  # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
-dataset_name:  # required
-  - shopee
-  - melbourne_airbnb
-
-### Customizations ####
-# custom_resource_dir: sample_configs/resources/  # path to custom multimodal_constraints.yaml and multimodal_frameworks.yaml
-# custom_dataloader:
-#     dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
-#     class_name: VisionDataLoader
-#     dataset_config_file: sample_configs/dataloaders/vision_datasets.yaml 
-
-# custom_metrics:
-#     metrics_path: sample_configs/custom_metrics/sample_metrics.py
-#     function_name: f1_score
-#     # Other optional parameters can be set, ref: https://auto.gluon.ai/stable/tutorials/tabular/advanced/tabular-custom-metric.html
-#     optimum: 1
-#     greater_is_better: true
diff --git a/sample_configs/cloud_configs/multimodal_local_configs.yaml b/sample_configs/cloud_configs/multimodal_local_configs.yaml
deleted file mode 100644
index 0bfadcd3..00000000
--- a/sample_configs/cloud_configs/multimodal_local_configs.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Benchmark configurations
-module: multimodal  # required
-mode: local  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
-
-# Multimodal specific
-framework: AutoGluon_stable  # required
-dataset_name:  # required
-  melbourne_airbnb  
-
-#### Customizations ####
-# custom_resource_dir: sample_configs/resources/  # path to custom multimodal_frameworks.yaml and multimodal_constraints.yaml
-# custom_dataloader:
-#     dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
-#     class_name: VisionDataLoader
-#     dataset_config_file: sample_configs/dataloaders/vision_datasets.yaml 
-# custom_metrics:
-#     metrics_path: sample_configs/custom_metrics/sample_metrics.py
-#     function_name: f1_score
-#     # Other optional parameters can be set, ref: https://auto.gluon.ai/stable/tutorials/tabular/advanced/tabular-custom-metric.html
-#     optimum: 1
-#     greater_is_better: true
diff --git a/sample_configs/cloud_configs/resources/bench_all.py b/sample_configs/cloud_configs/resources/bench_all.py
deleted file mode 100644
index f098db57..00000000
--- a/sample_configs/cloud_configs/resources/bench_all.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import random
-n_experiments = 5
-seeds = []
-for i in range(n_experiments):
-    seeds.append(random.randint(0, 100))
-
-seeds = [22, 92, 54, 86, 41]
-
-config_paths = [
-    "sample_configs/paper_text_tabular_local_configs.yaml",
-    "sample_configs/paper_text_local_configs.yaml",
-    "sample_configs/paper_image_local_configs.yaml",
-    # "sample_configs/multimodal_cloud_text_configs.yaml",
-    # "sample_configs/multimodal_cloud_text_fs_configs.yaml",
-    # "sample_configs/multimodal_cloud_text_tabular_configs.yaml",
-    # "sample_configs/multimodal_cloud_text_tabular_image_configs.yaml",
-    # "sample_configs/multimodal_cloud_text_tabular_image_standard_configs.yaml"
-]
-frameworks = [
-    # "AutoGluon_best_master",
-    # "ablation_base",
-    # "ablation_greedy_soup",
-    # "ablation_gradient_clip",
-    "ablation_warmup_steps",
-    "ablation_cosine_decay",
-    "ablation_weight_decay",
-    # "ablation_lr_decay",
-    # "autokeras_master",
-    # "torch_compile_best",
-    # "AutoGluon_best_master",
-    # "AutoGluon_high_master",
-    # "AutoGluon_medium_master",
-    # "AutoGluon_high_vitlarge",
-    # "AutoGluon_medium_vitlarge",
-    # "AutoGluon_best_vitlarge",
-    # "AutoGluon_best_caformer",
-    # "AutoGluon_best_beit",
-    # "AutoGluon_best_swinv2"
-    # "AutoGluon_high_0_8",
-    # "AutoGluon_medium_0_8",
-    # "AutoGluon_best_0_8",
-]
-constraints = [
-    "g4_12x"
-]
-fs = [
-    1,
-    5,
-    10
-]
-# module = "autokeras"
-module = "multimodal"
-
-import yaml
-import os
-import subprocess
-
-config_root = "./temp_configs"
-os.makedirs(config_root, exist_ok=True)
-
-for seed in seeds:
-    print("Seed: ", seed)
-    for constraint in constraints:
-        os.makedirs(f"{config_root}/{constraint}", exist_ok=True)
-        for framework in frameworks:
-            # for shot in fs:
-                config_dir = f"{config_root}/{constraint}/{framework}"
-                os.makedirs(config_dir, exist_ok=True)
-
-                for config_path in config_paths:
-                    with open(config_path, "r") as f:
-                        configs = yaml.safe_load(f)
-                        if constraint == "g4_12x":
-                            configs["cdk_context"]["PREFIX"] = f"{configs['cdk_context']['PREFIX']}-multi"
-                        configs["constraint"] = constraint
-                        configs["framework"] = framework
-                        configs["module"] = module
-                        configs["seed"] = seed 
-                        # configs["custom_dataloader"]["shot"] = shot
-                        configs["benchmark_name"] = f"{configs['benchmark_name']}-{seed}"
-                        new_config_path = os.path.join(config_dir, os.path.basename(config_path))
-                        with open(new_config_path, "w") as new_f:
-                            yaml.dump(configs, new_f)
-                        print("Running config: ", new_config_path)
-                        command = ["agbench", "run", new_config_path]
-                        subprocess.run(command)
-
diff --git a/sample_configs/cloud_configs/resources/multimodal_constraints.yaml b/sample_configs/cloud_configs/resources/multimodal_constraints.yaml
deleted file mode 100644
index 940345c5..00000000
--- a/sample_configs/cloud_configs/resources/multimodal_constraints.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-10m4x:
-  TIME_LIMIT: 500
-  INSTANCE: g4dn.4xlarge
-  # MAX_MACHINE_NUM: 20   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
-
-g4_12x:
-  INSTANCE: g4dn.12xlarge
-  MAX_MACHINE_NUM: 1000   # optional, default 20
-  BLOCK_DEVICE_VOLUME: 1000   # optional, default 100GB
-  RESERVED_MEMORY_SIZE: 12000  # optional, default 15000MB
-
diff --git a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml b/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
deleted file mode 100644
index 38dc46f1..00000000
--- a/sample_configs/cloud_configs/resources/multimodal_frameworks.yaml
+++ /dev/null
@@ -1,93 +0,0 @@
-AutoGluon_branch:
-  repo: https://github.com/autogluon/autogluon.git
-  version: stable_GA4_update
-  params:  # MultimodalPredictor.fit(params)
-    presets: medium_quality
-    time_limit:  90
-    hyperparameters:
-      optimization.max_epochs: 1
-      optimization.learning_rate: 0.005
-
-AutoGluon_best_master:
-  repo: https://github.com/suzhoum/autogluon.git 
-  version: add_constant_lr_decay
-  params:  # MultimodalPredictor.fit(params)
-    presets: best_quality
-
-ablation_base:
-  repo: https://github.com/suzhoum/autogluon.git
-  version: add_constant_lr_decay
-  params:  # MultimodalPredictor.fit(params)
-    presets: best_quality
-    hyperparameters:
-        optimization.top_k_average_method: best
-        optimization.gradient_clip_val: 0
-        optimization.warmup_steps: 0
-        optimization.lr_schedule: constant
-        optimization.weight_decay: 0
-        optimization.lr_decay: 0
-
-
-ablation_greedy_soup:
-  repo: https://github.com/suzhoum/autogluon.git
-  version: add_constant_lr_decay
-  params:  # MultimodalPredictor.fit(params)
-    presets: best_quality
-    hyperparameters:
-        optimization.gradient_clip_val: 0
-        optimization.warmup_steps: 0
-        optimization.lr_schedule: constant
-        optimization.weight_decay: 0
-        optimization.lr_decay: 0
-
-ablation_gradient_clip:
-  repo: https://github.com/suzhoum/autogluon.git
-  version: add_constant_lr_decay
-  params:  # MultimodalPredictor.fit(params)
-    presets: best_quality
-    hyperparameters:
-        optimization.warmup_steps: 0
-        optimization.lr_schedule: constant
-        optimization.weight_decay: 0
-        optimization.lr_decay: 0
-
-ablation_warmup_steps:
-  repo: https://github.com/suzhoum/autogluon.git
-  version: add_constant_lr_decay
-  params:  # MultimodalPredictor.fit(params)
-    presets: best_quality
-    hyperparameters:
-        optimization.lr_schedule: constant
-        optimization.weight_decay: 0
-        optimization.lr_decay: 0
-
-ablation_cosine_decay:
-  repo: https://github.com/suzhoum/autogluon.git
-  version: add_constant_lr_decay
-  params:  # MultimodalPredictor.fit(params)
-    presets: best_quality
-    hyperparameters:
-        optimization.weight_decay: 0
-        optimization.lr_decay: 0
-
-
-ablation_weight_decay:
-  repo: https://github.com/suzhoum/autogluon.git
-  version: add_constant_lr_decay
-  params:  # MultimodalPredictor.fit(params)
-    presets: best_quality
-    hyperparameters:
-        optimization.lr_decay: 0
-
-
-ablation_lr_decay:
-  repo: https://github.com/suzhoum/autogluon.git
-  version: add_constant_lr_decay
-  params:  # MultimodalPredictor.fit(params)
-    presets: best_quality
-
-
-autokeras_master:
-  repo: https://github.com/keras-team/keras-tuner.git 
-  version: master
-  
diff --git a/sample_configs/cloud_configs/tabular_cloud_configs.yaml b/sample_configs/cloud_configs/tabular_cloud_configs.yaml
deleted file mode 100644
index a73a95d1..00000000
--- a/sample_configs/cloud_configs/tabular_cloud_configs.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
-  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
-  # MAX_MACHINE_NUM: 20   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
-  # INSTANCE: g4dn.2xlarge  # optional, default g4dn.2xlarge
-  # TIME_LIMIT: 3600  # optional, EC2 timeout, default 3600s
-  # VPC_NAME: existing-vpc-name  # optional
-
-# Benchmark configurations
-module: tabular  # required
-mode: aws  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-
-git_uri#branch:  # required, can be any AMLB custom fork and branch, only one value allowed
-  https://github.com/openml/automlbenchmark.git#master
-framework:  # required, only one value allowed
-  AutoGluon:stable
-amlb_constraint:  # optional, only one value allowed, default: test
-  test
-amlb_benchmark:  # required, a list of amlb benchmarks to run, ref: https://github.com/openml/automlbenchmark/tree/master/resources/benchmarks
-  - small
-amlb_task:  # optional, value of each key is a subset of dataset names in each 'amlb_benchmark', ref: https://github.com/openml/automlbenchmark/blob/master/resources/benchmarks/small.yaml
-  small:
-    - credit-g
-    - vehicle
-
-# fold_to_run:  # optional, capped by `folds` in amlb_constraint, ref: https://github.com/openml/automlbenchmark/blob/master/resources/constraints.yaml
-#   small:
-#     credit-g:
-#       - 3
-#       - 6
-# amlb_user_dir:  # optional, local/s3 path where all the amlb customizations are stored, only one value allowed
-#   sample_configs/amlb_configs
diff --git a/sample_configs/cloud_configs/tabular_local_configs.yaml b/sample_configs/cloud_configs/tabular_local_configs.yaml
deleted file mode 100644
index 62196d54..00000000
--- a/sample_configs/cloud_configs/tabular_local_configs.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Benchmark configurations
-module: tabular  # required
-mode: local  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
-
-# Tabular specific
-git_uri#branch: https://github.com/openml/automlbenchmark.git#stable  # required, can be any AMLB custom fork and branch
-framework: AutoGluon:stable  # required
-amlb_benchmark: small  # required
-amlb_task: vehicle # optional
-amlb_constraint: test  # optional
-fold_to_run: 1  # optional, the specific data fold to run
-amlb_user_dir: sample_configs/amlb_configs  # optional, local/s3 path where all the amlb customizations are stored
diff --git a/sample_configs/cloud_configs/timeseries_cloud_configs.yaml b/sample_configs/cloud_configs/timeseries_cloud_configs.yaml
deleted file mode 100644
index 6cb9441c..00000000
--- a/sample_configs/cloud_configs/timeseries_cloud_configs.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
-  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
-  # MAX_MACHINE_NUM: 20   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
-  # INSTANCE: g4dn.2xlarge  # optional, default g4dn.2xlarge
-  # TIME_LIMIT: 3600  # optional, EC2 timeout, default 3600s
-  # VPC_NAME: existing-vpc-name  # optional
-
-# Benchmark configurations
-module: timeseries  # required
-mode: aws  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-
-git_uri#branch:  # required, can be any AMLB custom fork and branch, only one value allowed
-  https://github.com/openml/automlbenchmark.git#master
-framework:  # required, only one value allowed
-  AutoGluon_dev:example
-amlb_constraint:  # optional, only one value allowed, default: test
-  test
-amlb_benchmark:  # required, a list of amlb benchmarks to run, ref: https://github.com/openml/automlbenchmark/tree/master/resources/benchmarks
-  - timeseries_test
-amlb_task:  # optional, value of each key is a subset of dataset names in each 'amlb_benchmark', ref: https://github.com/openml/automlbenchmark/blob/master/resources/benchmarks/small.yaml
-  timeseries_test:
-    - m4_hourly_2
-# fold_to_run:  # optional, capped by `folds` in amlb_constraint, ref: https://github.com/openml/automlbenchmark/blob/master/resources/constraints.yaml
-amlb_user_dir:  # optional, local/s3 path where all the amlb customizations are stored, only one value allowed
-  sample_configs/amlb_configs
diff --git a/sample_configs/cloud_configs/timeseries_local_configs.yaml b/sample_configs/cloud_configs/timeseries_local_configs.yaml
deleted file mode 100644
index 838ad9b3..00000000
--- a/sample_configs/cloud_configs/timeseries_local_configs.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Benchmark configurations
-module: timeseries  # required
-mode: local  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
-
-# Tabular specific
-git_uri#branch: https://github.com/openml/automlbenchmark.git#stable  # required, can be any AMLB custom fork and branch
-framework: AutoGluon  # required
-amlb_benchmark: timeseries_test  # required
-amlb_task: m4_hourly_2 # optional
-amlb_constraint: test  # optional
-fold_to_run: 1  # optional, the specific data fold to run
-amlb_user_dir: sample_configs/amlb_configs  # optional, local/s3 path where all the amlb customizations are stored
diff --git a/sample_configs/paper_image_local_configs.yaml b/sample_configs/paper_image_local_configs.yaml
index c1cee4cc..3942c89f 100644
--- a/sample_configs/paper_image_local_configs.yaml
+++ b/sample_configs/paper_image_local_configs.yaml
@@ -1,11 +1,11 @@
-module: multimodal  # required, choise of 'multimodal' pr 'autokeras'
+module: autokeras  # required, choise of 'multimodal' pr 'autokeras'
 mode: local  # required
 benchmark_name: ag_bench_image  # required
 root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
 
 framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
-  AutoGluon_best_master
-  # autokeras_master
+  # AutoGluon_best_master
+  autokeras_master
   # ablation_base
   # ablation_greedy_soup
   # ablation_gradient_clip

From e8a52d427da5a4f064cf500e0e11180432e878b1 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Tue, 5 Mar 2024 16:12:21 +0000
Subject: [PATCH 11/13] update

---
 bench_all.py                                    | 17 ++++++++---------
 sample_configs/paper_image_cloud_configs.yaml   |  1 -
 sample_configs/paper_text_cloud_configs.yaml    |  2 +-
 .../resources/multimodal_frameworks.yaml        | 12 ++++++------
 4 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/bench_all.py b/bench_all.py
index 72c7d6d2..734495b9 100644
--- a/bench_all.py
+++ b/bench_all.py
@@ -4,22 +4,21 @@
 for i in range(n_experiments):
     seeds.append(random.randint(0, 100))
 
-# seeds = [22, 92, 54, 86, 41]
+seeds = [22, 92, 54, 86, 41]
 
 config_paths = [
-    "sample_configs/paper_text_tabular_local_configs.yaml",
-    "sample_configs/paper_text_local_configs.yaml",
-    "sample_configs/paper_image_local_configs.yaml",
+    "sample_configs/paper_text_tabular_cloud_configs.yaml",
+    "sample_configs/paper_text_cloud_configs.yaml",
+    "sample_configs/paper_image_cloud_configs.yaml",
 ]
 frameworks = [
-    # "AutoGluon_best_master",
-    # "ablation_base",
-    # "ablation_greedy_soup",
-    # "ablation_gradient_clip",
+    "ablation_base",
+    "ablation_greedy_soup",
+    "ablation_gradient_clip",
     "ablation_warmup_steps",
     "ablation_cosine_decay",
     "ablation_weight_decay",
-    # "ablation_lr_decay",
+    "ablation_lr_decay",
     # "autokeras_master",
 ]
 constraints = [
diff --git a/sample_configs/paper_image_cloud_configs.yaml b/sample_configs/paper_image_cloud_configs.yaml
index d9d49386..a3c5331f 100644
--- a/sample_configs/paper_image_cloud_configs.yaml
+++ b/sample_configs/paper_image_cloud_configs.yaml
@@ -1,5 +1,4 @@
 # Infra configurations
-# Infra configurations
 cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
   CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
   CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
diff --git a/sample_configs/paper_text_cloud_configs.yaml b/sample_configs/paper_text_cloud_configs.yaml
index 25996146..2f98216d 100644
--- a/sample_configs/paper_text_cloud_configs.yaml
+++ b/sample_configs/paper_text_cloud_configs.yaml
@@ -1,4 +1,4 @@
-sample_configs/paper_text_# Infra configurations
+# Infra configurations
 cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
   CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
   CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
diff --git a/sample_configs/resources/multimodal_frameworks.yaml b/sample_configs/resources/multimodal_frameworks.yaml
index 38dc46f1..623a6e9c 100644
--- a/sample_configs/resources/multimodal_frameworks.yaml
+++ b/sample_configs/resources/multimodal_frameworks.yaml
@@ -25,7 +25,7 @@ ablation_base:
         optimization.warmup_steps: 0
         optimization.lr_schedule: constant
         optimization.weight_decay: 0
-        optimization.lr_decay: 0
+        optimization.lr_decay: 1
 
 
 ablation_greedy_soup:
@@ -38,7 +38,7 @@ ablation_greedy_soup:
         optimization.warmup_steps: 0
         optimization.lr_schedule: constant
         optimization.weight_decay: 0
-        optimization.lr_decay: 0
+        optimization.lr_decay: 1
 
 ablation_gradient_clip:
   repo: https://github.com/suzhoum/autogluon.git
@@ -49,7 +49,7 @@ ablation_gradient_clip:
         optimization.warmup_steps: 0
         optimization.lr_schedule: constant
         optimization.weight_decay: 0
-        optimization.lr_decay: 0
+        optimization.lr_decay: 1
 
 ablation_warmup_steps:
   repo: https://github.com/suzhoum/autogluon.git
@@ -59,7 +59,7 @@ ablation_warmup_steps:
     hyperparameters:
         optimization.lr_schedule: constant
         optimization.weight_decay: 0
-        optimization.lr_decay: 0
+        optimization.lr_decay: 1
 
 ablation_cosine_decay:
   repo: https://github.com/suzhoum/autogluon.git
@@ -68,7 +68,7 @@ ablation_cosine_decay:
     presets: best_quality
     hyperparameters:
         optimization.weight_decay: 0
-        optimization.lr_decay: 0
+        optimization.lr_decay: 1
 
 
 ablation_weight_decay:
@@ -77,7 +77,7 @@ ablation_weight_decay:
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-        optimization.lr_decay: 0
+        optimization.lr_decay: 1
 
 
 ablation_lr_decay:

From 8bc504afe4a26167224ad4740a87db9844e25494 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Tue, 5 Mar 2024 16:50:16 +0000
Subject: [PATCH 12/13] update

---
 bench_all.py                                  | 12 ++--
 ...sion_dataloader.py => image_dataloader.py} |  2 +-
 ...sion_datasets.yaml => image_datasets.yaml} |  0
 .../dataloaders/paper_text_datasets.yaml      |  3 +-
 .../paper_text_tabular_datasets.yaml          |  4 ++
 .../resources/multimodal_frameworks.yaml      | 66 +++++++++++++------
 6 files changed, 59 insertions(+), 28 deletions(-)
 rename src/autogluon/bench/custom_configs/dataloaders/{vision_dataloader.py => image_dataloader.py} (99%)
 rename src/autogluon/bench/custom_configs/dataloaders/{vision_datasets.yaml => image_datasets.yaml} (100%)

diff --git a/bench_all.py b/bench_all.py
index 734495b9..8fe1f5a8 100644
--- a/bench_all.py
+++ b/bench_all.py
@@ -7,19 +7,21 @@
 seeds = [22, 92, 54, 86, 41]
 
 config_paths = [
+    "sample_configs/paper_image_cloud_configs.yaml",
     "sample_configs/paper_text_tabular_cloud_configs.yaml",
     "sample_configs/paper_text_cloud_configs.yaml",
-    "sample_configs/paper_image_cloud_configs.yaml",
 ]
 frameworks = [
+    # "AutoGluon_best_master",
+    # "autokeras_master",
     "ablation_base",
     "ablation_greedy_soup",
     "ablation_gradient_clip",
     "ablation_warmup_steps",
-    "ablation_cosine_decay",
-    "ablation_weight_decay",
-    "ablation_lr_decay",
-    # "autokeras_master",
+    # "ablation_cosine_decay",
+    # "ablation_weight_decay",
+    # "ablation_lr_decay",
+    
 ]
 constraints = [
     "g4_12x"
diff --git a/src/autogluon/bench/custom_configs/dataloaders/vision_dataloader.py b/src/autogluon/bench/custom_configs/dataloaders/image_dataloader.py
similarity index 99%
rename from src/autogluon/bench/custom_configs/dataloaders/vision_dataloader.py
rename to src/autogluon/bench/custom_configs/dataloaders/image_dataloader.py
index 84ab5d91..21d6a25f 100644
--- a/src/autogluon/bench/custom_configs/dataloaders/vision_dataloader.py
+++ b/src/autogluon/bench/custom_configs/dataloaders/image_dataloader.py
@@ -16,7 +16,7 @@ def path_expander(path, base_folder):
 logger = logging.getLogger(__name__)
 
 
-class VisionDataLoader:
+class ImageDataLoader:
     def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
         with open(dataset_config_file, "r") as f:
             config = yaml.safe_load(f)
diff --git a/src/autogluon/bench/custom_configs/dataloaders/vision_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/image_datasets.yaml
similarity index 100%
rename from src/autogluon/bench/custom_configs/dataloaders/vision_datasets.yaml
rename to src/autogluon/bench/custom_configs/dataloaders/image_datasets.yaml
diff --git a/src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml
index 067e12d1..d3113109 100644
--- a/src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml
+++ b/src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml
@@ -63,12 +63,13 @@ base: &base
 
 
 financial_news:
-  <<: *base
   url: s3://zs-models/datasets/financial_news/{lang}/{split}.csv
   splits:
     - train
   langs:
     - en
+  metric: accuracy
+  problem_type: classification
 
 MLDoc-11000:
   <<: *base
diff --git a/src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml
index f4ddfd70..b43fbab6 100644
--- a/src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml
+++ b/src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml
@@ -19,6 +19,10 @@ airbnb:
   text_columns:
     - summary
     - amenities
+    - description
+    - notes
+    - name
+    - neighborhood
   label_columns:
     - price_label
   columns_to_drop:
diff --git a/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
index 7a2765f5..623a6e9c 100644
--- a/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
+++ b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
@@ -9,58 +9,82 @@ AutoGluon_branch:
       optimization.learning_rate: 0.005
 
 AutoGluon_best_master:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git 
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+
+ablation_base:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
 
 ablation_greedy_soup:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
 
 ablation_gradient_clip:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.gradient_clip_algorithm: value
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
 
 ablation_warmup_steps:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.warmup_steps: 0.0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
 
 ablation_cosine_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.lr_schedule: polynomial_decay
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
 
 ablation_weight_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
     hyperparameters:
-      optimization.weight_decay: 0.0
+        optimization.lr_decay: 1
+
 
 ablation_lr_decay:
-  repo: https://github.com/autogluon/autogluon.git
-  version: master
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
   params:  # MultimodalPredictor.fit(params)
     presets: best_quality
-    hyperparameters:
-      optimization.lr_decay: 0.0 
 
 
 autokeras_master:

From 8b46e8a40bc27df2b9a28b12c19bde83d7e413f3 Mon Sep 17 00:00:00 2001
From: Su Zhou <zhousu@amazon.com>
Date: Thu, 14 Mar 2024 21:35:26 +0000
Subject: [PATCH 13/13] update

---
 bench_all.py => sample_configs/bench_all.py   |  28 +-
 .../dataloaders/paper_text_datasets.yaml      |   3 +-
 .../paper_text_tabular_datasets.yaml          | 278 ++++++++++++++++++
 .../dataloaders/text_tabular_datasets.yaml    | 278 ++++++++++++++++++
 sample_configs/paper_image_cloud_configs.yaml |   2 +-
 sample_configs/paper_text_cloud_configs.yaml  |   2 +-
 .../paper_text_tabular_cloud_configs.yaml     |  20 +-
 .../resources/multimodal_frameworks.yaml      | 134 +++++++++
 8 files changed, 724 insertions(+), 21 deletions(-)
 rename bench_all.py => sample_configs/bench_all.py (75%)
 create mode 100644 sample_configs/dataloaders/text_tabular_datasets.yaml

diff --git a/bench_all.py b/sample_configs/bench_all.py
similarity index 75%
rename from bench_all.py
rename to sample_configs/bench_all.py
index 8fe1f5a8..94d7a0f3 100644
--- a/bench_all.py
+++ b/sample_configs/bench_all.py
@@ -5,24 +5,24 @@
     seeds.append(random.randint(0, 100))
 
 seeds = [22, 92, 54, 86, 41]
-
+seeds = [22]
 config_paths = [
     "sample_configs/paper_image_cloud_configs.yaml",
     "sample_configs/paper_text_tabular_cloud_configs.yaml",
     "sample_configs/paper_text_cloud_configs.yaml",
-]
-frameworks = [
-    # "AutoGluon_best_master",
-    # "autokeras_master",
-    "ablation_base",
-    "ablation_greedy_soup",
-    "ablation_gradient_clip",
-    "ablation_warmup_steps",
-    # "ablation_cosine_decay",
-    # "ablation_weight_decay",
-    # "ablation_lr_decay",
-    
-]
+
+frameworks = ['AutoGluon_best_master', 'ablation_base', 'ablation_add_greedy', 'ablation_add_grad_clip', 'ablation_add_warmup_steps', 'ablation_add_cosine_decay', 'ablation_add_weight_decay', 'ablation_add_lr_decay', 'AutoGluon_del_greedy', 'AutoGluon_del_grad_clip', 'AutoGluon_del_warmup_steps', 'AutoGluon_del_cosine_decay', 'AutoGluon_del_weight_decay', 'AutoGluon_del_lr_decay']
+
+#frameworks = [
+   # "ablation_base",
+   # "ablation_greedy_soup",
+   # "ablation_gradient_clip",
+   # "ablation_warmup_steps",
+   # "ablation_cosine_decay",
+   # "ablation_weight_decay",
+   # "ablation_lr_decay",
+#    "autokeras_master",
+#]
 constraints = [
     "g4_12x"
 ]
diff --git a/sample_configs/dataloaders/paper_text_datasets.yaml b/sample_configs/dataloaders/paper_text_datasets.yaml
index d3113109..067e12d1 100644
--- a/sample_configs/dataloaders/paper_text_datasets.yaml
+++ b/sample_configs/dataloaders/paper_text_datasets.yaml
@@ -63,13 +63,12 @@ base: &base
 
 
 financial_news:
+  <<: *base
   url: s3://zs-models/datasets/financial_news/{lang}/{split}.csv
   splits:
     - train
   langs:
     - en
-  metric: accuracy
-  problem_type: classification
 
 MLDoc-11000:
   <<: *base
diff --git a/sample_configs/dataloaders/paper_text_tabular_datasets.yaml b/sample_configs/dataloaders/paper_text_tabular_datasets.yaml
index b43fbab6..d2c9c79f 100644
--- a/sample_configs/dataloaders/paper_text_tabular_datasets.yaml
+++ b/sample_configs/dataloaders/paper_text_tabular_datasets.yaml
@@ -92,3 +92,281 @@ cal_house:
     - Sold Price
   metric: rmse
   problem_type: regression
+base: &base
+  url: s3://automl-mm-bench/{name}/{split}.csv
+  test_split_name: test
+  splits:
+    - train
+    - test
+  feature_columns:
+    - ImageID
+  label_columns:
+    - LabelName
+  image_columns:
+  text_columns:
+  columns_to_drop:
+  metric: acc
+  problem_type: multiclass
+  
+
+prod:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_product_sentiment/{split}.csv
+  test_split_name: dev
+  feature_columns:
+    - Product_Description
+    - Product_Type
+  label_columns:
+    - Sentiment
+
+airbnb:
+  <<: *base
+  url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - price_label
+  ignore_columns:
+    - id
+    - listing_url
+    - scrape_id
+    - last_scraped
+    - picture_url
+    - host_id
+    - host_url
+    - host_name
+    - host_thumbnail_url
+    - host_picture_url
+    - monthly_price
+    - weekly_price
+    - price
+    - calendar_last_scraped
+
+channel:
+  <<: *base
+  url: s3://automl-mm-bench/news_channel/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - channel
+  ignore_columns:
+    null
+
+wine:
+  <<: *base
+  url: s3://automl-mm-bench/wine_reviews/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - variety
+  ignore_columns:
+    null
+
+imdb:
+  <<: *base
+  url: s3://automl-mm-bench/imdb_genre_prediction/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - Genre_is_Drama
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+jigsaw:
+  <<: *base
+  url: s3://automl-mm-bench/jigsaw_unintended_bias100K/{split}.pq
+  feature_columns:
+    - comment_text
+    - asian
+    - atheist
+    - bisexual
+    - black
+    - buddhist
+    - christian
+    - female
+    - heterosexual
+    - hindu
+    - homosexual_gay_or_lesbian
+    - intellectual_or_learning_disability
+    - jewish
+    - latino
+    - male
+    - muslim
+    - other_disability
+    - other_gender
+    - other_race_or_ethnicity
+    - other_religion
+    - other_sexual_orientation
+    - physical_disability
+    - psychiatric_or_mental_illness
+    - transgender
+    - white
+    - funny
+    - wow
+    - sad
+    - likes
+    - disagree
+  label_columns:
+    - target
+  metric: roc_auc
+  problem_type: binary
+
+fake:
+  <<: *base
+  url: s3://automl-mm-bench/fake_job_postings2/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - fraudulent
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+kick:
+  <<: *base
+  url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - final_status
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+ae:
+  <<: *base
+  url: s3://automl-mm-bench/ae_price_prediction/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - price
+  ignore_columns:
+    - mrp
+    - pdp_url
+  metric: r2
+  problem_type: regression
+
+qaa:
+  <<: *base
+  url: s3://automl-mm-bench/google_quest_qa/{split}.pq
+  test_split_name: dev
+  feature_columns:
+    - question_title
+    - question_body
+    - answer
+    - category
+  label_columns:
+    - answer_type_reason_explanation
+  metric: r2
+  problem_type: regression
+
+qaq:
+  <<: *base
+  url: s3://automl-mm-bench/google_quest_qa/{split}.pq
+  test_split_name: dev
+  feature_columns:
+    - question_title
+    - question_body
+    - answer
+    - category
+  label_columns:
+    - question_type_reason_explanation
+  metric: r2
+  problem_type: regression
+
+cloth:
+  <<: *base
+  url: s3://automl-mm-bench/women_clothing_review/{split}.pq
+  feature_columns:
+    - Title
+    - Review Text
+    - Age
+    - Division Name
+    - Department Name
+    - Class Name
+  label_columns:
+    - Rating
+  metric: r2
+  problem_type: regression
+
+mercari:
+  <<: *base
+  url: s3://automl-mm-bench/mercari_price_suggestion100K/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - log_price
+  ignore_columns:
+    - train_id
+    - price
+  metric: r2
+  problem_type: regression
+
+jc:
+  <<: *base
+  url: s3://automl-mm-bench/jc_penney_products/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - sale_price
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+pop:
+  <<: *base
+  url: s3://automl-mm-bench/news_popularity2/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - log_shares
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+book:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_competitions/predict_the_price_of_books/{split}.csv
+  feature_columns:
+    - Title
+    - Author
+    - Edition
+    - Reviews
+    - Ratings
+    - Synopsis
+    - Genre
+    - BookCategory
+  label_columns:
+    - Price
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+salary:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_competitions/predict_the_data_scientists_salary_in_india_hackathon/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - salary
+  ignore_columns:
+    null
+  metric: acc
+  problem_type: multiclass
+
+house:
+  <<: *base
+  url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - Sold Price
+  metric: r2
+  problem_type: regression
diff --git a/sample_configs/dataloaders/text_tabular_datasets.yaml b/sample_configs/dataloaders/text_tabular_datasets.yaml
new file mode 100644
index 00000000..9f8e4d6c
--- /dev/null
+++ b/sample_configs/dataloaders/text_tabular_datasets.yaml
@@ -0,0 +1,278 @@
+base: &base
+  url: s3://automl-mm-bench/{name}/{split}.csv
+  test_split_name: test
+  splits:
+    - train
+    - test
+  feature_columns:
+    - ImageID
+  label_columns:
+    - LabelName
+  image_columns:
+  text_columns:
+  columns_to_drop:
+  metric: acc
+  problem_type: multiclass
+  
+
+prod:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_product_sentiment/{split}.csv
+  test_split_name: dev
+  feature_columns:
+    - Product_Description
+    - Product_Type
+  label_columns:
+    - Sentiment
+
+airbnb:
+  <<: *base
+  url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - price_label
+  ignore_columns:
+    - id
+    - listing_url
+    - scrape_id
+    - last_scraped
+    - picture_url
+    - host_id
+    - host_url
+    - host_name
+    - host_thumbnail_url
+    - host_picture_url
+    - monthly_price
+    - weekly_price
+    - price
+    - calendar_last_scraped
+
+channel:
+  <<: *base
+  url: s3://automl-mm-bench/news_channel/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - channel
+  ignore_columns:
+    null
+
+wine:
+  <<: *base
+  url: s3://automl-mm-bench/wine_reviews/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - variety
+  ignore_columns:
+    null
+
+imdb:
+  <<: *base
+  url: s3://automl-mm-bench/imdb_genre_prediction/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - Genre_is_Drama
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+jigsaw:
+  <<: *base
+  url: s3://automl-mm-bench/jigsaw_unintended_bias100K/{split}.pq
+  feature_columns:
+    - comment_text
+    - asian
+    - atheist
+    - bisexual
+    - black
+    - buddhist
+    - christian
+    - female
+    - heterosexual
+    - hindu
+    - homosexual_gay_or_lesbian
+    - intellectual_or_learning_disability
+    - jewish
+    - latino
+    - male
+    - muslim
+    - other_disability
+    - other_gender
+    - other_race_or_ethnicity
+    - other_religion
+    - other_sexual_orientation
+    - physical_disability
+    - psychiatric_or_mental_illness
+    - transgender
+    - white
+    - funny
+    - wow
+    - sad
+    - likes
+    - disagree
+  label_columns:
+    - target
+  metric: roc_auc
+  problem_type: binary
+
+fake:
+  <<: *base
+  url: s3://automl-mm-bench/fake_job_postings2/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - fraudulent
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+kick:
+  <<: *base
+  url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - final_status
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+ae:
+  <<: *base
+  url: s3://automl-mm-bench/ae_price_prediction/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - price
+  ignore_columns:
+    - mrp
+    - pdp_url
+  metric: r2
+  problem_type: regression
+
+qaa:
+  <<: *base
+  url: s3://automl-mm-bench/google_quest_qa/{split}.pq
+  test_split_name: dev
+  feature_columns:
+    - question_title
+    - question_body
+    - answer
+    - category
+  label_columns:
+    - answer_type_reason_explanation
+  metric: r2
+  problem_type: regression
+
+qaq:
+  <<: *base
+  url: s3://automl-mm-bench/google_quest_qa/{split}.pq
+  test_split_name: dev
+  feature_columns:
+    - question_title
+    - question_body
+    - answer
+    - category
+  label_columns:
+    - question_type_reason_explanation
+  metric: r2
+  problem_type: regression
+
+cloth:
+  <<: *base
+  url: s3://automl-mm-bench/women_clothing_review/{split}.pq
+  feature_columns:
+    - Title
+    - Review Text
+    - Age
+    - Division Name
+    - Department Name
+    - Class Name
+  label_columns:
+    - Rating
+  metric: r2
+  problem_type: regression
+
+mercari:
+  <<: *base
+  url: s3://automl-mm-bench/mercari_price_suggestion100K/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - log_price
+  ignore_columns:
+    - train_id
+    - price
+  metric: r2
+  problem_type: regression
+
+jc:
+  <<: *base
+  url: s3://automl-mm-bench/jc_penney_products/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - sale_price
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+pop:
+  <<: *base
+  url: s3://automl-mm-bench/news_popularity2/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - log_shares
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+book:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_competitions/predict_the_price_of_books/{split}.csv
+  feature_columns:
+    - Title
+    - Author
+    - Edition
+    - Reviews
+    - Ratings
+    - Synopsis
+    - Genre
+    - BookCategory
+  label_columns:
+    - Price
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+salary:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_competitions/predict_the_data_scientists_salary_in_india_hackathon/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - salary
+  ignore_columns:
+    null
+  metric: acc
+  problem_type: multiclass
+
+house:
+  <<: *base
+  url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - Sold Price
+  metric: r2
+  problem_type: regression
diff --git a/sample_configs/paper_image_cloud_configs.yaml b/sample_configs/paper_image_cloud_configs.yaml
index a3c5331f..904d3c1f 100644
--- a/sample_configs/paper_image_cloud_configs.yaml
+++ b/sample_configs/paper_image_cloud_configs.yaml
@@ -1,7 +1,7 @@
 # Infra configurations
 cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
   CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
-  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
+  CDK_DEPLOY_REGION: us-west-2  # required, update with your desired region
   PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
   METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
   DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
diff --git a/sample_configs/paper_text_cloud_configs.yaml b/sample_configs/paper_text_cloud_configs.yaml
index 2f98216d..a1173526 100644
--- a/sample_configs/paper_text_cloud_configs.yaml
+++ b/sample_configs/paper_text_cloud_configs.yaml
@@ -1,7 +1,7 @@
 # Infra configurations
 cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
   CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
-  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
+  CDK_DEPLOY_REGION: us-west-2  # required, update with your desired region
   PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
   METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
   DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
diff --git a/sample_configs/paper_text_tabular_cloud_configs.yaml b/sample_configs/paper_text_tabular_cloud_configs.yaml
index 16ba00c1..a42ad6f5 100644
--- a/sample_configs/paper_text_tabular_cloud_configs.yaml
+++ b/sample_configs/paper_text_tabular_cloud_configs.yaml
@@ -1,16 +1,16 @@
 # Infra configurations
 cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
   CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
-  CDK_DEPLOY_REGION: us-east-1  # required, update with your desired region
+  CDK_DEPLOY_REGION: us-west-2 # required, update with your desired region
   PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
   METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  DATA_BUCKET: automl-mm-bench  # optional, S3 bucket to download your private datasets
+  DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
   MAX_MACHINE_NUM: 1000   # optional, default 20
   # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
   # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
   # VPC_NAME: existing-vpc-name  # optional
 
-module: multimodal  # required, choise of 'multimodal' pr 'autokeras'
+module: autokeras  # required, choise of 'multimodal' pr 'autokeras'
 mode: aws  # required
 benchmark_name: ag_bench_text_tabular  # required
 root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
@@ -27,6 +27,20 @@ framework:   # framework options can be found at resources/multimodal_frameworks
   # ablation_weight_decay
   # ablation_lr_decay
 dataset_name:
+        #        - prod
+        #- channel
+        #- wine
+        #- imdb
+        #- jigsaw
+        ##- fake
+        #- ae
+        #- qaa
+        #- qaq
+        #- mercari
+        #- jc
+        #- pop
+        #- book
+        #- salary
         - airbnb
         - kick_start
         - cloth_review
diff --git a/sample_configs/resources/multimodal_frameworks.yaml b/sample_configs/resources/multimodal_frameworks.yaml
index 623a6e9c..fd2e29e9 100644
--- a/sample_configs/resources/multimodal_frameworks.yaml
+++ b/sample_configs/resources/multimodal_frameworks.yaml
@@ -28,6 +28,140 @@ ablation_base:
         optimization.lr_decay: 1
 
 
+ablation_add_greedy:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+ablation_add_grad_clip:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+ablation_add_warmup_steps:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+
+ablation_add_cosine_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+ablation_add_weight_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.lr_decay: 1
+
+
+ablation_add_lr_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+
+
+AutoGluon_best_master:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+
+
+AutoGluon_del_greedy:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.top_k_average_method: best
+
+AutoGluon_del_grad_clip:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.gradient_clip_val: 0
+
+
+AutoGluon_del_warmup_steps:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.warmup_steps: 0
+
+
+AutoGluon_del_cosine_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.lr_schedule: constant
+
+AutoGluon_del_weight_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.weight_decay: 0
+
+
+
+AutoGluon_del_lr_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.lr_decay: 1
+
+
+
 ablation_greedy_soup:
   repo: https://github.com/suzhoum/autogluon.git
   version: add_constant_lr_decay