changed weights to list to consume less memory

jcapp4 · jcapp4 · commit 0b46ab883adc · 2025-11-25T11:51:51.000+01:00
diff --git a/chebai/models/base.py b/chebai/models/base.py
@@ -10,6 +10,8 @@
 
 import extras.adamh as f
 
+import extras.weight_loader as e
+
 logging.getLogger("pysmiles").setLevel(logging.CRITICAL)
 
 _MODEL_REGISTRY = dict()
@@ -271,6 +273,7 @@ def _execute(
                     loss_kwargs = loss_kwargs_candidates
                 #torch.save(loss_data,"loss_data.pt")
                 loss_kwargs['weights'] = f.create_data_weights(batchsize=len(data['idents']),dim=data['labels'].size(dim=1),weights=data["loss_kwargs"],idents=data["idents"])
+                #loss_kwargs['weights'] = e.create_data_weights(batchsize=len(data['idents']),dim=data['labels'].size(dim=1),weights=data["loss_kwargs"],idents=data["idents"])
 
                 loss_kwargs["current_epoch"] = self.trainer.current_epoch
                 loss = self.criterion(loss_data, loss_labels, **loss_kwargs)
diff --git a/chebai/preprocessing/datasets/base.py b/chebai/preprocessing/datasets/base.py
@@ -1182,7 +1182,7 @@ def load_processed_data(
                 data_df = self.dynamic_split_dfs[kind]
                 data = data_df.to_dict(orient="records")
                 if kind == "train" :
-                    #f.init_weights()
+                    #       f.init_weights()
                     data = f.add_train_weights(data)
                 if kind == "validation" :
                     data = f.add_val_weights(data)
diff --git a/extras/adamh.py b/extras/adamh.py
@@ -1,11 +1,12 @@
 import torch
 import csv
+import numpy
 
 
 train = 0
 
 
-def create_weight(path_to_split="../../split/splits.csv"):
+def create_weight(path_to_split="/home/programmer/Bachelorarbeit/split/splits.csv"):
     weights = {}
     with open(path_to_split, 'r') as csvfile:
         reader = csv.reader(csvfile)
@@ -17,13 +18,27 @@ def create_weight(path_to_split="../../split/splits.csv"):
                 #print(row[0])
             i = i +1
         print(len(weights))
-    torch.save(weights,"../../weights/init_mh.pt")
+    torch.save(weights,"/home/programmer/Bachelorarbeit/weights/init_mh.pt")
+
+
+
+def new_create_weight(path_to_split="/home/programmer/Bachelorarbeit/split/splits.csv"):
+    weights = {}
+    with open(path_to_split, 'r') as csvfile:
+        reader = csv.reader(csvfile)
+        i = 0
+        for row in reader:
+            if (row[1] == "train") and i > 0:
+                # print(row[0])
+                weights[row[0]] = [int(row[0])]* 1528
+                # print(row[0])
+            i = i + 1
+        print(len(weights))
+    torch.save(weights, "../../weights/init_mh.pt")
+
 
 def add_train_weights(ids):
     d = torch.load("/home/programmer/Bachelorarbeit/weights/init_mh.pt",weights_only=False)
-    global train
-    train = train + 1
-    print(train)
     it = 0
     for i in ids:
         if it % 10000 == 0:
@@ -36,19 +51,23 @@ def add_train_weights(ids):
 def add_val_weights(ids):
     for i in ids:
         weight = 1
-        i["weight"] = torch.full((1,1528),1)
+        #i["weight"] = torch.full((1,1528),1)
+        i["weight"] = [1]*1528
+
     return ids
 
-def create_data_weights(batchsize:int,dim:int,weights:dict[str,torch.Tensor],idents:tuple[int,...])-> torch.tensor:
+def create_data_weights(batchsize:int,dim:int,weights:dict[str,list[float,...]],idents:tuple[int,...])-> torch.tensor:
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
     weight = None
     index = 0
     for i in idents:
-        w = weights[str(i)]
+        w = torch.Tensor([weights[str(i)],]).to(device)
         if weight == None:
             weight = w
         else:
             weight = torch.cat((weight,w),0)
         index = index + 1
     return weight
 
+#new_create_weight()
+#create_weight()
diff --git a/extras/weight_loader.py b/extras/weight_loader.py
@@ -110,9 +110,9 @@ def add_train_weights(ids):
         it = it +1
     return ids
 
-def check_weights(data):
-    for i in data:
-        print(f"({i["ident"]} , {i["weight"]}")
+#def check_weights(data):
+#    for i in data:
+#        print(f"({i["ident"]} , {i["weight"]}")
 
 
 def init_class_weights(class_path:str,weight_path:str,weight:float):
@@ -154,5 +154,5 @@ def create_weight_class_tensor(batch_size:int)-> torch.Tensor:
 
 
 #init_class_weights("../../data/chebi_v241/ChEBI50/processed/classes.txt","../../weights/class_first_it.csv",1)
-create_class_tensor("../../weights/test.pt")
-create_weight_class_tensor(32)
+#create_class_tensor("../../weights/test.pt")
+#create_weight_class_tensor(32)