From e5ed3b824932e80c95240eb6a8e513816a114cde Mon Sep 17 00:00:00 2001
From: ArthurTlprt <arthur@symbiome.eu>
Date: Fri, 30 Jan 2026 12:00:44 +0100
Subject: [PATCH 1/2] fix Segmentation: Add skewness and kurtosis calculation
 #103

---
 segmenter/planktoscope/segmenter/__init__.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/segmenter/planktoscope/segmenter/__init__.py b/segmenter/planktoscope/segmenter/__init__.py
index 3eb124ac6..0975d146a 100644
--- a/segmenter/planktoscope/segmenter/__init__.py
+++ b/segmenter/planktoscope/segmenter/__init__.py
@@ -37,6 +37,7 @@
 import numpy as np
 import PIL.Image
 import skimage.exposure
+from scipy.stats import skew, kurtosis
 
 ################################################################################
 # Other image processing Libraries
@@ -268,9 +269,14 @@ def _get_color_info(self, bgr_img, mask):
         h_stddev = np.std(h_channel, where=mask)
         s_stddev = np.std(s_channel, where=mask)
         v_stddev = np.std(v_channel, where=mask)
-        # TODO #103 Add skewness and kurtosis calculation (with scipy) here
-        # using https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skew.html#scipy.stats.skew
-        # and https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kurtosis.html#scipy.stats.kurtosis
+        # Distribution skewness and kurtosis computation
+        h_skewness = skew(h_channel, bias=False, axis=None)
+        s_skewness = skew(s_channel, bias=False, axis=None)
+        v_skewness = skew(v_channel, bias=False, axis=None)
+        h_kurtosis = kurtosis(h_channel, bias=False, axis=None)
+        s_kurtosis = kurtosis(s_channel, bias=False, axis=None)
+        v_kurtosis = kurtosis(v_channel, bias=False, axis=None)
+        
         # h_quartiles = np.quantile(h_channel, quartiles)
         # s_quartiles = np.quantile(s_channel, quartiles)
         # v_quartiles = np.quantile(v_channel, quartiles)
@@ -308,6 +314,12 @@ def _get_color_info(self, bgr_img, mask):
             "StdHue": h_stddev,
             "StdSaturation": s_stddev,
             "StdValue": v_stddev,
+            "SkewnessHue": h_skewness,
+            "SkewnessSaturation": s_skewness,
+            "SkewnessValue": v_skewness,
+            "KurtosisHue": h_kurtosis,
+            "KurtosisSaturation": s_kurtosis,
+            "KurtosisValue": v_kurtosis
             # "object_minHue": h_quartiles[0],
             # "object_Q05Hue": h_quartiles[1],
             # "object_Q25Hue": h_quartiles[2],

From e26106785e9cc62590a33bda0599ddf3ea21e560 Mon Sep 17 00:00:00 2001
From: ArthurTlprt <arthur@symbiome.eu>
Date: Fri, 30 Jan 2026 12:07:57 +0100
Subject: [PATCH 2/2] fix Solve the FIXME: just use python's csv library, to
 shave off pandas's 60 MB of unnecessary disk space usage #846

---
 segmenter/planktoscope/segmenter/ecotaxa.py | 53 +++++++++++++++------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/segmenter/planktoscope/segmenter/ecotaxa.py b/segmenter/planktoscope/segmenter/ecotaxa.py
index ab3ece883..d5ad81085 100644
--- a/segmenter/planktoscope/segmenter/ecotaxa.py
+++ b/segmenter/planktoscope/segmenter/ecotaxa.py
@@ -20,7 +20,7 @@
 
 
 import numpy
-import pandas  # FIXME: just use python's csv library, to shave off pandas's 60 MB of unnecessary disk space usage
+import csv
 import zipfile
 import os
 import io
@@ -262,25 +262,50 @@ def ecotaxa_export(archive_filepath, metadata, image_base_path, keep_files=False
                 # we remove the image file if we don't want to keep it!
                 os.remove(image_path)
 
-        tsv_content = pandas.DataFrame(tsv_content)
-
-        tsv_type_header = [dtype_to_ecotaxa(dt) for dt in tsv_content.dtypes]
-        tsv_content.columns = pandas.MultiIndex.from_tuples(
-            list(zip(tsv_content.columns, tsv_type_header))
-        )
-
+        # Extract column names from first row if content exists
+        if not tsv_content:
+            logger.error("No TSV content to export")
+            return 0
+            
+        column_names = sorted(tsv_content[0].keys())
+        
+        # Determine data types for each column
+        tsv_type_header = []
+        for col in column_names:
+            # Check the type of the first non-None value in this column
+            sample_value = next((row[col] for row in tsv_content if row.get(col) is not None), None)
+            if sample_value is not None and isinstance(sample_value, (int, float)):
+                tsv_type_header.append("[f]")
+            else:
+                tsv_type_header.append("[t]")
+        
         # create the filename with the acquisition ID
         acquisition_id = metadata.get("acq_id")
         acquisition_id = acquisition_id.replace(" ", "_")
         tsv_filename = f"ecotaxa_{acquisition_id}.tsv"
-
+        
+        # Build TSV content as string
+        tsv_output = io.StringIO()
+        writer = csv.writer(tsv_output, delimiter='\t', lineterminator='\n')
+        
+        # Write header row (column names)
+        writer.writerow(column_names)
+        
+        # Write type header row
+        writer.writerow(tsv_type_header)
+        
+        # Write data rows
+        for row in tsv_content:
+            writer.writerow([row.get(col, '') for col in column_names])
+        
+        tsv_string = tsv_output.getvalue()
+        
         # add the tsv to the archive
-        archive.writestr(
-            tsv_filename,
-            io.BytesIO(tsv_content.to_csv(sep="\t", encoding="utf-8", index=False).encode()).read(),
-        )
+        archive.writestr(tsv_filename, tsv_string.encode('utf-8'))
+        
         if keep_files:
             tsv_file = os.path.join(image_base_path, tsv_filename)
-            tsv_content.to_csv(path_or_buf=tsv_file, sep="\t", encoding="utf-8", index=False)
+            with open(tsv_file, 'w', encoding='utf-8', newline='') as f:
+                f.write(tsv_string)
     logger.success("Ecotaxa archive is ready!")
     return 1