Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions segmenter/planktoscope/segmenter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import numpy as np
import PIL.Image
import skimage.exposure
from scipy.stats import skew, kurtosis

################################################################################
# Other image processing Libraries
Expand Down Expand Up @@ -268,9 +269,14 @@ def _get_color_info(self, bgr_img, mask):
h_stddev = np.std(h_channel, where=mask)
s_stddev = np.std(s_channel, where=mask)
v_stddev = np.std(v_channel, where=mask)
# TODO #103 Add skewness and kurtosis calculation (with scipy) here
# using https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skew.html#scipy.stats.skew
# and https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kurtosis.html#scipy.stats.kurtosis
# Distribution skewness and kurtosis computation
h_skewness = skew(h_channel, bias=False, axis=None)
s_skewness = skew(s_channel, bias=False, axis=None)
v_skewness = skew(v_channel, bias=False, axis=None)
h_kurtosis = kurtosis(h_channel, bias=False, axis=None)
s_kurtosis = kurtosis(s_channel, bias=False, axis=None)
v_kurtosis = kurtosis(v_channel, bias=False, axis=None)

# h_quartiles = np.quantile(h_channel, quartiles)
# s_quartiles = np.quantile(s_channel, quartiles)
# v_quartiles = np.quantile(v_channel, quartiles)
Expand Down Expand Up @@ -308,6 +314,12 @@ def _get_color_info(self, bgr_img, mask):
"StdHue": h_stddev,
"StdSaturation": s_stddev,
"StdValue": v_stddev,
"SkewnessHue": h_skewness,
"SkewnessSaturation": s_skewness,
"SkewnessValue": v_skewness,
"KurtosisHue": h_kurtosis,
"KurtosisSaturation": s_kurtosis,
"KurtosisValue": v_kurtosis
# "object_minHue": h_quartiles[0],
# "object_Q05Hue": h_quartiles[1],
# "object_Q25Hue": h_quartiles[2],
Expand Down
53 changes: 39 additions & 14 deletions segmenter/planktoscope/segmenter/ecotaxa.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


import numpy
import pandas # FIXME: just use python's csv library, to shave off pandas's 60 MB of unnecessary disk space usage
import csv
import zipfile
import os
import io
Expand Down Expand Up @@ -262,25 +262,50 @@ def ecotaxa_export(archive_filepath, metadata, image_base_path, keep_files=False
# we remove the image file if we don't want to keep it!
os.remove(image_path)

tsv_content = pandas.DataFrame(tsv_content)

tsv_type_header = [dtype_to_ecotaxa(dt) for dt in tsv_content.dtypes]
tsv_content.columns = pandas.MultiIndex.from_tuples(
list(zip(tsv_content.columns, tsv_type_header))
)

# Extract column names from first row if content exists
if not tsv_content:
logger.error("No TSV content to export")
return 0

column_names = sorted(tsv_content[0].keys())

# Determine data types for each column
tsv_type_header = []
for col in column_names:
# Check the type of the first non-None value in this column
sample_value = next((row[col] for row in tsv_content if row.get(col) is not None), None)
if sample_value is not None and isinstance(sample_value, (int, float)):
tsv_type_header.append("[f]")
else:
tsv_type_header.append("[t]")

# create the filename with the acquisition ID
acquisition_id = metadata.get("acq_id")
acquisition_id = acquisition_id.replace(" ", "_")
tsv_filename = f"ecotaxa_{acquisition_id}.tsv"


# Build TSV content as string
tsv_output = io.StringIO()
writer = csv.writer(tsv_output, delimiter='\t', lineterminator='\n')

# Write header row (column names)
writer.writerow(column_names)

# Write type header row
writer.writerow(tsv_type_header)

# Write data rows
for row in tsv_content:
writer.writerow([row.get(col, '') for col in column_names])

tsv_string = tsv_output.getvalue()

# add the tsv to the archive
archive.writestr(
tsv_filename,
io.BytesIO(tsv_content.to_csv(sep="\t", encoding="utf-8", index=False).encode()).read(),
)
archive.writestr(tsv_filename, tsv_string.encode('utf-8'))

if keep_files:
tsv_file = os.path.join(image_base_path, tsv_filename)
tsv_content.to_csv(path_or_buf=tsv_file, sep="\t", encoding="utf-8", index=False)
with open(tsv_file, 'w', encoding='utf-8', newline='') as f:
f.write(tsv_string)
logger.success("Ecotaxa archive is ready!")
return 1