forked from mahmoodlab/CLAM
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgen_pre_feat_ext_csv.py
More file actions
73 lines (56 loc) · 2.44 KB
/
gen_pre_feat_ext_csv.py
File metadata and controls
73 lines (56 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import pandas as pd
import os
"""
This script is used to process the process list CSV autogenerated by `create_patches_fp.py`.
It removes the .svs file extension from the 'slide_id' column and filters out rows where 'status' is not 'processed' or 'already_exist'.
It also sets the 'status' column to 'tbp' (to be processed).
The output CSV is used for feature extraction.
"""
def process_csv_for_feature_extraction(csv_path):
"""
Process the CSV file for feature extraction by removing the .svs file extension from the 'slide_id' column
and filtering out rows where 'status' is not 'processed' or 'already_exist'.
Args:
csv_path (str): Path to the input CSV file.
Returns:
pd.DataFrame: Processed DataFrame ready for feature extraction.
"""
# Load the CSV file into a DataFrame
try:
df = pd.read_csv(csv_path)
print(f"Loaded CSV with {len(df)} rows.")
except Exception as e:
print(f"Error loading {csv_path}: {e}")
exit()
# Remove the .svs file extension from the 'slide_id' column
df['slide_id'] = df['slide_id'].str.replace('.svs', '', regex=False)
print(f"Removed .svs extensions from slide IDs")
# Filter out rows where 'status' is 'failed_seg'
df = df[df['status'] != 'failed_seg']
print(f"Filtered out slides that failed segmentation, remaining rows: {len(df)}")
df['status'] = 'tbp'
return df
def write_processed_csv(df, output_dir):
"""
Write the processed DataFrame to a new CSV file.
Args:
df (pd.DataFrame): Processed DataFrame.
output_path (str): Path to save the processed CSV file.
"""
output_path = os.path.join(output_dir, 'pre_feat_ext.csv')
print(f"Writing to {output_path}...")
df.to_csv(output_path, index=False)
print(f"Processed CSV saved to {output_path}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Process auto-generated segmentation CSV into a CSV for feature extraction.")
parser.add_argument('--input_csv', type=str, help='Path to the input CSV file.')
parser.add_argument('--output_dir', type=str, help='Path to save the processed CSV file.')
args = parser.parse_args()
# Process the input CSV
processed_df = process_csv_for_feature_extraction(args.input_csv)
if processed_df is not None:
# Write the processed DataFrame to a new CSV file
write_processed_csv(processed_df, args.output_dir)
else:
print("No processed CSV file was created.")