From c9d3c1aa5d16f8538b75507dcfa88daf014caaae Mon Sep 17 00:00:00 2001 From: Matt Mallory Date: Tue, 24 Jun 2025 16:09:13 -0700 Subject: [PATCH 1/2] allow None as input to proj mat mask_method --- .../distance_between_nodes_for_directory.py | 3 +- .../projection_matrix_from_swc_directory.py | 20 ++--- morph_utils/proj_mat_utils.py | 82 +++++++++++++++++++ setup.cfg | 1 + 4 files changed, 90 insertions(+), 16 deletions(-) diff --git a/morph_utils/executable_scripts/distance_between_nodes_for_directory.py b/morph_utils/executable_scripts/distance_between_nodes_for_directory.py index 855f3a6..69d7917 100644 --- a/morph_utils/executable_scripts/distance_between_nodes_for_directory.py +++ b/morph_utils/executable_scripts/distance_between_nodes_for_directory.py @@ -10,7 +10,7 @@ class IO_Schema(ags.ArgSchema): swc_input_directory = ags.fields.InputDir(description='directory with swc files') output_file = ags.fields.OutputFile(descripion='output csv with distances between files') - compartment_types = ags.fields.List(default=[3, 4], cls_or_instance=ags.fields.Int) + compartment_types = ags.fields.List(default=[2, 3, 4], cls_or_instance=ags.fields.Int) use_multiprocessing = ags.fields.Boolean(default=True) @@ -29,6 +29,7 @@ def main(swc_input_directory, output_file, compartment_types, use_multiprocessin # all_combinations = [c for c in all_combinations] # if c[0] != c[1]] print("{} Comparisons to analyze".format(len(all_combinations))) + print(f"Comparing nodes of type: {compartment_types}") reslist = [] for combo in all_combinations: file_1 = os.path.join(swc_input_directory, combo[0]) diff --git a/morph_utils/executable_scripts/projection_matrix_from_swc_directory.py b/morph_utils/executable_scripts/projection_matrix_from_swc_directory.py index 683bcdd..856721c 100644 --- a/morph_utils/executable_scripts/projection_matrix_from_swc_directory.py +++ b/morph_utils/executable_scripts/projection_matrix_from_swc_directory.py @@ -5,7 +5,7 @@ import time import subprocess from morph_utils.ccf import projection_matrix_for_swc -from morph_utils.proj_mat_utils import roll_up_proj_mat +from morph_utils.proj_mat_utils import roll_up_proj_mat, normalize_projection_columns_per_cell class IO_Schema(ags.ArgSchema): @@ -24,19 +24,6 @@ class IO_Schema(ags.ArgSchema): output_projection_csv = ags.fields.OutputFile(description="output projection csv, when running local only") -def normalize_projection_columns_per_cell(input_df, projection_column_identifiers=['ipsi', 'contra']): - """ - :param input_df: input projection df - :param projection_column_identifiers: list of identifiers for projection columns. i.e. strings that identify projection columns from metadata columns - :return: normalized projection matrix - """ - proj_cols = [c for c in input_df.columns if any([ider in c for ider in projection_column_identifiers])] - input_df[proj_cols] = input_df[proj_cols].fillna(0) - - res = input_df[proj_cols].T / input_df[proj_cols].sum(axis=1) - input_df[proj_cols] = res.T - - return input_df def main(ccf_swc_directory, @@ -56,7 +43,10 @@ def main(ccf_swc_directory, if run_host not in ['local','hpc']: raise ValueError(f"Invalid run_host parameter entered ({run_host})") - if mask_method not in [None,'tip_and_branch', 'branch', 'tip', 'tip_or_branch']: + + if mask_method is None: + mask_method = "None" + if mask_method not in ["None",'tip_and_branch', 'branch', 'tip', 'tip_or_branch']: raise ValueError(f"Invalid mask_method provided {mask_method}") if annotation_path == "": diff --git a/morph_utils/proj_mat_utils.py b/morph_utils/proj_mat_utils.py index e69de29..4f2dadf 100644 --- a/morph_utils/proj_mat_utils.py +++ b/morph_utils/proj_mat_utils.py @@ -0,0 +1,82 @@ +import os +import numpy as np +import pandas as pd + +from morph_utils.ccf import STRUCTURE_DESCENDANTS_ACRONYM + + +def de_layer(st): + CTX_STRUCTS = STRUCTURE_DESCENDANTS_ACRONYM['CTX'] + sub_st = st.replace("ipsi_","").replace("contra","") + if sub_st in CTX_STRUCTS: + + for l in ["1","2/3","4","5","6a","6b"]: + st = st.replace(l,"") + + if "ENT" in st: + for l in ["2", "3","6"]: + st = st.replace(l,"") + + return st + else: + return st + + +def roll_up_proj_mat(infile, outfile): + + df = pd.read_csv(infile, index_col=0) + df.index = df.index.map(os.path.basename) + + non_proj_cols = [f for f in df.columns if not any([i in f for i in ["ipsi","contra"]])] + new_df = df[non_proj_cols].copy() + + proj_cols = [f for f in df.columns if any([i in f for i in ["ipsi","contra"]])] + de_layer_dict = {p:de_layer(p) for p in proj_cols} + + parent_names = list(de_layer_dict.values()) + unique_parent_names = np.unique(parent_names) + unique_parent_names = sorted(unique_parent_names, key=lambda x:parent_names.index(x)) + + roll_up_records = {} + for low_res_struct in unique_parent_names: + children = [k for k,v in de_layer_dict.items() if v==low_res_struct ] + roll_up_records[low_res_struct] = children + + + + # for parent, child_list in roll_up_records.items(): + # new_df[parent] = df[child_list].sum(axis=1) + new_cols = { + parent: df[child_list].sum(axis=1) + for parent, child_list in roll_up_records.items() + } + new_cols_df = pd.DataFrame(new_cols) + new_df = pd.concat([new_df, new_cols_df], axis=1) + + # sanity check + for n_struct,old_list in roll_up_records.items(): + sum_old = df[old_list].sum(axis=1) + sum_new = new_df[n_struct] + assert sum(sum_old==sum_new) == len(df) + + + + # print(outfile) + # print() + assert os.path.abspath(outfile) != os.path.abspath(infile) + new_df.to_csv(outfile) + + +def normalize_projection_columns_per_cell(input_df, projection_column_identifiers=['ipsi', 'contra']): + """ + :param input_df: input projection df + :param projection_column_identifiers: list of identifiers for projection columns. i.e. strings that identify projection columns from metadata columns + :return: normalized projection matrix + """ + proj_cols = [c for c in input_df.columns if any([ider in c for ider in projection_column_identifiers])] + input_df[proj_cols] = input_df[proj_cols].fillna(0) + + res = input_df[proj_cols].T / input_df[proj_cols].sum(axis=1) + input_df[proj_cols] = res.T + + return input_df diff --git a/setup.cfg b/setup.cfg index c56fcad..f8cc2ea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,3 +29,4 @@ console_scripts = morph_utils_aggregate_single_cell_projs = morph_utils.executable_scripts.aggregate_single_cell_projection_csvs:console_script morph_utils_move_somas_left_hemisphere = morph_utils.executable_scripts.move_somas_to_left_hemisphere_swc_directory:console_script morph_utils_local_crop_ccf_swcs = morph_utils.executable_scripts.local_crop_ccf_swc_directory:console_script + morph_utils_dsit_btwn_nodes_directory = morph_utils.executable_scripts.distance_between_nodes_for_directory:console_script \ No newline at end of file From f9331fdb950af545b0c30045996c55445593dbce Mon Sep 17 00:00:00 2001 From: Matt Mallory Date: Wed, 16 Jul 2025 09:09:21 -0700 Subject: [PATCH 2/2] add: option to apply mask-method at cortical parent structure level --- morph_utils/ccf.py | 46 +++++++++++++++++-- .../projection_matrix_for_single_cell.py | 15 ++++-- .../projection_matrix_from_swc_directory.py | 13 ++++-- morph_utils/proj_mat_utils.py | 19 +------- 4 files changed, 61 insertions(+), 32 deletions(-) diff --git a/morph_utils/ccf.py b/morph_utils/ccf.py index 73bfd13..a68be78 100644 --- a/morph_utils/ccf.py +++ b/morph_utils/ccf.py @@ -102,6 +102,30 @@ def load_structure_graph(): df = df.set_index('acronym') return df +def de_layer(st): + """de-layer cortical projection targets + + Args: + st (str): e.g. ipsi_VISal2/3 + + Returns: + str: e.g. ipsi_VISal + """ + CTX_STRUCTS = STRUCTURE_DESCENDANTS_ACRONYM['CTX'] + sub_st = st.replace("ipsi_","").replace("contra_","") + if sub_st in CTX_STRUCTS: + + for l in ["1","2/3","4","5","6a","6b"]: + st = st.replace(l,"") + + if "ENT" in st: + for l in ["2", "3", "5/6", "6"]: + st = st.replace(l,"") + + return st + else: + return st + def process_pin_jblob( slide_specimen_id, jblob, annotation, structures, prints=False) : """ @@ -303,6 +327,7 @@ def get_ccf_structure(voxel, name_map=None, annotation=None, coordinate_to_voxel return name_map[structure_id] def projection_matrix_for_swc(input_swc_file, mask_method = "tip_and_branch", + apply_mask_at_cortical_parent_level=False, count_method = "node", annotation=None, annotation_path = None, volume_shape=(1320, 800, 1140), resolution=10, node_type_list=[2], @@ -316,6 +341,9 @@ def projection_matrix_for_swc(input_swc_file, mask_method = "tip_and_branch", 'tip_and_branch' will return a projection matrix masking only structures with tip and branch nodes. If 'tip' will only look at structures with tip nodes. And last, if 'branch' will only look at structures with branch nodes. + apply_mask_at_cortical_parent_level (bool): If True, the `mask_method` will be applied to aggregated cortical + regions. E.g. if `mask_method`='tip_and_branch' and apply_mask_at_cortical_parent_level = True, then + the tip-and-branch mask will be enforced at the (e.g.) VISp level, instead of in VISp1, VISp2/3 etc. independantly count_method (str): ['node','tip','branch']. When 'node', will measure axon length directly. Otherwise will return the count of tip or branch nodes in each structure annotation (array, optional): 3 dimensional ccf annotation array. Defaults to None. @@ -402,11 +430,21 @@ def node_ider(morph,i): # determine ipsi/contra projections morph_df["ccf_structure_sided"] = morph_df.apply(lambda row: "ipsi_{}".format(row.ccf_structure) if row.z