Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 46 additions & 1 deletion pySingleCellNet/scn_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def scn_train(aTrain,dLevel,nTopGenes = 100,nTopGenePairs = 100,nRand = 100, nTr
pdTrain= query_transform(expRaw.loc[:,cgenesA], xpairs)
print("Finished pair transforming the data\n")
tspRF=sc_makeClassifier(pdTrain.loc[:, xpairs], genes=xpairs, groups=grps, nRand = nRand, ntrees = nTrees, stratify=stratify)
return [cgenesA, xpairs, tspRF]
return [cgenesA, xpairs, tspRF, cgenes_list]

def scn_classify(adata, cgenes, xpairs, rf_tsp, nrand = 0 ):
classRes = scn_predict(cgenes, xpairs, rf_tsp, adata, nrand = nrand)
Expand Down Expand Up @@ -118,3 +118,48 @@ def rf_classPredict(rfObj,expQuery,numRand=50):
expQuery=pd.concat([expQuery, randDat])
xpreds= pd.DataFrame(rfObj.predict_proba(expQuery), columns= rfObj.classes_, index=expQuery.index)
return xpreds

def scn_train_edit(aTrain,dLevel,nTopGenes = 100,nTopGenePairs = 100,nRand = 100, nTrees = 1000,stratify=False,counts_per_cell_after=1e4, scaleMax=10, limitToHVG=True, normalization = True, include_all_genes = False):
warnings.filterwarnings('ignore')
stTrain= aTrain.obs

expRaw = aTrain.to_df()
expRaw = expRaw.loc[stTrain.index.values]

adNorm = aTrain.copy()
if normalization == True:
sc.pp.normalize_per_cell(adNorm, counts_per_cell_after=counts_per_cell_after)
sc.pp.log1p(adNorm)

print("HVG")
if limitToHVG:
sc.pp.highly_variable_genes(adNorm, min_mean=0.0125, max_mean=4, min_disp=0.5)
adNorm = adNorm[:, adNorm.var.highly_variable]

sc.pp.scale(adNorm, max_value=scaleMax)

expTnorm = adNorm.to_df()
expTnorm=expTnorm.loc[stTrain.index.values]

### expTnorm= pd.DataFrame(data=aTrain.X, index= aTrain.obs.index.values, columns= aTrain.var.index.values)
### expTnorm=expTnorm.loc[stTrain.index.values]
print("Matrix normalized")
### cgenesA, grps, cgenes_list =findClassyGenes(expTnorm,stTrain, dLevel = dLevel, topX = nTopGenes)
if include_all_genes == False:
cgenesA, grps, cgenes_list =findClassyGenes_edit(adNorm, dLevel = dLevel, topX = nTopGenes)
else:
cgenesA = np.array(aTrain.var.index)
grps = aTrain.obs[dLevel]
cgenes_list = dict()
for g in np.unique(grps):
cgenes_list[g] = cgenesA

print("There are ", len(cgenesA), " classification genes\n")
### xpairs= ptGetTop(expTnorm.loc[:,cgenesA], grps, cgenes_list, topX=nTopGenePairs, sliceSize=5000)
xpairs= ptGetTop(expTnorm.loc[:,cgenesA], grps, cgenes_list, topX=nTopGenePairs, sliceSize=5000)

print("There are", len(xpairs), "top gene pairs\n")
pdTrain= query_transform(expRaw.loc[:,cgenesA], xpairs)
print("Finished pair transforming the data\n")
tspRF=sc_makeClassifier(pdTrain.loc[:, xpairs], genes=xpairs, groups=grps, nRand = nRand, ntrees = nTrees, stratify=stratify)
return [cgenesA, xpairs, tspRF, cgenes_list]
20 changes: 19 additions & 1 deletion pySingleCellNet/tsp_rf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd
import numpy as np
import scanpy as sc
from sklearn import linear_model
from itertools import combinations
from .stats import *
Expand Down Expand Up @@ -223,4 +224,21 @@ def findClassyGenes(expDat, sampTab,dLevel, topX=25, dThresh=0, alpha1=0.05,alph
cgenes2=np.unique(np.array(res).flatten())
return [cgenes2, grps, cgenes]


def findClassyGenes_edit(adDat, dLevel, topX=25):
adTemp = adDat.copy()
grps = adDat.obs[dLevel]
groups = np.unique(grps)

sc.tl.rank_genes_groups(adTemp, dLevel, use_raw=False, method='wilcoxon')
tempTab = pd.DataFrame(adTemp.uns['rank_genes_groups']['names']).head(topX)

res = []
cgenes = {}

for g in groups:
temp = tempTab[g]
res.append(temp)
cgenes[g] = temp.to_numpy()
cgenes2 = np.unique(np.array(res).flatten())

return [cgenes2, grps, cgenes]