Skip to content
This repository was archived by the owner on Mar 5, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Python virtual env.
.venv/

# Python cache files
*.pyc

Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,9 @@ pyComBat 0.3.1
pyComBat 0.3.2

* Enhanced error handling for confounded variables

## [0.3.3] - 29-11-2022

pyComBat 0.3.3

* Loose dependencies versions constrains handling last releases
24 changes: 11 additions & 13 deletions combat/pycombat.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@


import numpy as np
from math import exp
from multiprocessing import Pool, cpu_count
from functools import partial
import mpmath as mp
import pandas as pd
Expand Down Expand Up @@ -144,8 +142,7 @@ def it_sol(sdat, g_hat, d_hat, g_bar, t2, a, b, conv=0.0001, exit_iteration=10e5
Returns:
array list -- estimated additive and multiplicative batch effect
"""

n = [len(i) for i in np.asarray(sdat)]
n = [len(i) for i in sdat]
t2_n = np.multiply(t2, n)
t2_n_g_hat = np.multiply(t2_n, g_hat)
g_old = np.ndarray.copy(g_hat)
Expand All @@ -163,7 +160,7 @@ def it_sol(sdat, g_hat, d_hat, g_bar, t2, a, b, conv=0.0001, exit_iteration=10e5
g_old = np.ndarray.copy(g_new) # save value for g
d_old = np.ndarray.copy(d_new) # save value for d
count += 1
adjust = np.asarray([g_new, d_new])
adjust = np.asarray([g_new, d_new], dtype=object)
return(adjust) # remove parenthesis in returns

# int_eprior - Monte Carlo integration function to find nonparametric adjustments
Expand Down Expand Up @@ -191,10 +188,10 @@ def int_eprior(sdat, g_hat, d_hat, precision):
test_approximation = 0
for i in range(len(sdat)):
# additive batch effect
g = np.asarray(np.delete(np.transpose(g_hat), i))
g = np.delete(np.transpose(g_hat), i)
# multiplicative batch effect
d = np.asarray(np.delete(np.transpose(d_hat), i))
x = np.asarray(np.transpose(sdat[i]))
x = np.expand_dims(np.transpose(sdat[i]), axis=1)
n = len(x)
j = [1]*n
dat = np.repeat(x, len(np.transpose(g)), axis=1)
Expand Down Expand Up @@ -247,14 +244,15 @@ def param_fun(i, s_data, batches, mean_only, gamma_hat, gamma_bar, delta_hat, t2
Returns:
array list -- estimated adjusted additive and multiplicative batch effect
"""
g_hat = np.expand_dims(gamma_hat[i], axis=0)
if mean_only: # if mean_only, no need for complex method: batch effect is immediately calculated
t2_n = np.multiply(t2[i], 1)
t2_n_g_hat = np.multiply(t2_n, gamma_hat[i])
t2_n_g_hat = np.multiply(t2_n, g_hat)
gamma_star = postmean(gamma_bar[i], 1, t2_n, t2_n_g_hat) # additive batch effect
delta_star = [1]*len(s_data) # multiplicative batch effect
else: # if not(mean_only) then use it_solve
temp = it_sol(np.transpose(np.transpose(s_data)[
batches[i]]), gamma_hat[i], delta_hat[i], gamma_bar[i], t2[i], a_prior[i], b_prior[i])
batches[i]]), g_hat, delta_hat[i], gamma_bar[i], t2[i], a_prior[i], b_prior[i])
gamma_star = temp[0] # additive batch effect
delta_star = temp[1] # multiplicative batch effect
return [gamma_star, delta_star]
Expand All @@ -278,7 +276,7 @@ def nonparam_fun(i, mean_only, delta_hat, s_data, batches, gamma_hat, precision)
delta_hat[i] = [1]*len(delta_hat[i])
# use int_eprior for non-parametric estimation
temp = int_eprior(np.transpose(np.transpose(s_data)[
batches[i]]), gamma_hat[i], delta_hat[i], precision)
batches[i]]), np.expand_dims(gamma_hat[i], axis=0), delta_hat[i], precision)
return [temp[0], temp[1]]

############
Expand Down Expand Up @@ -475,7 +473,7 @@ def calculate_stand_mean(grand_mean, n_array, design, n_batch, B_hat):
Returns:
stand_mean {matrix} -- standardised mean
"""
stand_mean = np.dot(np.transpose(np.mat(grand_mean)), np.mat([1]*n_array))
stand_mean = np.dot(np.transpose(np.expand_dims(grand_mean, axis=0)), np.array([[1]*n_array]))
# corrects the mean with design matrix information
if design is not None:
tmp = np.ndarray.copy(design)
Expand All @@ -497,8 +495,8 @@ def standardise_data(dat, stand_mean, var_pooled, n_array):
Returns:
s_data {matrix} -- standardised data matrix
"""
s_data = (dat - stand_mean) / \
np.dot(np.transpose(np.mat(np.sqrt(var_pooled))), np.mat([1]*n_array))
s_data = np.divide((dat - stand_mean), \
np.dot(np.transpose(np.expand_dims(np.sqrt(var_pooled), axis=0)), np.array([[1]*n_array])))
return(s_data)


Expand Down
6 changes: 3 additions & 3 deletions combat/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
# import function used for unit testing
from .pycombat import model_matrix, all_1
#covariate_model_matrix
from .pycombat import compute_prior, postmean, postvar, it_sol, int_eprior
from .pycombat import compute_prior, postmean, postvar
from .pycombat import check_mean_only, define_batchmod, check_ref_batch, treat_batches, treat_covariates, check_NAs
from .pycombat import calculate_mean_var, calculate_stand_mean
from .pycombat import standardise_data, fit_model, adjust_data
Expand Down Expand Up @@ -71,8 +71,8 @@
# local tests before unit testing

def test_means():
print("mean matrix:",np.mean(matrix))
print("mean matrix (adjusted):",np.mean(matrix_adjusted))
print("mean matrix:",matrix.mean())
print("mean matrix (adjusted):",matrix_adjusted.mean())
print("**********")
print("var matrix:",np.var(matrix))
print("var matrix (adjusted):",np.var(matrix_adjusted))
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

setuptools.setup(
install_requires=[
"numpy>=1.18.5,<=1.19.5",
"numpy>=1.18.5",
"mpmath>=1.1.0",
"pandas>=0.24.2,<=1.1.5",
"patsy==0.5.1"
"pandas>=0.24.2",
"patsy>=0.5.1"
],
name="combat",
version="0.3.2",
version="0.3.3",
author="Abdelkader Behdenna",
author_email="abdelkader@epigenelabs.com",
description="pyComBat, a Python tool for batch effects correction in high-throughput molecular data using empirical Bayes methods",
Expand Down