diff --git a/.gitignore b/.gitignore
index 434598a..cf1ee9e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,12 +1,147 @@
-# Python cache files
-*.pyc
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
# misc.
*.DS_Store
*.vscode
*.bash_history
misc/
-.coverage
# data files
/data/*.csv
@@ -33,9 +168,3 @@ misc/
*.out
*.gz
*.toc
-
-# build files
-build/
-pyComBat_test.egg-info/
-ComBat.egg-info/
-dist/
\ No newline at end of file
diff --git a/combat/__init__.py b/combat/__init__.py
index e69de29..f4bbe06 100644
--- a/combat/__init__.py
+++ b/combat/__init__.py
@@ -0,0 +1 @@
+from .pycombat import PyCombat, pycombat
diff --git a/combat/pycombat.py b/combat/pycombat.py
index cfc4801..0c2dcad 100644
--- a/combat/pycombat.py
+++ b/combat/pycombat.py
@@ -1,4 +1,4 @@
-#-----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
# Copyright (C) 2019-2020 A. Behdenna, A. Nordor, J. Haziza and A. Gema
# This program is free software: you can redistribute it and/or modify
@@ -18,656 +18,239 @@
# file pycombat.py
# author A. Behdenna, J. Haziza, A. Gema, A. Nordor
-# date Sept 2020
-#-----------------------------------------------------------------------------
+# date Sept 2020
+# -----------------------------------------------------------------------------
+from __future__ import annotations
import numpy as np
-from math import exp
-from multiprocessing import Pool, cpu_count
-from functools import partial
-import mpmath as mp
import pandas as pd
-#import unittest
-
-
-def model_matrix(info, intercept=True, drop_first=True):
- """Creates the model_matrix from batch list
-
- Arguments:
- info {list} -- list info with batch or covariates data
- intercept {bool} -- boolean for intercept in model matrix
-
- Returns:
- matrix -- model matrix generate from batch list
- """
- if not isinstance(info[0],list) :
- info = [info]
- else:
- info = info
- info_dict = {}
- for i in range(len(info)):
- info_dict[f"col{str(i)}"] = list(map(str,info[i]))
- df = pd.get_dummies(pd.DataFrame(info_dict), drop_first=drop_first, dtype=float)
- if intercept:
- df["intercept"] = 1.0
- return df.to_numpy()
-
-
-def all_1(list_of_elements):
- """checks if all elements in a list are 1s
-
- Arguments:
- list_of_elements {list} -- list of elements
-
- Returns:
- bool -- True iff all elements of the list are 1s
- """
- return((list_of_elements == 1).all())
-
-
-# aprior and bprior are useful to compute "hyper-prior values"
-# -> prior parameters used to estimate the prior gamma distribution for multiplicative batch effect
-# aprior - calculates empirical hyper-prior values
-
-def compute_prior(prior, gamma_hat, mean_only):
- """[summary]
-
- Arguments:
- prior {char} -- 'a' or 'b' depending of the prior to be calculated
- gamma_hat {matrix} -- matrix of additive batch effect
- mean_only {bool} -- True iff mean_only selected
-
- Returns:
- float -- [the prior calculated (aprior or bprior)
- """
- if mean_only:
- return 1
- m = np.mean(gamma_hat)
- s2 = np.var(gamma_hat)
- if prior == 'a':
- return (2*s2+m*m)/s2
- elif prior == 'b':
- return (m*s2+m*m*m)/s2
-
-
-def postmean(g_bar, d_star, t2_n, t2_n_g_hat):
- """estimates additive batch effect
-
- Arguments:
- g_bar {matrix} -- additive batch effect
- d_star {matrix} -- multiplicative batch effect
- t2_n {matrix} --
- t2_n_g_hat {matrix} --
-
- Returns:
- matrix -- estimated additive batch effect
- """
- return np.divide(t2_n_g_hat+d_star*g_bar, np.asarray(t2_n+d_star))
-
-
-def postvar(sum2, n, a, b):
- """estimates multiplicative batch effect
-
- Arguments:
- sum2 {vector} --
- n {[type]} --
- a {float} -- aprior
- b {float} -- bprior
-
- Returns:
- matrix -- estimated multiplicative batch effect
- """
- return(np.divide((np.multiply(0.5, sum2)+b), (np.multiply(0.5, n)+a-1)))
-
-
-def it_sol(sdat, g_hat, d_hat, g_bar, t2, a, b, conv=0.0001, exit_iteration=10e5):
- """iterative solution for Empirical Bayesian method
-
- Arguments:
- sdat {matrix} --
- g_hat {matrix} -- average additive batch effect
- d_hat {matrix} -- average multiplicative batch effect
- g_bar {matrix} -- additive batch effect
- t2 {matrix} --
- a {float} -- aprior
- b {float} -- bprior
-
- Keyword Arguments:
- conv {float} -- convergence criterion (default: {0.0001})
- exit_iteration {float} -- maximum number of iterations before exit (default: {10e5})
-
- Returns:
- array list -- estimated additive and multiplicative batch effect
- """
-
- n = [len(i) for i in np.asarray(sdat)]
- t2_n = np.multiply(t2, n)
- t2_n_g_hat = np.multiply(t2_n, g_hat)
- g_old = np.ndarray.copy(g_hat)
- d_old = np.ndarray.copy(d_hat)
- change = 1
- count = 0 # number of steps needed (for diagnostic only)
- # convergence criteria, if new-old < conv, then stop
- while (change > conv) and (count < exit_iteration):
- g_new = postmean(g_bar, d_old, t2_n, t2_n_g_hat) # updated additive batch effect
- sum2 = np.sum(np.asarray(np.square(
- sdat-np.outer(g_new[0][0], np.ones(np.ma.size(sdat, axis=1))))), axis=1)
- d_new = postvar(sum2, n, a, b) # updated multiplicative batch effect
- change = max(np.amax(np.absolute(g_new-np.asarray(g_old))/np.asarray(g_old)), np.amax(
- np.absolute(d_new-d_old)/d_old)) # maximum difference between new and old estimate
- g_old = np.ndarray.copy(g_new) # save value for g
- d_old = np.ndarray.copy(d_new) # save value for d
- count += 1
- adjust = np.asarray([g_new, d_new])
- return(adjust) # remove parenthesis in returns
-
-# int_eprior - Monte Carlo integration function to find nonparametric adjustments
-# Johnson et al (Biostatistics 2007, supp.mat.) show that we can estimate the multiplicative and additive batch effects with an integral
-# This integral is numerically computed through Monte Carlo inegration (iterative method)
-
-
-def int_eprior(sdat, g_hat, d_hat, precision):
- """ int_eprior - Monte Carlo integration function to find nonparametric adjustments
- Johnson et al (Biostatistics 2007, supp.mat.) show that we can estimate the multiplicative and additive batch effects with an integral
- This integral is numerically computed through Monte Carlo inegration (iterative method)
-
- Arguments:
- sdat {matrix} -- data matrix
- g_hat {matrix} -- average additive batch effect
- d_hat {matrix} -- average multiplicative batch effect
- precision {float} -- level of precision for precision computing
-
- Returns:
- array list -- estimated additive and multiplicative batch effect
- """
- g_star = []
- d_star = []
- # use this variable to only print error message once if approximation used
- test_approximation = 0
- for i in range(len(sdat)):
- # additive batch effect
- g = np.asarray(np.delete(np.transpose(g_hat), i))
- # multiplicative batch effect
- d = np.asarray(np.delete(np.transpose(d_hat), i))
- x = np.asarray(np.transpose(sdat[i]))
- n = len(x)
- j = [1]*n
- dat = np.repeat(x, len(np.transpose(g)), axis=1)
- resid2 = np.square(dat-g)
- sum2 = np.dot(np.transpose(resid2), j)
- # /begin{handling high precision computing}
- temp_2d = 2*d
- if (precision == None):
- LH = np.power(1/(np.pi*temp_2d), n/2)*np.exp(np.negative(sum2)/(temp_2d))
-
- else: # only if precision parameter informed
- # increase the precision of the computing (if negative exponential too close to 0)
- mp.dps = precision
- buf_exp = list(map(mp.exp, np.negative(sum2)/(temp_2d)))
- buf_pow = list(map(partial(mp.power, y=n/2), 1/(np.pi*temp_2d)))
- LH = buf_pow*buf_exp # likelihood
- # /end{handling high precision computing}
- LH = np.nan_to_num(LH) # corrects NaNs in likelihood
- if np.sum(LH) == 0 and test_approximation == 0: # correction for LH full of 0.0
- test_approximation = 1 # this message won't appear again
- print("###\nValues too small, approximation applied to avoid division by 0.\nPrecision mode can correct this problem, but increases computation time.\n###")
- LH[LH == 0] = np.exp(-745)
- g_star.append(np.sum(g*LH)/np.sum(LH))
- d_star.append(np.sum(d*LH)/np.sum(LH))
- else:
- g_star.append(np.sum(g*LH)/np.sum(LH))
- d_star.append(np.sum(d*LH)/np.sum(LH))
- adjust = np.asarray([np.asarray(g_star), np.asarray(d_star)])
- return(adjust)
-
-
-def param_fun(i, s_data, batches, mean_only, gamma_hat, gamma_bar, delta_hat, t2, a_prior, b_prior):
- """parametric estimation of batch effects
-
- Arguments:
- i {int} -- column index
- s_data {matrix} --
- batches {list list} -- list of list of batches' elements
- mean_only {bool} -- True iff mean_only selected
- gamma_hat {matrix} -- average additive batch effect
- gamma_bar {matrix} -- estimated additive batch effect
- delta_hat {matrix} -- average multiplicative batch effect
- t2 {matrix} --
- a_prior {float} -- aprior
- b_prior {float} -- bprior
-
- Returns:
- array list -- estimated adjusted additive and multiplicative batch effect
- """
- if mean_only: # if mean_only, no need for complex method: batch effect is immediately calculated
- t2_n = np.multiply(t2[i], 1)
- t2_n_g_hat = np.multiply(t2_n, gamma_hat[i])
- gamma_star = postmean(gamma_bar[i], 1, t2_n, t2_n_g_hat) # additive batch effect
- delta_star = [1]*len(s_data) # multiplicative batch effect
- else: # if not(mean_only) then use it_solve
- temp = it_sol(np.transpose(np.transpose(s_data)[
- batches[i]]), gamma_hat[i], delta_hat[i], gamma_bar[i], t2[i], a_prior[i], b_prior[i])
- gamma_star = temp[0] # additive batch effect
- delta_star = temp[1] # multiplicative batch effect
- return [gamma_star, delta_star]
-
-def nonparam_fun(i, mean_only, delta_hat, s_data, batches, gamma_hat, precision):
- """non-parametric estimation
-
- Arguments:
- i {int} -- column index
- mean_only {bool} -- True iff mean_only selected
- delta_hat {matrix} -- estimated multiplicative batch effect
- s_data {matrix} --
- batches {list list} -- list of list of batches' elements
- gamma_hat {matrix} -- estimated additive batch effect
- precision {float} -- level of precision for precision computing
-
- Returns:
- array list -- estimated adjusted additive and multiplicative batch effect
- """
- if mean_only: # if mean only, change delta_hat to vector of 1s
- delta_hat[i] = [1]*len(delta_hat[i])
- # use int_eprior for non-parametric estimation
- temp = int_eprior(np.transpose(np.transpose(s_data)[
- batches[i]]), gamma_hat[i], delta_hat[i], precision)
- return [temp[0], temp[1]]
-
-############
-# pyComBat #
-############
-
-
-def check_mean_only(mean_only):
- """checks mean_only option
-
- Arguments:
- mean_only {boolean} -- user's choice about mean_only
-
- Returns:
- ()
- """
- if mean_only == True:
- print("Using mean only version")
-
-
-def define_batchmod(batch):
- """generates model matrix
-
- Arguments:
- batch {list} -- list of batch id
-
- Returns:
- batchmod {matrix} -- model matrix for batches
- """
- batchmod = model_matrix(list(batch), intercept=False, drop_first=False)
- return(batchmod)
-
-
-def check_ref_batch(ref_batch, batch, batchmod):
- """check ref_batch option and treat it if needed
-
- Arguments:
- ref_batch {int} -- the reference batch
- batch {list} -- list of batch id
- batchmod {matrix} -- model matrix related to batches
-
- Returns:
- ref {int list} -- the corresponding positions of the reference batch in the batch list
- batchmod {matrix} -- updated model matrix related to batches, with reference
- """
- if ref_batch is not None:
- if ref_batch not in batch:
- print("Reference level ref.batch must be one of the levels of batch.")
- exit(0)
- print("Using batch "+str(ref_batch) +
- " as a reference batch.")
- # ref keeps in memory the columns concerned by the reference batch
- ref = np.where(np.unique(batch) == ref_batch)[0][0]
- # updates batchmod with reference
- batchmod[:,ref] = 1
- else:
- ref = None # default settings
- return(ref, batchmod)
-
-
-def treat_batches(batch):
- """treat batches
-
- Arguments:
- batch {list} -- batch list
-
- Returns:
- n_batch {int} -- number of batches
- batches {int list} -- list of unique batches
- n_batches {int list} -- list of batches lengths
- n_array {int} -- total size of dataset
- """
- n_batch = len(np.unique(batch)) # number of batches
- print("Found "+str(n_batch)+" batches.")
- batches = [] # list of lists, contains the list of position for each batch
- for i in range(n_batch):
- batches.append(np.where(batch == np.unique(batch)[i])[0])
- batches = np.asarray(batches)
- n_batches = list(map(len, batches))
- if 1 in n_batches:
- #mean_only = True # no variance if only one sample in a batch - mean_only has to be used
- print("\nOne batch has only one sample, try setting mean_only=True.\n")
- n_array = sum(n_batches)
- return(n_batch, batches, n_batches, n_array)
-
-
-def treat_covariates(batchmod, mod, ref, n_batch):
- """treat covariates
-
- Arguments:
- batchmod {matrix} -- model matrix for batch
- mod {matrix} -- model matrix for other covariates
- ref {int} -- reference batch
- n_batch {int} -- number of batches
-
- Returns:
- check {bool list} -- a list characterising all covariates
- design {matrix} -- model matrix for all covariates, including batch
- """
- # design matrix for sample conditions
- if mod == []:
- design = batchmod
- else:
- mod_matrix = model_matrix(mod, intercept=True)
- design = np.concatenate((batchmod, mod_matrix), axis=1)
- check = list(map(all_1, np.transpose(design)))
- if ref is not None: # if ref
- check[ref] = False # the reference in not considered as a covariate
- design = design[:, ~np.array(check)]
- design = np.transpose(design)
-
- print("Adjusting for "+str(len(design)-len(np.transpose(batchmod))) +
- " covariate(s) or covariate level(s).")
-
- # if matrix cannot be invertible, different cases
- if np.linalg.matrix_rank(design) < len(design):
- if len(design) == n_batch + 1: # case 1: covariate confunded with a batch
- print(
- "The covariate is confunded with batch. Remove the covariate and rerun pyComBat.")
- exit(0)
- if len(design) > n_batch + 1: # case 2: multiple covariates confunded with a batch
- if np.linalg.matrix_rank(np.transpose(design)[:n_batch]) < len(design):
- print(
- "The covariates are confounded! Please remove one or more of the covariates so the design is not confounded.")
- exit(0)
- else: # case 3: at least a covariate confunded with a batch
- print(
- "At least one covariate is confounded with batch. Please remove confounded covariates and rerun pyComBat")
- exit(0)
- return(design)
-
-
-def check_NAs(dat):
- """check if NaNs - in theory, we construct the data without NAs
-
- Arguments:
- dat {matrix} -- the data matrix
-
- Returns:
- NAs {bool} -- boolean characterising the presence of NaNs in the data matrix
- """
- # NAs = True in (np.isnan(dat))
- NAs = np.isnan(np.sum(dat)) # Check if NaN exists
- if NAs:
- print("Found missing data values. Please remove all missing values before proceeding with pyComBat.")
- return(NAs)
-
-
-def calculate_mean_var(design, batches, ref, dat, NAs, ref_batch, n_batches, n_batch, n_array):
- """ calculates the Normalisation factors
-
- Arguments:
- design {matrix} -- model matrix for all covariates
- batches {int list} -- list of unique batches
- dat {matrix} -- data matrix
- NAs {bool} -- presence of NaNs in the data matrix
- ref_batch {int} -- reference batch
- n_batches {int list} -- list of batches lengths
- n_array {int} -- total size of dataset
-
- Returns:
- B_hat {matrix} -- regression coefficients corresponding to the design matrix
- grand_mean {matrix} -- Mean for each gene and each batch
- var_pooled {matrix} -- Variance for each gene and each batch
- """
- print("Standardizing Data across genes.")
- if not(NAs): # NAs not supported
- # B_hat is the vector of regression coefficients corresponding to the design matrix
- B_hat = np.linalg.solve(np.dot(design, np.transpose(
- design)), np.dot(design, np.transpose(dat)))
-
- # Calculates the general mean
- if ref_batch is not None:
- grand_mean = np.transpose(B_hat[ref])
- else:
- grand_mean = np.dot(np.transpose(
- [i / n_array for i in n_batches]), B_hat[0:n_batch])
- # Calculates the general variance
- if not NAs: # NAs not supported
- if ref_batch is not None: # depending on ref batch
- ref_dat = np.transpose(np.transpose(dat)[batches[ref]])
- var_pooled = np.dot(np.square(ref_dat - np.transpose(np.dot(np.transpose(
- design)[batches[ref]], B_hat))), [1/n_batches[ref]]*n_batches[ref])
+from sklearn.base import BaseEstimator, TransformerMixin
+
+from .utils.combat_utils import (
+ model_matrix,
+ check_ref_batch,
+ treat_covariates,
+ treat_batches,
+ calculate_mean_var,
+ calculate_stand_mean,
+ standardise_data,
+ fit_model,
+)
+from .utils.common_utils import (
+ check_NAs,
+ check_mean_only,
+)
+
+
+class PyCombat(BaseEstimator, TransformerMixin):
+ def __init__(self):
+ """
+ Corrects batch effect in microarray expression data.
+ Takes an gene expression file and a list of known batches corresponding to each sample.
+ """
+ self.ref = None
+
+ self.ref_batch = None
+
+ self.gamma_star = None
+ self.delta_star = None
+ self.batch_design = None
+ self.n_batches = None
+ self.var_pooled = None
+ self.stand_mean = None
+ self.n_array = None
+ self.ref_batch = None
+ self.ref = None
+ self.batches = None
+
+ def check_if_fitted(self):
+ error_message = "is not initialized, run .fit_transform() or .fit()"
+
+ assert (
+ self.gamma_star is not None
+ ), f"gamma_star (estimated additive batch effect) {error_message}"
+ assert (
+ self.delta_star is not None
+ ), f"delta_star (estimated multiplicative batch effect) {error_message}"
+ assert (
+ self.batch_design is not None
+ ), f"batch_design (information about batches in design matrix) {error_message}"
+ assert (
+ self.n_batches is not None
+ ), f"n_batches (list of batches lengths ) {error_message}"
+ assert (
+ self.var_pooled is not None
+ ), f"var_pooled (standardised mean) {error_message}"
+ assert (
+ self.stand_mean is not None
+ ), f"stand_mean (Variance for each gene and each batch) {error_message}"
+ assert (
+ self.n_array is not None
+ ), f"n_array (total size of dataset) {error_message}"
+ assert (
+ self.batches is not None
+ ), f"batches (list of unique batches) {error_message}"
+
+ def fit(
+ self,
+ data: pd.DataFrame,
+ batch: list,
+ mod: list = [],
+ par_prior: bool = True,
+ prior_plots: bool = False,
+ mean_only: bool = False,
+ ref_batch: int = None,
+ precision: float = None,
+ ) -> PyCombat:
+ """
+ Initialized the important variables for the batch effect correction
+
+ Args:
+ data (pd.DataFrame): The expression matrix (dataframe). It contains the information about the gene expression (rows) for each sample (columns).
+ batch (list): List of batch indexes. The batch list describes the batch for each sample. The batches list has as many elements as the number of columns in the expression matrix.
+ mod (list, optional): List (or list of lists) of covariate(s) indexes. The mod list describes the covariate(s) for each sample. Each mod list has as many elements as the number of columns in the expression matrix. Defaults to [].
+ par_prior (bool, optional): False for non-parametric estimation of batch effects. Defaults to True.
+ prior_plots (bool, optional): True if requires to plot the priors. Defaults to False.
+ mean_only (bool, optional): True if just adjusting the means and not individual batch effects. Defaults to False.
+ ref_batch (int, optional): reference batch selected. Defaults to None.
+ precision (float, optional): level of precision for precision computing. Defaults to None.
+
+ Raises:
+ ValueError: Error will be trigerred if NaN is found in the data
+
+ Returns:
+ PyCombat: The fitted object with all needed components for adjusting the data
+ """
+ self.list_samples = data.columns
+ self.list_genes = data.index
+ self.data_value = data.values
+
+ self.ref_batch = ref_batch
+
+ check_mean_only(mean_only)
+
+ # Generate model matrix
+ batchmod = model_matrix(list(batch), intercept=False, drop_first=False)
+
+ # self.ref (List[int]): the corresponding positions of the reference batch in the batch list
+ self.ref, batchmod = check_ref_batch(self.ref_batch, batch, batchmod)
+
+ # self.batches (List[int]): list of unique batches
+ # self.n_batches (List[int]): list of batches lengths
+ # self.n_array (int): total size of dataset
+ n_batch, self.batches, self.n_batches, self.n_array = treat_batches(batch)
+
+ design = treat_covariates(batchmod, mod, self.ref, n_batch)
+
+ NAs = check_NAs(self.data_value)
+ if not NAs:
+ B_hat, grand_mean, self.var_pooled = calculate_mean_var(
+ design,
+ self.batches,
+ self.ref,
+ self.data_value,
+ NAs,
+ self.ref_batch,
+ self.n_batches,
+ n_batch,
+ self.n_array,
+ )
+ self.stand_mean = calculate_stand_mean(
+ grand_mean, self.n_array, design, n_batch, B_hat
+ )
+ self.standardised_data = standardise_data(
+ self.data_value, self.stand_mean, self.var_pooled, self.n_array
+ )
+
+ # self.gamma_star (np.array): estimated additive batch effect
+ # self.delta_star (np.array): estimated multiplicative batch effect
+ # self.batch_design (np.array): information about batches in design matrix
+ self.gamma_star, self.delta_star, self.batch_design = fit_model(
+ design,
+ n_batch,
+ self.standardised_data,
+ self.batches,
+ mean_only,
+ par_prior,
+ precision,
+ self.ref_batch,
+ self.ref,
+ NAs,
+ )
else:
- var_pooled = np.dot(np.square(
- dat - np.transpose(np.dot(np.transpose(design), B_hat))), [1/n_array]*n_array)
-
- return(B_hat, grand_mean, var_pooled)
-
-
-def calculate_stand_mean(grand_mean, n_array, design, n_batch, B_hat):
- """ transform the format of the mean for substraction
-
- Arguments:
- grand_mean {matrix} -- Mean for each gene and each batch
- n_array {int} -- total size of dataset
- design {[type]} -- design matrix for all covariates including batch
- n_batch {int} -- number of batches
- B_hat {matrix} -- regression coefficients corresponding to the design matrix
-
- Returns:
- stand_mean {matrix} -- standardised mean
- """
- stand_mean = np.dot(np.transpose(np.mat(grand_mean)), np.mat([1]*n_array))
- # corrects the mean with design matrix information
- if design is not None:
- tmp = np.ndarray.copy(design)
- tmp[0:n_batch] = 0
- stand_mean = stand_mean + \
- np.transpose(np.dot(np.transpose(tmp), B_hat))
- return(stand_mean)
-
-
-def standardise_data(dat, stand_mean, var_pooled, n_array):
- """standardise the data: substract mean and divide by variance
-
- Arguments:
- dat {matrix} -- data matrix
- stand_mean {matrix} -- standardised mean
- var_pooled {matrix} -- Variance for each gene and each batch
- n_array {int} -- total size of dataset
-
- Returns:
- s_data {matrix} -- standardised data matrix
- """
- s_data = (dat - stand_mean) / \
- np.dot(np.transpose(np.mat(np.sqrt(var_pooled))), np.mat([1]*n_array))
- return(s_data)
-
-
-def fit_model(design, n_batch, s_data, batches, mean_only, par_prior, precision, ref_batch, ref, NAs):
- print("Fitting L/S model and finding priors.")
-
- # fraction of design matrix related to batches
- batch_design = design[0:n_batch]
-
- if not NAs: # CF SUPRA FOR NAs
- # gamma_hat is the vector of additive batch effect
- gamma_hat = np.linalg.solve(np.dot(batch_design, np.transpose(batch_design)),
- np.dot(batch_design, np.transpose(s_data)))
-
- delta_hat = [] # delta_hat is the vector of estimated multiplicative batch effect
-
- if (mean_only):
- # no variance if mean_only == True
- delta_hat = [np.asarray([1]*len(s_data))] * len(batches)
- else:
- for i in batches: # feed incrementally delta_hat
- list_map = np.transpose(np.transpose(s_data)[i]).var(
- axis=1) # variance for each row
- delta_hat.append(np.squeeze(np.asarray(list_map)))
-
- gamma_bar = list(map(np.mean, gamma_hat)) # vector of means for gamma_hat
- t2 = list(map(np.var, gamma_hat)) # vector of variances for gamma_hat
-
- # calculates hyper priors for gamma (additive batch effect)
- a_prior = list(
- map(partial(compute_prior, 'a', mean_only=mean_only), delta_hat))
- b_prior = list(
- map(partial(compute_prior, 'b', mean_only=mean_only), delta_hat))
-
- # initialise gamma and delta for parameters estimation
- gamma_star = np.empty((n_batch, len(s_data)))
- delta_star = np.empty((n_batch, len(s_data)))
-
- if par_prior:
- # use param_fun function for parametric adjustments (cf. function definition)
- print("Finding parametric adjustments.")
- results = list(map(partial(param_fun,
- s_data=s_data,
- batches=batches,
- mean_only=mean_only,
- gamma_hat=gamma_hat,
- gamma_bar=gamma_bar,
- delta_hat=delta_hat,
- t2=t2,
- a_prior=a_prior,
- b_prior=b_prior), range(n_batch)))
- else:
- # use nonparam_fun for non-parametric adjustments (cf. function definition)
- print("Finding nonparametric adjustments")
- results = list(map(partial(nonparam_fun, mean_only=mean_only, delta_hat=delta_hat,
- s_data=s_data, batches=batches, gamma_hat=gamma_hat, precision=precision), range(n_batch)))
-
- for i in range(n_batch): # store the results in gamma/delta_star
- results_i = results[i]
- gamma_star[i], delta_star[i] = results_i[0], results_i[1]
-
- # update if reference batch (the reference batch is not supposed to be modified)
- if ref_batch:
- len_gamma_star_ref = len(gamma_star[ref])
- gamma_star[ref] = [0] * len_gamma_star_ref
- delta_star[ref] = [1] * len_gamma_star_ref
-
- return(gamma_star, delta_star, batch_design)
-
-
-def adjust_data(s_data, gamma_star, delta_star, batch_design, n_batches, var_pooled, stand_mean, n_array, ref_batch, ref, batches, dat):
- """Adjust the data -- corrects for estimated batch effects
-
- Arguments:
- s_data {matrix} -- standardised data matrix
- gamma_star {matrix} -- estimated additive batch effect
- delta_star {matrix} -- estimated multiplicative batch effect
- batch_design {matrix} -- information about batches in design matrix
- n_batches {int list} -- list of batches lengths
- stand_mean {matrix} -- standardised mean
- var_pooled {matrix} -- Variance for each gene and each batch
- n_array {int} -- total size of dataset
- ref_batch {int} -- reference batch
- ref {int list} -- the corresponding positions of the reference batch in the batch list
- batches {int list} -- list of unique batches
- dat
-
- Returns:
- bayes_data [matrix] -- data adjusted for correction of batch effects
- """
- # Now we adjust the data:
- # 1. substract additive batch effect (gamma_star)
- # 2. divide by multiplicative batch effect (delta_star)
- print("Adjusting the Data")
- bayes_data = np.transpose(s_data)
- j = 0
- for i in batches: # for each batch, specific correction
- bayes_data[i] = (bayes_data[i] - np.dot(np.transpose(batch_design)[i], gamma_star)) / \
- np.transpose(
- np.outer(np.sqrt(delta_star[j]), np.asarray([1]*n_batches[j])))
- j += 1
-
- # renormalise the data after correction:
- # 1. multiply by variance
- # 2. add mean
- bayes_data = np.multiply(np.transpose(bayes_data), np.outer(
- np.sqrt(var_pooled), np.asarray([1]*n_array))) + stand_mean
-
- # correction for reference batch
- if ref_batch:
- bayes_data[batches[ref]] = dat[batches[ref]]
-
- # returns the data corrected for batch effects
- return bayes_data
-
-
-def pycombat(data, batch, mod=[], par_prior=True, prior_plots=False, mean_only=False, ref_batch=None, precision=None, **kwargs):
- """Corrects batch effect in microarray expression data. Takes an gene expression file and a list of known batches corresponding to each sample.
-
- Arguments:
- data {matrix} -- The expression matrix (dataframe). It contains the information about the gene expression (rows) for each sample (columns).
-
- batch {list} -- List of batch indexes. The batch list describes the batch for each sample. The batches list has as many elements as the number of columns in the expression matrix.
-
- Keyword Arguments:
- mod {list} -- List (or list of lists) of covariate(s) indexes. The mod list describes the covariate(s) for each sample. Each mod list has as many elements as the number of columns in the expression matrix (default: {[]}).
-
- par_prior {bool} -- False for non-parametric estimation of batch effects (default: {True}).
-
- prior_plots {bool} -- True if requires to plot the priors (default: {False} -- Not implemented yet!).
-
- mean_only {bool} -- True iff just adjusting the means and not individual batch effects (default: {False}).
-
- ref_batch {int} -- reference batch selected (default: {None}).
-
- precision {float} -- level of precision for precision computing (default: {None}).
-
- Returns:
- bayes_data_df -- The expression dataframe adjusted for batch effects.
- """
-
- list_samples = data.columns
- list_genes = data.index
- dat = data.values
-
- check_mean_only(mean_only)
-
- batchmod = define_batchmod(batch)
- ref, batchmod = check_ref_batch(ref_batch, batch, batchmod)
- n_batch, batches, n_batches, n_array = treat_batches(batch)
- design = treat_covariates(batchmod, mod, ref, n_batch)
- NAs = check_NAs(dat)
- if not(NAs):
- B_hat, grand_mean, var_pooled = calculate_mean_var(
- design, batches, ref, dat, NAs, ref_batch, n_batches, n_batch, n_array)
- stand_mean = calculate_stand_mean(
- grand_mean, n_array, design, n_batch, B_hat)
- s_data = standardise_data(dat, stand_mean, var_pooled, n_array)
- gamma_star, delta_star, batch_design = fit_model(
- design, n_batch, s_data, batches, mean_only, par_prior, precision, ref_batch, ref, NAs)
- bayes_data = adjust_data(s_data, gamma_star, delta_star, batch_design,
- n_batches, var_pooled, stand_mean, n_array, ref_batch, ref, batches, dat)
-
- bayes_data_df = pd.DataFrame(bayes_data,
- columns = list_samples,
- index = list_genes)
-
- return(bayes_data_df)
- else:
- raise ValueError("NaN value is not accepted")
\ No newline at end of file
+ raise ValueError("NaN value is not accepted")
+
+ return self
+
+ def transform(self, data: pd.DataFrame) -> pd.DataFrame:
+ """
+ Adjust the data, correct the data given the estimated batch effects
+
+ Args:
+ data (pd.DataFrame): The expression matrix (dataframe). It contains the information about the gene expression (rows) for each sample (columns).
+
+ Returns:
+ pd.DataFrame: The expression dataframe adjusted given the batch effects.
+ """
+ # Check if the fit function has been initiated before transform
+ self.check_if_fitted()
+
+ print("Adjusting the Data")
+ bayes_data = np.transpose(self.standardised_data)
+ for i, batch_index in enumerate(
+ self.batches
+ ): # for each batch, specific correction
+ bayes_data[batch_index] = (
+ bayes_data[batch_index]
+ - np.dot(np.transpose(self.batch_design)[batch_index], self.gamma_star)
+ ) / np.transpose(
+ np.outer(
+ np.sqrt(self.delta_star[i]), np.asarray([1] * self.n_batches[i])
+ )
+ )
+
+ # renormalise the data after correction:
+ # 1. multiply by variance
+ # 2. add mean
+ bayes_data = (
+ np.multiply(
+ np.transpose(bayes_data),
+ np.outer(np.sqrt(self.var_pooled), np.asarray([1] * self.n_array)),
+ )
+ + self.stand_mean
+ )
+
+ # correction for reference batch
+ if self.ref_batch:
+ bayes_data[self.batches[self.ref]] = self.data_value[self.batches[self.ref]]
+
+ # returns the data corrected for batch effects
+ return pd.DataFrame(
+ bayes_data, columns=self.list_samples, index=self.list_genes
+ )
+
+
+def pycombat(
+ data: pd.DataFrame,
+ batch: list,
+ mod: list = [],
+ par_prior: bool = True,
+ prior_plots: bool = False,
+ mean_only: bool = False,
+ ref_batch: int = None,
+ precision: float = None,
+ **kwargs,
+):
+ return PyCombat().fit_transform(
+ data,
+ batch,
+ mod=mod,
+ par_prior=par_prior,
+ prior_plots=prior_plots,
+ mean_only=mean_only,
+ ref_batch=ref_batch,
+ precision=precision,
+ )
diff --git a/combat/test_unit.py b/combat/test_unit.py
deleted file mode 100644
index 00a1dfb..0000000
--- a/combat/test_unit.py
+++ /dev/null
@@ -1,211 +0,0 @@
-#-----------------------------------------------------------------------------
-# Copyright (C) 2019-2020 A. Behdenna, A. Nordor, J. Haziza and A. Gema
-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-# For more information, please contact Abdelkader Behdenna /
-
-# file pycombat.py
-# author A. Behdenna, J. Haziza, A. Gema, A. Nordor
-# date Sept 2020
-#-----------------------------------------------------------------------------
-
-
-
-
-# this file is only used for unit testing
-# We import the function that will be tested one by one, incrementally
-# Generates a report about the functions tested
-
-import numpy as np
-import pandas as pd
-#from patsy import dmatrix
-
-##########
-# import function used for unit testing
-from .pycombat import model_matrix, all_1
-#covariate_model_matrix
-from .pycombat import compute_prior, postmean, postvar, it_sol, int_eprior
-from .pycombat import check_mean_only, define_batchmod, check_ref_batch, treat_batches, treat_covariates, check_NAs
-from .pycombat import calculate_mean_var, calculate_stand_mean
-from .pycombat import standardise_data, fit_model, adjust_data
-from .pycombat import pycombat
-
-##########
-print("\n#### Unit Testing for pyComBat ####\n")
-
-##########
-# Define constants for unit testing
-
-batch = np.asarray([1,1,1,2,2,3,3,3,3])
-# matrix = np.transpose(np.asmatrix([np.random.normal(size=1000,loc=3,scale=1),np.random.normal(size=1000,loc=3,scale=1),np.random.normal(size=1000,loc=3,scale=1),
-# np.random.normal(size=1000,loc=2,scale=0.6),np.random.normal(size=1000,loc=2,scale=0.6),
-# np.random.normal(size=1000,loc=4,scale=1),np.random.normal(size=1000,loc=4,scale=1),np.random.normal(size=1000,loc=4,scale=1),np.random.normal(size=1000,loc=4,scale=1)]))
-matrix = np.transpose([np.random.normal(size=1000,loc=3,scale=1),np.random.normal(size=1000,loc=3,scale=1),np.random.normal(size=1000,loc=3,scale=1),
- np.random.normal(size=1000,loc=2,scale=0.6),np.random.normal(size=1000,loc=2,scale=0.6),
- np.random.normal(size=1000,loc=4,scale=1),np.random.normal(size=1000,loc=4,scale=1),np.random.normal(size=1000,loc=4,scale=1),np.random.normal(size=1000,loc=4,scale=1)])
-
-matrix = pd.DataFrame(data=matrix,columns=["sample_"+str(i+1) for i in range(9)],index=["gene_"+str(i+1) for i in range(1000)])
-
-print("Matrix and batch generated.")
-
-matrix_adjusted = pycombat(matrix,batch)
-
-list_samples = matrix_adjusted.columns
-list_genes = matrix_adjusted.index
-
-print("Adjusted matrix generated.")
-
-##########
-# local tests before unit testing
-
-def test_means():
- print("mean matrix:",np.mean(matrix))
- print("mean matrix (adjusted):",np.mean(matrix_adjusted))
- print("**********")
- print("var matrix:",np.var(matrix))
- print("var matrix (adjusted):",np.var(matrix_adjusted))
-
-###########
-# useful constants for unit testing
-ref_batch = None
-mean_only = False
-par_prior = False
-precision = None
-mod = []
-dat = matrix.values
-#batchmod = define_batchmod(batch)
-batchmod = model_matrix(list(batch), intercept=False, drop_first=False)
-ref,batchmod = check_ref_batch(ref_batch,batch,batchmod)
-n_batch, batches, n_batches, n_array = treat_batches(batch)
-design = treat_covariates(batchmod, mod, ref, n_batch)
-NAs = check_NAs(dat)
-B_hat, grand_mean, var_pooled = calculate_mean_var(design, batches, ref, dat, NAs, ref_batch, n_batches, n_batch, n_array)
-stand_mean = calculate_stand_mean(grand_mean, n_array, design, n_batch,B_hat)
-s_data = standardise_data(dat, stand_mean, var_pooled, n_array)
-gamma_star, delta_star, batch_design = fit_model(design,n_batch,s_data, batches, mean_only, par_prior, precision, ref_batch, ref, NAs)
-bayes_data = adjust_data(s_data, gamma_star, delta_star, batch_design, n_batches, var_pooled, stand_mean, n_array, ref_batch, ref, batches, dat)
-
-
-##########
-# Unit tests
-
-# test for compute_prior
-def test_compute_prior():
- print("aprior",compute_prior("a",gamma_star,False))
- assert compute_prior("a",gamma_star,True) == 1
- print("bprior",compute_prior("b",gamma_star,False))
- assert compute_prior("b",gamma_star,True) == 1
-
-# test for postmean
-def test_postmean():
- assert np.shape(postmean(gamma_star,delta_star,gamma_star,delta_star)) == np.shape(gamma_star)
-
-# test for postvar
-def test_postvar():
- assert np.sum(postvar([2,4,6],2,1,1) - [2,3,4]) == 0
-
-# test for it_sol
-def test_it_sol():
- ()
-
-# test for int_eprior
-def test_int_eprior():
- ()
-
-# test for model_matrix
-def test_model_matrix():
- model_matrix_test = model_matrix([1,1,0,1,0])
- assert np.shape(model_matrix_test) == (5,2)
- assert list(model_matrix_test[0]) == [1.0,1.0]
-
-# tests for all_1 function
-def test_all_1():
- assert all_1(np.array([1,1,1,1,1])) == True
-
- assert all_1(np.array([1,1,1,1,0])) == False
- assert all_1(np.array([0,0,0,0,0])) == False
-
- assert all_1(np.array([1.5,0.5,1,1,1])) == False # This test to show the limit of the method we use
-
-
-# test for check_mean_only
-def test_check_mean_only():
- check_mean_only(True)
- check_mean_only(False)
- print("Only one line of text should have been printed above.")
-
-# test for define_batchmode
-def test_define_batchmod():
- assert np.shape(define_batchmod(batch)) == (9,3)
-
-# test for check_ref_batch
-def test_check_ref_batch():
- assert check_ref_batch(1,batch,batchmod) == (0, batchmod)
- assert check_ref_batch(2,batch,batchmod) == (1, batchmod)
- print("Using batch 1 then 2. Above lines should inform on that.")
- assert check_ref_batch(None,batch,batchmod) == (None, batchmod)
-
-# test for treat_batches
-def test_treat_batches():
- assert n_batch == 3
- assert batches[0].tolist() == [0,1,2]
- assert batches[1].tolist() == [3,4]
- assert batches[2].tolist() == [5,6,7,8]
- assert n_batches == [3,2,4]
- assert n_array == 9
-
-# test for treat_covariates
-def test_treat_covariates():
- batchmod = define_batchmod(batch)
- assert np.sum(design - np.transpose(batchmod)) == 0
-
-# test for check_NAs
-def test_check_NAs():
- assert check_NAs([0,1,2]) == False
-
-# test for calculate_mean_var
-def test_calculate_mean_var():
- assert np.shape(B_hat)[0] == np.shape(design)[0]
- assert np.shape(grand_mean)[0] == np.shape(dat)[0]
- assert np.shape(var_pooled)[0] == np.shape(dat)[0]
-
-# test for calculate_stand_mean
-def test_calculate_stand_mean():
- assert np.shape(stand_mean) == np.shape(dat)
-
-# test for standardise_data
-def test_standardise_data():
- assert np.shape(s_data) == np.shape(dat)
-
-# test for fit_model
-def test_fit_model():
- assert np.shape(gamma_star)[1] == np.shape(dat)[0]
- assert np.shape(delta_star)[1] == np.shape(dat)[0]
- assert np.shape(batch_design) == np.shape(design)
-
-#test for adjust_data
-def test_adjust_data():
- assert np.shape(bayes_data) == np.shape(dat)
-
-# general tests on pyComBat
-def test_pycombat():
- assert np.shape(matrix) == np.shape(matrix_adjusted)
- assert abs(np.mean(matrix.values)-np.mean(matrix_adjusted.values)) <= np.mean(matrix.values)*0.05
- assert np.var(matrix_adjusted.values) <= np.var(matrix.values)
-
-if __name__ == '__main__':
- test_means()
- print("\n*** UNIT TESTING ***\n")
- # unittest.main()
\ No newline at end of file
diff --git a/combat/utils/__init__.py b/combat/utils/__init__.py
new file mode 100644
index 0000000..0ee28e5
--- /dev/null
+++ b/combat/utils/__init__.py
@@ -0,0 +1,2 @@
+from .combat_utils import *
+from .common_utils import *
diff --git a/combat/utils/combat_utils.py b/combat/utils/combat_utils.py
new file mode 100644
index 0000000..cab48f6
--- /dev/null
+++ b/combat/utils/combat_utils.py
@@ -0,0 +1,674 @@
+from typing_extensions import Literal, Tuple
+from typing import List, Optional
+
+import numpy as np
+from functools import partial
+import mpmath as mp
+import pandas as pd
+
+from .common_utils import check_all_ones
+
+
+def model_matrix(
+ info: list, intercept: bool = True, drop_first: bool = True
+) -> np.array:
+ """
+ Creates the model_matrix from batch list
+
+ Args:
+ info (list): list info with batch or covariates data
+ intercept (bool, optional): boolean for intercept in model matrix. Defaults to True.
+ drop_first (bool, optional): boolean for drop the first row. Defaults to True.
+
+ Returns:
+ np.array: Model matrix generate from batch list
+ """
+ if not isinstance(info[0], list):
+ info = [info]
+ else:
+ info = info
+ info_dict = {}
+ for i in range(len(info)):
+ info_dict[f"col{str(i)}"] = list(map(str, info[i]))
+ df = pd.get_dummies(pd.DataFrame(info_dict), drop_first=drop_first, dtype=float)
+ if intercept:
+ df["intercept"] = 1.0
+ return df.to_numpy()
+
+
+def compute_prior(
+ prior: Literal["a", "b"], gamma_hat: np.array, mean_only: bool
+) -> float:
+ """
+ aprior and bprior are useful to compute "hyper-prior values"
+ -> prior parameters used to estimate the prior gamma distribution for multiplicative batch effect
+ aprior - calculates empirical hyper-prior values
+
+ Args:
+ prior (str): 'a' or 'b' depending of the prior to be calculated
+ gamma_hat (np.array): [description]
+ mean_only (bool): [description]
+
+ Returns:
+ float: [description]
+ """
+ if mean_only:
+ return 1
+
+ m = np.mean(gamma_hat)
+ s2 = np.var(gamma_hat)
+
+ if prior == "a":
+ calculated_prior = (2 * s2 + m * m) / s2
+ elif prior == "b":
+ calculated_prior = (m * s2 + m * m * m) / s2
+
+ return calculated_prior
+
+
+def postmean(
+ g_bar: np.array, d_star: np.array, t2_n: np.array, t2_n_g_hat: np.array
+) -> np.array:
+ """
+ Estimates additive batch effect
+
+ Args:
+ g_bar (np.array): matrix of additive batch effect
+ d_star (np.array): matrix of multiplicative batch effect
+ t2_n (np.array): [description]
+ t2_n_g_hat (np.array): [description]
+
+ Returns:
+ np.array: matrix of estimated additive batch effect
+ """
+
+ return np.divide(t2_n_g_hat + d_star * g_bar, np.asarray(t2_n + d_star))
+
+
+def postvar(sum2: np.array, n: float, a: float, b: float) -> np.array:
+ """
+ Estimates multiplicative batch effect
+
+ Args:
+ sum2 (np.array): Vector
+ n (float): [description]
+ a (float): aprior
+ b (float): bprior
+
+ Returns:
+ np.array: matrix of estimated multiplicative batch effect
+ """
+ return np.divide((np.multiply(0.5, sum2) + b), (np.multiply(0.5, n) + a - 1))
+
+
+def it_sol(
+ sdat: np.array,
+ g_hat: np.array,
+ d_hat: np.array,
+ g_bar: np.array,
+ t2: np.array,
+ a: float,
+ b: float,
+ conv: float = 0.0001,
+ exit_iteration: float = 10e5,
+) -> np.array:
+ """
+ Iterative solution for Empirical Bayesian method
+
+ Args:
+ sdat (np.array): data matrix
+ g_hat (np.array): matrix of the average additive batch effect
+ d_hat (np.array): matrix of the average multiplicative batch effect
+ g_bar (np.array): matrix of the additive batch effect
+ t2 (np.array): [description]
+ a (float): aprior
+ b (float): bprior
+ conv (float, optional): convergence criterion. Defaults to 0.0001.
+ exit_iteration (float, optional): maximum number of iterations before exit. Defaults to 10e5.
+
+ Returns:
+ np.array: An array of array with the estimated additive and multiplicative batch effect
+ """
+
+ n = [len(i) for i in np.asarray(sdat)]
+ t2_n = np.multiply(t2, n)
+ t2_n_g_hat = np.multiply(t2_n, g_hat)
+ g_old = np.ndarray.copy(g_hat)
+ d_old = np.ndarray.copy(d_hat)
+ change = 1
+ count = 0 # number of steps needed (for diagnostic only)
+
+ # convergence criteria, if new-old < conv, then stop
+ while (change > conv) and (count < exit_iteration):
+ g_new = postmean(
+ g_bar, d_old, t2_n, t2_n_g_hat
+ ) # updated additive batch effect
+ sum2 = np.sum(
+ np.asarray(
+ np.square(
+ sdat - np.outer(g_new[0][0], np.ones(np.ma.size(sdat, axis=1)))
+ )
+ ),
+ axis=1,
+ )
+ d_new = postvar(sum2, n, a, b) # updated multiplicative batch effect
+ change = max(
+ np.amax(np.absolute(g_new - np.asarray(g_old)) / np.asarray(g_old)),
+ np.amax(np.absolute(d_new - d_old) / d_old),
+ ) # maximum difference between new and old estimate
+ g_old = np.ndarray.copy(g_new) # save value for g
+ d_old = np.ndarray.copy(d_new) # save value for d
+ count += 1
+ adjust = np.asarray([g_new, d_new])
+
+ return adjust
+
+
+def int_eprior(
+ sdat: np.array, g_hat: np.array, d_hat: np.array, precision: float
+) -> np.array:
+ """
+ int_eprior - Monte Carlo integration function to find nonparametric adjustments
+ Johnson et al (Biostatistics 2007, supp.mat.) show that we can estimate the multiplicative and additive batch effects with an integral
+ This integral is numerically computed through Monte Carlo inegration (iterative method)
+
+ Args:
+ sdat (np.array): data matrix
+ g_hat (np.array): matrix of the average additive batch effect
+ d_hat (np.array): matrix of the average multiplicative batch effect
+ precision (float): level of precision for precision computing
+
+ Returns:
+ np.array: An array of array with the estimated additive and multiplicative batch effect
+ """
+ g_star = []
+ d_star = []
+ # use this variable to only print error message once if approximation used
+ test_approximation = 0
+ for i in range(len(sdat)):
+ # additive batch effect
+ g = np.asarray(np.delete(np.transpose(g_hat), i))
+
+ # multiplicative batch effect
+ d = np.asarray(np.delete(np.transpose(d_hat), i))
+ x = np.asarray(np.transpose(sdat[i]))
+ n = len(x)
+ j = [1] * n
+ dat = np.repeat(x, len(np.transpose(g)), axis=1)
+ resid2 = np.square(dat - g)
+ sum2 = np.dot(np.transpose(resid2), j)
+
+ # /begin{handling high precision computing}
+ temp_2d = 2 * d
+ if precision == None:
+ LH = np.power(1 / (np.pi * temp_2d), n / 2) * np.exp(
+ np.negative(sum2) / (temp_2d)
+ )
+ else:
+ # only if precision parameter informed
+ # increase the precision of the computing (if negative exponential too close to 0)
+ mp.dps = precision
+ buf_exp = np.array(list(map(mp.exp, np.negative(sum2)/(temp_2d))))
+ buf_pow = np.array(list(map(partial(mp.power, y=n/2), 1/(np.pi*temp_2d))))
+ LH = buf_pow * buf_exp # likelihood
+ # /end{handling high precision computing}
+ LH = np.nan_to_num(LH) # corrects NaNs in likelihood
+ if np.sum(LH) == 0 and test_approximation == 0:
+ test_approximation = 1 # this message won't appear again
+ print(
+ "###\nValues too small, approximation applied to avoid division by 0.\nPrecision mode can correct this problem, but increases computation time.\n###"
+ )
+
+ if np.sum(LH) == 0: # correction for LH full of 0.0
+ LH[LH == 0] = np.exp(-745)
+ g_star.append(np.sum(g * LH) / np.sum(LH))
+ d_star.append(np.sum(d * LH) / np.sum(LH))
+ else:
+ g_star.append(np.sum(g * LH) / np.sum(LH))
+ d_star.append(np.sum(d * LH) / np.sum(LH))
+ adjust = np.asarray([np.asarray(g_star), np.asarray(d_star)])
+
+ return adjust
+
+
+def param_fun(
+ i: int,
+ standardised_data: np.array,
+ batches: List[list],
+ mean_only: bool,
+ gamma_hat: np.array,
+ gamma_bar: np.array,
+ delta_hat: np.array,
+ t2: np.array,
+ a_prior: float,
+ b_prior: float,
+) -> Tuple[np.array, np.array]:
+ """
+ Parametric estimation of batch effects
+
+ Args:
+ i (int): column index
+ standardised_data (np.array): data matrix
+ batches (List[list]): list of list of batches' elements
+ mean_only (bool): True if mean_only selected
+ gamma_hat (np.array): average additive batch effect
+ gamma_bar (np.array): estimated additive batch effect
+ delta_hat (np.array): average multiplicative batch effect
+ t2 (np.array): [description]
+ a_prior (float): aprior
+ b_prior (float): bprior
+
+ Returns:
+ Tuple[np.array, np.array]: estimated adjusted additive (gamma_star) and multiplicative (delta_star) batch effect
+ """
+ if (
+ mean_only
+ ): # if mean_only, no need for complex method: batch effect is immediately calculated
+ t2_n = np.multiply(t2[i], 1)
+ t2_n_g_hat = np.multiply(t2_n, gamma_hat[i])
+ gamma_star = postmean(
+ gamma_bar[i], 1, t2_n, t2_n_g_hat
+ ) # additive batch effect
+ delta_star = [1] * len(standardised_data) # multiplicative batch effect
+ else: # if not(mean_only) then use it_solve
+ temp = it_sol(
+ np.transpose(np.transpose(standardised_data)[batches[i]]),
+ gamma_hat[i],
+ delta_hat[i],
+ gamma_bar[i],
+ t2[i],
+ a_prior[i],
+ b_prior[i],
+ )
+ gamma_star = temp[0] # additive batch effect
+ delta_star = temp[1] # multiplicative batch effect
+ return (gamma_star, delta_star)
+
+
+# FIXME: Params order is not the same
+def nonparam_fun(
+ i: int,
+ mean_only: bool,
+ delta_hat: np.array,
+ standardised_data: np.array,
+ batches: List[list],
+ gamma_hat: np.array,
+ precision: float,
+) -> Tuple[np.array, np.array]:
+ """
+ Non-parametric estimation
+
+ Args:
+ i (int): column index
+ mean_only (bool): True if mean_only selected
+ delta_hat (np.array): estimated multiplicative batch effect
+ standardised_data (np.array): data matrix
+ batches (List[list]): list of list of batches' elements
+ gamma_hat (np.array): estimated additive batch effect
+ precision (float): level of precision for precision computing
+
+ Returns:
+ Tuple[np.array, np.array]: estimated adjusted additive and multiplicative batch effect
+ """
+ if mean_only: # if mean only, change delta_hat to vector of 1s
+ delta_hat[i] = [1] * len(delta_hat[i])
+ # use int_eprior for non-parametric estimation
+ temp = int_eprior(
+ np.transpose(np.transpose(standardised_data)[batches[i]]),
+ gamma_hat[i],
+ delta_hat[i],
+ precision,
+ )
+ return (temp[0], temp[1])
+
+
+def check_ref_batch(
+ ref_batch: int, batch: List[int], batchmod: np.array
+) -> Tuple[Optional[List[int]], np.array]:
+ """
+ check ref_batch option and treat it if needed
+
+ Args:
+ ref_batch (int): the reference batch
+ batch (List[int]): list of batch id
+ batchmod (np.array): model matrix related to batches
+
+ Raises:
+ ValueError: Reference level ref_batch must be one of the levels of batch
+
+ Returns:
+ Tuple[Optional[List[int]], np.array]:
+ - the corresponding positions of the reference batch in the batch list
+ - updated model matrix related to batches, with reference
+ """
+ if ref_batch is not None:
+ if ref_batch not in batch:
+ raise ValueError(
+ "Reference level ref_batch must be one of the levels of batch."
+ )
+
+ print(f"Using batch {str(ref_batch)} as a reference batch.")
+ # ref keeps in memory the columns concerned by the reference batch
+ ref = np.where(np.unique(batch) == ref_batch)[0][0]
+ # updates batchmod with reference
+ batchmod[:, ref] = 1
+ else:
+ ref = None # default settings
+
+ return (ref, batchmod)
+
+
+def treat_covariates(
+ batchmod: np.array, mod: np.array, ref: int, n_batch: int
+) -> np.array:
+ """
+ treat covariates
+
+ Args:
+ batchmod (np.array): model matrix for batch
+ mod (np.array): model matrix for other covariates
+ ref (int): index of reference batch
+ n_batch (int): number of batches
+
+ Raises:
+ ValueError: The covariate is confunded with batch. Remove the covariate and rerun pyComBat.
+ ValueError: The covariates are confounded! Please remove one or more of the covariates so the design is not confounded.
+ ValueError: At least one covariate is confounded with batch. Please remove confounded covariates and rerun pyComBat.
+
+ Returns:
+ np.array: model matrix for all covariates, including batch
+ """
+ # design matrix for sample conditions
+ if mod == []:
+ design = batchmod
+ else:
+ mod_matrix = model_matrix(mod, intercept=True)
+ design = np.concatenate((batchmod, mod_matrix), axis=1)
+ check = list(map(check_all_ones, np.transpose(design)))
+ if ref is not None:
+ check[ref] = False # the reference in not considered as a covariate
+ design = design[:, ~np.array(check)]
+ design = np.transpose(design)
+
+ print(
+ f"Adjusting for {str(len(design)-len(np.transpose(batchmod)))} covariate(s) or covariate level(s)."
+ )
+
+ # if matrix cannot be invertible, different cases
+ if np.linalg.matrix_rank(design) < len(design):
+ if len(design) == n_batch + 1: # case 1: covariate confunded with a batch
+ raise ValueError(
+ "The covariate is confunded with batch. Remove the covariate and rerun pyComBat."
+ )
+ if (
+ len(design) > n_batch + 1
+ ): # case 2: multiple covariates confunded with a batch
+ if np.linalg.matrix_rank(np.transpose(design)[:n_batch]) < len(design):
+ raise ValueError(
+ "The covariates are confounded! Please remove one or more of the covariates so the design is not confounded."
+ )
+ else: # case 3: at least a covariate confunded with a batch
+ raise ValueError(
+ "At least one covariate is confounded with batch. Please remove confounded covariates and rerun pyComBat"
+ )
+ return design
+
+
+def treat_batches(batch: list) -> Tuple[int, List[int], List[int], int]:
+ """
+ treat batches
+
+ Args:
+ batch (list): batch list
+
+ Returns:
+ Tuple[int, List[int], List[int], int]:
+ - number of batches
+ - list of unique batches
+ - list of batches lengths
+ - total size of dataset
+ """
+ n_batch = len(np.unique(batch)) # number of batches
+ print(f"Found {n_batch} batches.")
+
+ batches = [] # list of lists, contains the list of position for each batch
+ for i in range(n_batch):
+ batches.append(np.where(batch == np.unique(batch)[i])[0])
+ batches = np.asarray(batches, dtype=np.int32)
+ n_batches = list(map(len, batches))
+ if 1 in n_batches:
+ # mean_only = True # no variance if only one sample in a batch - mean_only has to be used
+ print("\nOne batch has only one sample, try setting mean_only=True.\n")
+ n_array = sum(n_batches)
+ return (n_batch, batches, n_batches, n_array)
+
+
+def calculate_mean_var(
+ design: np.array,
+ batches: List[int],
+ ref: int,
+ dat: np.array,
+ NAs: bool,
+ ref_batch: int,
+ n_batches: List[int],
+ n_batch: int,
+ n_array: int,
+) -> Tuple[np.array, np.array, np.array]:
+ """
+ calculates the Normalisation factors
+
+ Args:
+ design (np.array): model matrix for all covariates
+ batches (List[int]): list of unique batches
+ ref (int): index of reference batch
+ dat (np.array): data matrix
+ NAs (bool): presence of NaNs in the data matrix
+ ref_batch (int): reference batch
+ n_batches (List[int]): list of batches lengths
+ n_batch (int): number of batches
+ n_array (int): total size of dataset
+
+ Returns:
+ Tuple[np.array, np.array, np.array]:
+ - regression coefficients corresponding to the design matrix
+ - Mean for each gene and each batch
+ - Variance for each gene and each batch
+ """
+ print("Standardizing Data across genes.")
+ if not (NAs): # NAs not supported
+ # B_hat is the vector of regression coefficients corresponding to the design matrix
+ B_hat = np.linalg.solve(
+ np.dot(design, np.transpose(design)), np.dot(design, np.transpose(dat))
+ )
+
+ # Calculates the general mean
+ if ref_batch is not None:
+ grand_mean = np.transpose(B_hat[ref])
+ else:
+ grand_mean = np.dot(
+ np.transpose([i / n_array for i in n_batches]), B_hat[0:n_batch]
+ )
+
+ # Calculates the general variance
+ if not NAs: # NAs not supported
+ if ref_batch is not None: # depending on ref batch
+ ref_dat = np.transpose(np.transpose(dat)[batches[ref]])
+ var_pooled = np.dot(
+ np.square(
+ ref_dat
+ - np.transpose(np.dot(np.transpose(design)[batches[ref]], B_hat))
+ ),
+ [1 / n_batches[ref]] * n_batches[ref],
+ )
+ else:
+ var_pooled = np.dot(
+ np.square(dat - np.transpose(np.dot(np.transpose(design), B_hat))),
+ [1 / n_array] * n_array,
+ )
+
+ return (B_hat, grand_mean, var_pooled)
+
+
+def calculate_stand_mean(
+ grand_mean: np.array, n_array: int, design: np.array, n_batch: int, B_hat: np.array
+) -> np.array:
+ """
+ transform the format of the mean for substraction
+
+ Args:
+ grand_mean (np.array): Mean for each gene and each batch
+ n_array (int): total size of dataset
+ design (np.array): design matrix for all covariates including batch
+ n_batch (int): number of batches
+ B_hat (np.array): regression coefficients corresponding to the design matrix
+
+ Returns:
+ np.array: standardised mean
+ """
+ stand_mean = np.dot(np.transpose(np.mat(grand_mean)), np.mat([1] * n_array))
+ # corrects the mean with design matrix information
+ if design is not None:
+ tmp = np.ndarray.copy(design)
+ tmp[0:n_batch] = 0
+ stand_mean = stand_mean + np.transpose(np.dot(np.transpose(tmp), B_hat))
+ return stand_mean
+
+
+def standardise_data(
+ dat: np.array, stand_mean: np.array, var_pooled: np.array, n_array: int
+) -> np.array:
+ """
+ standardise the data: substract mean and divide by variance
+
+ Args:
+ dat (np.array): data matrix
+ stand_mean (np.array): standardised mean
+ var_pooled (np.array): Variance for each gene and each batch
+ n_array (int): total size of dataset
+
+ Returns:
+ np.array: standardised data matrix
+ """
+ standardised_data = (dat - stand_mean) / np.dot(
+ np.transpose(np.mat(np.sqrt(var_pooled))), np.mat([1] * n_array)
+ )
+ return standardised_data
+
+
+def fit_model(
+ design: np.array,
+ n_batch: int,
+ standardised_data: np.array,
+ batches: List[list],
+ mean_only: bool,
+ par_prior: bool,
+ precision: float,
+ ref_batch: int,
+ ref: List[int],
+ NAs: bool,
+) -> Tuple[np.array, np.array, np.array]:
+ """
+ Fitting L/S model and finding priors.
+
+ Args:
+ design (np.array): model matrix for all covariates, including batch
+ n_batch (int): number of batches
+ standardised_data (np.array): standardised data matrix
+ batches (List[list]): list of list of batches' elements
+ mean_only (bool): True if just adjusting the means and not individual batch effects
+ par_prior (bool): False for non-parametric estimation of batch effects
+ precision (float): level of precision for precision computing
+ ref_batch (int): reference batch selected
+ ref (List[int]): the corresponding positions of the reference batch in the batch list
+ NAs (bool): boolean characterising the presence of NaNs in the data matrix
+
+ Returns:
+ Tuple[np.array, np.array, np.array]:
+ - estimated additive batch effect
+ - estimated multiplicative batch effect
+ - information about batches in design matrix
+ """
+ print("Fitting L/S model and finding priors.")
+
+ # fraction of design matrix related to batches
+ batch_design = design[0:n_batch]
+
+ if not NAs: # CF SUPRA FOR NAs
+ # gamma_hat is the vector of additive batch effect
+ gamma_hat = np.linalg.solve(
+ np.dot(batch_design, np.transpose(batch_design)),
+ np.dot(batch_design, np.transpose(standardised_data)),
+ )
+
+ delta_hat = [] # delta_hat is the vector of estimated multiplicative batch effect
+
+ if mean_only:
+ # no variance if mean_only == True
+ delta_hat = [np.asarray([1] * len(standardised_data))] * len(batches)
+ else:
+ for i in batches: # feed incrementally delta_hat
+ list_map = np.transpose(np.transpose(standardised_data)[i]).var(
+ axis=1
+ ) # variance for each row
+ delta_hat.append(np.squeeze(np.asarray(list_map)))
+
+ gamma_bar = list(map(np.mean, gamma_hat)) # vector of means for gamma_hat
+ t2 = list(map(np.var, gamma_hat)) # vector of variances for gamma_hat
+
+ # calculates hyper priors for gamma (additive batch effect)
+ a_prior = list(map(partial(compute_prior, "a", mean_only=mean_only), delta_hat))
+ b_prior = list(map(partial(compute_prior, "b", mean_only=mean_only), delta_hat))
+
+ # initialise gamma and delta for parameters estimation
+ gamma_star = np.empty((n_batch, len(standardised_data)))
+ delta_star = np.empty((n_batch, len(standardised_data)))
+
+ if par_prior:
+ # use param_fun function for parametric adjustments (cf. function definition)
+ print("Finding parametric adjustments.")
+ results = list(
+ map(
+ partial(
+ param_fun,
+ standardised_data=standardised_data,
+ batches=batches,
+ mean_only=mean_only,
+ gamma_hat=gamma_hat,
+ gamma_bar=gamma_bar,
+ delta_hat=delta_hat,
+ t2=t2,
+ a_prior=a_prior,
+ b_prior=b_prior,
+ ),
+ range(n_batch),
+ )
+ )
+ else:
+ # use nonparam_fun for non-parametric adjustments (cf. function definition)
+ print("Finding nonparametric adjustments")
+ results = list(
+ map(
+ partial(
+ nonparam_fun,
+ mean_only=mean_only,
+ delta_hat=delta_hat,
+ standardised_data=standardised_data,
+ batches=batches,
+ gamma_hat=gamma_hat,
+ precision=precision,
+ ),
+ range(n_batch),
+ )
+ )
+
+ for i in range(n_batch): # store the results in gamma/delta_star
+ results_i = results[i]
+ gamma_star[i], delta_star[i] = results_i[0], results_i[1]
+
+ # update if reference batch (the reference batch is not supposed to be modified)
+ if ref_batch:
+ len_gamma_star_ref = len(gamma_star[ref])
+ gamma_star[ref] = [0] * len_gamma_star_ref
+ delta_star[ref] = [1] * len_gamma_star_ref
+
+ return (gamma_star, delta_star, batch_design)
diff --git a/combat/utils/common_utils.py b/combat/utils/common_utils.py
new file mode 100644
index 0000000..d1905fc
--- /dev/null
+++ b/combat/utils/common_utils.py
@@ -0,0 +1,43 @@
+import numpy as np
+
+
+def check_all_ones(list_of_elements: list) -> bool:
+ """
+ Check if all elements in a list are 1s
+
+ Args:
+ list_of_elements (list): list of elements
+
+ Returns:
+ bool: True if all elements of the list are 1s
+ """
+ return all(list_of_elements == 1)
+
+
+def check_mean_only(mean_only: bool) -> None:
+ """
+ Check mean_only option
+
+ Args:
+ mean_only (bool): user's choice regarding the mean_only option
+ """
+ if mean_only == True:
+ print("Using mean only version")
+
+
+def check_NAs(data):
+ """check if NaNs - in theory, we construct the data without NAs
+
+ Arguments:
+ data {matrix} -- the data matrix
+
+ Returns:
+ NAs {bool} -- boolean characterising the presence of NaNs in the data matrix
+ """
+ # NAs = True in (np.isnan(dat))
+ NAs = np.isnan(np.sum(data)) # Check if NaN exists
+ if NAs:
+ print(
+ "Found missing data values. Please remove all missing values before proceeding with pyComBat."
+ )
+ return NAs
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 7fe46f6..324c180 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -12,20 +12,21 @@
#
import os
import sys
-sys.path.insert(0, os.path.abspath('../../ComBat'))
+
+sys.path.insert(0, os.path.abspath("../../ComBat"))
sys.setrecursionlimit(1500)
# -- Project information -----------------------------------------------------
-project = 'pyComBat'
-copyright = '2020, Abdelkader Behdenna'
-author = 'Abdelkader Behdenna'
+project = "pyComBat"
+copyright = "2020, Abdelkader Behdenna"
+author = "Abdelkader Behdenna"
# The short version
-version = ''
+version = ""
# The full version, including alpha/beta/rc tags
-release = '0.4.4'
+release = "0.4.4"
# -- General configuration ---------------------------------------------------
@@ -39,26 +40,27 @@
# ones.
import sphinx_rtd_theme
-extensions = ['sphinx.ext.autodoc',
- 'sphinx.ext.intersphinx',
- 'sphinx.ext.ifconfig',
- 'sphinx.ext.viewcode',
- 'sphinx.ext.githubpages',
- 'rinoh.frontend.sphinx',
+extensions = [
+ "sphinx.ext.autodoc",
+ "sphinx.ext.intersphinx",
+ "sphinx.ext.ifconfig",
+ "sphinx.ext.viewcode",
+ "sphinx.ext.githubpages",
+ "rinoh.frontend.sphinx",
"sphinx_rtd_theme",
]
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
@@ -66,48 +68,44 @@
exclude_patterns = []
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
-#html_theme = 'alabaster'
+# html_theme = 'alabaster'
html_theme = "sphinx_rtd_theme"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
# -- LaTeX elements ----------------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
- 'papersize': 'letterpaper',
-
+ "papersize": "letterpaper",
# The font size ('10pt', '11pt' or '12pt').
#
- 'pointsize': '10pt',
-
+ "pointsize": "10pt",
# Additional stuff for the LaTeX preamble.
#
- 'preamble': '',
-
+ "preamble": "",
# Latex figure (float) alignment
#
- 'figure_align': 'htbp',
- 'extraclassoptions': 'openany,oneside',
+ "figure_align": "htbp",
+ "extraclassoptions": "openany,oneside",
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
- (master_doc, 'pyComBat.tex', 'pyComBat',
- 'Abdelkader Behdenna', 'manual'),
+ (master_doc, "pyComBat.tex", "pyComBat", "Abdelkader Behdenna", "manual"),
]
# -- Extension configuration -------------------------------------------------
@@ -115,4 +113,4 @@
# -- Options for intersphinx extension ---------------------------------------
# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'https://docs.python.org/': None}
\ No newline at end of file
+intersphinx_mapping = {"https://docs.python.org/": None}
diff --git a/poetry.lock b/poetry.lock
new file mode 100644
index 0000000..e69ddf7
--- /dev/null
+++ b/poetry.lock
@@ -0,0 +1,713 @@
+[[package]]
+name = "appdirs"
+version = "1.4.4"
+description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "atomicwrites"
+version = "1.4.0"
+description = "Atomic file writes."
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[[package]]
+name = "attrs"
+version = "21.2.0"
+description = "Classes Without Boilerplate"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[package.extras]
+dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit"]
+docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
+tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"]
+tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"]
+
+[[package]]
+name = "black"
+version = "21.7b0"
+description = "The uncompromising code formatter."
+category = "dev"
+optional = false
+python-versions = ">=3.6.2"
+
+[package.dependencies]
+appdirs = "*"
+click = ">=7.1.2"
+mypy-extensions = ">=0.4.3"
+pathspec = ">=0.8.1,<1"
+regex = ">=2020.1.8"
+tomli = ">=0.2.6,<2.0.0"
+typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\""}
+typing-extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""}
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.6.0)", "aiohttp-cors (>=0.4.0)"]
+python2 = ["typed-ast (>=1.4.2)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
+[[package]]
+name = "click"
+version = "8.0.1"
+description = "Composable command line interface toolkit"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
+
+[[package]]
+name = "colorama"
+version = "0.4.4"
+description = "Cross-platform colored terminal text."
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[[package]]
+name = "coverage"
+version = "5.5"
+description = "Code coverage measurement for Python"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
+
+[package.extras]
+toml = ["toml"]
+
+[[package]]
+name = "importlib-metadata"
+version = "1.7.0"
+description = "Read metadata from Python packages"
+category = "dev"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
+
+[package.dependencies]
+zipp = ">=0.5"
+
+[package.extras]
+docs = ["sphinx", "rst.linker"]
+testing = ["packaging", "pep517", "importlib-resources (>=1.3)"]
+
+[[package]]
+name = "iniconfig"
+version = "1.1.1"
+description = "iniconfig: brain-dead simple config-ini parsing"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "joblib"
+version = "1.0.1"
+description = "Lightweight pipelining with Python functions"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[[package]]
+name = "mpmath"
+version = "1.2.1"
+description = "Python library for arbitrary-precision floating-point arithmetic"
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.extras]
+develop = ["pytest (>=4.6)", "pycodestyle", "pytest-cov", "codecov", "wheel"]
+tests = ["pytest (>=4.6)"]
+
+[[package]]
+name = "mypy-extensions"
+version = "0.4.3"
+description = "Experimental type system extensions for programs checked with the mypy typechecker."
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "numpy"
+version = "1.21.1"
+description = "NumPy is the fundamental package for array computing with Python."
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "packaging"
+version = "21.0"
+description = "Core utilities for Python packages"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+pyparsing = ">=2.0.2"
+
+[[package]]
+name = "pandas"
+version = "1.1.5"
+description = "Powerful data structures for data analysis, time series, and statistics"
+category = "main"
+optional = false
+python-versions = ">=3.6.1"
+
+[package.dependencies]
+numpy = ">=1.15.4"
+python-dateutil = ">=2.7.3"
+pytz = ">=2017.2"
+
+[package.extras]
+test = ["pytest (>=4.0.2)", "pytest-xdist", "hypothesis (>=3.58)"]
+
+[[package]]
+name = "pathspec"
+version = "0.9.0"
+description = "Utility library for gitignore style pattern matching of file paths."
+category = "dev"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
+
+[[package]]
+name = "patsy"
+version = "0.5.1"
+description = "A Python package for describing statistical models and for building design matrices."
+category = "main"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+numpy = ">=1.4"
+six = "*"
+
+[[package]]
+name = "pluggy"
+version = "0.13.1"
+description = "plugin and hook calling mechanisms for python"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[package.dependencies]
+importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+
+[[package]]
+name = "py"
+version = "1.10.0"
+description = "library with cross-python path, ini-parsing, io, code, log facilities"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[[package]]
+name = "pyparsing"
+version = "2.4.7"
+description = "Python parsing module"
+category = "dev"
+optional = false
+python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
+
+[[package]]
+name = "pytest"
+version = "6.2.4"
+description = "pytest: simple powerful testing with Python"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
+attrs = ">=19.2.0"
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
+iniconfig = "*"
+packaging = "*"
+pluggy = ">=0.12,<1.0.0a1"
+py = ">=1.8.2"
+toml = "*"
+
+[package.extras]
+testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
+
+[[package]]
+name = "pytest-cov"
+version = "2.12.1"
+description = "Pytest plugin for measuring coverage."
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[package.dependencies]
+coverage = ">=5.2.1"
+pytest = ">=4.6"
+toml = "*"
+
+[package.extras]
+testing = ["fields", "hunter", "process-tests", "six", "pytest-xdist", "virtualenv"]
+
+[[package]]
+name = "python-dateutil"
+version = "2.8.2"
+description = "Extensions to the standard Python datetime module"
+category = "main"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+
+[package.dependencies]
+six = ">=1.5"
+
+[[package]]
+name = "pytz"
+version = "2021.1"
+description = "World timezone definitions, modern and historical"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "regex"
+version = "2021.7.6"
+description = "Alternative regular expression module, to replace re."
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "scikit-learn"
+version = "0.23.1"
+description = "A set of python modules for machine learning and data mining"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+joblib = ">=0.11"
+numpy = ">=1.13.3"
+scipy = ">=0.19.1"
+threadpoolctl = ">=2.0.0"
+
+[package.extras]
+alldeps = ["numpy (>=1.13.3)", "scipy (>=0.19.1)"]
+
+[[package]]
+name = "scipy"
+version = "1.6.1"
+description = "SciPy: Scientific Library for Python"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+numpy = ">=1.16.5"
+
+[[package]]
+name = "six"
+version = "1.16.0"
+description = "Python 2 and 3 compatibility utilities"
+category = "main"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+
+[[package]]
+name = "threadpoolctl"
+version = "2.2.0"
+description = "threadpoolctl"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[[package]]
+name = "toml"
+version = "0.10.2"
+description = "Python Library for Tom's Obvious, Minimal Language"
+category = "dev"
+optional = false
+python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
+
+[[package]]
+name = "tomli"
+version = "1.1.0"
+description = "A lil' TOML parser"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[[package]]
+name = "typed-ast"
+version = "1.4.3"
+description = "a fork of Python 2 and 3 ast modules with type comment support"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "typing-extensions"
+version = "3.10.0.0"
+description = "Backported and Experimental Type Hints for Python 3.5+"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "zipp"
+version = "3.5.0"
+description = "Backport of pathlib-compatible object wrapper for zip files"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.extras]
+docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
+testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"]
+
+[metadata]
+lock-version = "1.1"
+python-versions = "^3.7"
+content-hash = "462ae7d5a53adc665574ba480401a2d692d281ec83e2f34f50a030e959ec6506"
+
+[metadata.files]
+appdirs = [
+ {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"},
+ {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
+]
+atomicwrites = [
+ {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"},
+ {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"},
+]
+attrs = [
+ {file = "attrs-21.2.0-py2.py3-none-any.whl", hash = "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1"},
+ {file = "attrs-21.2.0.tar.gz", hash = "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"},
+]
+black = [
+ {file = "black-21.7b0-py3-none-any.whl", hash = "sha256:1c7aa6ada8ee864db745b22790a32f94b2795c253a75d6d9b5e439ff10d23116"},
+ {file = "black-21.7b0.tar.gz", hash = "sha256:c8373c6491de9362e39271630b65b964607bc5c79c83783547d76c839b3aa219"},
+]
+click = [
+ {file = "click-8.0.1-py3-none-any.whl", hash = "sha256:fba402a4a47334742d782209a7c79bc448911afe1149d07bdabdf480b3e2f4b6"},
+ {file = "click-8.0.1.tar.gz", hash = "sha256:8c04c11192119b1ef78ea049e0a6f0463e4c48ef00a30160c704337586f3ad7a"},
+]
+colorama = [
+ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
+ {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
+]
+coverage = [
+ {file = "coverage-5.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:b6d534e4b2ab35c9f93f46229363e17f63c53ad01330df9f2d6bd1187e5eaacf"},
+ {file = "coverage-5.5-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:b7895207b4c843c76a25ab8c1e866261bcfe27bfaa20c192de5190121770672b"},
+ {file = "coverage-5.5-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:c2723d347ab06e7ddad1a58b2a821218239249a9e4365eaff6649d31180c1669"},
+ {file = "coverage-5.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:900fbf7759501bc7807fd6638c947d7a831fc9fdf742dc10f02956ff7220fa90"},
+ {file = "coverage-5.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:004d1880bed2d97151facef49f08e255a20ceb6f9432df75f4eef018fdd5a78c"},
+ {file = "coverage-5.5-cp27-cp27m-win32.whl", hash = "sha256:06191eb60f8d8a5bc046f3799f8a07a2d7aefb9504b0209aff0b47298333302a"},
+ {file = "coverage-5.5-cp27-cp27m-win_amd64.whl", hash = "sha256:7501140f755b725495941b43347ba8a2777407fc7f250d4f5a7d2a1050ba8e82"},
+ {file = "coverage-5.5-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:372da284cfd642d8e08ef606917846fa2ee350f64994bebfbd3afb0040436905"},
+ {file = "coverage-5.5-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:8963a499849a1fc54b35b1c9f162f4108017b2e6db2c46c1bed93a72262ed083"},
+ {file = "coverage-5.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:869a64f53488f40fa5b5b9dcb9e9b2962a66a87dab37790f3fcfb5144b996ef5"},
+ {file = "coverage-5.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:4a7697d8cb0f27399b0e393c0b90f0f1e40c82023ea4d45d22bce7032a5d7b81"},
+ {file = "coverage-5.5-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:8d0a0725ad7c1a0bcd8d1b437e191107d457e2ec1084b9f190630a4fb1af78e6"},
+ {file = "coverage-5.5-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:51cb9476a3987c8967ebab3f0fe144819781fca264f57f89760037a2ea191cb0"},
+ {file = "coverage-5.5-cp310-cp310-win_amd64.whl", hash = "sha256:c0891a6a97b09c1f3e073a890514d5012eb256845c451bd48f7968ef939bf4ae"},
+ {file = "coverage-5.5-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:3487286bc29a5aa4b93a072e9592f22254291ce96a9fbc5251f566b6b7343cdb"},
+ {file = "coverage-5.5-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:deee1077aae10d8fa88cb02c845cfba9b62c55e1183f52f6ae6a2df6a2187160"},
+ {file = "coverage-5.5-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:f11642dddbb0253cc8853254301b51390ba0081750a8ac03f20ea8103f0c56b6"},
+ {file = "coverage-5.5-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:6c90e11318f0d3c436a42409f2749ee1a115cd8b067d7f14c148f1ce5574d701"},
+ {file = "coverage-5.5-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:30c77c1dc9f253283e34c27935fded5015f7d1abe83bc7821680ac444eaf7793"},
+ {file = "coverage-5.5-cp35-cp35m-win32.whl", hash = "sha256:9a1ef3b66e38ef8618ce5fdc7bea3d9f45f3624e2a66295eea5e57966c85909e"},
+ {file = "coverage-5.5-cp35-cp35m-win_amd64.whl", hash = "sha256:972c85d205b51e30e59525694670de6a8a89691186012535f9d7dbaa230e42c3"},
+ {file = "coverage-5.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:af0e781009aaf59e25c5a678122391cb0f345ac0ec272c7961dc5455e1c40066"},
+ {file = "coverage-5.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:74d881fc777ebb11c63736622b60cb9e4aee5cace591ce274fb69e582a12a61a"},
+ {file = "coverage-5.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:92b017ce34b68a7d67bd6d117e6d443a9bf63a2ecf8567bb3d8c6c7bc5014465"},
+ {file = "coverage-5.5-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:d636598c8305e1f90b439dbf4f66437de4a5e3c31fdf47ad29542478c8508bbb"},
+ {file = "coverage-5.5-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:41179b8a845742d1eb60449bdb2992196e211341818565abded11cfa90efb821"},
+ {file = "coverage-5.5-cp36-cp36m-win32.whl", hash = "sha256:040af6c32813fa3eae5305d53f18875bedd079960822ef8ec067a66dd8afcd45"},
+ {file = "coverage-5.5-cp36-cp36m-win_amd64.whl", hash = "sha256:5fec2d43a2cc6965edc0bb9e83e1e4b557f76f843a77a2496cbe719583ce8184"},
+ {file = "coverage-5.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:18ba8bbede96a2c3dde7b868de9dcbd55670690af0988713f0603f037848418a"},
+ {file = "coverage-5.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2910f4d36a6a9b4214bb7038d537f015346f413a975d57ca6b43bf23d6563b53"},
+ {file = "coverage-5.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:f0b278ce10936db1a37e6954e15a3730bea96a0997c26d7fee88e6c396c2086d"},
+ {file = "coverage-5.5-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:796c9c3c79747146ebd278dbe1e5c5c05dd6b10cc3bcb8389dfdf844f3ead638"},
+ {file = "coverage-5.5-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:53194af30d5bad77fcba80e23a1441c71abfb3e01192034f8246e0d8f99528f3"},
+ {file = "coverage-5.5-cp37-cp37m-win32.whl", hash = "sha256:184a47bbe0aa6400ed2d41d8e9ed868b8205046518c52464fde713ea06e3a74a"},
+ {file = "coverage-5.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2949cad1c5208b8298d5686d5a85b66aae46d73eec2c3e08c817dd3513e5848a"},
+ {file = "coverage-5.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:217658ec7187497e3f3ebd901afdca1af062b42cfe3e0dafea4cced3983739f6"},
+ {file = "coverage-5.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1aa846f56c3d49205c952d8318e76ccc2ae23303351d9270ab220004c580cfe2"},
+ {file = "coverage-5.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:24d4a7de75446be83244eabbff746d66b9240ae020ced65d060815fac3423759"},
+ {file = "coverage-5.5-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:d1f8bf7b90ba55699b3a5e44930e93ff0189aa27186e96071fac7dd0d06a1873"},
+ {file = "coverage-5.5-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:970284a88b99673ccb2e4e334cfb38a10aab7cd44f7457564d11898a74b62d0a"},
+ {file = "coverage-5.5-cp38-cp38-win32.whl", hash = "sha256:01d84219b5cdbfc8122223b39a954820929497a1cb1422824bb86b07b74594b6"},
+ {file = "coverage-5.5-cp38-cp38-win_amd64.whl", hash = "sha256:2e0d881ad471768bf6e6c2bf905d183543f10098e3b3640fc029509530091502"},
+ {file = "coverage-5.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d1f9ce122f83b2305592c11d64f181b87153fc2c2bbd3bb4a3dde8303cfb1a6b"},
+ {file = "coverage-5.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:13c4ee887eca0f4c5a247b75398d4114c37882658300e153113dafb1d76de529"},
+ {file = "coverage-5.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:52596d3d0e8bdf3af43db3e9ba8dcdaac724ba7b5ca3f6358529d56f7a166f8b"},
+ {file = "coverage-5.5-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:2cafbbb3af0733db200c9b5f798d18953b1a304d3f86a938367de1567f4b5bff"},
+ {file = "coverage-5.5-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:44d654437b8ddd9eee7d1eaee28b7219bec228520ff809af170488fd2fed3e2b"},
+ {file = "coverage-5.5-cp39-cp39-win32.whl", hash = "sha256:d314ed732c25d29775e84a960c3c60808b682c08d86602ec2c3008e1202e3bb6"},
+ {file = "coverage-5.5-cp39-cp39-win_amd64.whl", hash = "sha256:13034c4409db851670bc9acd836243aeee299949bd5673e11844befcb0149f03"},
+ {file = "coverage-5.5-pp36-none-any.whl", hash = "sha256:f030f8873312a16414c0d8e1a1ddff2d3235655a2174e3648b4fa66b3f2f1079"},
+ {file = "coverage-5.5-pp37-none-any.whl", hash = "sha256:2a3859cb82dcbda1cfd3e6f71c27081d18aa251d20a17d87d26d4cd216fb0af4"},
+ {file = "coverage-5.5.tar.gz", hash = "sha256:ebe78fe9a0e874362175b02371bdfbee64d8edc42a044253ddf4ee7d3c15212c"},
+]
+importlib-metadata = [
+ {file = "importlib_metadata-1.7.0-py2.py3-none-any.whl", hash = "sha256:dc15b2969b4ce36305c51eebe62d418ac7791e9a157911d58bfb1f9ccd8e2070"},
+ {file = "importlib_metadata-1.7.0.tar.gz", hash = "sha256:90bb658cdbbf6d1735b6341ce708fc7024a3e14e99ffdc5783edea9f9b077f83"},
+]
+iniconfig = [
+ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
+ {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
+]
+joblib = [
+ {file = "joblib-1.0.1-py3-none-any.whl", hash = "sha256:feeb1ec69c4d45129954f1b7034954241eedfd6ba39b5e9e4b6883be3332d5e5"},
+ {file = "joblib-1.0.1.tar.gz", hash = "sha256:9c17567692206d2f3fb9ecf5e991084254fe631665c450b443761c4186a613f7"},
+]
+mpmath = [
+ {file = "mpmath-1.2.1-py3-none-any.whl", hash = "sha256:604bc21bd22d2322a177c73bdb573994ef76e62edd595d17e00aff24b0667e5c"},
+ {file = "mpmath-1.2.1.tar.gz", hash = "sha256:79ffb45cf9f4b101a807595bcb3e72e0396202e0b1d25d689134b48c4216a81a"},
+]
+mypy-extensions = [
+ {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
+ {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
+]
+numpy = [
+ {file = "numpy-1.21.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50"},
+ {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a"},
+ {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a75b4498b1e93d8b700282dc8e655b8bd559c0904b3910b144646dbbbc03e062"},
+ {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1412aa0aec3e00bc23fbb8664d76552b4efde98fb71f60737c83efbac24112f1"},
+ {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e46ceaff65609b5399163de5893d8f2a82d3c77d5e56d976c8b5fb01faa6b671"},
+ {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c6a2324085dd52f96498419ba95b5777e40b6bcbc20088fddb9e8cbb58885e8e"},
+ {file = "numpy-1.21.1-cp37-cp37m-win32.whl", hash = "sha256:73101b2a1fef16602696d133db402a7e7586654682244344b8329cdcbbb82172"},
+ {file = "numpy-1.21.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7a708a79c9a9d26904d1cca8d383bf869edf6f8e7650d85dbc77b041e8c5a0f8"},
+ {file = "numpy-1.21.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95b995d0c413f5d0428b3f880e8fe1660ff9396dcd1f9eedbc311f37b5652e16"},
+ {file = "numpy-1.21.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:635e6bd31c9fb3d475c8f44a089569070d10a9ef18ed13738b03049280281267"},
+ {file = "numpy-1.21.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4a3d5fb89bfe21be2ef47c0614b9c9c707b7362386c9a3ff1feae63e0267ccb6"},
+ {file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a326af80e86d0e9ce92bcc1e65c8ff88297de4fa14ee936cb2293d414c9ec63"},
+ {file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:791492091744b0fe390a6ce85cc1bf5149968ac7d5f0477288f78c89b385d9af"},
+ {file = "numpy-1.21.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0318c465786c1f63ac05d7c4dbcecd4d2d7e13f0959b01b534ea1e92202235c5"},
+ {file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a513bd9c1551894ee3d31369f9b07460ef223694098cf27d399513415855b68"},
+ {file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:91c6f5fc58df1e0a3cc0c3a717bb3308ff850abdaa6d2d802573ee2b11f674a8"},
+ {file = "numpy-1.21.1-cp38-cp38-win32.whl", hash = "sha256:978010b68e17150db8765355d1ccdd450f9fc916824e8c4e35ee620590e234cd"},
+ {file = "numpy-1.21.1-cp38-cp38-win_amd64.whl", hash = "sha256:9749a40a5b22333467f02fe11edc98f022133ee1bfa8ab99bda5e5437b831214"},
+ {file = "numpy-1.21.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d7a4aeac3b94af92a9373d6e77b37691b86411f9745190d2c351f410ab3a791f"},
+ {file = "numpy-1.21.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d9e7912a56108aba9b31df688a4c4f5cb0d9d3787386b87d504762b6754fbb1b"},
+ {file = "numpy-1.21.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:25b40b98ebdd272bc3020935427a4530b7d60dfbe1ab9381a39147834e985eac"},
+ {file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a92c5aea763d14ba9d6475803fc7904bda7decc2a0a68153f587ad82941fec1"},
+ {file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05a0f648eb28bae4bcb204e6fd14603de2908de982e761a2fc78efe0f19e96e1"},
+ {file = "numpy-1.21.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f01f28075a92eede918b965e86e8f0ba7b7797a95aa8d35e1cc8821f5fc3ad6a"},
+ {file = "numpy-1.21.1-cp39-cp39-win32.whl", hash = "sha256:88c0b89ad1cc24a5efbb99ff9ab5db0f9a86e9cc50240177a571fbe9c2860ac2"},
+ {file = "numpy-1.21.1-cp39-cp39-win_amd64.whl", hash = "sha256:01721eefe70544d548425a07c80be8377096a54118070b8a62476866d5208e33"},
+ {file = "numpy-1.21.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4"},
+ {file = "numpy-1.21.1.zip", hash = "sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd"},
+]
+packaging = [
+ {file = "packaging-21.0-py3-none-any.whl", hash = "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14"},
+ {file = "packaging-21.0.tar.gz", hash = "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7"},
+]
+pandas = [
+ {file = "pandas-1.1.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:bf23a3b54d128b50f4f9d4675b3c1857a688cc6731a32f931837d72effb2698d"},
+ {file = "pandas-1.1.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:5a780260afc88268a9d3ac3511d8f494fdcf637eece62fb9eb656a63d53eb7ca"},
+ {file = "pandas-1.1.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:b61080750d19a0122469ab59b087380721d6b72a4e7d962e4d7e63e0c4504814"},
+ {file = "pandas-1.1.5-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:0de3ddb414d30798cbf56e642d82cac30a80223ad6fe484d66c0ce01a84d6f2f"},
+ {file = "pandas-1.1.5-cp36-cp36m-win32.whl", hash = "sha256:70865f96bb38fec46f7ebd66d4b5cfd0aa6b842073f298d621385ae3898d28b5"},
+ {file = "pandas-1.1.5-cp36-cp36m-win_amd64.whl", hash = "sha256:19a2148a1d02791352e9fa637899a78e371a3516ac6da5c4edc718f60cbae648"},
+ {file = "pandas-1.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:26fa92d3ac743a149a31b21d6f4337b0594b6302ea5575b37af9ca9611e8981a"},
+ {file = "pandas-1.1.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:c16d59c15d946111d2716856dd5479221c9e4f2f5c7bc2d617f39d870031e086"},
+ {file = "pandas-1.1.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:3be7a7a0ca71a2640e81d9276f526bca63505850add10206d0da2e8a0a325dae"},
+ {file = "pandas-1.1.5-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:573fba5b05bf2c69271a32e52399c8de599e4a15ab7cec47d3b9c904125ab788"},
+ {file = "pandas-1.1.5-cp37-cp37m-win32.whl", hash = "sha256:21b5a2b033380adbdd36b3116faaf9a4663e375325831dac1b519a44f9e439bb"},
+ {file = "pandas-1.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:24c7f8d4aee71bfa6401faeba367dd654f696a77151a8a28bc2013f7ced4af98"},
+ {file = "pandas-1.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2860a97cbb25444ffc0088b457da0a79dc79f9c601238a3e0644312fcc14bf11"},
+ {file = "pandas-1.1.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:5008374ebb990dad9ed48b0f5d0038124c73748f5384cc8c46904dace27082d9"},
+ {file = "pandas-1.1.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:2c2f7c670ea4e60318e4b7e474d56447cf0c7d83b3c2a5405a0dbb2600b9c48e"},
+ {file = "pandas-1.1.5-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:0a643bae4283a37732ddfcecab3f62dd082996021b980f580903f4e8e01b3c5b"},
+ {file = "pandas-1.1.5-cp38-cp38-win32.whl", hash = "sha256:5447ea7af4005b0daf695a316a423b96374c9c73ffbd4533209c5ddc369e644b"},
+ {file = "pandas-1.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:4c62e94d5d49db116bef1bd5c2486723a292d79409fc9abd51adf9e05329101d"},
+ {file = "pandas-1.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:731568be71fba1e13cae212c362f3d2ca8932e83cb1b85e3f1b4dd77d019254a"},
+ {file = "pandas-1.1.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:c61c043aafb69329d0f961b19faa30b1dab709dd34c9388143fc55680059e55a"},
+ {file = "pandas-1.1.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:2b1c6cd28a0dfda75c7b5957363333f01d370936e4c6276b7b8e696dd500582a"},
+ {file = "pandas-1.1.5-cp39-cp39-win32.whl", hash = "sha256:c94ff2780a1fd89f190390130d6d36173ca59fcfb3fe0ff596f9a56518191ccb"},
+ {file = "pandas-1.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:edda9bacc3843dfbeebaf7a701763e68e741b08fccb889c003b0a52f0ee95782"},
+ {file = "pandas-1.1.5.tar.gz", hash = "sha256:f10fc41ee3c75a474d3bdf68d396f10782d013d7f67db99c0efbfd0acb99701b"},
+]
+pathspec = [
+ {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"},
+ {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"},
+]
+patsy = [
+ {file = "patsy-0.5.1-py2.py3-none-any.whl", hash = "sha256:5465be1c0e670c3a965355ec09e9a502bf2c4cbe4875e8528b0221190a8a5d40"},
+ {file = "patsy-0.5.1.tar.gz", hash = "sha256:f115cec4201e1465cd58b9866b0b0e7b941caafec129869057405bfe5b5e3991"},
+]
+pluggy = [
+ {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"},
+ {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"},
+]
+py = [
+ {file = "py-1.10.0-py2.py3-none-any.whl", hash = "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"},
+ {file = "py-1.10.0.tar.gz", hash = "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3"},
+]
+pyparsing = [
+ {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"},
+ {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"},
+]
+pytest = [
+ {file = "pytest-6.2.4-py3-none-any.whl", hash = "sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890"},
+ {file = "pytest-6.2.4.tar.gz", hash = "sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b"},
+]
+pytest-cov = [
+ {file = "pytest-cov-2.12.1.tar.gz", hash = "sha256:261ceeb8c227b726249b376b8526b600f38667ee314f910353fa318caa01f4d7"},
+ {file = "pytest_cov-2.12.1-py2.py3-none-any.whl", hash = "sha256:261bb9e47e65bd099c89c3edf92972865210c36813f80ede5277dceb77a4a62a"},
+]
+python-dateutil = [
+ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
+ {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
+]
+pytz = [
+ {file = "pytz-2021.1-py2.py3-none-any.whl", hash = "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"},
+ {file = "pytz-2021.1.tar.gz", hash = "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da"},
+]
+regex = [
+ {file = "regex-2021.7.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e6a1e5ca97d411a461041d057348e578dc344ecd2add3555aedba3b408c9f874"},
+ {file = "regex-2021.7.6-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:6afe6a627888c9a6cfbb603d1d017ce204cebd589d66e0703309b8048c3b0854"},
+ {file = "regex-2021.7.6-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:ccb3d2190476d00414aab36cca453e4596e8f70a206e2aa8db3d495a109153d2"},
+ {file = "regex-2021.7.6-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:ed693137a9187052fc46eedfafdcb74e09917166362af4cc4fddc3b31560e93d"},
+ {file = "regex-2021.7.6-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:99d8ab206a5270c1002bfcf25c51bf329ca951e5a169f3b43214fdda1f0b5f0d"},
+ {file = "regex-2021.7.6-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:b85ac458354165405c8a84725de7bbd07b00d9f72c31a60ffbf96bb38d3e25fa"},
+ {file = "regex-2021.7.6-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:3f5716923d3d0bfb27048242a6e0f14eecdb2e2a7fac47eda1d055288595f222"},
+ {file = "regex-2021.7.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5983c19d0beb6af88cb4d47afb92d96751fb3fa1784d8785b1cdf14c6519407"},
+ {file = "regex-2021.7.6-cp36-cp36m-win32.whl", hash = "sha256:c92831dac113a6e0ab28bc98f33781383fe294df1a2c3dfd1e850114da35fd5b"},
+ {file = "regex-2021.7.6-cp36-cp36m-win_amd64.whl", hash = "sha256:791aa1b300e5b6e5d597c37c346fb4d66422178566bbb426dd87eaae475053fb"},
+ {file = "regex-2021.7.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:59506c6e8bd9306cd8a41511e32d16d5d1194110b8cfe5a11d102d8b63cf945d"},
+ {file = "regex-2021.7.6-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:564a4c8a29435d1f2256ba247a0315325ea63335508ad8ed938a4f14c4116a5d"},
+ {file = "regex-2021.7.6-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:59c00bb8dd8775473cbfb967925ad2c3ecc8886b3b2d0c90a8e2707e06c743f0"},
+ {file = "regex-2021.7.6-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:9a854b916806c7e3b40e6616ac9e85d3cdb7649d9e6590653deb5b341a736cec"},
+ {file = "regex-2021.7.6-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:db2b7df831c3187a37f3bb80ec095f249fa276dbe09abd3d35297fc250385694"},
+ {file = "regex-2021.7.6-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:173bc44ff95bc1e96398c38f3629d86fa72e539c79900283afa895694229fe6a"},
+ {file = "regex-2021.7.6-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:15dddb19823f5147e7517bb12635b3c82e6f2a3a6b696cc3e321522e8b9308ad"},
+ {file = "regex-2021.7.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ddeabc7652024803666ea09f32dd1ed40a0579b6fbb2a213eba590683025895"},
+ {file = "regex-2021.7.6-cp37-cp37m-win32.whl", hash = "sha256:f080248b3e029d052bf74a897b9d74cfb7643537fbde97fe8225a6467fb559b5"},
+ {file = "regex-2021.7.6-cp37-cp37m-win_amd64.whl", hash = "sha256:d8bbce0c96462dbceaa7ac4a7dfbbee92745b801b24bce10a98d2f2b1ea9432f"},
+ {file = "regex-2021.7.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:edd1a68f79b89b0c57339bce297ad5d5ffcc6ae7e1afdb10f1947706ed066c9c"},
+ {file = "regex-2021.7.6-cp38-cp38-manylinux1_i686.whl", hash = "sha256:422dec1e7cbb2efbbe50e3f1de36b82906def93ed48da12d1714cabcd993d7f0"},
+ {file = "regex-2021.7.6-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cbe23b323988a04c3e5b0c387fe3f8f363bf06c0680daf775875d979e376bd26"},
+ {file = "regex-2021.7.6-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:0eb2c6e0fcec5e0f1d3bcc1133556563222a2ffd2211945d7b1480c1b1a42a6f"},
+ {file = "regex-2021.7.6-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:1c78780bf46d620ff4fff40728f98b8afd8b8e35c3efd638c7df67be2d5cddbf"},
+ {file = "regex-2021.7.6-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:bc84fb254a875a9f66616ed4538542fb7965db6356f3df571d783f7c8d256edd"},
+ {file = "regex-2021.7.6-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:598c0a79b4b851b922f504f9f39a863d83ebdfff787261a5ed061c21e67dd761"},
+ {file = "regex-2021.7.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:875c355360d0f8d3d827e462b29ea7682bf52327d500a4f837e934e9e4656068"},
+ {file = "regex-2021.7.6-cp38-cp38-win32.whl", hash = "sha256:e586f448df2bbc37dfadccdb7ccd125c62b4348cb90c10840d695592aa1b29e0"},
+ {file = "regex-2021.7.6-cp38-cp38-win_amd64.whl", hash = "sha256:2fe5e71e11a54e3355fa272137d521a40aace5d937d08b494bed4529964c19c4"},
+ {file = "regex-2021.7.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6110bab7eab6566492618540c70edd4d2a18f40ca1d51d704f1d81c52d245026"},
+ {file = "regex-2021.7.6-cp39-cp39-manylinux1_i686.whl", hash = "sha256:4f64fc59fd5b10557f6cd0937e1597af022ad9b27d454e182485f1db3008f417"},
+ {file = "regex-2021.7.6-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:89e5528803566af4df368df2d6f503c84fbfb8249e6631c7b025fe23e6bd0cde"},
+ {file = "regex-2021.7.6-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:2366fe0479ca0e9afa534174faa2beae87847d208d457d200183f28c74eaea59"},
+ {file = "regex-2021.7.6-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:f9392a4555f3e4cb45310a65b403d86b589adc773898c25a39184b1ba4db8985"},
+ {file = "regex-2021.7.6-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:2bceeb491b38225b1fee4517107b8491ba54fba77cf22a12e996d96a3c55613d"},
+ {file = "regex-2021.7.6-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:f98dc35ab9a749276f1a4a38ab3e0e2ba1662ce710f6530f5b0a6656f1c32b58"},
+ {file = "regex-2021.7.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:319eb2a8d0888fa6f1d9177705f341bc9455a2c8aca130016e52c7fe8d6c37a3"},
+ {file = "regex-2021.7.6-cp39-cp39-win32.whl", hash = "sha256:eaf58b9e30e0e546cdc3ac06cf9165a1ca5b3de8221e9df679416ca667972035"},
+ {file = "regex-2021.7.6-cp39-cp39-win_amd64.whl", hash = "sha256:4c9c3155fe74269f61e27617529b7f09552fbb12e44b1189cebbdb24294e6e1c"},
+ {file = "regex-2021.7.6.tar.gz", hash = "sha256:8394e266005f2d8c6f0bc6780001f7afa3ef81a7a2111fa35058ded6fce79e4d"},
+]
+scikit-learn = [
+ {file = "scikit-learn-0.23.1.tar.gz", hash = "sha256:e3fec1c8831f8f93ad85581ca29ca1bb88e2da377fb097cf8322aa89c21bc9b8"},
+ {file = "scikit_learn-0.23.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:058d213092de4384710137af1300ed0ff030b8c40459a6c6f73c31ccd274cc39"},
+ {file = "scikit_learn-0.23.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:ebe853e6f318f9d8b3b74dd17e553720d35646eff675a69eeaed12fbbbb07daa"},
+ {file = "scikit_learn-0.23.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:e9879ba9e64ec3add41bf201e06034162f853652ef4849b361d73b0deb3153ad"},
+ {file = "scikit_learn-0.23.1-cp36-cp36m-win32.whl", hash = "sha256:c2fa33d20408b513cf432505c80e6eb4bf4d71434f1ae36680765d4a2c2a16ec"},
+ {file = "scikit_learn-0.23.1-cp36-cp36m-win_amd64.whl", hash = "sha256:e585682e37f2faa81ad6cd4472fff646bf2fd0542147bec93697a905db8e6bd2"},
+ {file = "scikit_learn-0.23.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:244ca85d6eba17a1e6e8a66ab2f584be6a7784b5f59297e3d7ff8c7983af627c"},
+ {file = "scikit_learn-0.23.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:9e04c0811ea92931ee8490d638171b8cb2f21387efcfff526bbc8c2a3da60f1c"},
+ {file = "scikit_learn-0.23.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:0e7b55f73b35537ecd0d19df29dd39aa9e076dba78f3507b8136c819d84611fd"},
+ {file = "scikit_learn-0.23.1-cp37-cp37m-win32.whl", hash = "sha256:bded94236e16774385202cafd26190ce96db18e4dc21e99473848c61e4fdc400"},
+ {file = "scikit_learn-0.23.1-cp37-cp37m-win_amd64.whl", hash = "sha256:04799686060ecbf8992f26a35be1d99e981894c8c7860c1365cda4200f954a16"},
+ {file = "scikit_learn-0.23.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c3464e46ef8bd4f1bfa5c009648c6449412c8f7e9b3fc0c9e3d800139c48827"},
+ {file = "scikit_learn-0.23.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:93f56abd316d131645559ec0ab4f45e3391c2ccdd4eadaa4912f4c1e0a6f2c96"},
+ {file = "scikit_learn-0.23.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:3e6e92b495eee193a8fa12a230c9b7976ea0fc1263719338e35c986ea1e42cff"},
+ {file = "scikit_learn-0.23.1-cp38-cp38-win32.whl", hash = "sha256:5bcea4d6ee431c814261117281363208408aa4e665633655895feb059021aca6"},
+ {file = "scikit_learn-0.23.1-cp38-cp38-win_amd64.whl", hash = "sha256:16feae4361be6b299d4d08df5a30956b4bfc8eadf173fe9258f6d59630f851d4"},
+]
+scipy = [
+ {file = "scipy-1.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a15a1f3fc0abff33e792d6049161b7795909b40b97c6cc2934ed54384017ab76"},
+ {file = "scipy-1.6.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:e79570979ccdc3d165456dd62041d9556fb9733b86b4b6d818af7a0afc15f092"},
+ {file = "scipy-1.6.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:a423533c55fec61456dedee7b6ee7dce0bb6bfa395424ea374d25afa262be261"},
+ {file = "scipy-1.6.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:33d6b7df40d197bdd3049d64e8e680227151673465e5d85723b3b8f6b15a6ced"},
+ {file = "scipy-1.6.1-cp37-cp37m-win32.whl", hash = "sha256:6725e3fbb47da428794f243864f2297462e9ee448297c93ed1dcbc44335feb78"},
+ {file = "scipy-1.6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:5fa9c6530b1661f1370bcd332a1e62ca7881785cc0f80c0d559b636567fab63c"},
+ {file = "scipy-1.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bd50daf727f7c195e26f27467c85ce653d41df4358a25b32434a50d8870fc519"},
+ {file = "scipy-1.6.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:f46dd15335e8a320b0fb4685f58b7471702234cba8bb3442b69a3e1dc329c345"},
+ {file = "scipy-1.6.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0e5b0ccf63155d90da576edd2768b66fb276446c371b73841e3503be1d63fb5d"},
+ {file = "scipy-1.6.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:2481efbb3740977e3c831edfd0bd9867be26387cacf24eb5e366a6a374d3d00d"},
+ {file = "scipy-1.6.1-cp38-cp38-win32.whl", hash = "sha256:68cb4c424112cd4be886b4d979c5497fba190714085f46b8ae67a5e4416c32b4"},
+ {file = "scipy-1.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:5f331eeed0297232d2e6eea51b54e8278ed8bb10b099f69c44e2558c090d06bf"},
+ {file = "scipy-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0c8a51d33556bf70367452d4d601d1742c0e806cd0194785914daf19775f0e67"},
+ {file = "scipy-1.6.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:83bf7c16245c15bc58ee76c5418e46ea1811edcc2e2b03041b804e46084ab627"},
+ {file = "scipy-1.6.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:794e768cc5f779736593046c9714e0f3a5940bc6dcc1dba885ad64cbfb28e9f0"},
+ {file = "scipy-1.6.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:5da5471aed911fe7e52b86bf9ea32fb55ae93e2f0fac66c32e58897cfb02fa07"},
+ {file = "scipy-1.6.1-cp39-cp39-win32.whl", hash = "sha256:8e403a337749ed40af60e537cc4d4c03febddcc56cd26e774c9b1b600a70d3e4"},
+ {file = "scipy-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a5193a098ae9f29af283dcf0041f762601faf2e595c0db1da929875b7570353f"},
+ {file = "scipy-1.6.1.tar.gz", hash = "sha256:c4fceb864890b6168e79b0e714c585dbe2fd4222768ee90bc1aa0f8218691b11"},
+]
+six = [
+ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
+ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
+]
+threadpoolctl = [
+ {file = "threadpoolctl-2.2.0-py3-none-any.whl", hash = "sha256:e5a995e3ffae202758fa8a90082e35783b9370699627ae2733cd1c3a73553616"},
+ {file = "threadpoolctl-2.2.0.tar.gz", hash = "sha256:86d4b6801456d780e94681d155779058759eaef3c3564758b17b6c99db5f81cb"},
+]
+toml = [
+ {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
+ {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
+]
+tomli = [
+ {file = "tomli-1.1.0-py3-none-any.whl", hash = "sha256:f4a182048010e89cbec0ae4686b21f550a7f2903f665e34a6de58ec15424f919"},
+ {file = "tomli-1.1.0.tar.gz", hash = "sha256:33d7984738f8bb699c9b0a816eb646a8178a69eaa792d258486776a5d21b8ca5"},
+]
+typed-ast = [
+ {file = "typed_ast-1.4.3-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6"},
+ {file = "typed_ast-1.4.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075"},
+ {file = "typed_ast-1.4.3-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528"},
+ {file = "typed_ast-1.4.3-cp35-cp35m-win32.whl", hash = "sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428"},
+ {file = "typed_ast-1.4.3-cp35-cp35m-win_amd64.whl", hash = "sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3"},
+ {file = "typed_ast-1.4.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f"},
+ {file = "typed_ast-1.4.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341"},
+ {file = "typed_ast-1.4.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace"},
+ {file = "typed_ast-1.4.3-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f"},
+ {file = "typed_ast-1.4.3-cp36-cp36m-win32.whl", hash = "sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363"},
+ {file = "typed_ast-1.4.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7"},
+ {file = "typed_ast-1.4.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266"},
+ {file = "typed_ast-1.4.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e"},
+ {file = "typed_ast-1.4.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04"},
+ {file = "typed_ast-1.4.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899"},
+ {file = "typed_ast-1.4.3-cp37-cp37m-win32.whl", hash = "sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c"},
+ {file = "typed_ast-1.4.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805"},
+ {file = "typed_ast-1.4.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a"},
+ {file = "typed_ast-1.4.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff"},
+ {file = "typed_ast-1.4.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41"},
+ {file = "typed_ast-1.4.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39"},
+ {file = "typed_ast-1.4.3-cp38-cp38-win32.whl", hash = "sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927"},
+ {file = "typed_ast-1.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40"},
+ {file = "typed_ast-1.4.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3"},
+ {file = "typed_ast-1.4.3-cp39-cp39-manylinux1_i686.whl", hash = "sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4"},
+ {file = "typed_ast-1.4.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0"},
+ {file = "typed_ast-1.4.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3"},
+ {file = "typed_ast-1.4.3-cp39-cp39-win32.whl", hash = "sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808"},
+ {file = "typed_ast-1.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c"},
+ {file = "typed_ast-1.4.3.tar.gz", hash = "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"},
+]
+typing-extensions = [
+ {file = "typing_extensions-3.10.0.0-py2-none-any.whl", hash = "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497"},
+ {file = "typing_extensions-3.10.0.0-py3-none-any.whl", hash = "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"},
+ {file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"},
+]
+zipp = [
+ {file = "zipp-3.5.0-py3-none-any.whl", hash = "sha256:957cfda87797e389580cb8b9e3870841ca991e2125350677b2ca83a0e99390a3"},
+ {file = "zipp-3.5.0.tar.gz", hash = "sha256:f5812b1e007e48cff63449a5e9f4e7ebea716b4111f9c4f9a645f91d579bf0c4"},
+]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..b4fcbb3
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,30 @@
+[tool.poetry]
+name = "pycombat"
+version = "0.3.1"
+description = "pyComBat is a Python 3 implementation of ComBat, one of the most widely used tool for correcting technical biases, called batch effects, in microarray expression data."
+authors = ["Abdelkader Behdenna ", "Guillaume Appé ", "Aryo Gema "]
+license = "GPL-3.0-or-later"
+readme = "README.md"
+homepage = "https://github.com/epigenelabs/pyComBat"
+repository = "https://github.com/epigenelabs/pyComBat"
+include = [
+ "LICENSE",
+]
+
+[tool.poetry.dependencies]
+python = "^3.7"
+numpy = "^1.18.5"
+mpmath = "^1.1.0"
+pandas = "^1.1.5"
+patsy = "^0.5.1"
+scikit-learn = "0.23.1"
+typing-extensions = "^3.10.0"
+
+[tool.poetry.dev-dependencies]
+pytest = "^6.2.4"
+black = "^21.5b1"
+pytest-cov = "^2.12.1"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 773f09a..0000000
--- a/setup.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import setuptools
-
-with open("README.md", "r") as fh:
- long_description = fh.read()
-
-setuptools.setup(
- install_requires=[
- "numpy==1.18.5",
- "mpmath==1.1.0",
- "pandas==1.1.5",
- "patsy==0.5.1"
- ],
- name="combat",
- version="0.3.0",
- author="Abdelkader Behdenna",
- author_email="abdelkader@epigenelabs.com",
- description="pyComBat, a Python tool for batch effects correction in high-throughput molecular data using empirical Bayes methods",
- long_description=long_description,
- long_description_content_type="text/markdown",
- url="https://github.com/epigenelabs/pyComBat",
- packages=setuptools.find_packages(),
- classifiers=[
- "Programming Language :: Python :: 3",
- "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
- "Operating System :: OS Independent",
- ],
- python_requires='>=3.6',
-)
\ No newline at end of file
diff --git a/tests/test_combat_utils.py b/tests/test_combat_utils.py
new file mode 100644
index 0000000..2b09e07
--- /dev/null
+++ b/tests/test_combat_utils.py
@@ -0,0 +1,178 @@
+import os
+import sys
+
+sys.path.append(os.getcwd())
+
+import pytest
+
+import numpy as np
+import pandas as pd
+
+from combat.utils.combat_utils import (
+ model_matrix,
+ compute_prior,
+ postmean,
+ postvar,
+ it_sol,
+ int_eprior,
+ check_ref_batch,
+ treat_batches,
+ treat_covariates,
+ calculate_mean_var,
+ calculate_stand_mean,
+ standardise_data,
+ fit_model,
+)
+from combat.utils.common_utils import check_NAs
+
+"""
+Load the shared variables
+"""
+batch = np.asarray([1, 1, 1, 2, 2, 3, 3, 3, 3])
+matrix = np.transpose(
+ [
+ np.random.normal(size=1000, loc=3, scale=1),
+ np.random.normal(size=1000, loc=3, scale=1),
+ np.random.normal(size=1000, loc=3, scale=1),
+ np.random.normal(size=1000, loc=2, scale=0.6),
+ np.random.normal(size=1000, loc=2, scale=0.6),
+ np.random.normal(size=1000, loc=4, scale=1),
+ np.random.normal(size=1000, loc=4, scale=1),
+ np.random.normal(size=1000, loc=4, scale=1),
+ np.random.normal(size=1000, loc=4, scale=1),
+ ]
+)
+
+matrix = pd.DataFrame(
+ data=matrix,
+ columns=["sample_" + str(i + 1) for i in range(9)],
+ index=["gene_" + str(i + 1) for i in range(1000)],
+)
+ref_batch = None
+mean_only = False
+par_prior = False
+precision = None
+mod = []
+dat = matrix.values
+batchmod = model_matrix(list(batch), intercept=False, drop_first=False)
+ref, batchmod = check_ref_batch(ref_batch, batch, batchmod)
+n_batch, batches, n_batches, n_array = treat_batches(batch)
+design = treat_covariates(batchmod, mod, ref, n_batch)
+NAs = check_NAs(dat)
+B_hat, grand_mean, var_pooled = calculate_mean_var(
+ design, batches, ref, dat, NAs, ref_batch, n_batches, n_batch, n_array
+)
+stand_mean = calculate_stand_mean(grand_mean, n_array, design, n_batch, B_hat)
+s_data = standardise_data(dat, stand_mean, var_pooled, n_array)
+gamma_star, delta_star, batch_design = fit_model(
+ design,
+ n_batch,
+ s_data,
+ batches,
+ mean_only,
+ par_prior,
+ precision,
+ ref_batch,
+ ref,
+ NAs,
+)
+
+
+# test for compute_prior
+def test_compute_prior():
+ print("aprior", compute_prior("a", gamma_star, False))
+ assert compute_prior("a", gamma_star, True) == 1
+ print("bprior", compute_prior("b", gamma_star, False))
+ assert compute_prior("b", gamma_star, True) == 1
+
+
+# test for model_matrix
+def test_model_matrix():
+ model_matrix_test = model_matrix([1, 1, 0, 1, 0])
+ assert np.shape(model_matrix_test) == (5, 2)
+ assert list(model_matrix_test[0]) == [1.0, 1.0]
+
+
+# test for compute_prior
+def test_compute_prior():
+ print("aprior", compute_prior("a", gamma_star, False))
+ assert compute_prior("a", gamma_star, True) == 1
+ print("bprior", compute_prior("b", gamma_star, False))
+ assert compute_prior("b", gamma_star, True) == 1
+
+
+# test for postmean
+def test_postmean():
+ assert np.shape(
+ postmean(gamma_star, delta_star, gamma_star, delta_star)
+ ) == np.shape(gamma_star)
+
+
+# test for postvar
+def test_postvar():
+ assert np.sum(postvar([2, 4, 6], 2, 1, 1) - [2, 3, 4]) == 0
+
+
+# test for it_sol
+def test_it_sol():
+ assert 1 == 1
+
+
+# test for int_eprior
+def test_int_eprior():
+ assert 1 == 1
+
+
+# test for model_matrix
+def test_model_matrix():
+ model_matrix_test = model_matrix([1, 1, 0, 1, 0])
+ assert np.shape(model_matrix_test) == (5, 2)
+ assert list(model_matrix_test[0]) == [1.0, 1.0]
+
+
+# test for check_ref_batch
+def test_check_ref_batch():
+ assert check_ref_batch(1, batch, batchmod) == (0, batchmod)
+ assert check_ref_batch(2, batch, batchmod) == (1, batchmod)
+ print("Using batch 1 then 2. Above lines should inform on that.")
+ assert check_ref_batch(None, batch, batchmod) == (None, batchmod)
+
+
+# test for treat_batches
+def test_treat_batches():
+ assert n_batch == 3
+ assert batches[0].tolist() == [0, 1, 2]
+ assert batches[1].tolist() == [3, 4]
+ assert batches[2].tolist() == [5, 6, 7, 8]
+ assert n_batches == [3, 2, 4]
+ assert n_array == 9
+
+
+# test for treat_covariates
+def test_treat_covariates():
+ batchmod = model_matrix(list(batch), intercept=False, drop_first=False)
+ assert np.sum(design - np.transpose(batchmod)) == 0
+
+
+# test for calculate_mean_var
+def test_calculate_mean_var():
+ assert np.shape(B_hat)[0] == np.shape(design)[0]
+ assert np.shape(grand_mean)[0] == np.shape(dat)[0]
+ assert np.shape(var_pooled)[0] == np.shape(dat)[0]
+
+
+# test for calculate_stand_mean
+def test_calculate_stand_mean():
+ assert np.shape(stand_mean) == np.shape(dat)
+
+
+# test for standardise_data
+def test_standardise_data():
+ assert np.shape(s_data) == np.shape(dat)
+
+
+# test for fit_model
+def test_fit_model():
+ assert np.shape(gamma_star)[1] == np.shape(dat)[0]
+ assert np.shape(delta_star)[1] == np.shape(dat)[0]
+ assert np.shape(batch_design) == np.shape(design)
diff --git a/tests/test_common_utils.py b/tests/test_common_utils.py
new file mode 100644
index 0000000..c028156
--- /dev/null
+++ b/tests/test_common_utils.py
@@ -0,0 +1,36 @@
+import os
+import sys
+
+sys.path.append(os.getcwd())
+
+import pytest
+
+import numpy as np
+import pandas as pd
+
+from combat.utils.common_utils import check_all_ones, check_mean_only, check_NAs
+
+
+# tests for check_all_ones function
+def test_check_all_ones():
+ assert check_all_ones(np.array([1, 1, 1, 1, 1])) == True
+
+ assert check_all_ones(np.array([1, 1, 1, 1, 0])) == False
+ assert check_all_ones(np.array([0, 0, 0, 0, 0])) == False
+
+ assert (
+ check_all_ones(np.array([1.5, 0.5, 1, 1, 1])) == False
+ ) # This test to show the limit of the method we use
+
+
+# test for check_mean_only
+def test_check_mean_only():
+ check_mean_only(True)
+ check_mean_only(False)
+ print("Only one line of text should have been printed above.")
+
+
+# test for check_NAs
+def test_check_NAs():
+ assert check_NAs([0, 1, 2]) == False
+ assert check_NAs([0, np.nan, 2]) == True
diff --git a/tests/test_pycombat_method.py b/tests/test_pycombat_method.py
new file mode 100644
index 0000000..b998a1a
--- /dev/null
+++ b/tests/test_pycombat_method.py
@@ -0,0 +1,93 @@
+# -----------------------------------------------------------------------------
+# Copyright (C) 2019-2020 A. Behdenna, A. Nordor, J. Haziza and A. Gema
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+# For more information, please contact Abdelkader Behdenna /
+
+# file pycombat.py
+# author A. Behdenna, J. Haziza, A. Gema, A. Nordor
+# date Sept 2020
+# -----------------------------------------------------------------------------
+
+
+# this file is only used for unit testing
+# We import the function that will be tested one by one, incrementally
+# Generates a report about the functions tested
+
+import os
+import sys
+
+sys.path.append(os.getcwd())
+
+import numpy as np
+import pandas as pd
+
+##########
+# import function used for unit testing
+from combat import pycombat
+
+##########
+print("\n#### Unit Testing for pyComBat ####\n")
+
+##########
+# Define constants for unit testing
+
+batch = np.asarray([1, 1, 1, 2, 2, 3, 3, 3, 3])
+matrix = np.transpose(
+ [
+ np.random.normal(size=1000, loc=3, scale=1),
+ np.random.normal(size=1000, loc=3, scale=1),
+ np.random.normal(size=1000, loc=3, scale=1),
+ np.random.normal(size=1000, loc=2, scale=0.6),
+ np.random.normal(size=1000, loc=2, scale=0.6),
+ np.random.normal(size=1000, loc=4, scale=1),
+ np.random.normal(size=1000, loc=4, scale=1),
+ np.random.normal(size=1000, loc=4, scale=1),
+ np.random.normal(size=1000, loc=4, scale=1),
+ ]
+)
+
+matrix = pd.DataFrame(
+ data=matrix,
+ columns=["sample_" + str(i + 1) for i in range(9)],
+ index=["gene_" + str(i + 1) for i in range(1000)],
+)
+
+print("Matrix and batch generated.")
+
+matrix_adjusted = pycombat(matrix, batch)
+
+print("Adjusted matrix generated.")
+
+##########
+# local tests before unit testing
+
+
+def test_means():
+ print(f"mean matrix: {np.mean(matrix)}")
+ print(f"mean matrix (adjusted): {np.mean(matrix_adjusted)}")
+ print("**********")
+ print(f"var matrix: {np.var(matrix)}")
+ print(f"var matrix (adjusted): {np.var(matrix_adjusted)}")
+
+
+# general tests on pyComBat
+def test_pycombat():
+ assert np.shape(matrix) == np.shape(matrix_adjusted)
+ assert (
+ abs(np.mean(matrix.values) - np.mean(matrix_adjusted.values))
+ <= np.mean(matrix.values) * 0.05
+ )
+ assert np.var(matrix_adjusted.values) <= np.var(matrix.values)
diff --git a/tests/test_pycombat_object.py b/tests/test_pycombat_object.py
new file mode 100644
index 0000000..e95f78b
--- /dev/null
+++ b/tests/test_pycombat_object.py
@@ -0,0 +1,93 @@
+# -----------------------------------------------------------------------------
+# Copyright (C) 2019-2020 A. Behdenna, A. Nordor, J. Haziza and A. Gema
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+# For more information, please contact Abdelkader Behdenna /
+
+# file pycombat.py
+# author A. Behdenna, J. Haziza, A. Gema, A. Nordor
+# date Sept 2020
+# -----------------------------------------------------------------------------
+
+
+# this file is only used for unit testing
+# We import the function that will be tested one by one, incrementally
+# Generates a report about the functions tested
+
+import os
+import sys
+
+sys.path.append(os.getcwd())
+
+import numpy as np
+import pandas as pd
+
+##########
+# import function used for unit testing
+from combat import PyCombat
+
+##########
+print("\n#### Unit Testing for pyComBat ####\n")
+
+##########
+# Define constants for unit testing
+
+batch = np.asarray([1, 1, 1, 2, 2, 3, 3, 3, 3])
+matrix = np.transpose(
+ [
+ np.random.normal(size=1000, loc=3, scale=1),
+ np.random.normal(size=1000, loc=3, scale=1),
+ np.random.normal(size=1000, loc=3, scale=1),
+ np.random.normal(size=1000, loc=2, scale=0.6),
+ np.random.normal(size=1000, loc=2, scale=0.6),
+ np.random.normal(size=1000, loc=4, scale=1),
+ np.random.normal(size=1000, loc=4, scale=1),
+ np.random.normal(size=1000, loc=4, scale=1),
+ np.random.normal(size=1000, loc=4, scale=1),
+ ]
+)
+
+matrix = pd.DataFrame(
+ data=matrix,
+ columns=["sample_" + str(i + 1) for i in range(9)],
+ index=["gene_" + str(i + 1) for i in range(1000)],
+)
+
+print("Matrix and batch generated.")
+
+matrix_adjusted = PyCombat().fit_transform(matrix, batch)
+
+print("Adjusted matrix generated.")
+
+##########
+# local tests before unit testing
+
+
+def test_means():
+ print(f"mean matrix: {np.mean(matrix)}")
+ print(f"mean matrix (adjusted): {np.mean(matrix_adjusted)}")
+ print("**********")
+ print(f"var matrix: {np.var(matrix)}")
+ print(f"var matrix (adjusted): {np.var(matrix_adjusted)}")
+
+
+# general tests on pyComBat
+def test_pycombat():
+ assert np.shape(matrix) == np.shape(matrix_adjusted)
+ assert (
+ abs(np.mean(matrix.values) - np.mean(matrix_adjusted.values))
+ <= np.mean(matrix.values) * 0.05
+ )
+ assert np.var(matrix_adjusted.values) <= np.var(matrix.values)