-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprepareAdmixture.py
More file actions
132 lines (100 loc) · 4.84 KB
/
prepareAdmixture.py
File metadata and controls
132 lines (100 loc) · 4.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# -*- coding: utf-8 -*-
"""
Created on Wed May 15 14:08:02 2024
@author: PEIXOTT
"""
import os
import gzip
import argparse
def readBim(bimFileOld, bimFileNew, dictBim, fileKeepName):
bimOutput = open(bimFileNew, "w")
bimInput = open(bimFileOld)
if dictBim: #if it has variants
fill = False
fileKeep = open(fileKeepName, "w")
else:
fill = True
for line in bimInput:
chrom, ID, poscM, pos, ref, alt = line.strip().split()
chrom = chrom.replace("chr", "")
IDVar = f"{chrom}:{pos}:{ref}:{alt}"
bimOutput.write(f"{chrom}\t{IDVar}\t{poscM}\t{pos}\t{ref}\t{alt}\n")
if fill:
if IDVar in dictBim:
dictBim[IDVar] = False
else:
dictBim[IDVar] = True
else:
if IDVar in dictBim:
fileKeep.write(f"{IDVar}\n")
if not fill:
fileKeep.close()
bimOutput.close()
bimInput.close()
return dictBim
def convertPFiles(prefix, outFolder, index, plink2, covar = False):
if covar:
toRemove = []
psam = open(f"{prefix}.psam")
header = True
for line in psam:
if header:
header = False
headerLine = line.strip().split()
else:
split = line.strip().split()
for i in range(len(split)):
if split[i] == "NA" or split[i] == "-9":
print(f"ID {split[0]} -> {headerLine[i]} {split[i]} ")
if split[0] not in toRemove:
toRemove.append(split[0])
fileRemove = open(f"{outFolder}/toRemove_NA.txt", "w")
for ID in toRemove:
fileRemove.write(f"{ID}\n")
fileRemove.close()
os.system(f"{plink2} --pfile {prefix} --make-bed --out {outFolder}/Bfile{index} --remove {outFolder}/toRemove_NA.txt --chr 1-22")
else:
os.system(f"{plink2} --pfile {prefix} --make-bed --out {outFolder}/Bfile{index} --chr 1-22")
os.system(f"mv {outFolder}/Bfile{index}.bim {outFolder}/OLD_Bfile{index}.bim")
return f"{outFolder}/OLD_Bfile{index}.bim"
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Merge PFiles to Admixture')
requiredGeneral = parser.add_argument_group("Required arguments for all steps")
requiredGeneral.add_argument('-i', '--input1', required=True,
help='First pfile. We recommend this file be with the lower number of variants')
requiredGeneral.add_argument('-I', '--input2', required=True,
help='Secong pfile.')
requiredGeneral.add_argument('-o', '--output', help='Name of output folder', required=True)
requiredGeneral.add_argument('-F', '--parentalsFolder', help='Folder with files with parentals ID', required=True)
requiredGeneral.add_argument('-P', '--plink2', help='Plink2 executable', required=True)
requiredGeneral.add_argument('-p', '--plink', help='Plink executable', required=True)
requiredGeneral = parser.add_argument_group("Optional arguments")
requiredGeneral.add_argument('-c', '--covar', help='Remove NA covar', required=False, action="store_true")
args = parser.parse_args()
os.system(f"mkdir {args.output}")
bimOld1 = convertPFiles(args.input1, args.output, 1, args.plink2, args.covar)
bimOld2 = convertPFiles(args.input2, args.output, 2, args.plink2)
dictBim = {}
dictBim = readBim(bimOld1, f"{args.output}/Bfile1.bim", dictBim, "")
dictBim = readBim(bimOld2, f"{args.output}/Bfile2.bim", dictBim, f"{args.output}/ListInCommon.txt")
os.system(f"{args.plink} --bfile {args.output}/Bfile1 --extract {args.output}/ListInCommon.txt --make-bed --out {args.output}/Common_Bfile1")
os.system(f"{args.plink} --bfile {args.output}/Bfile2 --extract {args.output}/ListInCommon.txt --make-bed --out {args.output}/Common_Bfile2")
os.system(f"{args.plink} --bfile {args.output}/Common_Bfile1 --bmerge {args.output}/Common_Bfile2 --make-bed --out {args.output}/Common_Bfile1And2")
os.system(f"{args.plink} --bfile {args.output}/Common_Bfile1And2 --indep-pairwise 200 50 0.2 --out LD")
os.system(f"{args.plink} --bfile {args.output}/Common_Bfile1And2 --extract LD.prune.in --make-bed --out {args.output}/LD_Bfile1And2")
parentals = ["AFR", "AMR", "EAS", "EUR", "SAS"]
dictPop = {}
for pop in parentals:
filePop = open(f"{args.parentalsFolder}/{pop}.txt")
for line in filePop:
dictPop[line.strip()] = pop
filePop.close()
famFile = open(f"{args.output}/LD_Bfile1And2.fam")
fileAdmixture = open(f"{args.output}/LD_Bfile1And2.pop", "w")
for line in famFile:
ID = line.strip().split()[1]
if ID in dictPop:
fileAdmixture.write(f"{dictPop[ID]}\n")
else:
fileAdmixture.write(f"-\n")
fileAdmixture.close()