Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added Arabidopsis/Lasky2016/FullData.rds
Binary file not shown.
56 changes: 56 additions & 0 deletions Arabidopsis/Lasky2016/Lasky2016Processing.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
AccClim <- read.csv("~/Downloads/SOO/Lasky2012/AccessionClimateData.csv")
AccClim$ecotype_id <- paste0("X", AccClim$ecotype_id)

CommClim <- read.csv("~/Downloads/SOO/Lasky2012/CommonGard_Clim.csv")
Pheno <- read.csv("~/Downloads/SOO/Fournier-Level.csv")
Pheno$ecotype_id <- paste0("X", Pheno$ecotype_id)
SNPs <- read.csv("~/Downloads/SOO/call_method_75/call_method_75_TAIR9.csv", header = T, skip = 1)

AccClim <- AccClim[AccClim$ecotype_id %in% Pheno$ecotype_id ,] #114
Pheno <- Pheno[Pheno$ecotype_id %in% AccClim$ecotype_id ,] #137 accessions

SeqIndx <- which(colnames(SNPs) %in% Pheno$ecotype_id)
SNPs <- SNPs[c(1,2, SeqIndx)]

#Order the Phenotypes, Climate data and SNPs by ecotype ID
Pheno <- Pheno[match(colnames(SNPs[3:ncol(SNPs)]), Pheno$ecotype_id) ,]
AccClim <- AccClim[match(colnames(SNPs[3:ncol(SNPs)]), AccClim$ecotype_id) ,]

dim(Pheno); dim(AccClim); dim(SNPs)


#Recode SNPs
SNPinfo <- SNPs[1:2]
SNPs <- t(SNPs[3:ncol(SNPs)])

#Recode ATCG with 0,1,2
#minor allele gets coded as 0, major coded as 2
SNPmat <- matrix(0, ncol = ncol(SNPs), nrow = nrow(SNPs))
for (i in 1:ncol(SNPs)){
tmpCnts <- table(SNPs[,i])
MajAllele <- names(tmpCnts[which(tmpCnts == max(tmpCnts))])
MinAllele <- names(tmpCnts[which(tmpCnts == min(tmpCnts))])
SNPmat[which(SNPs[,i] %in% MajAllele),i] <- 2
SNPmat[which(SNPs[,i] %in% MinAllele),i] <- 0
}
colnames(SNPmat) <- colnames(SNPs)
row.names(SNPmat) <- row.names(SNPs)

#filter low MAF (0.1) SNPs
freq <- colMeans(SNPmat) / 2
maf <- ifelse(freq > 0.5, 1-freq, freq)
maf.index <- which(maf < 0.1)
length(maf.index)

SNPmat <- SNPmat[, -maf.index] #137 x 200682
SNPinfo <- SNPinfo[-maf.index ,] #200682 x 2

dim(SNPmat); dim(SNPinfo) #114 168407

FullData <- list(SNPmat = SNPmat,
MAP = SNPinfo,
AccessionClimData = AccClim,
CommonGardenClimData = CommClim,
Phenotypes = Pheno)

saveRDS(FullData, "~/Downloads/SOO/FullData.rds")
4 changes: 4 additions & 0 deletions Arabidopsis/Lasky2016/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Lasky 2016
This is a dataset from Jesse Lasky's 2016 [paper](https://onlinelibrary.wiley.com/doi/abs/10.1111/1755-0998.12714) that describes an approach to synthesize data from common garden and genome–environment associations to infer SNP effects for local adaptation. The paper uses data from [Fournier-Level et al 2011](https://science.sciencemag.org/content/334/6052/86), [Handcock et al (2012)](https://science.sciencemag.org/content/334/6052/83), and [Horton et al (2012)](https://www.nature.com/articles/ng.1042). I have taken these data and selected accessions that have phenotypic information, climate data at their collection sites, and genotypic data. The .Rds file contains all these data.
SNPs were obtained from [here](http://bergelson.uchicago.edu/wp-content/uploads/2015/04/call_method_75.tar.gz)
Phenotypic and climate data were accessed from [here](https://onlinelibrary.wiley.com/doi/abs/10.1111/1755-0998.12714)
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ The directories and contents are listed below.
* NGR
* Olatoye_2018

* Arabidopsis
* Lasky 2016