-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathipf_analysis.R
More file actions
151 lines (85 loc) · 4.53 KB
/
ipf_analysis.R
File metadata and controls
151 lines (85 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#clear environment
rm(list=ls())
data <-read.table('GSE150910_gene-level_count_file.csv',header= TRUE, sep = ',', row.names=1)
data <- read.table(
file = "C:/Users/modles/Downloads/Transcriptomics/GSE150910_gene-level_count_file.csv",
header = TRUE, # First row contains column names
sep = ",", # Comma-separated (CSV format)
row.names = 1 # Use first column as row names
)
samples <- colnames(data)
genes <- row.names(data)
classes <- c()
for(sample in samples){
tmp <- unlist(strsplit(sample,"_"))
classes <- append(classes, tmp[1])
}
filtered_data <- data[, classes %in% c("control","ipf") ]
filtered_classes <- classes[classes %in% c("control","ipf") ]
dir.create("C:/Users/modles/Downloads/Transcriptomics/rds_objects", recursive = TRUE)
saveRDS(filtered_data, file = "C:/Users/modles/Downloads/Transcriptomics/rds_objects/filtered_data.RDS")
saveRDS(filtered_classes, file = "C:/Users/modles/Downloads/Transcriptomics/rds_objects/filtered_classes.RDS")
#filtered_data <- readRDS("C:/Users/modles/Downloads/Transcriptomics/rds_objects/filtered_data.RDS")
#filtered_classes <- readRDS("C:/Users/modles/Downloads/Transcriptomics/rds_objects/filtered_classes.RDS")
# used to manage Bioconductor packages
#install.packages("BiocManager")
BiocManager::install("DESeq2")
library(DESeq2)
data <- readRDS("C:/Users/modles/Downloads/Transcriptomics/rds_objects/filtered_data.RDS")
classes <- readRDS("C:/Users/modles/Downloads/Transcriptomics/rds_objects/filtered_classes.RDS")
samples_info <- data.frame(
condition = factor(classes, levels = c("control","ipf"))
)
rownames(samples_info) <- colnames(data)
dds <- DESeqDataSetFromMatrix(countData = data,
colData = samples_info,
design = ~ condition)
dds <- DESeq(dds)
results <- results(dds)
head(results[order(results$padj),])
significance_threshold <- 0.01
significant_results <- results[which(results$padj < significance_threshold), ]
upregulated <- rownames(significant_results[significant_results$log2FoldChange > 0,])
downregulated <- rownames(significant_results[significant_results$log2FoldChange < 0,])
#install.packages("pheatmap")
library(pheatmap)
N <- 100
top_genes <- head(rownames(significant_results[order(significant_results$padj),]), N)
norm_counts <- counts(dds, normalized=TRUE)
heatmap_data <- norm_counts[top_genes, ]
# We use the full, absolute path here.
if (!dir.exists("C:/Users/modles/Downloads/Transcriptomics/plots")) {
dir.create("C:/Users/modles/Downloads/Transcriptomics/plots", recursive = TRUE)
}
# 2. Generate PDF
# Now, specify the full path to the PDF file, including the directory you just ensured exists.
pdf("C:/Users/modles/Downloads/Transcriptomics/plots/heatmap_plot.pdf", width = 10, height = 8)
#Create color palette (blue-white-red gradient with 50 shades)
my_colors <- colorRampPalette(c("royalblue3", "white", "firebrick3"))(50)
#Define annotation colors for conditions
annotation_colors <- list(
condition = c(control = "darkolivegreen3", # Changed to green for better contrast
ipf = "coral2") # Changed to coral for better visibility
)
#Generate the heatmap with enhanced parameters
pheatmap(heatmap_data,
scale = "row", # Z-score normalization by row (genes)
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "complete", # Hierarchical clustering
annotation_col = samples_info, # Sample metadata
annotation_colors = annotation_colors,
color = my_colors, # Custom color gradient
fontsize_row = 6, # Gene name size
fontsize_col = 8, # Sample name size (increased)
fontsize = 9, # General font size
border_color = NA, # No border around cells
show_rownames = TRUE, # Show gene names
show_colnames = TRUE, # Show sample names
treeheight_row = 30, # Dendrogram height for rows
treeheight_col = 30, # Dendrogram height for columns
cellwidth = 12, # Cell dimensions
cellheight = 8,
main = "Top Differentially Expressed Genes\n(IPF vs Control)") # Title
# 5. Close the PDF device
dev.off()