F1 Read counts and BED files of F0 variants are imported.
SNPs intersecting \(n > 2\) genes, \(n = 2\) genes annotated on the same strand, or within mitochondrial, miRNA, or tRNA genes are discarded.
SNPs with \(n < 10\) read counts in any Lineage; SNPs where the distance between the nearest SNP is shorter than the average read length (thus the read counts are exactly the same for both SNPs), and genes with \(n < 2\) SNPs are discarded.
To adjust for differences in sequencing depth between libraries, read counts are normalized using the median of ratios normalization (MRN) method from DESeq2.
For each SNP, a Storer-Kim binomial exact test of two proportions is conducted using the MRN normalized counts to test the hypothesis that the proportion of maternal read counts is significantly different from the proportion of paternal read counts.
Previously established cutoffs for p1 (the proportion of “A” allele counts in individuals from the “BxA” Lineage) and p2 (the proportion of “A” allele counts in individuals from the “AxB” Lineage) are required for a gene to be considered as showing parent or lineage bias.
A general linear mixed-effects model (GLIMMIX) is fit for each gene to assess the effects of parent, lineage, and their interaction on the quantity of raw read counts at each SNP, using the library size factors estimated during MRN normalization as an offset to adjust for variation in sequencing depth between samples. Genes exhibiting significant parent∗lineage effects are considered unbiased.
For a gene to be considered as showing parent or lineage biases, all SNPs are required to exhibit the same directional bias in both tests.
This analysis utilizes several custom functions. See here for documentation.
# Packages
library(tidyverse)
library(plyr)
library(Rfast)
library(tryCatchLog)
library(lmerTest)
library(lme4)
library(car)
library(gridExtra)
library(kableExtra)
library(doParallel)
library(ggpubr)
library(grid)
library(tagcloud)
library(DESeq2)
library(ggprism)
# Custom functions
source("PSGE_functions.R")
metadata <- read.csv("metadata.csv")
metadata <- metadata[metadata$block=="Y12xO20",]
kbl(metadata) %>% kable_styling()
sample.id | parent | phenotype | individual | lineage | block | |
---|---|---|---|---|---|---|
13 | Y12D_SRR24049740 | D | responsive | Y12-R1 | A | Y12xO20 |
14 | Y12D_SRR24049739 | D | responsive | Y12-R2 | A | Y12xO20 |
15 | Y12D_SRR24049738 | D | responsive | Y12-R3 | A | Y12xO20 |
16 | Y12D_SRR24049737 | D | unresponsive | Y12-U1 | A | Y12xO20 |
17 | Y12D_SRR24049735 | D | unresponsive | Y12-U2 | A | Y12xO20 |
18 | Y12D_SRR24049734 | D | unresponsive | Y12-U3 | A | Y12xO20 |
19 | O20D_SRR24049753 | D | responsive | O20-R1 | B | Y12xO20 |
20 | O20D_SRR24049752 | D | responsive | O20-R2 | B | Y12xO20 |
21 | O20D_SRR24049751 | D | responsive | O20-R3 | B | Y12xO20 |
22 | O20D_SRR24049750 | D | unresponsive | O20-U1 | B | Y12xO20 |
23 | O20D_SRR24049749 | D | unresponsive | O20-U2 | B | Y12xO20 |
24 | O20D_SRR24049748 | D | unresponsive | O20-U3 | B | Y12xO20 |
49 | Y12Q_SRR24049740 | Q | responsive | Y12-R1 | A | Y12xO20 |
50 | Y12Q_SRR24049739 | Q | responsive | Y12-R2 | A | Y12xO20 |
51 | Y12Q_SRR24049738 | Q | responsive | Y12-R3 | A | Y12xO20 |
52 | Y12Q_SRR24049737 | Q | unresponsive | Y12-U1 | A | Y12xO20 |
53 | Y12Q_SRR24049735 | Q | unresponsive | Y12-U2 | A | Y12xO20 |
54 | Y12Q_SRR24049734 | Q | unresponsive | Y12-U3 | A | Y12xO20 |
55 | O20Q_SRR24049753 | Q | responsive | O20-R1 | B | Y12xO20 |
56 | O20Q_SRR24049752 | Q | responsive | O20-R2 | B | Y12xO20 |
57 | O20Q_SRR24049751 | Q | responsive | O20-R3 | B | Y12xO20 |
58 | O20Q_SRR24049750 | Q | unresponsive | O20-U1 | B | Y12xO20 |
59 | O20Q_SRR24049749 | Q | unresponsive | O20-U2 | B | Y12xO20 |
60 | O20Q_SRR24049748 | Q | unresponsive | O20-U3 | B | Y12xO20 |
filter_SNPs
]filterlist <- read.csv("miRNA_tRNA_genes.csv",header=F)[,c(1)]
SNPs <- filter_SNPs("Y12xO20_SNPs_for_analysis_sorted.bed",filterlist,2)
write.table(SNPs,"Y12xO20_SNP_gene_overlaps.txt",sep="\t",quote=F,row.names=F)
SNPs <- SNPs[,c(3,4)]
make_ASE_counts_matrix
]SNP_counts <- make_ASE_counts_matrix("counts",metadata,12)
write.csv(SNP_counts,"Y12xO20_SNP_gene_counts.csv")
Here, we normalize the read counts to adjust for variation in sequencing depth between samples. We will conduct the Storer-Kim binomial exact test of two proportions at each SNP using the normalized read counts, and then fit gene-wise general linear mixed-effects models to the raw read counts at each SNP, adjusting for library size by including the library size factors calculated during the normalization step as an offset term in the model.
calcSizeFactors
]normalizeASReadCounts
]size_factors <- calcSizeFactors(SNP_counts,metadata)
SNP_counts_normalized <- normalizeASReadCounts(SNP_counts,metadata,size_factors)
write.csv(SNP_counts_normalized,"Y12xO20_SNP_gene_counts_normalized.csv")
unresponsive.IDs <- metadata[metadata$phenotype=="unresponsive","sample.id"]
responsive.IDs <- metadata[metadata$phenotype=="responsive","sample.id"]
unresponsive_counts_normalized <- SNP_counts_normalized[,names(SNP_counts_normalized)%in%unresponsive.IDs]
unresponsive_counts <- SNP_counts[row.names(SNP_counts)%in%row.names(unresponsive_counts_normalized),
names(SNP_counts)%in%unresponsive.IDs]
responsive_counts_normalized <- SNP_counts_normalized[,names(SNP_counts_normalized)%in%responsive.IDs]
responsive_counts <- SNP_counts[row.names(SNP_counts)%in%row.names(responsive_counts_normalized),
names(SNP_counts)%in%responsive.IDs]
filter_counts
]unresponsive_counts_normalized <- filter_counts(unresponsive_counts_normalized,metadata,9)
write.csv(unresponsive_counts_normalized,"Y12xO20_unresponsive_counts_normalized.csv")
unresponsive_counts <- unresponsive_counts[row.names(unresponsive_counts)%in%row.names(unresponsive_counts_normalized),]
write.csv(unresponsive_counts,"Y12xO20_unresponsive_counts.csv")
responsive_counts_normalized <- filter_counts(responsive_counts_normalized,metadata,9)
write.csv(responsive_counts_normalized,"Y12xO20_responsive_counts_normalized.csv")
responsive_counts <- responsive_counts[row.names(responsive_counts)%in%row.names(responsive_counts_normalized),]
write.csv(responsive_counts,"Y12xO20_responsive_counts.csv")
PSGE.SK
]unresponsive.SK <- PSGE.SK(unresponsive_counts_normalized,metadata,"unresponsive",12)
write.csv(unresponsive.SK,"Y12xO20_unresponsiveSK.csv", row.names=F)
responsive.SK <- PSGE.SK(responsive_counts_normalized,metadata,"responsive",12)
write.csv(responsive.SK,"Y12xO20_responsiveSK.csv", row.names=F)
PSGE.GLIMMIX
]unresponsive.GLIMMIX <- PSGE.GLIMMIX(unresponsive_counts,metadata,
size_factors,12)
write.csv(unresponsive.GLIMMIX,"Y12xO20_unresponsiveGLIMMIX.csv", row.names=F)
responsive.GLIMMIX <- PSGE.GLIMMIX(responsive_counts,metadata,
size_factors,12)
write.csv(responsive.GLIMMIX,"Y12xO20_responsiveGLIMMIX.csv", row.names=F)
PSGE.analysis
]unresponsive.plot <- PSGE.analysis(unresponsive_counts_normalized,
"unresponsive",metadata,
unresponsive.SK,unresponsive.GLIMMIX)
write.csv(unresponsive.plot,"Y12xO20_unresponsive_PSGE.csv",row.names=F)
responsive.plot <- PSGE.analysis(responsive_counts_normalized,
"responsive",metadata,
responsive.SK,responsive.GLIMMIX)
write.csv(responsive.plot,"Y12xO20_responsive_PSGE.csv",row.names=F)
PSGE.collapse.avgExp
]PSGE.plot.tx
]triplot.plot
]unresponsive_counts_normalized <- read.csv("Y12xO20_unresponsive_counts_normalized.csv",
row.names=1)
unresponsive.plot <- read.csv("Y12xO20_unresponsive_PSGE.csv")
unresponsive.plot[is.na(unresponsive.plot$xbias),"xbias"] <- "NA"
unresponsive.plot[is.na(unresponsive.plot$bias),"bias"] <- "NA"
unresponsive.plot[is.na(unresponsive.plot$bias.plot),"bias.plot"] <- "NA"
unresponsive.plot <- rbind(unresponsive.plot[unresponsive.plot$
bias.plot%in%c("NA"),],
unresponsive.plot[unresponsive.plot$bias.plot%in%c(
"mat", "Lineage A", "Lineage B", "pat"),])
unresponsive.plot$bias.plot <- factor(unresponsive.plot$bias.plot,
levels = c("NA","mat", "Lineage A", "Lineage B", "pat"))
unresponsive.collapse <- PSGE.collapse.avgExp(unresponsive.plot,
unresponsive_counts_normalized,
metadata,"unresponsive")
g1.avgExp <- PSGE.plot.tx(unresponsive.collapse,"Unresponsive",
LineageA.color.dark="#b697f7",LineageA.color.light="#bfc1ff",
LineageB.color.dark="#f7af09",LineageB.color.light="#f7c95e")
responsive_counts_normalized <- read.csv("Y12xO20_responsive_counts_normalized.csv",
row.names=1)
responsive.plot <- read.csv("Y12xO20_responsive_PSGE.csv")
responsive.plot[is.na(responsive.plot$xbias),"xbias"] <- "NA"
responsive.plot[is.na(responsive.plot$bias),"bias"] <- "NA"
responsive.plot[is.na(responsive.plot$bias.plot),"bias.plot"] <- "NA"
responsive.plot <- rbind(responsive.plot[responsive.plot$bias.plot%in%c("NA"),],
responsive.plot[responsive.plot$bias.plot%in%c(
"mat", "Lineage A", "Lineage B", "pat"),])
responsive.plot$bias.plot <- factor(responsive.plot$bias.plot,
levels = c("NA","mat", "Lineage A", "Lineage B", "pat"))
responsive.collapse <- PSGE.collapse.avgExp(responsive.plot,
responsive_counts_normalized,
metadata,"responsive")
g2.avgExp <- PSGE.plot.tx(responsive.collapse,"Responsive",
LineageA.color.dark="#b697f7",LineageA.color.light="#bfc1ff",
LineageB.color.dark="#f7af09",LineageB.color.light="#f7c95e")
allgenes <- read.table("Amel_HAv3.1_genes.bed",header=F)[,c(4)]
triplot.avgExp <- triplot.plot(unresponsive.collapse,responsive.collapse,
"Unresponsive","Responsive",allgenes,
LineageA.color="#b697f7",LineageB.color="#f7af09")
fig1.avgExp <- arrangeGrob(g1.avgExp, triplot.avgExp,
g2.avgExp, widths=c(5,2.5,5))
ggsave(file="Y12xO20_avgExp.png", plot=fig1.avgExp, width=15, height=6)
export_PSGE_results
]export_PSGE_results(unresponsive.plot,responsive.plot,"Y12xO20_PSGEs.csv")
metadata <- read.csv("metadata.csv")
metadata <- metadata[metadata$block=="B4xW36",]
kbl(metadata) %>% kable_styling()
sample.id | parent | phenotype | individual | lineage | block | |
---|---|---|---|---|---|---|
25 | B4D_SRR24049729 | D | responsive | B4-R1 | A | B4xW36 |
26 | B4D_SRR24049728 | D | responsive | B4-R2 | A | B4xW36 |
27 | B4D_SRR24049757 | D | responsive | B4-R3 | A | B4xW36 |
28 | B4D_SRR24049756 | D | unresponsive | B4-U1 | A | B4xW36 |
29 | B4D_SRR24049755 | D | unresponsive | B4-U2 | A | B4xW36 |
30 | B4D_SRR24049754 | D | unresponsive | B4-U3 | A | B4xW36 |
31 | W36D_SRR24049746 | D | responsive | W36-R1 | B | B4xW36 |
32 | W36D_SRR24049745 | D | responsive | W36-R2 | B | B4xW36 |
33 | W36D_SRR24049744 | D | responsive | W36-R3 | B | B4xW36 |
34 | W36D_SRR24049743 | D | unresponsive | W36-U1 | B | B4xW36 |
35 | W36D_SRR24049742 | D | unresponsive | W36-U2 | B | B4xW36 |
36 | W36D_SRR24049741 | D | unresponsive | W36-U3 | B | B4xW36 |
61 | B4Q_SRR24049729 | Q | responsive | B4-R1 | A | B4xW36 |
62 | B4Q_SRR24049728 | Q | responsive | B4-R2 | A | B4xW36 |
63 | B4Q_SRR24049757 | Q | responsive | B4-R3 | A | B4xW36 |
64 | B4Q_SRR24049756 | Q | unresponsive | B4-U1 | A | B4xW36 |
65 | B4Q_SRR24049755 | Q | unresponsive | B4-U2 | A | B4xW36 |
66 | B4Q_SRR24049754 | Q | unresponsive | B4-U3 | A | B4xW36 |
67 | W36Q_SRR24049746 | Q | responsive | W36-R1 | B | B4xW36 |
68 | W36Q_SRR24049745 | Q | responsive | W36-R2 | B | B4xW36 |
69 | W36Q_SRR24049744 | Q | responsive | W36-R3 | B | B4xW36 |
70 | W36Q_SRR24049743 | Q | unresponsive | W36-U1 | B | B4xW36 |
71 | W36Q_SRR24049742 | Q | unresponsive | W36-U2 | B | B4xW36 |
72 | W36Q_SRR24049741 | Q | unresponsive | W36-U3 | B | B4xW36 |
filter_SNPs
]filterlist <- read.csv("miRNA_tRNA_genes.csv",header=F)[,c(1)]
SNPs <- filter_SNPs("B4xW36_SNPs_for_analysis_sorted.bed",filterlist,2)
write.table(SNPs,"B4xW36_SNP_gene_overlaps.txt",sep="\t",quote=F,row.names=F)
SNPs <- SNPs[,c(3,4)]
make_ASE_counts_matrix
]SNP_counts <- make_ASE_counts_matrix("counts",metadata,12)
write.csv(SNP_counts,"B4xW36_SNP_gene_counts.csv")
Here, we normalize the read counts to adjust for variation in sequencing depth between samples. We will conduct the Storer-Kim binomial exact test of two proportions at each SNP using the normalized read counts, and then fit gene-wise general linear mixed-effects models to the raw read counts at each SNP, adjusting for library size by including the library size factors calculated during the normalization step as an offset term in the model.
calcSizeFactors
]normalizeASReadCounts
]size_factors <- calcSizeFactors(SNP_counts,metadata)
SNP_counts_normalized <- normalizeASReadCounts(SNP_counts,metadata,size_factors)
write.csv(SNP_counts_normalized,"B4xW36_SNP_gene_counts_normalized.csv")
unresponsive.IDs <- metadata[metadata$phenotype=="unresponsive","sample.id"]
responsive.IDs <- metadata[metadata$phenotype=="responsive","sample.id"]
unresponsive_counts_normalized <- SNP_counts_normalized[,names(SNP_counts_normalized)%in%unresponsive.IDs]
unresponsive_counts <- SNP_counts[row.names(SNP_counts)%in%row.names(unresponsive_counts_normalized),
names(SNP_counts)%in%unresponsive.IDs]
responsive_counts_normalized <- SNP_counts_normalized[,names(SNP_counts_normalized)%in%responsive.IDs]
responsive_counts <- SNP_counts[row.names(SNP_counts)%in%row.names(responsive_counts_normalized),
names(SNP_counts)%in%responsive.IDs]
filter_counts
]unresponsive_counts_normalized <- filter_counts(unresponsive_counts_normalized,metadata,9)
write.csv(unresponsive_counts_normalized,"B4xW36_unresponsive_counts_normalized.csv")
unresponsive_counts <- unresponsive_counts[row.names(unresponsive_counts)%in%row.names(unresponsive_counts_normalized),]
write.csv(unresponsive_counts,"B4xW36_unresponsive_counts.csv")
responsive_counts_normalized <- filter_counts(responsive_counts_normalized,metadata,9)
write.csv(responsive_counts_normalized,"B4xW36_responsive_counts_normalized.csv")
responsive_counts <- responsive_counts[row.names(responsive_counts)%in%row.names(responsive_counts_normalized),]
write.csv(responsive_counts,"B4xW36_responsive_counts.csv")
PSGE.SK
]unresponsive.SK <- PSGE.SK(unresponsive_counts_normalized,metadata,"unresponsive",14)
write.csv(unresponsive.SK,"B4xW36_unresponsiveSK.csv", row.names=F)
responsive.SK <- PSGE.SK(responsive_counts_normalized,metadata,"responsive",14)
write.csv(responsive.SK,"B4xW36_responsiveSK.csv", row.names=F)
PSGE.GLIMMIX
]unresponsive.GLIMMIX <- PSGE.GLIMMIX(unresponsive_counts,metadata,
size_factors,14)
write.csv(unresponsive.GLIMMIX,"B4xW36_unresponsiveGLIMMIX.csv", row.names=F)
responsive.GLIMMIX <- PSGE.GLIMMIX(responsive_counts,metadata,
size_factors,14)
write.csv(responsive.GLIMMIX,"B4xW36_responsiveGLIMMIX.csv", row.names=F)
PSGE.analysis
]unresponsive.plot <- PSGE.analysis(unresponsive_counts_normalized,
"unresponsive",metadata,
unresponsive.SK,unresponsive.GLIMMIX)
write.csv(unresponsive.plot,"B4xW36_unresponsive_PSGE.csv",row.names=F)
responsive.plot <- PSGE.analysis(responsive_counts_normalized,
"responsive",metadata,
responsive.SK,responsive.GLIMMIX)
write.csv(responsive.plot,"B4xW36_responsive_PSGE.csv",row.names=F)
PSGE.collapse.avgExp
]PSGE.plot.tx
]triplot.plot
]unresponsive_counts_normalized <- read.csv("B4xW36_unresponsive_counts_normalized.csv",
row.names=1)
unresponsive.plot <- read.csv("B4xW36_unresponsive_PSGE.csv")
unresponsive.plot[is.na(unresponsive.plot$xbias),"xbias"] <- "NA"
unresponsive.plot[is.na(unresponsive.plot$bias),"bias"] <- "NA"
unresponsive.plot[is.na(unresponsive.plot$bias.plot),"bias.plot"] <- "NA"
unresponsive.plot <- rbind(unresponsive.plot[unresponsive.plot$
bias.plot%in%c("NA"),],
unresponsive.plot[unresponsive.plot$bias.plot%in%c(
"mat", "Lineage A", "Lineage B", "pat"),])
unresponsive.plot$bias.plot <- factor(unresponsive.plot$bias.plot,
levels = c("NA","mat", "Lineage A", "Lineage B", "pat"))
unresponsive.collapse <- PSGE.collapse.avgExp(unresponsive.plot,
unresponsive_counts_normalized,
metadata,"unresponsive")
g1.avgExp <- PSGE.plot.tx(unresponsive.collapse,"Unresponsive",
LineageA.color.dark="#ff7e79",LineageA.color.light="#fc9895",
LineageB.color.dark="#f7af09",LineageB.color.light="#f7c95e")
responsive_counts_normalized <- read.csv("B4xW36_responsive_counts_normalized.csv",
row.names=1)
responsive.plot <- read.csv("B4xW36_responsive_PSGE.csv")
responsive.plot[is.na(responsive.plot$xbias),"xbias"] <- "NA"
responsive.plot[is.na(responsive.plot$bias),"bias"] <- "NA"
responsive.plot[is.na(responsive.plot$bias.plot),"bias.plot"] <- "NA"
responsive.plot <- rbind(responsive.plot[responsive.plot$bias.plot%in%c("NA"),],
responsive.plot[responsive.plot$bias.plot%in%c(
"mat", "Lineage A", "Lineage B", "pat"),])
responsive.plot$bias.plot <- factor(responsive.plot$bias.plot,
levels = c("NA","mat", "Lineage A", "Lineage B", "pat"))
responsive.collapse <- PSGE.collapse.avgExp(responsive.plot,
responsive_counts_normalized,
metadata,"responsive")
g2.avgExp <- PSGE.plot.tx(responsive.collapse,"Responsive",
LineageA.color.dark="#ff7e79",LineageA.color.light="#fc9895",
LineageB.color.dark="#f7af09",LineageB.color.light="#f7c95e")
allgenes <- read.table("Amel_HAv3.1_genes.bed",header=F)[,c(4)]
triplot.avgExp <- triplot.plot(unresponsive.collapse,responsive.collapse,
"Unresponsive","Responsive",allgenes,
LineageA.color="#ff7e79",LineageB.color="#f7af09")
fig1.avgExp <- arrangeGrob(g1.avgExp, triplot.avgExp,
g2.avgExp, widths=c(5,2.5,5))
ggsave(file="B4xW36_avgExp.png", plot=fig1.avgExp, width=15, height=6)
export_PSGE_results(unresponsive.plot,responsive.plot,"B4xW36_PSGEs.csv")
metadata <- read.csv("metadata.csv")
metadata <- metadata[metadata$block=="LB11xW4",]
kbl(metadata) %>% kable_styling()
sample.id | parent | phenotype | individual | lineage | block | |
---|---|---|---|---|---|---|
1 | LB11D_SRR14654185 | D | responsive | LB11-R1 | A | LB11xW4 |
2 | LB11D_SRR14654181 | D | responsive | LB11-R2 | A | LB11xW4 |
3 | LB11D_SRR14654179 | D | responsive | LB11-R3 | A | LB11xW4 |
4 | LB11D_SRR14654184 | D | unresponsive | LB11-U1 | A | LB11xW4 |
5 | LB11D_SRR14654180 | D | unresponsive | LB11-U2 | A | LB11xW4 |
6 | LB11D_SRR14654178 | D | unresponsive | LB11-U3 | A | LB11xW4 |
7 | W4D_SRR14654177 | D | responsive | W4-R1 | B | LB11xW4 |
8 | W4D_SRR14654175 | D | responsive | W4-R2 | B | LB11xW4 |
9 | W4D_SRR14654183 | D | responsive | W4-R3 | B | LB11xW4 |
10 | W4D_SRR14654176 | D | unresponsive | W4-U1 | B | LB11xW4 |
11 | W4D_SRR14654174 | D | unresponsive | W4-U2 | B | LB11xW4 |
12 | W4D_SRR14654182 | D | unresponsive | W4-U3 | B | LB11xW4 |
37 | LB11Q_SRR14654185 | Q | responsive | LB11-R1 | A | LB11xW4 |
38 | LB11Q_SRR14654181 | Q | responsive | LB11-R2 | A | LB11xW4 |
39 | LB11Q_SRR14654179 | Q | responsive | LB11-R3 | A | LB11xW4 |
40 | LB11Q_SRR14654184 | Q | unresponsive | LB11-U1 | A | LB11xW4 |
41 | LB11Q_SRR14654180 | Q | unresponsive | LB11-U2 | A | LB11xW4 |
42 | LB11Q_SRR14654178 | Q | unresponsive | LB11-U3 | A | LB11xW4 |
43 | W4Q_SRR14654177 | Q | responsive | W4-R1 | B | LB11xW4 |
44 | W4Q_SRR14654175 | Q | responsive | W4-R2 | B | LB11xW4 |
45 | W4Q_SRR14654183 | Q | responsive | W4-R3 | B | LB11xW4 |
46 | W4Q_SRR14654176 | Q | unresponsive | W4-U1 | B | LB11xW4 |
47 | W4Q_SRR14654174 | Q | unresponsive | W4-U2 | B | LB11xW4 |
48 | W4Q_SRR14654182 | Q | unresponsive | W4-U3 | B | LB11xW4 |
filter_SNPs
]filterlist <- read.csv("miRNA_tRNA_genes.csv",header=F)[,c(1)]
SNPs <- filter_SNPs("LB11xW4_SNPs_for_analysis_sorted.bed",filterlist,12)
write.table(SNPs,"LB11xW4_SNP_gene_overlaps.txt",sep="\t",quote=F,row.names=F)
SNPs <- SNPs[,c(3,4)]
make_ASE_counts_matrix
]SNP_counts <- make_ASE_counts_matrix("counts",metadata,12)
write.csv(SNP_counts,"LB11xW4_SNP_gene_counts.csv")
Here, we normalize the read counts to adjust for variation in sequencing depth between samples. We will conduct the Storer-Kim binomial exact test of two proportions at each SNP using the normalized read counts, and then fit gene-wise general linear mixed-effects models to the raw read counts at each SNP, adjusting for library size by including the library size factors calculated during the normalization step as an offset term in the model.
calcSizeFactors
]normalizeASReadCounts
]size_factors <- calcSizeFactors(SNP_counts,metadata)
SNP_counts_normalized <- normalizeASReadCounts(SNP_counts,metadata,size_factors)
write.csv(SNP_counts_normalized,"LB11xW4_SNP_gene_counts_normalized.csv")
unresponsive.IDs <- metadata[metadata$phenotype=="unresponsive","sample.id"]
responsive.IDs <- metadata[metadata$phenotype=="responsive","sample.id"]
unresponsive_counts_normalized <- SNP_counts_normalized[,names(SNP_counts_normalized)%in%unresponsive.IDs]
unresponsive_counts <- SNP_counts[row.names(SNP_counts)%in%row.names(unresponsive_counts_normalized),
names(SNP_counts)%in%unresponsive.IDs]
responsive_counts_normalized <- SNP_counts_normalized[,names(SNP_counts_normalized)%in%responsive.IDs]
responsive_counts <- SNP_counts[row.names(SNP_counts)%in%row.names(responsive_counts_normalized),
names(SNP_counts)%in%responsive.IDs]
filter_counts
]unresponsive_counts_normalized <- filter_counts(unresponsive_counts_normalized,metadata,9)
write.csv(unresponsive_counts_normalized,"LB11xW4_unresponsive_counts_normalized.csv")
unresponsive_counts <- unresponsive_counts[row.names(unresponsive_counts)%in%row.names(unresponsive_counts_normalized),]
write.csv(unresponsive_counts,"LB11xW4_unresponsive_counts.csv")
responsive_counts_normalized <- filter_counts(responsive_counts_normalized,metadata,9)
write.csv(responsive_counts_normalized,"LB11xW4_responsive_counts_normalized.csv")
responsive_counts <- responsive_counts[row.names(responsive_counts)%in%row.names(responsive_counts_normalized),]
write.csv(responsive_counts,"LB11xW4_responsive_counts.csv")
PSGE.SK
]unresponsive.SK <- PSGE.SK(unresponsive_counts_normalized,metadata,"unresponsive",8)
write.csv(unresponsive.SK,"LB11xW4_unresponsiveSK.csv", row.names=F)
responsive.SK <- PSGE.SK(responsive_counts_normalized,metadata,"responsive",8)
write.csv(responsive.SK,"LB11xW4_responsiveSK.csv", row.names=F)
PSGE.GLIMMIX
]unresponsive.GLIMMIX <- PSGE.GLIMMIX(unresponsive_counts,metadata,
size_factors,12)
write.csv(unresponsive.GLIMMIX,"LB11xW4_unresponsiveGLIMMIX.csv", row.names=F)
responsive.GLIMMIX <- PSGE.GLIMMIX(responsive_counts,metadata,
size_factors,12)
write.csv(responsive.GLIMMIX,"LB11xW4_responsiveGLIMMIX.csv", row.names=F)
PSGE.analysis
]unresponsive.plot <- PSGE.analysis(unresponsive_counts_normalized,
"unresponsive",metadata,
unresponsive.SK,unresponsive.GLIMMIX)
write.csv(unresponsive.plot,"LB11xW4_unresponsive_PSGE.csv",row.names=F)
responsive.plot <- PSGE.analysis(responsive_counts_normalized,
"responsive",metadata,
responsive.SK,responsive.GLIMMIX)
write.csv(responsive.plot,"LB11xW4_responsive_PSGE.csv",row.names=F)
PSGE.collapse.avgExp
]PSGE.plot.tx
]triplot.plot
]unresponsive_counts_normalized <- read.csv("LB11xW4_unresponsive_counts_normalized.csv",
row.names=1)
unresponsive.plot <- read.csv("LB11xW4_unresponsive_PSGE.csv")
unresponsive.plot[is.na(unresponsive.plot$xbias),"xbias"] <- "NA"
unresponsive.plot[is.na(unresponsive.plot$bias),"bias"] <- "NA"
unresponsive.plot[is.na(unresponsive.plot$bias.plot),"bias.plot"] <- "NA"
unresponsive.plot <- rbind(unresponsive.plot[unresponsive.plot$
bias.plot%in%c("NA"),],
unresponsive.plot[unresponsive.plot$bias.plot%in%c(
"mat", "Lineage A", "Lineage B", "pat"),])
unresponsive.plot$bias.plot <- factor(unresponsive.plot$bias.plot,
levels = c("NA","mat", "Lineage A", "Lineage B", "pat"))
unresponsive.collapse <- PSGE.collapse.avgExp(unresponsive.plot,
unresponsive_counts_normalized,
metadata,"unresponsive")
write.csv(unresponsive.collapse,"LB11xW4_unresponsive_collapsed.csv",row.names=F)
g1.avgExp <- PSGE.plot.tx(unresponsive.collapse,"Unresponsive",
LineageA.color.dark="#2d9874",LineageA.color.light="#d5eae3",
LineageB.color.dark="#cb6c37",LineageB.color.light="#f5e2d7")
responsive_counts_normalized <- read.csv("LB11xW4_responsive_counts_normalized.csv",
row.names=1)
responsive.plot <- read.csv("LB11xW4_responsive_PSGE.csv")
responsive.plot[is.na(responsive.plot$xbias),"xbias"] <- "NA"
responsive.plot[is.na(responsive.plot$bias),"bias"] <- "NA"
responsive.plot[is.na(responsive.plot$bias.plot),"bias.plot"] <- "NA"
responsive.plot <- rbind(responsive.plot[responsive.plot$bias.plot%in%c("NA"),],
responsive.plot[responsive.plot$bias.plot%in%c(
"mat", "Lineage A", "Lineage B", "pat"),])
responsive.plot$bias.plot <- factor(responsive.plot$bias.plot,
levels = c("NA","mat", "Lineage A", "Lineage B", "pat"))
responsive.collapse <- PSGE.collapse.avgExp(responsive.plot,
responsive_counts_normalized,
metadata,"responsive")
write.csv(responsive.collapse,"LB11xW4_responsive_collapsed.csv",row.names=F)
g2.avgExp <- PSGE.plot.tx(responsive.collapse,"Responsive",
LineageA.color.dark="#2d9874",LineageA.color.light="#d5eae3",
LineageB.color.dark="#cb6c37",LineageB.color.light="#f5e2d7")
allgenes <- read.table("Amel_HAv3.1_genes.bed",header=F)[,c(4)]
triplot.avgExp <- triplot.plot(unresponsive.collapse,responsive.collapse,
"Unresponsive","Responsive",allgenes,
LineageA.color="#2d9874",LineageB.color="#cb6c37")
fig1.avgExp <- arrangeGrob(g1.avgExp, triplot.avgExp,
g2.avgExp, widths=c(5,2.5,5))
ggsave(file="LB11xW4_avgExp.png", plot=fig1.avgExp, width=15, height=6)
export_PSGE_results
]export_PSGE_results(unresponsive.plot,responsive.plot,"LB11xW4_PSGEs.csv")
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 grid parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] ggprism_1.0.4 DESeq2_1.38.3
## [3] SummarizedExperiment_1.28.0 Biobase_2.58.0
## [5] MatrixGenerics_1.10.0 matrixStats_0.63.0
## [7] GenomicRanges_1.50.2 GenomeInfoDb_1.34.9
## [9] IRanges_2.32.0 S4Vectors_0.36.1
## [11] BiocGenerics_0.44.0 tagcloud_0.6
## [13] ggpubr_0.5.0 doParallel_1.0.17
## [15] iterators_1.0.14 foreach_1.5.2
## [17] kableExtra_1.3.4 gridExtra_2.3
## [19] car_3.1-1 carData_3.0-5
## [21] lmerTest_3.1-3 lme4_1.1-31
## [23] Matrix_1.5-3 tryCatchLog_1.3.1
## [25] Rfast_2.0.6 RcppZiggurat_0.1.6
## [27] Rcpp_1.0.9 plyr_1.8.8
## [29] forcats_0.5.2 stringr_1.5.0
## [31] dplyr_1.0.10 purrr_1.0.1
## [33] readr_2.1.3 tidyr_1.2.1
## [35] tibble_3.1.8 ggplot2_3.4.0
## [37] tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.1 backports_1.4.1 systemfonts_1.0.4
## [4] splines_4.2.2 BiocParallel_1.32.5 digest_0.6.31
## [7] htmltools_0.5.4 fansi_1.0.3 magrittr_2.0.3
## [10] memoise_2.0.1 googlesheets4_1.0.1 tzdb_0.3.0
## [13] Biostrings_2.66.0 annotate_1.76.0 modelr_0.1.10
## [16] svglite_2.1.1 timechange_0.2.0 colorspace_2.0-3
## [19] blob_1.2.3 rvest_1.0.3 haven_2.5.1
## [22] xfun_0.36 crayon_1.5.2 RCurl_1.98-1.9
## [25] jsonlite_1.8.4 glue_1.6.2 gtable_0.3.1
## [28] gargle_1.2.1 zlibbioc_1.44.0 XVector_0.38.0
## [31] webshot_0.5.4 DelayedArray_0.24.0 abind_1.4-5
## [34] scales_1.2.1 futile.options_1.0.1 DBI_1.1.3
## [37] rstatix_0.7.2 viridisLite_0.4.1 xtable_1.8-4
## [40] bit_4.0.5 httr_1.4.4 RColorBrewer_1.1-3
## [43] ellipsis_0.3.2 pkgconfig_2.0.3 XML_3.99-0.13
## [46] sass_0.4.4 dbplyr_2.2.1 locfit_1.5-9.7
## [49] utf8_1.2.2 tidyselect_1.2.0 rlang_1.0.6
## [52] AnnotationDbi_1.60.0 munsell_0.5.0 cellranger_1.1.0
## [55] tools_4.2.2 cachem_1.0.6 cli_3.6.0
## [58] generics_0.1.3 RSQLite_2.2.20 broom_1.0.2
## [61] evaluate_0.19 fastmap_1.1.0 yaml_2.3.6
## [64] knitr_1.41 bit64_4.0.5 fs_1.5.2
## [67] KEGGREST_1.38.0 nlme_3.1-160 formatR_1.14
## [70] xml2_1.3.3 compiler_4.2.2 rstudioapi_0.14
## [73] png_0.1-8 ggsignif_0.6.4 reprex_2.0.2
## [76] geneplotter_1.76.0 bslib_0.4.2 stringi_1.7.12
## [79] highr_0.10 futile.logger_1.4.3 lattice_0.20-45
## [82] nloptr_2.0.3 vctrs_0.5.1 pillar_1.8.1
## [85] lifecycle_1.0.3 jquerylib_0.1.4 bitops_1.0-7
## [88] R6_2.5.1 codetools_0.2-18 lambda.r_1.2.4
## [91] boot_1.3-28 MASS_7.3-58.1 assertthat_0.2.1
## [94] withr_2.5.0 GenomeInfoDbData_1.2.9 hms_1.1.2
## [97] minqa_1.2.5 rmarkdown_2.19 googledrive_2.0.0
## [100] numDeriv_2016.8-1.1 lubridate_1.9.0