1 Find the hyphal specificity of Khd4-bound mRNA targets
1.1 Khd4-bound mRNAs responds to hyphae morphological cue
Show the code
#Load the differential expression value#resultstable_Fi_wtvskd<- readRDS("resultstable_Fi_wtvskd.rds")#resultstable_kd_YevFi<- readRDS("resultstable_kd_YevFi.rds")#resultstable_wt_YevFi<- readRDS("resultstable_wt_YevFi.rds")#resultstable_Ye_wtvkd<- readRDS("resultstable_Ye_wtvkd.rds")#re-enter the high-conf results#high.conf#check the number of genes to cross check the data.frame#n_distinct(high.conf$gene.id)# separate gene.Ids from high confident target list for venndiagramhcGenes=unique(high.conf$gene.id)hcGenes=data.frame(hcGenes)names(hcGenes)[1] <-"gene.Id"# select all gene.Ids from wt_yvsF#colnames(wt_yevFi)hy.spe=dplyr::select(wt_yevFi,c("gene.ID","regulated", "up_down")) # select rows of specific columnsnhy.spe=hy.spe %>%filter(grepl('up|down', up_down))# subset only those genes that are differentially expressedvenn.hyphae=list(hyspe=nhy.spe$gene.ID, kspe=high.conf$gene.id) # make a list. 1. hyphal specific genes 2. high conf Khd4 targets#make venndiagramggvenn(venn.hyphae, c("hyspe", "kspe"),fill_color =c("gray32", "darkred")) +ggtitle("Overlap between DEGs (wt:hyphevs yeast) and Khd4-bound mRNAs")#+
Show the code
#myTheme1
1.2 Analysing the AUACCC distribution in overlapping Khd4-bound mRNAs
Although Vma21 is not differentially regulated, I have added them in the following analysis as they play an important role in vacuole biogenesis.
Show the code
# collect the overlapping geneshy.spe_TRIBE =list(shared = nhy.spe$gene.ID[nhy.spe$gene.ID %in%intersect(nhy.spe$gene.ID, high.conf$gene.id)])df_hy.spe_TRIBE=as_tibble(hy.spe_TRIBE) # instead of as_tibble one can also use data.frame. ############### Diagnose our shared dataset# doing vlookup like function to identify the gene ids in highconfident targets that are overlapping with regulated gene ids in hyphal specific data (wt_yevFi)#lookup function is "the column value to lookup" followedby the "the reference column" followed by the " column value to be entered"#high.conf$hyphae_specific=lookup(high.conf$gene.id, nhy.spe$gene.ID, nhy.spe$gene.ID)# Count how many value in the column hyphae specific is not NA#n_distinct(high.conf$hyphae_specific)## looks good!!########################################################### make columns with the differential gene expression datadf_hy.spe_TRIBE$log2FC_hyphae=lookup(df_hy.spe_TRIBE$shared, wt_yevFi$gene.ID, wt_yevFi$log2FoldChange.schrink)df_hy.spe_TRIBE$log2FC_hyphae_wtvkD=lookup(df_hy.spe_TRIBE$shared, Fi_wtvskd_DEG$gene.ID, Fi_wtvskd_DEG$log2FoldChange.schrink)# Add AUACCC position informationUTR3_AUACCC = auaccc.mot[auaccc.mot$Position =="3UTR",]No_UTR3_AUACCC = auaccc.mot[!auaccc.mot$Gene_ID %in% UTR3_AUACCC$Gene_ID,]# Add auaccc informationdf_hy.spe_TRIBE= df_hy.spe_TRIBE %>%mutate(UTR3auaccc = shared %in% UTR3_AUACCC$Gene_ID)df_hy.spe_TRIBE$UTR3auaccc =as.factor(df_hy.spe_TRIBE$UTR3auaccc)# manually add vma21 to the plotdf_hy.spe_TRIBE_man=rbind(df_hy.spe_TRIBE, data.frame("shared"="UMAG_11418", "log2FC_hyphae"=0.209896155, "log2FC_hyphae_wtvkD"=0.833623476, "UTR3auaccc"=TRUE))#scatterplot diagramscattertest_all=ggplot()+geom_point(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man, size=2, color="grey")+stat_quadrant_counts(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man,size=6,color="darkgrey") +geom_point(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man[df_hy.spe_TRIBE_man$UTR3auaccc==TRUE,], size=2,color="#7AB650")+# stat_quadrant_counts(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man[df_hy.spe_TRIBE_man$UTR3auaccc==TRUE,],size=6, position = "jitter", color="#7AB650") +#scale_fill_manual(values=c("TRUE" = "darkgreen", "FALSE" = "grey"))+coord_cartesian(xlim=c(-2,2), ylim=c(-4,4))+geom_hline(yintercept=0, size=0.75, color="dark grey")+geom_vline(xintercept=0, size=0.5, color="dark grey")+theme_bw()+stat_cor(method ="pearson", label.x =-10, label.y=5, size=8) + myTheme1scattertest_all
Note: In the publication, the quadrant count of all data points in the plot was calculated excluding the manually added vma21 to maintain the count of 122 overlapping targets and to avoid potential confusion. It is explicitly stated in the figure legends that vma21 was manually included in the analysis.
Next I would like to check the enrichment of the AUACCC motif in mRNA targets responsive to morphological changes. For this i want to use the data frames in which target mRNAs are segregated based on the position of the AUACCC motif. Before using them, i need to change the datastructure in a way that the column names are same across different data frames.
Show the code
# all genes influenced by hyphal induction"# all genes that are diff. expressed upon hyphal inductionall_GGE_hvsY = wt_yevFi[!wt_yevFi$up_down =="not",] auaccc.mot$DEG_hvy=lookup(auaccc.mot$Gene_ID, all_GGE_hvsY$gene.ID, all_GGE_hvsY$gene.ID)auaccc_DEG_all = auaccc.mot[!is.na(auaccc.mot$DEG_hvy),]auaccc_DEG_all$L2FC_DGE_hvy =lookup(auaccc_DEG_all$Gene_ID,all_GGE_hvsY$gene.ID,all_GGE_hvsY$up_down)auaccc_DEG_all = auaccc_DEG_all[,c(1,19,23,24)]all_hyphae = auaccc_DEG_all[auaccc_DEG_all$L2FC_DGE_hvy=="up",]all_yeast = auaccc_DEG_all[auaccc_DEG_all$L2FC_DGE_hvy=="down",]
Show the code
df_hy.spe_TRIBE_man <-mutate(df_hy.spe_TRIBE_man, up_down =case_when(log2FC_hyphae >log2(1.5) ~"up",log2FC_hyphae <-log2(1.5) ~"down",TRUE~"not"))#make it a factor df_hy.spe_TRIBE_man$up_down <-factor(df_hy.spe_TRIBE_man$up_down)
Show the code
# get gene ids of ORF containing genesuo =df_hy.spe_TRIBE_manuo$orf = uo$shared %in% auaccc.mot[auaccc.mot$Position =="ORF",]$Gene_IDuo = uo[uo$orf,]# get gene ids of 5´UTR containing genesu5 =df_hy.spe_TRIBE_manu5$utr5 = u5$shared %in% auaccc.mot[auaccc.mot$Position =="5UTR",]$Gene_IDu5 = u5[u5$utr5,]# get gene ids of 3´UTR containing genesu3 =df_hy.spe_TRIBE_manu3$utr3 = u3$shared %in% auaccc.mot[auaccc.mot$Position =="3UTR",]$Gene_IDu3 = u3[u3$utr3,]
Show the code
# df changes#i will try making a function#targets that are regulated ### hyphal specificgetregtargets =function(targ.list, posit, morpho){ targ.list = targ.list[,c(1:3,5)]targ.list$position = posit # make a new columntarg_hy = targ.list[targ.list$up_down == morpho,] #targ_hy_UR = targ_hy[targ_hy$log2FC_hyphae_wtvkD >0,]targ_hy_DR = targ_hy[targ_hy$log2FC_hyphae_wtvkD <0,]return(list(targ_hy_UR,targ_hy_DR))}# 3UTRu3_hy =getregtargets(u3, "3UTR", "up")u3_hy_UR = u3_hy[[1]] # only UR genesu3_hy_DR = u3_hy[[2]] # only DR genes# 5UTRu5_hy =getregtargets(u5, "5UTR", "up")u5_hy_UR = u5_hy[[1]] # only UR genesu5_hy_DR = u5_hy[[2]] # only DR genes#ORFuo_hy =getregtargets(uo, "ORF", "up")uo_hy_UR = uo_hy[[1]] # only UR genesuo_hy_DR = uo_hy[[2]] # only DR genes### yeast-specific# 3UTRu3_ye =getregtargets(u3, "3UTR", "down")u3_ye_UR = u3_ye[[1]] # only UR genesu3_ye_DR = u3_ye[[2]] # only DR genes# 5UTRu5_ye =getregtargets(u5, "5UTR", "down")u5_ye_UR = u5_ye[[1]] # only UR genesu5_ye_DR = u5_ye[[2]] # only DR genes#ORFuo_ye =getregtargets(uo, "ORF", "down")uo_ye_UR = uo_ye[[1]] # only UR genesuo_ye_DR = uo_ye[[2]] # only DR genes#rbind UR_hyphaeur_hyphae =rbind(u3_hy_UR,u5_hy_UR, uo_hy_UR)dr_hyphae =rbind(u3_hy_DR, u5_hy_DR, uo_hy_DR )ur_yeast =rbind(u3_ye_UR,u5_ye_UR, uo_ye_UR)dr_yeast =rbind(u3_ye_DR, u5_ye_DR, uo_ye_DR )
Fisher's Exact Test for Count Data
data: df_3UTR[, c(1, 3)]
p-value = 0.006893
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
1.198096 4.078972
sample estimates:
odds ratio
2.219172
Show the code
chiURY =fisher.test(df_3UTR[,c(4,6)])chiURY
Fisher's Exact Test for Count Data
data: df_3UTR[, c(4, 6)]
p-value = 0.05975
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
0.9373129 4.0316139
sample estimates:
odds ratio
1.965331
Fisher's Exact Test for Count Data
data: df_5UTR[, c(2, 3)]
p-value = 0.02618
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
1.069824 4.184467
sample estimates:
odds ratio
2.144929
Show the code
chiDRy5 =fisher.test(df_5UTR[,c(5,6)])chiDRy5
Fisher's Exact Test for Count Data
data: df_5UTR[, c(5, 6)]
p-value = 0.03898
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
0.8869458 9.6220041
sample estimates:
odds ratio
3.060614
1.3 normalized gene counts of mRNA targets
Conducting a DESeq2 analyse to determine the mRNA levels of arl1, vma21, hok1 and mob1 in wt yeast, wt hyphae and khd4D hyphae cells.
Show the code
# make file pathmydir<-(Path="C:/Users/Sri/Documents/Khd4/DGE_analysis_SM_PhD/DGE-bioinfofolder/20220919_yeastvhyphae/")myfiles<-list.files(path=mydir,pattern="tabular")myfiles
## make sampleTable all# collect information for sampleTable sampleFiles<- myfilesmycondition<-substr(myfiles,1,1) # extract substring in myfiles from position 1 to 2myreplicates <-substr(myfiles, 4,4) # extract substring in myfiles from the position 4mysamplename<-sub(".tabular", "", myfiles)# Make sampleTablesampleTable<-data.frame(sampleName= mysamplename,fileName= sampleFiles,condition=mycondition,replicate=myreplicates)#look at the sampleTablesampleTable
sampleName fileName condition replicate
1 FA_1 FA_1.tabular F 1
2 FA_2 FA_2.tabular F 2
3 FA_3 FA_3.tabular F 3
4 FK_1 FK_1.tabular F 1
5 FK_2 FK_2.tabular F 2
6 FK_3 FK_3.tabular F 3
7 YA_1 YA_1.tabular Y 1
8 YA_2 YA_2.tabular Y 2
9 YA_3 YA_3.tabular Y 3
10 YK_1 YK_1.tabular Y 1
11 YK_2 YK_2.tabular Y 2
12 YK_3 YK_3.tabular Y 3
Show the code
#assign pvalmypval=0.05
1.3.1 make DEseq2 object
Show the code
# make dds object for alldds<-DESeqDataSetFromHTSeqCount(sampleTable= sampleTable,directory = mydir,design=~condition )# inspect DEseqDataSet (dds)dds
---title: "Khd4 regulates subcellular protein levels of membrane-trafficking regulators"date: last-modifiedauthor: - name: "Srimeenakshi Sankaranarayanan"title-block-banner: trueformat: html: theme: flatly self-contained: true code-fold: true code-tools: true code-summary: "Show the code" toc: true number-sections: true anchor-sections: trueeditor: visualexecute: echo: true error: false warning: false message: false---```{r}# Librarieslibrary(BSgenome.Umaydis.ENSMBL.UM1) # forged ustilago genomelibrary(ggplot2)library(forcats)library(tidyr)library(dplyr)library(tidyverse)library(magrittr)library(ggpp)library(gginnards)library(ggrepel)library(ggpubr)library(ggforce)library(ggrastr)library(viridis)library(reshape2)library(gprofiler2)library(ggvenn)library(lookup)library(DESeq2)``````{r}load("C:/Users/Sri/Documents/Khd4/DGE_analysis_SM_PhD/Kathis_lab/hyperTRIBE/hyperTRIBE.rds")```# Find the hyphal specificity of Khd4-bound mRNA targets## Khd4-bound mRNAs responds to hyphae morphological cue```{r}#Load the differential expression value#resultstable_Fi_wtvskd<- readRDS("resultstable_Fi_wtvskd.rds")#resultstable_kd_YevFi<- readRDS("resultstable_kd_YevFi.rds")#resultstable_wt_YevFi<- readRDS("resultstable_wt_YevFi.rds")#resultstable_Ye_wtvkd<- readRDS("resultstable_Ye_wtvkd.rds")#re-enter the high-conf results#high.conf#check the number of genes to cross check the data.frame#n_distinct(high.conf$gene.id)# separate gene.Ids from high confident target list for venndiagramhcGenes=unique(high.conf$gene.id)hcGenes=data.frame(hcGenes)names(hcGenes)[1] <-"gene.Id"# select all gene.Ids from wt_yvsF#colnames(wt_yevFi)hy.spe=dplyr::select(wt_yevFi,c("gene.ID","regulated", "up_down")) # select rows of specific columnsnhy.spe=hy.spe %>%filter(grepl('up|down', up_down))# subset only those genes that are differentially expressedvenn.hyphae=list(hyspe=nhy.spe$gene.ID, kspe=high.conf$gene.id) # make a list. 1. hyphal specific genes 2. high conf Khd4 targets#make venndiagramggvenn(venn.hyphae, c("hyspe", "kspe"),fill_color =c("gray32", "darkred")) +ggtitle("Overlap between DEGs (wt:hyphevs yeast) and Khd4-bound mRNAs")#+#myTheme1```## Analysing the AUACCC distribution in overlapping Khd4-bound mRNAs Although Vma21 is not differentially regulated, I have added them in the following analysis as they play an important role in vacuole biogenesis.```{r}# collect the overlapping geneshy.spe_TRIBE =list(shared = nhy.spe$gene.ID[nhy.spe$gene.ID %in%intersect(nhy.spe$gene.ID, high.conf$gene.id)])df_hy.spe_TRIBE=as_tibble(hy.spe_TRIBE) # instead of as_tibble one can also use data.frame. ############### Diagnose our shared dataset# doing vlookup like function to identify the gene ids in highconfident targets that are overlapping with regulated gene ids in hyphal specific data (wt_yevFi)#lookup function is "the column value to lookup" followedby the "the reference column" followed by the " column value to be entered"#high.conf$hyphae_specific=lookup(high.conf$gene.id, nhy.spe$gene.ID, nhy.spe$gene.ID)# Count how many value in the column hyphae specific is not NA#n_distinct(high.conf$hyphae_specific)## looks good!!########################################################### make columns with the differential gene expression datadf_hy.spe_TRIBE$log2FC_hyphae=lookup(df_hy.spe_TRIBE$shared, wt_yevFi$gene.ID, wt_yevFi$log2FoldChange.schrink)df_hy.spe_TRIBE$log2FC_hyphae_wtvkD=lookup(df_hy.spe_TRIBE$shared, Fi_wtvskd_DEG$gene.ID, Fi_wtvskd_DEG$log2FoldChange.schrink)# Add AUACCC position informationUTR3_AUACCC = auaccc.mot[auaccc.mot$Position =="3UTR",]No_UTR3_AUACCC = auaccc.mot[!auaccc.mot$Gene_ID %in% UTR3_AUACCC$Gene_ID,]# Add auaccc informationdf_hy.spe_TRIBE= df_hy.spe_TRIBE %>%mutate(UTR3auaccc = shared %in% UTR3_AUACCC$Gene_ID)df_hy.spe_TRIBE$UTR3auaccc =as.factor(df_hy.spe_TRIBE$UTR3auaccc)# manually add vma21 to the plotdf_hy.spe_TRIBE_man=rbind(df_hy.spe_TRIBE, data.frame("shared"="UMAG_11418", "log2FC_hyphae"=0.209896155, "log2FC_hyphae_wtvkD"=0.833623476, "UTR3auaccc"=TRUE))#scatterplot diagramscattertest_all=ggplot()+geom_point(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man, size=2, color="grey")+stat_quadrant_counts(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man,size=6,color="darkgrey") +geom_point(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man[df_hy.spe_TRIBE_man$UTR3auaccc==TRUE,], size=2,color="#7AB650")+# stat_quadrant_counts(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man[df_hy.spe_TRIBE_man$UTR3auaccc==TRUE,],size=6, position = "jitter", color="#7AB650") +#scale_fill_manual(values=c("TRUE" = "darkgreen", "FALSE" = "grey"))+coord_cartesian(xlim=c(-2,2), ylim=c(-4,4))+geom_hline(yintercept=0, size=0.75, color="dark grey")+geom_vline(xintercept=0, size=0.5, color="dark grey")+theme_bw()+stat_cor(method ="pearson", label.x =-10, label.y=5, size=8) + myTheme1scattertest_allscattertest_3UTRauaccc=ggplot()+geom_point(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man, size=2, color="grey")+#stat_quadrant_counts(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man,size=6,color="darkgrey") +geom_point(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man[df_hy.spe_TRIBE_man$UTR3auaccc==TRUE,], size=2,color="#7AB650")+stat_quadrant_counts(aes(x=log2FC_hyphae_wtvkD, y=log2FC_hyphae),df_hy.spe_TRIBE_man[df_hy.spe_TRIBE_man$UTR3auaccc==TRUE,],size=6, color="#7AB650") +#scale_fill_manual(values=c("TRUE" = "darkgreen", "FALSE" = "grey"))+coord_cartesian(xlim=c(-2,2), ylim=c(-4,4))+geom_hline(yintercept=0, size=0.75, color="dark grey")+geom_vline(xintercept=0, size=0.5, color="dark grey")+theme_bw()+stat_cor(method ="pearson", label.x =-10, label.y=5, size=8) + myTheme1scattertest_3UTRauaccc```Note: In the publication, the quadrant count of all data points in the plot was calculated excluding the manually added vma21 to maintain the count of 122 overlapping targets and to avoid potential confusion. It is explicitly stated in the figure legends that vma21 was manually included in the analysis.Next I would like to check the enrichment of the AUACCC motif in mRNA targets responsive to morphological changes. For this i want to use the data frames in which target mRNAs are segregated based on the position of the AUACCC motif. Before using them, i need to change the datastructure in a way that the column names are same across different data frames. ```{r}# all genes influenced by hyphal induction"# all genes that are diff. expressed upon hyphal inductionall_GGE_hvsY = wt_yevFi[!wt_yevFi$up_down =="not",] auaccc.mot$DEG_hvy=lookup(auaccc.mot$Gene_ID, all_GGE_hvsY$gene.ID, all_GGE_hvsY$gene.ID)auaccc_DEG_all = auaccc.mot[!is.na(auaccc.mot$DEG_hvy),]auaccc_DEG_all$L2FC_DGE_hvy =lookup(auaccc_DEG_all$Gene_ID,all_GGE_hvsY$gene.ID,all_GGE_hvsY$up_down)auaccc_DEG_all = auaccc_DEG_all[,c(1,19,23,24)]all_hyphae = auaccc_DEG_all[auaccc_DEG_all$L2FC_DGE_hvy=="up",]all_yeast = auaccc_DEG_all[auaccc_DEG_all$L2FC_DGE_hvy=="down",]``````{r}df_hy.spe_TRIBE_man <-mutate(df_hy.spe_TRIBE_man, up_down =case_when(log2FC_hyphae >log2(1.5) ~"up",log2FC_hyphae <-log2(1.5) ~"down",TRUE~"not"))#make it a factor df_hy.spe_TRIBE_man$up_down <-factor(df_hy.spe_TRIBE_man$up_down)``````{r}# get gene ids of ORF containing genesuo =df_hy.spe_TRIBE_manuo$orf = uo$shared %in% auaccc.mot[auaccc.mot$Position =="ORF",]$Gene_IDuo = uo[uo$orf,]# get gene ids of 5´UTR containing genesu5 =df_hy.spe_TRIBE_manu5$utr5 = u5$shared %in% auaccc.mot[auaccc.mot$Position =="5UTR",]$Gene_IDu5 = u5[u5$utr5,]# get gene ids of 3´UTR containing genesu3 =df_hy.spe_TRIBE_manu3$utr3 = u3$shared %in% auaccc.mot[auaccc.mot$Position =="3UTR",]$Gene_IDu3 = u3[u3$utr3,]``````{r}# df changes#i will try making a function#targets that are regulated ### hyphal specificgetregtargets =function(targ.list, posit, morpho){ targ.list = targ.list[,c(1:3,5)]targ.list$position = posit # make a new columntarg_hy = targ.list[targ.list$up_down == morpho,] #targ_hy_UR = targ_hy[targ_hy$log2FC_hyphae_wtvkD >0,]targ_hy_DR = targ_hy[targ_hy$log2FC_hyphae_wtvkD <0,]return(list(targ_hy_UR,targ_hy_DR))}# 3UTRu3_hy =getregtargets(u3, "3UTR", "up")u3_hy_UR = u3_hy[[1]] # only UR genesu3_hy_DR = u3_hy[[2]] # only DR genes# 5UTRu5_hy =getregtargets(u5, "5UTR", "up")u5_hy_UR = u5_hy[[1]] # only UR genesu5_hy_DR = u5_hy[[2]] # only DR genes#ORFuo_hy =getregtargets(uo, "ORF", "up")uo_hy_UR = uo_hy[[1]] # only UR genesuo_hy_DR = uo_hy[[2]] # only DR genes### yeast-specific# 3UTRu3_ye =getregtargets(u3, "3UTR", "down")u3_ye_UR = u3_ye[[1]] # only UR genesu3_ye_DR = u3_ye[[2]] # only DR genes# 5UTRu5_ye =getregtargets(u5, "5UTR", "down")u5_ye_UR = u5_ye[[1]] # only UR genesu5_ye_DR = u5_ye[[2]] # only DR genes#ORFuo_ye =getregtargets(uo, "ORF", "down")uo_ye_UR = uo_ye[[1]] # only UR genesuo_ye_DR = uo_ye[[2]] # only DR genes#rbind UR_hyphaeur_hyphae =rbind(u3_hy_UR,u5_hy_UR, uo_hy_UR)dr_hyphae =rbind(u3_hy_DR, u5_hy_DR, uo_hy_DR )ur_yeast =rbind(u3_ye_UR,u5_ye_UR, uo_ye_UR)dr_yeast =rbind(u3_ye_DR, u5_ye_DR, uo_ye_DR )``````{r}df_ur_hy =table(ur_hyphae$position) %>%as.data.frame() %>%mutate(set ="Khd4_bound_increase") %>%mutate(stage ="hyphae") %>%set_colnames(.,c("postn", "value", "set", "stage"))df_dr_hy =table(dr_hyphae$position) %>%as.data.frame() %>%mutate(set ="Khd4_bound_decrease") %>%mutate(stage ="hyphae") %>%set_colnames(.,c("postn", "value", "set", "stage"))df_all_hy =table(all_hyphae$Position) %>%as.data.frame() %>%mutate(set ="all") %>%mutate(stage ="hyphae") %>%set_colnames(.,c("postn", "value", "set", "stage"))df_ur_ye =table(ur_yeast$position) %>%as.data.frame() %>%mutate(set ="Khd4_bound_increase") %>%mutate(stage ="yeast") %>%set_colnames(.,c("postn", "value", "set", "stage"))df_dr_ye =table(dr_yeast$position) %>%as.data.frame() %>%mutate(set ="Khd4_bound_decrease") %>%mutate(stage ="yeast") %>%set_colnames(.,c("postn", "value", "set", "stage"))df_all_ye =table(all_yeast$Position) %>%as.data.frame() %>%mutate(set ="all") %>%mutate(stage ="yeast") %>%set_colnames(.,c("postn", "value", "set", "stage"))df_postn =rbind(df_ur_hy, df_dr_hy, df_all_hy, df_ur_ye, df_dr_ye, df_all_ye)pl_postn =ggplot(df_postn, aes(x=factor(set, levels=c("Khd4_bound_increase","Khd4_bound_decrease", "all")), y=value, fill=factor(postn, levels=c("5UTR", "ORF", "3UTR"))))+geom_bar(position ="fill",stat ="identity")+scale_fill_manual(values=c("#C8E1D3", "#7AB695", "#20854E"))+facet_wrap(~ stage)+labs(title ="position of auaccc motif in targets influenced by hyphal induction",x="",y="% transcripts with AUACCC",fill ="")+ myTheme1+theme(axis.text.x =element_text(angle=45, hjust=1, vjust=1))pl_postnggsave(pl_postn, path="C:/Users/Sri/Documents/Khd4/DGE_analysis_SM_PhD/TRIBE_data/Editing_events_all/Khd4/bedgraphs", filename="target_hyphae_yeast_spe_motif.pdf", width=9, height=6, unit="cm")``````{r}#pvalue calculationdf_3UTR =data.frame(URH=c(24,29),DRH=c(11,35),allH=c(162,435),URY =c(16,23),DRY =c(2,14),allY=c(123,348)) %>%set_rownames(.,c("with3UTRmot", "without3UTRmot"))chiURH =fisher.test(df_3UTR[,c(1,3)])chiURHchiURY =fisher.test(df_3UTR[,c(4,6)])chiURY``````{r}df_5UTR =data.frame(URH=c(11,42),DRH=c(17,29),allH=c(128,469),URY =c(8,31),DRY =c(6,10),allY=c(77,394)) %>%set_rownames(.,c("with5UTRmot", "without5UTRmot"))chiDRH5 =fisher.test(df_5UTR[,c(2,3)])chiDRH5chiDRy5 =fisher.test(df_5UTR[,c(5,6)])chiDRy5```## normalized gene counts of mRNA targetsConducting a DESeq2 analyse to determine the mRNA levels of arl1, vma21, hok1 and mob1 in wt yeast, wt hyphae and khd4D hyphae cells. ```{r}# make file pathmydir<-(Path="C:/Users/Sri/Documents/Khd4/DGE_analysis_SM_PhD/DGE-bioinfofolder/20220919_yeastvhyphae/")myfiles<-list.files(path=mydir,pattern="tabular")myfiles## make sampleTable all# collect information for sampleTable sampleFiles<- myfilesmycondition<-substr(myfiles,1,1) # extract substring in myfiles from position 1 to 2myreplicates <-substr(myfiles, 4,4) # extract substring in myfiles from the position 4mysamplename<-sub(".tabular", "", myfiles)# Make sampleTablesampleTable<-data.frame(sampleName= mysamplename,fileName= sampleFiles,condition=mycondition,replicate=myreplicates)#look at the sampleTablesampleTable#assign pvalmypval=0.05```### make DEseq2 object```{r}# make dds object for alldds<-DESeqDataSetFromHTSeqCount(sampleTable= sampleTable,directory = mydir,design=~condition )# inspect DEseqDataSet (dds)dds#building DEseq2 modeldds <-DESeq(dds)## estimating size factors## estimating dispersions## gene-wise dispersion estimates## mean-dispersion relationship## final dispersion estimates## fitting model and te8stingsizeFactors(dds)```### Count plots```{r normalised counts}cnts=counts(dds, normalized=TRUE) # get normalized counts#colnames(cnts) <- c("FA","FA","FA","FK","FK","FK","YA","YA","YA","YK","YK","YK")dat.count=as.data.frame(subset(cnts, select=-c(10:12))) # drop yeast khd4d RNA seq datadat.count$gene.id=rownames(dat.count)cnts.df=melt(dat.count)colnames(cnts.df) <-c("gene.ID", "condition", "norm.count")my.genes<-c(arl1="UMAG_10313", vma21="UMAG_11418", hok1="UMAG_11790")cnts.genes=subset(cnts.df, gene.ID %in% my.genes)cnts.genes$name=names(my.genes)[match(cnts.genes$gene.ID, my.genes)]cnts.test1=cnts.genes %>%mutate(across("condition", str_replace, "_1", ""))cnts.test2=cnts.test1 %>%mutate(across("condition", str_replace, "_2", ""))cnts.test3=cnts.test2 %>%mutate(across("condition", str_replace, "_3", ""))# change the condition namecnts.test3 = cnts.test3 %>%mutate(condition_name =case_when( condition =="FA"~"wildtype_hyphae", condition =="FK"~"khd4D_hyphae", condition =="YA"~"Wiltype_yeast"))pln=ggplot(cnts.test3, aes(x=factor(condition_name, levels =c("Wiltype_yeast", "wildtype_hyphae", "khd4D_hyphae")), y=norm.count)) +geom_point(col="darkgreen") +scale_y_log10()+facet_wrap(~name, scales="free")+labs(x="",y ="mRNA expression levels")+ myTheme1+theme(axis.text.x =element_text(angle =45, hjust=1, vjust=1 )) #facet_wrap to make adjacent plots for multiple genes.pln#ggsave(pln, path="C:/Users/Srimeenakshi/Desktop/DGE_analysis_SM_PhD/TRIBE data/Editing events_all/Khd4/bedgraphs", #filename="pln.pdf", width=16, height=6, unit="cm")``````{r}sessionInfo()```