getwd() ## Gets the present working directory that you are in
setwd("C:/Users/Daniel/Documents/edgeR") ## sets the working directory to be output in a certain area of your files
counts<-read.table("C:/Users/Daniel/Documents/RSEM outputs/ep.counts.matrix") WRONG
counts<-("C:/Users/Daniel/Desktop/RSEM outputs/ep.counts.matrix") WRONG
head(counts) LABELED
counts<-read.table("C:/Users/Daniel/Desktop/RSEM outputs/ep.counts.matrix") ## assigns the varible counts to read the matrix found from ep.counts.matrix in a table
head(counts) ## gives the first 6 data counts of your matrix
tail(counts) ## gives the last 6 data counts of your matrix
dim(counts) ## gives you the number of rows by columns (for our matrix: rows= specific genes, columns:specific transcriptome)
design<-read.csv("C:/Users/Daniel/Downloads/design/csv") WRONG
design<-read.csv("C:/Users/Daniel/Downloads/design.csv") ## Imports the design.csv (comma-separated file) that was made in excel to classify the samples(i.e x10A, X10A, etc.) vs groups (i.e active.lo, active.eye,etc.) and it called design.
design ## calls the variable design to show contents of the variable
install.packages("edgeR") ## downloads the edgeR package?
source("https://bioconductor.org/biocLite.R") ##??
biocLite("edgeR") ##??
groups<-design$Groups ## renames the column in the design.csv to just the name groups
groups ## calls the variable groups to show the content of it
library(edgeR) ##??
dge<-DGEList(counts, group=groups) ## sets variable dge to list the counts and their lib.size, and norm.factors
help(DGEList) ## is a help page for using DGEList
str(dge) ## States the parameters of the variable dge
dge<-calcno WRONG
dge<-calcNormFactoes(dge) WRONG
dge<-calcNormFactors(dge) ##??
str(dge)
ls() ##lists contents of the present working directory
head(counts) ##done
class(dge) ## classifies the class (i.e matrix, data.frame)
class(counts) ## DONE
norm<-cpm(dge, normalize.lib.sizes=TRUE) ## normalizes the sequencing data to compare data and use in plots
PLOTmds(norm) WRONG
plotMDS(norm) ## makes a simple component graph
des<-model.matrix(~groups) ##makes a matrix that assigns a 1 or 0 to the group that are similar to the intercept
dge<-estimateDisp(dge, des) ## has a class called "DGEList" and the estimation of dispersion compared to the dge and des matrices
et<-exactTest(dge, pair=c("active.eye", "inact.eye")) ## simulates a t-test in statistics and is used for comparative analysis
tags<-topTags(et) ##?
tags ## calls the variable tags to display the contents
tags<-topTags(et, n=1000) ## this makes the number of genes that are shown to become 1000
head(tags) ## shows the first 6 genes in the tags variable
write.table(file="tags_test.eyes.txt", tags) ## makes a table to output to a txt files that will be used to export into a excel file and analyze the genes and their significant differences between two libraries.
tags<-topTags(et, n=1000) ## modifies the variable tags to have 1000 genes from the et test
......................................................................................................................................................+--++...................................
etALO.IALO<-exactTest(dge, pair=c("active.lo", "inact.lo") ## etALO.IALO is the variable name that will compare the genes from the libraries of active.lo and inact.lo using the exact test
tags<-topTags(et, n=1000)
tags<-topTags(et, n=1000)
tags<-topTags(etALO.IALO, n=1000)
ls()
etaloilo<-exactTest(dge, pair=c("active.lo", "inact.lo")
etaloilo<-exactTest(dge, pair=c("active.lo", "inact.lo"))
etaloilo<-exactTest(dge, pair=c("active.lo", "inact.lo"))
tags<-topTags(etaloilo, n=1000)
write.table(file="tags_test.lo.txt", tags)
etaloae<-exactTest(dge,pair=c("active.lo", "inact.lo"))
tags<-topTags(etaloae, n=1000)
write.table(file="tags_test.elo.txt", tags)
x <- read.delim("TableOfCounts.txt",row.names="Symbol")
group <- factor(c(1,1,2,2))
y <- DGEList(counts=x,group=group)
y <- calcNormFactors(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)
The process to analyze the genes yielded:
look into edgeR file on the desktop
make sure the tags_test.*.txt is available
open excel and open and browse under ALL FILES to find the specific file
on A1 insert a cell that shifts to the right and have correct group head names
Highlight/bold the columns that have an FDR value less than 0.05
open new excel and place all these data points into there and manually analyze the specific genes that have shown a significant difference.
et<-exactTest(dge, pair=c("EA", "EI")) ## groups are EA, EI, LA, LI
tags<-topTags(et)
tags
tags<-topTags(et, n=1000)
write.table(file="testEA-EI.txt", tags)
tags<-topTags(et, n=1000)
et2<-exactTest(dge, pair=c("LA", "LI")) ## groups are EA, EI, LA, LI
tags<-topTags(et2)
tags
tags<-topTags(et2, n=1000)
write.table(file="testLA-LI.txt", tags)
tags<-topTags(et2, n=1000)
design<-read.csv("groups.csv")
gene1<-data["PPYR_10486", ]
master<-as.data.frame(t(gene1))
master$tissue<-design$tissue
master$active<-design$active
colnames(master)<-c("expression", "tissue", "active")
library(plyr)
Warning message:
package ‘plyr’ was built under R version 3.4.4
library(ggplot2)
max(summary$mean)
Error in summary$mean : object of type 'closure' is not subsettable
summary<-ddply(master, c("tissue", "active"), summarise, mean=mean(expression), sd=sd(expression), sem=sd(expression)/sqrt(length(expression)))
max(summary$mean)
[1] 14.004
Do this for gene 2 and then for each one go back and add the + scale_y_continuous(limits=(0,24))
The testLO + Eyes.txt files that are not including the shield tissue and is a matrix
et<-exactTest(dge, pair=c("EA", "EI")) ## groups are EA, EI, LA, LI
tags<-topTags(et)
tags
tags<-topTags(et, n=1000)
write.table(file="testEA-EI.txt", tags)
tags<-topTags(et, n=1000)
et2<-exactTest(dge, pair=c("LA", "LI")) ## groups are EA, EI, LA, LI
tags<-topTags(et2)
tags
tags<-topTags(et2, n=1000)
write.table(file="testLA-LI.txt", tags)
tags<-topTags(et2, n=1000)
R Computational Statistical Analysis + COMMANDS
getwd() ## Gets the present working directory that you are in setwd("C:/Users/Daniel/Documents/edgeR") ## sets the working directory to be output in a certain area of your files counts<-read.table("C:/Users/Daniel/Documents/RSEM outputs/ep.counts.matrix") WRONG counts<-("C:/Users/Daniel/Desktop/RSEM outputs/ep.counts.matrix") WRONG head(counts) LABELED counts<-read.table("C:/Users/Daniel/Desktop/RSEM outputs/ep.counts.matrix") ## assigns the varible counts to read the matrix found from ep.counts.matrix in a table head(counts) ## gives the first 6 data counts of your matrix tail(counts) ## gives the last 6 data counts of your matrix dim(counts) ## gives you the number of rows by columns (for our matrix: rows= specific genes, columns:specific transcriptome) design<-read.csv("C:/Users/Daniel/Downloads/design/csv") WRONG design<-read.csv("C:/Users/Daniel/Downloads/design.csv") ## Imports the design.csv (comma-separated file) that was made in excel to classify the samples(i.e x10A, X10A, etc.) vs groups (i.e active.lo, active.eye,etc.) and it called design. design ## calls the variable design to show contents of the variable install.packages("edgeR") ## downloads the edgeR package? source("https://bioconductor.org/biocLite.R") ##?? biocLite("edgeR") ##?? groups<-design$Groups ## renames the column in the design.csv to just the name groups groups ## calls the variable groups to show the content of it library(edgeR) ##?? dge<-DGEList(counts, group=groups) ## sets variable dge to list the counts and their lib.size, and norm.factors help(DGEList) ## is a help page for using DGEList str(dge) ## States the parameters of the variable dge dge<-calcno WRONG
dge<-calcNormFactoes(dge) WRONG dge<-calcNormFactors(dge) ##?? str(dge) ls() ##lists contents of the present working directory head(counts) ##done class(dge) ## classifies the class (i.e matrix, data.frame) class(counts) ## DONE norm<-cpm(dge, normalize.lib.sizes=TRUE) ## normalizes the sequencing data to compare data and use in plots PLOTmds(norm) WRONG plotMDS(norm) ## makes a simple component graph des<-model.matrix(~groups) ##makes a matrix that assigns a 1 or 0 to the group that are similar to the intercept dge<-estimateDisp(dge, des) ## has a class called "DGEList" and the estimation of dispersion compared to the dge and des matrices et<-exactTest(dge, pair=c("active.eye", "inact.eye")) ## simulates a t-test in statistics and is used for comparative analysis tags<-topTags(et) ##? tags ## calls the variable tags to display the contents tags<-topTags(et, n=1000) ## this makes the number of genes that are shown to become 1000 head(tags) ## shows the first 6 genes in the tags variable write.table(file="tags_test.eyes.txt", tags) ## makes a table to output to a txt files that will be used to export into a excel file and analyze the genes and their significant differences between two libraries. tags<-topTags(et, n=1000) ## modifies the variable tags to have 1000 genes from the et test ......................................................................................................................................................+--++................................... etALO.IALO<-exactTest(dge, pair=c("active.lo", "inact.lo") ## etALO.IALO is the variable name that will compare the genes from the libraries of active.lo and inact.lo using the exact test tags<-topTags(et, n=1000) tags<-topTags(et, n=1000) tags<-topTags(etALO.IALO, n=1000) ls() etaloilo<-exactTest(dge, pair=c("active.lo", "inact.lo") etaloilo<-exactTest(dge, pair=c("active.lo", "inact.lo")) etaloilo<-exactTest(dge, pair=c("active.lo", "inact.lo")) tags<-topTags(etaloilo, n=1000) write.table(file="tags_test.lo.txt", tags) etaloae<-exactTest(dge,pair=c("active.lo", "inact.lo")) tags<-topTags(etaloae, n=1000) write.table(file="tags_test.elo.txt", tags)
The process to analyze the genes yielded:
Run_blast.sh run_blast.zip
CLEANEST version of R comparison of Active vs. Inactive
getwd() setwd("C:/Users/Daniel/Desktop/Routputs") counts<-read.table("C:/Users/Daniel/Desktop/RSEM outputs/ep.counts.matrix") design<-read.csv("C:/Users/Daniel/Desktop/ModelsForGlm.csv") library("egdeR") ## this is install.packages("edgeR") source("https://bioconductor.org/biocLite.R") biocLite("edgeR") keeps<-c("X10A", "X10B", "X19A", "X19B", "X20A", "X20B", "X21A", "X21B", "X8A", "X8B", "X9A", "X9B") noshield<-counts[,colnames(counts) %in% keeps] keeps<-c("X10A", "X10B", "X19A", "X19B", "X20A", "X20B", "X21A", "X21B", "X8A", "X8B", "X9A", "X9B") noshield<-counts[,colnames(counts) %in% keeps] groups<-design$group groups dge<-DGEList(noshield, group=groups) str(dge) dge<-calcNormFactors(dge) str(dge) head(noshield) ##optional norm<-cpm(dge, normalize.lib.sizes=TRUE) plotMDS(norm) des<-model.matrix(~groups) dge<-estimateDisp(dge, des)
et<-exactTest(dge, pair=c("EA", "EI")) ## groups are EA, EI, LA, LI tags<-topTags(et) tags tags<-topTags(et, n=1000) write.table(file="testEA-EI.txt", tags) tags<-topTags(et, n=1000)
et2<-exactTest(dge, pair=c("LA", "LI")) ## groups are EA, EI, LA, LI tags<-topTags(et2) tags tags<-topTags(et2, n=1000) write.table(file="testLA-LI.txt", tags) tags<-topTags(et2, n=1000)
CONVERTING TO SVG
svg("Carnation1.svg")
FINDING MAX/MIN:
Do this for gene 2 and then for each one go back and add the + scale_y_continuous(limits=(0,24))
The testLO + Eyes.txt files that are not including the shield tissue and is a matrix
getwd() setwd("C:/Users/Daniel/Desktop/Routputs") counts<-read.table("C:/Users/Daniel/Desktop/RSEM outputs/ep.counts.matrix") design<-read.csv("C:/Users/Daniel/Desktop/ModelsForGlm.csv") library("egdeR") ## this is install.packages("edgeR") source("https://bioconductor.org/biocLite.R") biocLite("edgeR") keeps<-c("X10A", "X10B", "X19A", "X19B", "X20A", "X20B", "X21A", "X21B", "X8A", "X8B", "X9A", "X9B") noshield<-counts[,colnames(counts) %in% keeps] keeps<-c("X10A", "X10B", "X19A", "X19B", "X20A", "X20B", "X21A", "X21B", "X8A", "X8B", "X9A", "X9B") noshield<-counts[,colnames(counts) %in% keeps] groups<-design$group groups dge<-DGEList(noshield, group=groups) str(dge) dge<-calcNormFactors(dge) str(dge) head(noshield) ##optional norm<-cpm(dge, normalize.lib.sizes=TRUE) plotMDS(norm) des<-model.matrix(~groups) dge<-estimateDisp(dge, des)
et<-exactTest(dge, pair=c("EA", "EI")) ## groups are EA, EI, LA, LI tags<-topTags(et) tags tags<-topTags(et, n=1000) write.table(file="testEA-EI.txt", tags) tags<-topTags(et, n=1000)
et2<-exactTest(dge, pair=c("LA", "LI")) ## groups are EA, EI, LA, LI tags<-topTags(et2) tags tags<-topTags(et2, n=1000) write.table(file="testLA-LI.txt", tags) tags<-topTags(et2, n=1000)
save.image("C:\Users\Daniel\Desktop\Rwithnoshield")
CHANGING THE Y SCALE: