yctuga / pop.gen.rolfsii

0 stars 0 forks source link

poppr script for gacrc cluster #1

Open yctuga opened 1 month ago

yctuga commented 1 month ago

the script follow the tutorial https://grunwaldlab.github.io/Population_Genetics_in_R/gbs_analysis.html

script used the dataset provided on the above website and it worked on the gacrc cluster

the following script is to run shell script; it prompts the R script

save the script with .sh suffix


!/bin/bash

SBATCH --partition=batch

SBATCH --job-name=popper.text

SBATCH --ntasks=1

SBATCH --time=30:00

SBATCH --mem=25gb

SBATCH --mail-user=yct@uga.edu

SBATCH --mail-type=BEGIN,END,FAIL

cd /scratch/yct/work_PacBio/rubi.test

all dataset was saved under above directory

ref: https://grunwaldlab.github.io/Population_Genetics_in_R/gbs_analysis.html

need to install packages ('vcfR') ('poppr') ('RColorBrewer') in personal sapelo2 firstly

module load R/3.6.2-foss-2019b R < rubi.test.R --save

yctuga commented 1 month ago

save the following script with a seperate file with .R suffix

after running this script, four plots (same as the tutorial) should generated under the specified directory

setwd('/scratch/yct/work_PacBio/rubi.test') getwd()

library(vcfR) library(poppr) library(ape) library(RColorBrewer)

rubi.VCF <- read.vcfR("prubi_gbs.vcf.gz") rubi.VCF

pop.data <- read.table("population_data.gbs.txt", sep ="\t", header = TRUE) all(colnames(rubi.VCF@gt)[-1]==pop.data$AccessID)

gl.rubi <- vcfR2genlight(rubi.VCF) ploidy(gl.rubi) <-2 pop(gl.rubi) <-pop.data$State gl.rubi

par(c("mar", "mai"))

par(mar=c(1,1,1,1)) jpeg("DT.plot.jpg", width=1200, height = 1200)

tree <- aboot(gl.rubi, tree="upgma", distance = bitwise.dist, sample = 100, showtree= TRUE, cutoff = 50, quite=T) cols <-brewer.pal(n=nPop(gl.rubi), name = "Dark2") plot.phylo(tree, cex=0.8, font = 2, adj = 0, tip.color = cols[pop(gl.rubi)]) nodelabels(tree$node.label, adj =c(1.3, -0.5), frame ="n", cex =.8, font=3, xpd= TRUE) legend('topleft', legend = c("CA", "OR", "WA"), fill= cols, border =FALSE, bty="n", cex=.8) axis(side=1) title(xlab="genetic distance")

dev.off()

library(igraph) par(mar=c(1,1,1,1)) jpeg("MSN.plot.jpg", width=1200, height = 1200)

rubi.dist <-bitwise.dist(gl.rubi) rubi.msn <- poppr.msn(gl.rubi, rubi.dist, showplot = FALSE, include.ties=T) node.size <- rep(2, times =nInd(gl.rubi)) names(node.size) <- indNames(gl.rubi) vertex.attributes(rubi.msn$graph)$size <-node.size

set.seed(9) plot_poppr_msn(gl.rubi, rubi.msn, palette = brewer.pal(n= nPop(gl.rubi), name ="Dark2"), gadj =70) dev.off()

par(mar=c(1,1,1,1)) jpeg("PCA.plot.jpg", width=1200, height = 1200)

rubi.pca <- glPca(gl.rubi, nf =3) barplot(100*rubi.pca$eig/sum(rubi.pca$eig), col = heat.colors(50), main ="PCA Eigenvalues") title(ylab="Percent of variance\nexplained", line =2) title(xlab ="Eigenvalues", line =1) dev.off()

library(ggplot2)

jpeg("PCA2.plot.jpg", width=1200, height = 1200) rubi.pca.scores <- as.data.frame(rubi.pca$scores) rubi.pca.scores$pop <- pop(gl.rubi) set.seed(9) p <- ggplot(rubi.pca.scores, aes(x=PC1, y=PC2, colour=pop)) p <- p + geom_point(size=2) p <- p + stat_ellipse(level=0.95, size =1) p <- p + scale_color_manual(values = cols) p <- p + geom_hline(yintercept = 0) p <- p + geom_vline(xintercept = 0) p <- p + theme_bw() p dev.off()