Open utterances-bot opened 8 months ago
En la terminal WSL ejecutar lo siguiente:
## moverse a referencia
cd data/referencia
## Instalar bowtie2
sudo apt install bowtie2
## bowtie2-build
bowtie2-build GCF_000085865.1_ASM8586v1_genomic.fna index/myco
source("install.R", local = TRUE)
# Source scripts
library(R.utils) # Linux
source("scripts/sratoolkit.R", local = TRUE)
source("scripts/listfastq.R", local = TRUE)
source("scripts/filtereads.R", local = TRUE)
source("scripts/consensus.R", local = TRUE)
############### Download genomes from NCBI ###############
##########################################################
url <- fromJSON(file = "data/reference/ref.json")
filename <- "GCF_000085865.1_ASM8586v1_genomic.fna.gz"
download_genome(url$urlref,filename, "data/reference/")
############### Download files from SRA - NCBI ###############
##############################################################
download_sra_files(id = "SRR15616380")
download_sra_files(id = "SRR15616379")
############### Quality of fastq ###############
################################################
lecturas <- list_fastq(pattern = c("SRR15616379_1.fastq","SRR15616379_2.fastq"))
plotQualityProfile(c(lecturas$lf,lecturas$lr))
## Adicional
library(fastqcr)
fastqc(fq.dir = "data/rawdata/", # Directorio de archivos FASTQ
qc.dir = "results/", # Directorio de resultados
threads = 4,
fastqc.path = "FastQC/fastqc"
)
qc.dir = "results/"
qc_report(qc.dir, result.file = "results/multiqc/",
experiment = "Mycoplasma")
############### Filter reads from fastq files ###############
#############################################################
log_filter <- filter_reads(name = lecturas$name, lf = lecturas$lf,
lr = lecturas$lr, trunc = 250)
############### Assembly genomes ###############
################################################
# uncompressed reference genome
gunzip("data/reference/GCF_000085865.1_ASM8586v1_genomic.fna.gz")
# reference genome bowtie2 index
bowtie2_build("data/reference/GCF_000085865.1_ASM8586v1_genomic.fna",
bt2Index = "data/reference/index/myco" , overwrite = TRUE)
# aling fastq files to reference
bowtie2(bt2Index = "data/reference/index/myco",
samOutput = "results/SRR15616379.sam",
seq1 = "data/processed_data/filtered_F/SRR15616379_filt_1.fastq",
seq2 = "data/processed_data/filtered_R/SRR15616379_filt_2.fastq",
"--threads=3")
############### Alignment files manipulation ###############
############################################################
# Convert to BAM files
asBam("results/SRR15616379.sam")
# Read BAM file
bamFile <- BamFile("results/SRR15616379.bam")
# Statistics of alignment
bam <- countBam(bamFile)
quickBamFlagSummary(bamFile)
seqinfo(bamFile)
# Open large size of BAM files
yieldSize(bamFile) <- 1
open(bamFile)
scanBam(bamFile)[[1]]$seq
close(bamFile)
yieldSize(bamFile) <- NA
Buenas noches profesor, cuando intento correr este comando me sale error y no se porque:
library(fastqcr) fastqc(fq.dir="data/rawdata/",
- qc.dir="results/",
- threads = 4,
- fastqc.path = "FastQC/fastqc") Error in .check_if_unix() : Unix system (MAC OSX or Linux) required.
@CarolaMP Que tal Carola, ese error lo mencione en clase que podia ser posible, por ello revisa como realizar ese analisis pero desde la terminal.
#sudo apt install fastqc
fastqc -t 4 -o results/ data/rawdata/*fastq.gz
Hola Francisco, tienes una solución para el siguiente error al momento de alinear con Bowtie2 los índices del genoma de referencia con los filtrados de los reads?: (¿tal vez una alternativa en la terminal?)
bowtie2(bt2Index = "data/reference/index/myco",
- samOutput = "results/SRR15616379.sam",
- seq1 = "data/processed_data/filtered_F/SRR15616379_filt_1.fastq",
- seq2 = "data/processed_data/filtered_R/SRR15616379_filt_2.fastq",
- "--threads=3") Error in system(call, intern = TRUE, show.output.on.console = TRUE) : 'perl' not found
Aní me pasa igual,
Error in system(call, intern = TRUE, show.output.on.console = TRUE) : 'perl' not found
ggplot(cover, aes(x = pos, y = count, color = count)) + geom_point(shape = ".", size = 1) + scale_color_gradient(low = "lightblue", high = "darkred") + labs(title = "Gráfico de Cobertura", subtitle = "Gráfico de Cobertura de Genoma 'Metamycoplasma hominis'", x = "Posición", y = "Cobertura") + theme_minimal() + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) + annotate("text", x = Inf, y = -Inf, hjust = 1, vjust = -1, label = "Autor: Piero Martín Araujo Chávez", size = 3)
Genomics web - Pipeline de Genómica en R
https://franciscoascue.github.io/Rgenomics/modulo8.1.html