I'm trying this on my 40 samples. My issue is that all the rows of seqtab.nochim are NA, except for the first row which has one of the fastq.gz files. I think this is what made the error when trying to construct a phyloseq object (towards the end). All the previous steps seemed to run successfully. What went wrong? I might have muddled up the procedure. Thank you so much.
Here is all the code on the console if useful:
> path <- "C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered"
> path <- "C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered"
>
> list.files(path)
[1] "LFGCM-8276-01-0-1_F_filt.fastq.gz" "LFGCM-8276-01-0-1_R_filt.fastq.gz" "LFGCM-8276-02-0-1_F_filt.fastq.gz"
[4] "LFGCM-8276-02-0-1_R_filt.fastq.gz" "LFGCM-8276-03-0-1_F_filt.fastq.gz" "LFGCM-8276-03-0-1_R_filt.fastq.gz"
[7] "LFGCM-8276-04-0-1_F_filt.fastq.gz" "LFGCM-8276-04-0-1_R_filt.fastq.gz" "LFGCM-8276-05-0-1_F_filt.fastq.gz"
[10] "LFGCM-8276-05-0-1_R_filt.fastq.gz" "LFGCM-8276-06-0-1_F_filt.fastq.gz" "LFGCM-8276-06-0-1_R_filt.fastq.gz"
[13] "LFGCM-8276-07-0-1_F_filt.fastq.gz" "LFGCM-8276-07-0-1_R_filt.fastq.gz" "LFGCM-8276-08-0-1_F_filt.fastq.gz"
[16] "LFGCM-8276-08-0-1_R_filt.fastq.gz" "LFGCM-8276-09-0-1_F_filt.fastq.gz" "LFGCM-8276-09-0-1_R_filt.fastq.gz"
[19] "LFGCM-8276-10-0-1_F_filt.fastq.gz" "LFGCM-8276-10-0-1_R_filt.fastq.gz" "LFGCM-8276-11-0-1_F_filt.fastq.gz"
[22] "LFGCM-8276-11-0-1_R_filt.fastq.gz" "LFGCM-8276-12-0-1_F_filt.fastq.gz" "LFGCM-8276-12-0-1_R_filt.fastq.gz"
[25] "LFGCM-8276-13-0-1_F_filt.fastq.gz" "LFGCM-8276-13-0-1_R_filt.fastq.gz" "LFGCM-8276-14-0-1_F_filt.fastq.gz"
[28] "LFGCM-8276-14-0-1_R_filt.fastq.gz" "LFGCM-8276-15-0-1_F_filt.fastq.gz" "LFGCM-8276-15-0-1_R_filt.fastq.gz"
[31] "LFGCM-8276-16-0-1_F_filt.fastq.gz" "LFGCM-8276-16-0-1_R_filt.fastq.gz" "LFGCM-8276-17-0-1_F_filt.fastq.gz"
[34] "LFGCM-8276-17-0-1_R_filt.fastq.gz" "LFGCM-8276-18-0-1_F_filt.fastq.gz" "LFGCM-8276-18-0-1_R_filt.fastq.gz"
[37] "LFGCM-8276-19-0-1_F_filt.fastq.gz" "LFGCM-8276-19-0-1_R_filt.fastq.gz" "LFGCM-8276-20-0-1_F_filt.fastq.gz"
[40] "LFGCM-8276-20-0-1_R_filt.fastq.gz" "LFGCM-8276-21-0-1_F_filt.fastq.gz" "LFGCM-8276-21-0-1_R_filt.fastq.gz"
[43] "LFGCM-8276-22-0-1_F_filt.fastq.gz" "LFGCM-8276-22-0-1_R_filt.fastq.gz" "LFGCM-8276-23-0-1_F_filt.fastq.gz"
[46] "LFGCM-8276-23-0-1_R_filt.fastq.gz" "LFGCM-8276-24-0-1_F_filt.fastq.gz" "LFGCM-8276-24-0-1_R_filt.fastq.gz"
[49] "LFGCM-8276-25-0-1_F_filt.fastq.gz" "LFGCM-8276-25-0-1_R_filt.fastq.gz" "LFGCM-8276-26-0-1_F_filt.fastq.gz"
[52] "LFGCM-8276-26-0-1_R_filt.fastq.gz" "LFGCM-8276-27-0-1_F_filt.fastq.gz" "LFGCM-8276-27-0-1_R_filt.fastq.gz"
[55] "LFGCM-8276-28-0-1_F_filt.fastq.gz" "LFGCM-8276-28-0-1_R_filt.fastq.gz" "LFGCM-8276-29-0-1_F_filt.fastq.gz"
[58] "LFGCM-8276-29-0-1_R_filt.fastq.gz" "LFGCM-8276-30-0-1_F_filt.fastq.gz" "LFGCM-8276-30-0-1_R_filt.fastq.gz"
[61] "LFGCM-8276-31-0-1_F_filt.fastq.gz" "LFGCM-8276-31-0-1_R_filt.fastq.gz" "LFGCM-8276-32-0-1_F_filt.fastq.gz"
[64] "LFGCM-8276-32-0-1_R_filt.fastq.gz" "LFGCM-8276-33-0-1_F_filt.fastq.gz" "LFGCM-8276-33-0-1_R_filt.fastq.gz"
[67] "LFGCM-8276-34-0-1_F_filt.fastq.gz" "LFGCM-8276-34-0-1_R_filt.fastq.gz" "LFGCM-8276-35-0-1_F_filt.fastq.gz"
[70] "LFGCM-8276-35-0-1_R_filt.fastq.gz" "LFGCM-8276-36-0-1_F_filt.fastq.gz" "LFGCM-8276-36-0-1_R_filt.fastq.gz"
[73] "LFGCM-8276-37-0-1_F_filt.fastq.gz" "LFGCM-8276-37-0-1_R_filt.fastq.gz" "LFGCM-8276-38-0-1_F_filt.fastq.gz"
[76] "LFGCM-8276-38-0-1_R_filt.fastq.gz" "LFGCM-8276-39-0-1_F_filt.fastq.gz" "LFGCM-8276-39-0-1_R_filt.fastq.gz"
[79] "LFGCM-8276-40-0-1_F_filt.fastq.gz" "LFGCM-8276-40-0-1_R_filt.fastq.gz"
>
> # Forward and reverse fastq filenames have format: SAMPLENAME_R1_001.fastq and SAMPLENAME_R2_001.fastq
> filtFs <- sort(list.files(path, pattern="_F_filt.fastq", full.names = TRUE))
> filtRs <- sort(list.files(path, pattern="_R_filt.fastq", full.names = TRUE))
>
>
> errF <- learnErrors(filtFs, multithread=FALSE)
100215718 total bases in 553678 reads from 10 samples will be used for learning the error rates.
> errR <- learnErrors(filtRs, multithread=FALSE)
109806300 total bases in 610035 reads from 11 samples will be used for learning the error rates.
>
> #plotErrors(errF, nominalQ=TRUE)
> #plotErrors(errR, nominalQ=TRUE)
>
> derepFs <- derepFastq(filtFs, verbose=TRUE)
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-01-0-1_F_filt.fastq.gz
Encountered 11324 unique sequences from 49913 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-02-0-1_F_filt.fastq.gz
Encountered 14689 unique sequences from 64763 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-03-0-1_F_filt.fastq.gz
Encountered 11171 unique sequences from 46929 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-04-0-1_F_filt.fastq.gz
Encountered 15334 unique sequences from 65275 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-05-0-1_F_filt.fastq.gz
Encountered 12178 unique sequences from 51001 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-06-0-1_F_filt.fastq.gz
Encountered 8352 unique sequences from 38571 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-07-0-1_F_filt.fastq.gz
Encountered 11872 unique sequences from 61124 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-08-0-1_F_filt.fastq.gz
Encountered 12802 unique sequences from 58565 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-09-0-1_F_filt.fastq.gz
Encountered 16631 unique sequences from 70502 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-10-0-1_F_filt.fastq.gz
Encountered 12401 unique sequences from 47035 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-11-0-1_F_filt.fastq.gz
Encountered 11544 unique sequences from 56357 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-12-0-1_F_filt.fastq.gz
Encountered 11846 unique sequences from 55522 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-13-0-1_F_filt.fastq.gz
Encountered 13588 unique sequences from 61777 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-14-0-1_F_filt.fastq.gz
Encountered 13811 unique sequences from 61618 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-15-0-1_F_filt.fastq.gz
Encountered 11049 unique sequences from 49054 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-16-0-1_F_filt.fastq.gz
Encountered 9566 unique sequences from 42973 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-17-0-1_F_filt.fastq.gz
Encountered 10279 unique sequences from 44923 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-18-0-1_F_filt.fastq.gz
Encountered 13409 unique sequences from 53541 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-19-0-1_F_filt.fastq.gz
Encountered 10203 unique sequences from 46136 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-20-0-1_F_filt.fastq.gz
Encountered 9322 unique sequences from 48954 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-21-0-1_F_filt.fastq.gz
Encountered 11573 unique sequences from 56657 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-22-0-1_F_filt.fastq.gz
Encountered 10096 unique sequences from 53720 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-23-0-1_F_filt.fastq.gz
Encountered 10212 unique sequences from 54924 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-24-0-1_F_filt.fastq.gz
Encountered 9613 unique sequences from 50198 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-25-0-1_F_filt.fastq.gz
Encountered 13164 unique sequences from 56316 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-26-0-1_F_filt.fastq.gz
Encountered 9773 unique sequences from 43324 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-27-0-1_F_filt.fastq.gz
Encountered 16464 unique sequences from 57600 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-28-0-1_F_filt.fastq.gz
Encountered 9136 unique sequences from 46749 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-29-0-1_F_filt.fastq.gz
Encountered 11284 unique sequences from 55671 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-30-0-1_F_filt.fastq.gz
Encountered 12523 unique sequences from 56419 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-31-0-1_F_filt.fastq.gz
Encountered 8987 unique sequences from 44141 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-32-0-1_F_filt.fastq.gz
Encountered 8347 unique sequences from 48613 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-33-0-1_F_filt.fastq.gz
Encountered 8909 unique sequences from 46631 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-34-0-1_F_filt.fastq.gz
Encountered 11522 unique sequences from 54211 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-35-0-1_F_filt.fastq.gz
Encountered 10692 unique sequences from 48188 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-36-0-1_F_filt.fastq.gz
Encountered 10637 unique sequences from 50799 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-37-0-1_F_filt.fastq.gz
Encountered 12142 unique sequences from 56713 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-38-0-1_F_filt.fastq.gz
Encountered 11612 unique sequences from 56370 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-39-0-1_F_filt.fastq.gz
Encountered 9472 unique sequences from 46448 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-40-0-1_F_filt.fastq.gz
Encountered 12780 unique sequences from 56446 total sequences read.
> derepRs <- derepFastq(filtRs, verbose=TRUE)
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-01-0-1_R_filt.fastq.gz
Encountered 10029 unique sequences from 49913 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-02-0-1_R_filt.fastq.gz
Encountered 13381 unique sequences from 64763 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-03-0-1_R_filt.fastq.gz
Encountered 9070 unique sequences from 46929 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-04-0-1_R_filt.fastq.gz
Encountered 13577 unique sequences from 65275 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-05-0-1_R_filt.fastq.gz
Encountered 11166 unique sequences from 51001 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-06-0-1_R_filt.fastq.gz
Encountered 7891 unique sequences from 38571 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-07-0-1_R_filt.fastq.gz
Encountered 11720 unique sequences from 61124 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-08-0-1_R_filt.fastq.gz
Encountered 11419 unique sequences from 58565 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-09-0-1_R_filt.fastq.gz
Encountered 15471 unique sequences from 70502 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-10-0-1_R_filt.fastq.gz
Encountered 10683 unique sequences from 47035 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-11-0-1_R_filt.fastq.gz
Encountered 10739 unique sequences from 56357 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-12-0-1_R_filt.fastq.gz
Encountered 11227 unique sequences from 55522 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-13-0-1_R_filt.fastq.gz
Encountered 12768 unique sequences from 61777 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-14-0-1_R_filt.fastq.gz
Encountered 12614 unique sequences from 61618 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-15-0-1_R_filt.fastq.gz
Encountered 10235 unique sequences from 49054 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-16-0-1_R_filt.fastq.gz
Encountered 8373 unique sequences from 42973 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-17-0-1_R_filt.fastq.gz
Encountered 10706 unique sequences from 44923 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-18-0-1_R_filt.fastq.gz
Encountered 12048 unique sequences from 53541 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-19-0-1_R_filt.fastq.gz
Encountered 10199 unique sequences from 46136 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-20-0-1_R_filt.fastq.gz
Encountered 10374 unique sequences from 48954 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-21-0-1_R_filt.fastq.gz
Encountered 12530 unique sequences from 56657 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-22-0-1_R_filt.fastq.gz
Encountered 11702 unique sequences from 53720 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-23-0-1_R_filt.fastq.gz
Encountered 11846 unique sequences from 54924 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-24-0-1_R_filt.fastq.gz
Encountered 10668 unique sequences from 50198 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-25-0-1_R_filt.fastq.gz
Encountered 12595 unique sequences from 56316 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-26-0-1_R_filt.fastq.gz
Encountered 9379 unique sequences from 43324 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-27-0-1_R_filt.fastq.gz
Encountered 17431 unique sequences from 57600 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-28-0-1_R_filt.fastq.gz
Encountered 10053 unique sequences from 46749 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-29-0-1_R_filt.fastq.gz
Encountered 11617 unique sequences from 55671 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-30-0-1_R_filt.fastq.gz
Encountered 12022 unique sequences from 56419 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-31-0-1_R_filt.fastq.gz
Encountered 10372 unique sequences from 44141 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-32-0-1_R_filt.fastq.gz
Encountered 10500 unique sequences from 48613 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-33-0-1_R_filt.fastq.gz
Encountered 9433 unique sequences from 46631 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-34-0-1_R_filt.fastq.gz
Encountered 11362 unique sequences from 54211 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-35-0-1_R_filt.fastq.gz
Encountered 10841 unique sequences from 48188 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-36-0-1_R_filt.fastq.gz
Encountered 10398 unique sequences from 50799 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-37-0-1_R_filt.fastq.gz
Encountered 11797 unique sequences from 56713 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-38-0-1_R_filt.fastq.gz
Encountered 11635 unique sequences from 56370 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-39-0-1_R_filt.fastq.gz
Encountered 10146 unique sequences from 46448 total sequences read.
Dereplicating sequence entries in Fastq file: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/primerfiltered/LFGCM-8276-40-0-1_R_filt.fastq.gz
Encountered 11731 unique sequences from 56446 total sequences read.
> # Name the derep-class objects by the sample names
> names(derepFs) <- sample.names
> names(derepRs) <- sample.names
>
> dadaFs <- dada(derepFs, err=errF, multithread=TRUE)
Sample 1 - 49913 reads in 11324 unique sequences.
Sample 2 - 64763 reads in 14689 unique sequences.
Sample 3 - 46929 reads in 11171 unique sequences.
Sample 4 - 65275 reads in 15334 unique sequences.
Sample 5 - 51001 reads in 12178 unique sequences.
Sample 6 - 38571 reads in 8352 unique sequences.
Sample 7 - 61124 reads in 11872 unique sequences.
Sample 8 - 58565 reads in 12802 unique sequences.
Sample 9 - 70502 reads in 16631 unique sequences.
Sample 10 - 47035 reads in 12401 unique sequences.
Sample 11 - 56357 reads in 11544 unique sequences.
Sample 12 - 55522 reads in 11846 unique sequences.
Sample 13 - 61777 reads in 13588 unique sequences.
Sample 14 - 61618 reads in 13811 unique sequences.
Sample 15 - 49054 reads in 11049 unique sequences.
Sample 16 - 42973 reads in 9566 unique sequences.
Sample 17 - 44923 reads in 10279 unique sequences.
Sample 18 - 53541 reads in 13409 unique sequences.
Sample 19 - 46136 reads in 10203 unique sequences.
Sample 20 - 48954 reads in 9322 unique sequences.
Sample 21 - 56657 reads in 11573 unique sequences.
Sample 22 - 53720 reads in 10096 unique sequences.
Sample 23 - 54924 reads in 10212 unique sequences.
Sample 24 - 50198 reads in 9613 unique sequences.
Sample 25 - 56316 reads in 13164 unique sequences.
Sample 26 - 43324 reads in 9773 unique sequences.
Sample 27 - 57600 reads in 16464 unique sequences.
Sample 28 - 46749 reads in 9136 unique sequences.
Sample 29 - 55671 reads in 11284 unique sequences.
Sample 30 - 56419 reads in 12523 unique sequences.
Sample 31 - 44141 reads in 8987 unique sequences.
Sample 32 - 48613 reads in 8347 unique sequences.
Sample 33 - 46631 reads in 8909 unique sequences.
Sample 34 - 54211 reads in 11522 unique sequences.
Sample 35 - 48188 reads in 10692 unique sequences.
Sample 36 - 50799 reads in 10637 unique sequences.
Sample 37 - 56713 reads in 12142 unique sequences.
Sample 38 - 56370 reads in 11612 unique sequences.
Sample 39 - 46448 reads in 9472 unique sequences.
Sample 40 - 56446 reads in 12780 unique sequences.
> dadaRs <- dada(derepRs, err=errR, multithread=TRUE)
Sample 1 - 49913 reads in 10029 unique sequences.
Sample 2 - 64763 reads in 13381 unique sequences.
Sample 3 - 46929 reads in 9070 unique sequences.
Sample 4 - 65275 reads in 13577 unique sequences.
Sample 5 - 51001 reads in 11166 unique sequences.
Sample 6 - 38571 reads in 7891 unique sequences.
Sample 7 - 61124 reads in 11720 unique sequences.
Sample 8 - 58565 reads in 11419 unique sequences.
Sample 9 - 70502 reads in 15471 unique sequences.
Sample 10 - 47035 reads in 10683 unique sequences.
Sample 11 - 56357 reads in 10739 unique sequences.
Sample 12 - 55522 reads in 11227 unique sequences.
Sample 13 - 61777 reads in 12768 unique sequences.
Sample 14 - 61618 reads in 12614 unique sequences.
Sample 15 - 49054 reads in 10235 unique sequences.
Sample 16 - 42973 reads in 8373 unique sequences.
Sample 17 - 44923 reads in 10706 unique sequences.
Sample 18 - 53541 reads in 12048 unique sequences.
Sample 19 - 46136 reads in 10199 unique sequences.
Sample 20 - 48954 reads in 10374 unique sequences.
Sample 21 - 56657 reads in 12530 unique sequences.
Sample 22 - 53720 reads in 11702 unique sequences.
Sample 23 - 54924 reads in 11846 unique sequences.
Sample 24 - 50198 reads in 10668 unique sequences.
Sample 25 - 56316 reads in 12595 unique sequences.
Sample 26 - 43324 reads in 9379 unique sequences.
Sample 27 - 57600 reads in 17431 unique sequences.
Sample 28 - 46749 reads in 10053 unique sequences.
Sample 29 - 55671 reads in 11617 unique sequences.
Sample 30 - 56419 reads in 12022 unique sequences.
Sample 31 - 44141 reads in 10372 unique sequences.
Sample 32 - 48613 reads in 10500 unique sequences.
Sample 33 - 46631 reads in 9433 unique sequences.
Sample 34 - 54211 reads in 11362 unique sequences.
Sample 35 - 48188 reads in 10841 unique sequences.
Sample 36 - 50799 reads in 10398 unique sequences.
Sample 37 - 56713 reads in 11797 unique sequences.
Sample 38 - 56370 reads in 11635 unique sequences.
Sample 39 - 46448 reads in 10146 unique sequences.
Sample 40 - 56446 reads in 11731 unique sequences.
>
> dadaFs[[1]]
dada-class: object describing DADA2 denoising results
166 sequence variants were inferred from 11324 input unique sequences.
Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
> dadaRs[[1]]
dada-class: object describing DADA2 denoising results
181 sequence variants were inferred from 10029 input unique sequences.
Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
>
> mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE)
48198 paired-reads (in 169 unique pairings) successfully merged out of 48832 (in 485 pairings) input.
62082 paired-reads (in 175 unique pairings) successfully merged out of 63080 (in 579 pairings) input.
45018 paired-reads (in 93 unique pairings) successfully merged out of 45905 (in 307 pairings) input.
60765 paired-reads (in 421 unique pairings) successfully merged out of 63419 (in 1200 pairings) input.
48912 paired-reads (in 270 unique pairings) successfully merged out of 49919 (in 693 pairings) input.
37027 paired-reads (in 184 unique pairings) successfully merged out of 37728 (in 447 pairings) input.
59367 paired-reads (in 140 unique pairings) successfully merged out of 59900 (in 443 pairings) input.
56375 paired-reads (in 144 unique pairings) successfully merged out of 57339 (in 485 pairings) input.
66882 paired-reads (in 541 unique pairings) successfully merged out of 68591 (in 1273 pairings) input.
44460 paired-reads (in 272 unique pairings) successfully merged out of 45771 (in 790 pairings) input.
53272 paired-reads (in 241 unique pairings) successfully merged out of 55281 (in 581 pairings) input.
53700 paired-reads (in 204 unique pairings) successfully merged out of 54428 (in 554 pairings) input.
59230 paired-reads (in 192 unique pairings) successfully merged out of 60020 (in 641 pairings) input.
59379 paired-reads (in 297 unique pairings) successfully merged out of 60381 (in 648 pairings) input.
46942 paired-reads (in 225 unique pairings) successfully merged out of 47857 (in 614 pairings) input.
41449 paired-reads (in 101 unique pairings) successfully merged out of 42053 (in 290 pairings) input.
43243 paired-reads (in 471 unique pairings) successfully merged out of 44031 (in 870 pairings) input.
51027 paired-reads (in 242 unique pairings) successfully merged out of 52408 (in 718 pairings) input.
43763 paired-reads (in 187 unique pairings) successfully merged out of 45203 (in 547 pairings) input.
47386 paired-reads (in 181 unique pairings) successfully merged out of 48167 (in 415 pairings) input.
55228 paired-reads (in 314 unique pairings) successfully merged out of 55721 (in 626 pairings) input.
52243 paired-reads (in 318 unique pairings) successfully merged out of 52830 (in 595 pairings) input.
53644 paired-reads (in 234 unique pairings) successfully merged out of 54013 (in 467 pairings) input.
48680 paired-reads (in 198 unique pairings) successfully merged out of 49392 (in 448 pairings) input.
53846 paired-reads (in 285 unique pairings) successfully merged out of 55029 (in 819 pairings) input.
41440 paired-reads (in 257 unique pairings) successfully merged out of 42428 (in 590 pairings) input.
50279 paired-reads (in 1106 unique pairings) successfully merged out of 53877 (in 2367 pairings) input.
45533 paired-reads (in 296 unique pairings) successfully merged out of 46024 (in 538 pairings) input.
53822 paired-reads (in 278 unique pairings) successfully merged out of 54626 (in 607 pairings) input.
54384 paired-reads (in 188 unique pairings) successfully merged out of 55116 (in 514 pairings) input.
42559 paired-reads (in 458 unique pairings) successfully merged out of 43122 (in 741 pairings) input.
47627 paired-reads (in 107 unique pairings) successfully merged out of 47929 (in 256 pairings) input.
44827 paired-reads (in 148 unique pairings) successfully merged out of 45790 (in 350 pairings) input.
52496 paired-reads (in 195 unique pairings) successfully merged out of 53031 (in 487 pairings) input.
46405 paired-reads (in 211 unique pairings) successfully merged out of 46999 (in 516 pairings) input.
49271 paired-reads (in 251 unique pairings) successfully merged out of 49759 (in 551 pairings) input.
54877 paired-reads (in 164 unique pairings) successfully merged out of 55421 (in 465 pairings) input.
54387 paired-reads (in 257 unique pairings) successfully merged out of 55111 (in 558 pairings) input.
44808 paired-reads (in 288 unique pairings) successfully merged out of 45289 (in 541 pairings) input.
54303 paired-reads (in 361 unique pairings) successfully merged out of 55253 (in 769 pairings) input.
> # Inspect the merger data.frame from the first sample
> head(mergers[[1]])
sequence
1 GACGGAGGATGCAAGTGTTATCCGGAATCACTGGGCGTAAAGCGTCTGTAGGTGGCCTAATAAGTCAACTGTTAAATCTTGAGGCTCAACTTCAAAATCGCAGTCGAAACTATTAGACTAGAGTATAGTAGAGGTAAAGGGAATTTCCAGTGGAGCGGTGAAATGCGTAGATATTGGAAAGAACACCGATGGCGAAAGCACTTTACTGGGCTATTACTAACACTCAGAGACGAAAGCTAGGGTAGCAAATGGG
2 TACGTAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGCAGGCGGTTATGTAAGACAGAGGTGAAATCCCCGGGCTCAACCTGGGAACTGCCTTTGTGACTGCATAGCTAGAGTACGGTAGAGGGGGATGGAATTCCGCGTGTAGCAGTGAAATGCGTAGATATGCGGAGGAACACCGATGGCGAAGGCAATCCCCTGGACCTGTACTGACGCTCATGCACGAAAGCGTGGGGAGCAAACAGG
3 TACAGAGACTGCAAGCGTTACTCGGATTCACTGGGCGTAAAGGGAGCGCAGGCGGACTCGTGTGTCGGACGTGAAATACCGGGGCTTAACCCCGGTGCTGCGTTCGAAACTACGAGTCTAGAGACTTGGAGGGGTAAGCGGAATTCTTGGTGGAGCAGTGAAATGCGTAGATATCAAGAGGAACACCAACGGCGAAGGCAGCTTACTGGACAAGATCTGACGCTCAGGCTCGAAAGCGTGGGGAGCAAAAGGG
4 TACGGAGGGTGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCTGATTAAGTCAGCGGTGAAAGACTTCGGCTTAACCGGAGCAGTGCCGTTGATACTGATTAGCTTGAGTGTTGGAGGGGTACATGGAATTGATGGTGTAGCGGTGAAATGCATAGATACCATCAGGAACACCGATAGCGAAGGCATTGTACTGGCCAACAACTGACGCTGAGGCACGAAAGTGTGGGGATCGAACAGG
5 TACGTAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGCAGGCGGTTATACAAGACAGGCGTGAAATCCCCGGGCTTAACCTGGGAATGGCGTCTGTGACTGTATGACTAGAGTGTGTCAGAGGGGGGTAGAATTCCACGTGTAGCAGTGAAATGCGTAGATATGTGGAGGAATACCAATGGCGAAGGCAGCCCCCTGGGATAACACTGACGCTCATGCACGAAAGCGTGGGGAGCAAACAGG
6 TACGGAGGATTCGAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGATTTTTAAGTCAGTGGTGAAAGCCTGCAGCTCAACTGTAGAATTGCCATTGAAACTGAAAATCTTGAATTTGGTTAAAGTAGGCGGAATGTATCATGTAGCGGTGAAATGCTTAGATATGATACAGAACACCGATAGCGAAGGCAGCTTGCTGAACCAATATTGACGCTGAGGCACGAAAGCGTGGGGAGCAAACAGG
abundance forward reverse nmatch nmismatch nindel prefer accept
1 7548 1 1 108 0 0 1 TRUE
2 6429 2 2 108 0 0 2 TRUE
3 4191 3 3 108 0 0 1 TRUE
4 3256 4 4 108 0 0 1 TRUE
5 1557 10 5 108 0 0 2 TRUE
6 1230 5 7 108 0 0 1 TRUE
>
> seqtab <- makeSequenceTable(mergers)
> dim(seqtab)
[1] 40 6084
> # Inspect distribution of sequence lengths
> table(nchar(getSequences(seqtab)))
202 203 205 215 219 220 221 229 242 244 247 248 251 252 253 254 255 256 257 261 271 272
73 1 1 1 1 1 2 1 1 1 1 1 4 162 5373 392 41 8 6 1 1 1
282 285 288 289 292 299 309 336
1 2 1 1 1 1 2 1
>
> seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE)
Identified 627 bimeras out of 6084 input sequences.
> dim(seqtab.nochim)
[1] 40 5457
> sum(seqtab.nochim)/sum(seqtab)
[1] 0.9916625
>
> getN <- function(x) sum(getUniques(x))
> track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim))
Error: object 'out' not found
> track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim))
Error: object 'out' not found
> getN <- function(x) sum(getUniques(x))
> library(phyloseq); packageVersion("phyloseq")
[1] ‘1.46.0’
> library(ggplot2); packageVersion("ggplot2")
Keep up to date with changes at https://www.tidyverse.org/blog/
[1] ‘3.4.4’
> track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim))
Error: object 'out' not found
> filtFs <- file.path(path_filtered, "primerfiltered", paste0(sample.names, "_F_filt.fastq.gz"))
Error: object 'path_filtered' not found
> path_filtered <- "C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/filtered"
>
> list.files(path_filtered)
character(0)
>
> # Forward and reverse fastq filenames have format: SAMPLENAME_R1_001.fastq and SAMPLENAME_R2_001.fastq
> fnFs <- sort(list.files(path_filtered, pattern="_F_filt.fastq", full.names = TRUE))
> fnRs <- sort(list.files(path_filtered, pattern="_R_filt.fastq", full.names = TRUE))
> # Extract sample names, assuming filenames have format: SAMPLENAME_XXX.fastq
> sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1)
>
> #plotQualityProfile(fnFs[1:40])
> #plotQualityProfile(fnRs[1:40])
>
> filtFs <- file.path(path_filtered, "primerfiltered", paste0(sample.names, "_F_filt.fastq.gz"))
> filtRs <- file.path(path_filtered, "primerfiltered", paste0(sample.names, "_R_filt.fastq.gz"))
>
> out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, trimLeft=c(19, 20), maxN=0, maxEE=c(2, 2), truncQ=2,
+
+ rm.phix=TRUE, compress=TRUE, multithread=FALSE)
Error in filterAndTrim(fnFs, filtFs, fnRs, filtRs, trimLeft = c(19, 20), :
Every input file must have a corresponding output file.
> path <- "C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation"
> list.files(path)
[1] "HMP_MOCK.v35.fasta" "LFGCM-8276-01-0-1_S1_L001_R1_001.fastq.gz"
[3] "LFGCM-8276-01-0-1_S1_L001_R2_001.fastq.gz" "LFGCM-8276-02-0-1_S2_L001_R1_001.fastq.gz"
[5] "LFGCM-8276-02-0-1_S2_L001_R2_001.fastq.gz" "LFGCM-8276-03-0-1_S3_L001_R1_001.fastq.gz"
[7] "LFGCM-8276-03-0-1_S3_L001_R2_001.fastq.gz" "LFGCM-8276-04-0-1_S4_L001_R1_001.fastq.gz"
[9] "LFGCM-8276-04-0-1_S4_L001_R2_001.fastq.gz" "LFGCM-8276-05-0-1_S5_L001_R1_001.fastq.gz"
[11] "LFGCM-8276-05-0-1_S5_L001_R2_001.fastq.gz" "LFGCM-8276-06-0-1_S6_L001_R1_001.fastq.gz"
[13] "LFGCM-8276-06-0-1_S6_L001_R2_001.fastq.gz" "LFGCM-8276-07-0-1_S7_L001_R1_001.fastq.gz"
[15] "LFGCM-8276-07-0-1_S7_L001_R2_001.fastq.gz" "LFGCM-8276-08-0-1_S8_L001_R1_001.fastq.gz"
[17] "LFGCM-8276-08-0-1_S8_L001_R2_001.fastq.gz" "LFGCM-8276-09-0-1_S9_L001_R1_001.fastq.gz"
[19] "LFGCM-8276-09-0-1_S9_L001_R2_001.fastq.gz" "LFGCM-8276-10-0-1_S10_L001_R1_001.fastq.gz"
[21] "LFGCM-8276-10-0-1_S10_L001_R2_001.fastq.gz" "LFGCM-8276-11-0-1_S11_L001_R1_001.fastq.gz"
[23] "LFGCM-8276-11-0-1_S11_L001_R2_001.fastq.gz" "LFGCM-8276-12-0-1_S12_L001_R1_001.fastq.gz"
[25] "LFGCM-8276-12-0-1_S12_L001_R2_001.fastq.gz" "LFGCM-8276-13-0-1_S13_L001_R1_001.fastq.gz"
[27] "LFGCM-8276-13-0-1_S13_L001_R2_001.fastq.gz" "LFGCM-8276-14-0-1_S14_L001_R1_001.fastq.gz"
[29] "LFGCM-8276-14-0-1_S14_L001_R2_001.fastq.gz" "LFGCM-8276-15-0-1_S15_L001_R1_001.fastq.gz"
[31] "LFGCM-8276-15-0-1_S15_L001_R2_001.fastq.gz" "LFGCM-8276-16-0-1_S16_L001_R1_001.fastq.gz"
[33] "LFGCM-8276-16-0-1_S16_L001_R2_001.fastq.gz" "LFGCM-8276-17-0-1_S17_L001_R1_001.fastq.gz"
[35] "LFGCM-8276-17-0-1_S17_L001_R2_001.fastq.gz" "LFGCM-8276-18-0-1_S18_L001_R1_001.fastq.gz"
[37] "LFGCM-8276-18-0-1_S18_L001_R2_001.fastq.gz" "LFGCM-8276-19-0-1_S19_L001_R1_001.fastq.gz"
[39] "LFGCM-8276-19-0-1_S19_L001_R2_001.fastq.gz" "LFGCM-8276-20-0-1_S20_L001_R1_001.fastq.gz"
[41] "LFGCM-8276-20-0-1_S20_L001_R2_001.fastq.gz" "LFGCM-8276-21-0-1_S21_L001_R1_001.fastq.gz"
[43] "LFGCM-8276-21-0-1_S21_L001_R2_001.fastq.gz" "LFGCM-8276-22-0-1_S22_L001_R1_001.fastq.gz"
[45] "LFGCM-8276-22-0-1_S22_L001_R2_001.fastq.gz" "LFGCM-8276-23-0-1_S23_L001_R1_001.fastq.gz"
[47] "LFGCM-8276-23-0-1_S23_L001_R2_001.fastq.gz" "LFGCM-8276-24-0-1_S24_L001_R1_001.fastq.gz"
[49] "LFGCM-8276-24-0-1_S24_L001_R2_001.fastq.gz" "LFGCM-8276-25-0-1_S25_L001_R1_001.fastq.gz"
[51] "LFGCM-8276-25-0-1_S25_L001_R2_001.fastq.gz" "LFGCM-8276-26-0-1_S26_L001_R1_001.fastq.gz"
[53] "LFGCM-8276-26-0-1_S26_L001_R2_001.fastq.gz" "LFGCM-8276-27-0-1_S27_L001_R1_001.fastq.gz"
[55] "LFGCM-8276-27-0-1_S27_L001_R2_001.fastq.gz" "LFGCM-8276-28-0-1_S28_L001_R1_001.fastq.gz"
[57] "LFGCM-8276-28-0-1_S28_L001_R2_001.fastq.gz" "LFGCM-8276-29-0-1_S29_L001_R1_001.fastq.gz"
[59] "LFGCM-8276-29-0-1_S29_L001_R2_001.fastq.gz" "LFGCM-8276-30-0-1_S30_L001_R1_001.fastq.gz"
[61] "LFGCM-8276-30-0-1_S30_L001_R2_001.fastq.gz" "LFGCM-8276-31-0-1_S31_L001_R1_001.fastq.gz"
[63] "LFGCM-8276-31-0-1_S31_L001_R2_001.fastq.gz" "LFGCM-8276-32-0-1_S32_L001_R1_001.fastq.gz"
[65] "LFGCM-8276-32-0-1_S32_L001_R2_001.fastq.gz" "LFGCM-8276-33-0-1_S33_L001_R1_001.fastq.gz"
[67] "LFGCM-8276-33-0-1_S33_L001_R2_001.fastq.gz" "LFGCM-8276-34-0-1_S34_L001_R1_001.fastq.gz"
[69] "LFGCM-8276-34-0-1_S34_L001_R2_001.fastq.gz" "LFGCM-8276-35-0-1_S35_L001_R1_001.fastq.gz"
[71] "LFGCM-8276-35-0-1_S35_L001_R2_001.fastq.gz" "LFGCM-8276-36-0-1_S36_L001_R1_001.fastq.gz"
[73] "LFGCM-8276-36-0-1_S36_L001_R2_001.fastq.gz" "LFGCM-8276-37-0-1_S37_L001_R1_001.fastq.gz"
[75] "LFGCM-8276-37-0-1_S37_L001_R2_001.fastq.gz" "LFGCM-8276-38-0-1_S38_L001_R1_001.fastq.gz"
[77] "LFGCM-8276-38-0-1_S38_L001_R2_001.fastq.gz" "LFGCM-8276-39-0-1_S39_L001_R1_001.fastq.gz"
[79] "LFGCM-8276-39-0-1_S39_L001_R2_001.fastq.gz" "LFGCM-8276-40-0-1_S40_L001_R1_001.fastq.gz"
[81] "LFGCM-8276-40-0-1_S40_L001_R2_001.fastq.gz" "Mock_S280_L001_R1_001.fastq"
[83] "Mock_S280_L001_R2_001.fastq" "mouse.dpw.metadata"
[85] "mouse.time.design" "primerfiltered"
[87] "silva_nr_v128_train_set.fa.gz" "stability.batch"
[89] "stability.files"
> fnFs <- sort(list.files(path, pattern="_R1_001.fastq", full.names = TRUE))
> fnRs <- sort(list.files(path, pattern="_R2_001.fastq", full.names = TRUE))
> # Extract sample names, assuming filenames have format: SAMPLENAME_XXX.fastq
> sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1)
> filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz"))
> filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz"))
>
>
>
>
>
> out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(200, 200), trimLeft=c(19, 20), maxN=0, maxEE=c(2, 2), truncQ=2,
+
+ rm.phix=TRUE, compress=TRUE, multithread=FALSE)
Creating output directory: C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/filtered
>
> head(out)
reads.in reads.out
LFGCM-8276-01-0-1_S1_L001_R1_001.fastq.gz 195778 50036
LFGCM-8276-02-0-1_S2_L001_R1_001.fastq.gz 257943 64930
LFGCM-8276-03-0-1_S3_L001_R1_001.fastq.gz 186674 47056
LFGCM-8276-04-0-1_S4_L001_R1_001.fastq.gz 252689 65418
LFGCM-8276-05-0-1_S5_L001_R1_001.fastq.gz 203296 51117
LFGCM-8276-06-0-1_S6_L001_R1_001.fastq.gz 153113 38681
> track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim))
Warning message:
In cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, :
number of rows of result is not a multiple of vector length (arg 2)
> colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
> rownames(track) <- sample.names
> head(track)
input filtered denoisedF denoisedR merged nonchim
LFGCM-8276-01-0-1 195778 50036 49227 49270 48198 48078
LFGCM-8276-02-0-1 257943 64930 63729 63731 62082 60944
LFGCM-8276-03-0-1 186674 47056 46183 46366 45018 44675
LFGCM-8276-04-0-1 252689 65418 64247 64087 60765 58927
LFGCM-8276-05-0-1 203296 51117 50285 50364 48912 48911
LFGCM-8276-06-0-1 153113 38681 38033 38082 37027 36932
> taxa.print <- taxa # Removing sequence rownames for display only
Error: object 'taxa' not found
> taxa <- assignTaxonomy(seqtab.nochim, "C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/silva_nr_v128_train_set.fa.gz", multithread=TRUE)
>
> taxa.print <- taxa # Removing sequence rownames for display only
> rownames(taxa.print) <- NULL
> head(taxa.print)
Kingdom Phylum Class Order Family
[1,] "Bacteria" "Proteobacteria" "Betaproteobacteria" "Burkholderiales" "Comamonadaceae"
[2,] "Bacteria" "Actinobacteria" "Actinobacteria" "Micrococcales" "Microbacteriaceae"
[3,] "Bacteria" "Actinobacteria" "Actinobacteria" "Frankiales" "Sporichthyaceae"
[4,] "Bacteria" "Cyanobacteria" "Chloroplast" NA NA
[5,] "Bacteria" "Bacteroidetes" "Flavobacteriia" "Flavobacteriales" "Flavobacteriaceae"
[6,] "Bacteria" "Bacteroidetes" "Flavobacteriia" "Flavobacteriales" "Cryomorphaceae"
Genus
[1,] NA
[2,] "Candidatus_Limnoluna"
[3,] "hgcI_clade"
[4,] NA
[5,] "Flavobacterium"
[6,] NA
> save(list = ls(all.names = TRUE), file = "C:/Users/radhi/Desktop/onefolder_1_to_40_FASTQ_Generation/variables.RData")
> unqs.mock <- seqtab.nochim["Mock",]
Error in seqtab.nochim["Mock", ] : subscript out of bounds
> library(phyloseq); packageVersion("phyloseq")
[1] ‘1.46.0’
> library(ggplot2); packageVersion("ggplot2")
[1] ‘3.4.4’
> theme_set(theme_bw())
> samples.out <- rownames(seqtab.nochim)
> subject <- sapply(strsplit(samples.out, "D"), `[`, 1)
> gender <- substr(subject, 1, 1)
> subject <- substr(subject, 2, 999)
> day <- as.integer(sapply(strsplit(samples.out, "D"), `[`, 2))
> samdf <- data.frame(Subject = subject, Gender = gender, Day = day)
> samdf$When <- "Early"
> samdf$When[samdf$Day > 100] <- "Late"
> # Create unique row names using paste0 and row numbers
> unique_row_names <- paste0(samples.out, "_", seq_len(nrow(samdf)))
> rownames(samdf) <- unique_row_names
> ps <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE),
+ sample_data(samdf),
+ tax_table(taxa))
Error in validObject(.Object) : invalid class “phyloseq” object:
Component sample names do not match.
Try sample_names()
> ps <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE),
+ sample_data(samdf),
+ tax_table(taxa))
Error in validObject(.Object) : invalid class “phyloseq” object:
Component sample names do not match.
Try sample_names()
> ps <- prune_samples(sample_names(ps) != "Mock", ps) # Remove mock sample
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'samples' in selecting a method for function 'prune_samples': error in evaluating the argument 'physeq' in selecting a method for function 'sample_names': object 'ps' not found
> ps <- sample_names(otu_table(seqtab.nochim, taxa_are_rows=FALSE),
+ sample_data(samdf),
+ tax_table(taxa))
Error in sample_names(otu_table(seqtab.nochim, taxa_are_rows = FALSE), :
unused arguments (sample_data(samdf), tax_table(taxa))
> table(nchar(getSequences(seqtab)))
202 203 205 215 219 220 221 229 242 244 247 248 251 252 253 254 255 256 257 261 271 272
73 1 1 1 1 1 2 1 1 1 1 1 4 162 5373 392 41 8 6 1 1 1
282 285 288 289 292 299 309 336
1 2 1 1 1 1 2 1
> dim(seqtab.nochim)
[1] 40 5457
> load
function (file, envir = parent.frame(), verbose = FALSE)
{
if (is.character(file)) {
con <- gzfile(file)
on.exit(close(con))
magic <- readChar(con, 5L, useBytes = TRUE)
if (!length(magic))
stop("empty (zero-byte) input file")
if (!grepl("RD[ABX][2-9]\n", magic)) {
if (grepl("RD[ABX][2-9]\r", magic))
stop("input has been corrupted, with LF replaced by CR")
warning(sprintf("file %s has magic number '%s'\n",
sQuote(basename(file)), gsub("[\n\r]*", "", magic)),
" ", "Use of save versions prior to 2 is deprecated",
domain = NA, call. = FALSE)
return(.Internal(load(file, envir)))
}
}
else if (inherits(file, "connection")) {
con <- if (inherits(file, "gzfile") || inherits(file,
"gzcon"))
file
else gzcon(file)
}
else stop("bad 'file' argument")
if (verbose)
cat("Loading objects:\n")
.Internal(loadFromConn2(con, envir, verbose))
}
<bytecode: 0x0000021d5bde00b0>
<environment: namespace:base>
> sample_names()
NULL
> samples.out
[1] "LFGCM-8276-01-0-1" NA NA NA NA
[6] NA NA NA NA NA
[11] NA NA NA NA NA
[16] NA NA NA NA NA
[21] NA NA NA NA NA
[26] NA NA NA NA NA
[31] NA NA NA NA NA
[36] NA NA NA NA NA
> dim(seqtab.nochim)
[1] 40 5457
> sum(seqtab.nochim)/sum(seqtab)
[1] 0.9916625
> head(track)
input filtered denoisedF denoisedR merged nonchim
LFGCM-8276-01-0-1 195778 50036 49227 49270 48198 48078
LFGCM-8276-02-0-1 257943 64930 63729 63731 62082 60944
LFGCM-8276-03-0-1 186674 47056 46183 46366 45018 44675
LFGCM-8276-04-0-1 252689 65418 64247 64087 60765 58927
LFGCM-8276-05-0-1 203296 51117 50285 50364 48912 48911
LFGCM-8276-06-0-1 153113 38681 38033 38082 37027 36932
> cat("DADA2 inferred", length(unqs.mock), "sample sequences present in the Mock community.\n")
Error: object 'unqs.mock' not found
> rownames(seqtab.nochim)
[1] "LFGCM-8276-01-0-1" NA NA NA NA
[6] NA NA NA NA NA
[11] NA NA NA NA NA
[16] NA NA NA NA NA
[21] NA NA NA NA NA
[26] NA NA NA NA NA
[31] NA NA NA NA NA
[36] NA NA NA NA NA
I'm trying this on my 40 samples. My issue is that all the rows of seqtab.nochim are NA, except for the first row which has one of the fastq.gz files. I think this is what made the error when trying to construct a phyloseq object (towards the end). All the previous steps seemed to run successfully. What went wrong? I might have muddled up the procedure. Thank you so much.
Here is all the code on the console if useful:
Here are my files in their folder: