tagdust segfaults when output files exists already.

ghuls commented 4 years ago

It crashes in the free_read_structure(param->read_structure) call for read_structure->numseq_in_segment[0]:

$ tagdust -t 8 -e 0.1 -1 O:N -2 S:ACTCGAGCCGAGCAGGCGCGCCGATC -3 R:NNNNNNNNNNNNNNNNN -4 S:GGACCGGG -5 O:N -o test test.fastq.gz

[2019-12-19 17:13:02]   Tagdust 2.32, Copyright (C) 2013-2019 Timo Lassmann <timolassmann@gmail.com>
[2019-12-19 17:13:02]   cmd: tagdust -t 8 -e 0.1 -1 O:N -2 S:ACTCGAGCCGAGCAGGCGCGCCGATC -3 R:NNNNNNNNNNNNNNNNN -4 S:GGACCGGG -5 O:N -o test test.fastq.gz
[2019-12-19 17:13:02]   Start Run
--------------------------------------------------
[2019-12-19 17:13:03]   Determining threshold for read0.
[2019-12-19 17:13:08]   Long sequence found. Need to realloc model...
[2019-12-19 17:13:47]   Selected Threshold:: 0.015063
1
[2019-12-19 17:14:39]   ERROR: some output files already exists.
2
free_param: 1
free_param: 2
free_read_structure: 1
free_read_structure: 1  loop: 0
free_read_structure: 1  loop: 0 read_structure->numseq_in_segment
free_read_structure: 1  loop: 0 0
free_read_structure: 1  loop: 0 0 freed
free_read_structure: 1  loop: 1
free_read_structure: 1  loop: 1 read_structure->numseq_in_segment
free_read_structure: 1  loop: 1 0
free_read_structure: 1  loop: 1 0 freed
free_read_structure: 1  loop: 1 1
free_read_structure: 1  loop: 1 1 freed
free_read_structure: 1  loop: 2
free_read_structure: 1  loop: 2 read_structure->numseq_in_segment
free_read_structure: 1  loop: 2 0
free_read_structure: 1  loop: 2 0 freed
free_read_structure: 1  loop: 3
free_read_structure: 1  loop: 3 read_structure->numseq_in_segment
free_read_structure: 1  loop: 3 0
free_read_structure: 1  loop: 3 0 freed
free_read_structure: 1  loop: 3 1
free_read_structure: 1  loop: 3 1 freed
free_read_structure: 1  loop: 4
free_read_structure: 1  loop: 4 read_structure->numseq_in_segment
free_read_structure: 1  loop: 4 0
free_read_structure: 1  loop: 4 0 freed
free_read_structure: 2
free_read_structure: 3
free_read_structure: 4
free_read_structure: 5
free_read_structure: 6
free_param: 3
free_param: 3.1
free_read_structure: 1
free_read_structure: 1  loop: 1
Segmentation fault (core dumped)

"Debugging" code:

diff --git a/src/interface.c b/src/interface.c
index e18a80d..98ad7d6 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -710,6 +710,7 @@ int free_param(struct parameters* param)
         char logfile[200];
         FILE* outfile = 0;
         int i,status;
+        printf("free_param: 1\n");
         //if(param->log){
         if(param->outfile){
                 sprintf (logfile, "%s_logfile.txt",param->outfile);
@@ -723,6 +724,7 @@ int free_param(struct parameters* param)
                 fclose(outfile);

         }
+        printf("free_param: 2\n");
         if(param->read_structures){
                 for(i = 0; i < param->infiles;i++){
                         if(param->read_structures[i]){
@@ -731,24 +733,35 @@ int free_param(struct parameters* param)
                 }
                 MFREE(param->read_structures);
         }
+        printf("free_param: 3\n");

         if(param->read_structure){
+            printf("free_param: 3.1\n");
                 free_read_structure(param->read_structure);
+            printf("free_param: 3.2\n");
         }
+        printf("free_param: 4\n");
         if(param->read_structure_R1){
                 free_read_structure(param->read_structure_R1);
         }
+        printf("free_param: 5\n");
         if(param->read_structure_R2){
                 free_read_structure(param->read_structure_R2);
         }
+        printf("free_param: 6\n");
         if(param->confidence_thresholds){
                 MFREE(param->confidence_thresholds);
         }
+        printf("free_param: 7\n");

         MFREE (param->infile);
+        printf("free_param: 8\n");
         MFREE(param->messages);
+        printf("free_param: 9\n");
         MFREE(param->buffer);
+        printf("free_param: 10\n");
         MFREE(param);
+        printf("free_param: 11\n");
         return kslOK;
 ERROR:
         return status;
@@ -861,20 +874,32 @@ ERROR:
 void free_read_structure(struct read_structure* read_structure)
 {
         int i,j;
+        printf("free_read_structure: 1\n");
         for(i = 0; i < 10;i++){
                 if(read_structure->sequence_matrix[i]){
+        printf("free_read_structure: 1  loop: %d\n", i);
+        if (read_structure->numseq_in_segment) {
+            printf("free_read_structure: 1  loop: %d read_structure->numseq_in_segment\n", i);
+        }
                         for(j = 0; j < read_structure->numseq_in_segment[i];j++){
+        printf("free_read_structure: 1  loop: %d %d\n", i, j);
                                 MFREE(read_structure->sequence_matrix[i][j]);
+        printf("free_read_structure: 1  loop: %d %d freed\n", i, j);
                         }

                         MFREE(read_structure->sequence_matrix[i]);
                 }
         }
+        printf("free_read_structure: 2\n");
         MFREE(read_structure->sequence_matrix);

+        printf("free_read_structure: 3\n");
         MFREE(read_structure->numseq_in_segment );
+        printf("free_read_structure: 4\n");
         MFREE(read_structure->type);
+        printf("free_read_structure: 5\n");
         MFREE(read_structure);
+        printf("free_read_structure: 6\n");
 }

diff --git a/src/io.c b/src/io.c
index 5381950..4381f46 100644
--- a/src/io.c
+++ b/src/io.c
@@ -799,10 +799,14 @@ int print_all(struct read_info*** read_info_container,struct parameters* param,
        i = check_for_existing_demultiplexed_files_multiple(param, num_out_reads);
        if(i){
            sprintf(param->buffer,"ERROR: some output files already exists.\n");
+           printf("1\n");
            param->messages = append_message(param->messages, param->buffer);
+           printf("2\n");
            free_param(param);
+           printf("3\n");
            exit(EXIT_FAILURE);
        }
+       printf("4\n");

        filemode[0] = 'w';
        filemode[1] = 0;

TimoLassmann commented 4 years ago

Thanks for reporting! I am currently re-writing tagdust to make it more robust and to include read/writing to compressed files.

ghuls commented 4 years ago

tagdust seems like a great tool. I was looking for a program like this for many years. To to something similar with cutadapt seems to be quite difficult (although they should have most of the basic building block implemented).

ghuls commented 4 years ago

For this specific project I wanted to extract a UMI-like 17bp sequence from the FASTQ file. when using -3 R:NNNNNNNNNNNNNNNNN the extracted reads are not all 17bp (rest of the read is useless). It would be nice if it would be possible to specify the exact read length is this case (and ignore reads that didn't match). I easily can do a simple post filter to fix it manually now.

When using -3 F:NNNNNNNNNNNNNNNNN with no R: specified, tagdust segfaults.

$ tagdust -t 8 -e 0.1 -1 O:N -2 S:ACTCGAGCCGAGCAGGCGCGCCGATC -3 F:NNNNNNNNNNNNNNNNN -4 S:GGACCGGG -5 O:N -o test test.fastq.gz

[2019-12-20 11:47:13]   Tagdust 2.32, Copyright (C) 2013-2019 Timo Lassmann <timolassmann@gmail.com>
[2019-12-20 11:47:13]   cmd: tagdust/src/tagdust -t 8 -e 0.1 -1 O:N -2 S:ACTCGAGCCGAGCAGGCGCGCCGATC -3 F:NNNNNNNNNNNNNNNNN -4 S:GGACCGGG -5 O:N -o test test.fastq.gz
[2019-12-20 11:47:13]   Start Run
--------------------------------------------------
[2019-12-20 11:47:14]   Determining threshold for read0.
[2019-12-20 11:47:18]   Long sequence found. Need to realloc model...
[2019-12-20 11:48:07]   Selected Threshold:: 0.013495
4
Fatal exception (source file io.c, line 853):
malloc of size 0 failed
Aborted (core dumped)

TimoLassmann / tagdust

tagdust segfaults when output files exists already. #2