Open tomoakin opened 3 years ago
Stack traces were collected for some of them. Calling Process_Olap (frag_len=65535 might be related in some cases?
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x0000003d44e677e4 in ?? ()
(gdb) info stack
#0 0x0000003d44e677e4 in ?? ()
#1 0x000000002ed7fc40 in ?? ()
#2 0x00007ffc32e52c40 in ?? ()
#3 0x00007ffc32e02470 in ?? ()
#4 0x0000000061746161 in ?? ()
#5 0x000000002ed7fc40 in ?? ()
#6 0x0000000000407984 in Write_OVL_Mesg (fout=0x1, vmesg=0x61746161) at src/AS_MSG/AS_MSG_pmesg1.C:1322
#7 0x00000000004070be in WriteProtoMesg_AS (fout=0x61746161, pmesg=0x5, pmesg@entry=0x7ffc32e02470)
at src/AS_MSG/AS_MSG_pmesg.C:481
#8 0x00000000004049d8 in Output_OVL (quality=0.011773121516497273, olap=0x2af50f2c2af4)
at src/AS_OVL/CorrectOlapsOVL.C:1474
#9 Process_Olap (frag_len=65535, adj_ct=<optimized out>, forw_adj=0x4d90f570,
b_seq=0x7ffc32e02c40 "tattctactataacttactttatttgattcgatttgattctattttactttatttaattctattttactctatttgattctattttactccatttggttctattttattaaccataattttctgtattatattactataaatattataaccataattttccataatttttcgtgctgtattactataaaagtgatgacca"...,
olap=0x2af50f2c2af4) at src/AS_OVL/CorrectOlapsOVL.C:1932
#10 Redo_Olaps () at src/AS_OVL/CorrectOlapsOVL.C:2235
#11 0x0000000000402d21 in main (argc=<optimized out>, argv=<optimized out>) at src/AS_OVL/CorrectOlapsOVL.C:343
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x0000003d44e677e4 in ?? ()
(gdb) info stack
#0 0x0000003d44e677e4 in ?? ()
#1 0x000000000116ace0 in ?? ()
#2 0x00007ffdce5737e0 in ?? ()
#3 0x00007ffdce523010 in ?? ()
#4 0x0000000061746161 in ?? ()
#5 0x000000000116ace0 in ?? ()
#6 0x0000000000407984 in Write_OVL_Mesg (fout=0x1, vmesg=0x61746161) at src/AS_MSG/AS_MSG_pmesg1.C:1322
#7 0x00000000004070be in WriteProtoMesg_AS (fout=0x61746161, pmesg=0x5, pmesg@entry=0x7ffdce523010)
at src/AS_MSG/AS_MSG_pmesg.C:481
#8 0x00000000004049d8 in Output_OVL (quality=0, olap=0x2b038a48fe14) at src/AS_OVL/CorrectOlapsOVL.C:1474
#9 Process_Olap (frag_len=65535, adj_ct=<optimized out>, forw_adj=0x24c4c10,
b_seq=0x7ffdce5237e0 "tattctactataacttactttatttgattcgatttgattctattttactttatttaattctattttactctatttgattctattttactccatttggttctattttattaaccataattttctgtattatattactataaatattataaccataattttccataatttttcgtgctgtattactataaaagtgatgacca"...,
olap=0x2b038a48fe14) at src/AS_OVL/CorrectOlapsOVL.C:1932
#10 Redo_Olaps () at src/AS_OVL/CorrectOlapsOVL.C:2235
#11 0x0000000000402d21 in main (argc=<optimized out>, argv=<optimized out>) at src/AS_OVL/CorrectOlapsOVL.C:343
another one was not informative...
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x0000003d44e661f4 in ?? ()
(gdb) info stack
#0 0x0000003d44e661f4 in ?? ()
#1 0x000000005d118810 in ?? ()
#2 0x000000000133bec3 in ?? ()
#3 0x00000000000186a0 in ?? ()
#4 0x0000000000402d32 in main (argc=<optimized out>, argv=<optimized out>) at src/AS_OVL/CorrectOlapsOVL.C:351
Another core (on a different system)
(gdb) info stack
#0 0x00002b63b47df438 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
#1 0x00002b63b47e103a in __GI_abort () at abort.c:89
#2 0x00002b63b48217fa in __libc_message (do_abort=do_abort@entry=2,
fmt=fmt@entry=0x2b63b493955f "*** %s ***: %s terminated\n") at ../sysdeps/posix/libc_fatal.c:175
#3 0x00002b63b48c321c in __GI___fortify_fail (msg=<optimized out>,
msg@entry=0x2b63b49394f0 "buffer overflow detected") at fortify_fail.c:37
#4 0x00002b63b48c1220 in __GI___chk_fail () at chk_fail.c:28
#5 0x00002b63b48c0572 in __strcpy_chk (
dest=0x6a4740 <Get_Canonical_Olap_Region(Olap_Info_t*, int, char*, char*, Adjust_t*, int, int, char**, char**)::b_rev_seq> "ttttgttttttgtggaaagttcttccttggatttgggtgcatggcctgatatgctgatgtggccttctattccaactatttcggaagagagagtgaagaaaaaagaagtgaatacatttatgcatcaattatttcacttttattattattatttattcattttattattcgtttttttttacttttttttgtttattcat"...,
src=0x7ffe0f2e06b0 "gaaagggatcatatgaaataaaaaagaatttaacaaaaataaatgaataaataaacaaaaattacatgattgaaaaaatgattgaatgaataaatggataactatgcatataaatgatgggataaacaaaatgaatgaaaataaagtaatggatgaataaagaaataagaatgatgaataagaaaggaggaataataaag"..., destlen=65536) at strcpy_chk.c:30
#6 0x0000000000403c27 in strcpy (
__src=0x7ffe0f2e06b0 "gaaagggatcatatgaaataaaaaagaatttaacaaaaataaatgaataaataaacaaaaattacatgattgaaaaaatgattgaatgaataaatggataactatgcatataaatgatgggataaacaaaatgaatgaaaataaagtaatggatgaataaagaaataagaatgatgaataagaaaggaggaataataaag"...,
__dest=0x6a4740 <Get_Canonical_Olap_Region(Olap_Info_t*, int, char*, char*, Adjust_t*, int, int, char**, char**)::b_rev_seq> "ttttgttttttgtggaaagttcttccttggatttgggtgcatggcctgatatgctgatgtggccttctattccaactatttcggaagagagagtgaagaaaaaagaagtgaatacatttatgcatcaattatttcacttttattattattatttattcattttattattcgtttttttttacttttttttgtttattcat"...)
at /usr/include/x86_64-linux-gnu/bits/string3.h:110
#7 Get_Canonical_Olap_Region (b_part=<synthetic pointer>, a_part=<synthetic pointer>, frag_len=65535,
adj_ct=<optimized out>, forw_adj=0x395d260,
b_seq=0x7ffe0f2e06b0 "gaaagggatcatatgaaataaaaaagaatttaacaaaaataaatgaataaataaacaaaaattacatgattgaaaaaatgattgaatgaataaatggataactatgcatataaatgatgggataaacaaaatgaatgaaaataaagtaatggatgaataaagaaataagaatgatgaataagaaaggaggaataataaag"..., a_seq=<optimized out>, sub=<optimized out>, olap=0x2b63bef716c8) at src/AS_OVL/CorrectOlapsOVL.C:1032
#8 Process_Olap (frag_len=65535, adj_ct=<optimized out>, forw_adj=0x395d260,
b_seq=0x7ffe0f2e06b0 "gaaagggatcatatgaaataaaaaagaatttaacaaaaataaatgaataaataaacaaaaattacatgattgaaaaaatgattgaatgaataaatggataactatgcatataaatgatgggataaacaaaatgaatgaaaataaagtaatggatgaataaagaaataagaatgatgaataagaaaggaggaataataaag"..., olap=0x2b63bef716c8) at src/AS_OVL/CorrectOlapsOVL.C:1812
#9 Redo_Olaps () at src/AS_OVL/CorrectOlapsOVL.C:2235
#10 0x000000000040280a in main (argc=<optimized out>, argv=<optimized out>) at src/AS_OVL/CorrectOlapsOVL.C:343
Run the same dataset with MaSuRCa compiled with
in global-1/CA8/src/AS_global.H ended without producing cores. Note the max read length in the dataset was over 200 kb but less than 250 kb. When the DNA/library protocols is better a more will be necessary.
$ diff -u MaSuRCA-4.0.5/global-1/CA8/src/AS_global.H Masurca18b/MaSuRCA-4.0.5/global-1/CA8/src/AS_global.H
--- MaSuRCA-4.0.5/global-1/CA8/src/AS_global.H 2018-01-23 05:03:01.000000000 +0900
+++ Masurca18b/MaSuRCA-4.0.5/global-1/CA8/src/AS_global.H 2021-11-10 10:06:09.851397770 +0900
@@ -218,7 +218,7 @@
// amounts of memory too.
//
#define AS_READ_MAX_PACKED_LEN_BITS 8
-#define AS_READ_MAX_NORMAL_LEN_BITS 16
+#define AS_READ_MAX_NORMAL_LEN_BITS 18
// Do not define this.
#undef OVS_DANGEROUSLY_OVERSIZE
Because the fourth core https://github.com/alekseyzimin/masurca/issues/257#issuecomment-965074142 suggested a buffer overrun in Get_Canonical_Olap_Region, the context was looked in
static void Get_Canonical_Olap_Region
(Olap_Info_t * olap, int sub, char * a_seq, char * b_seq,
Adjust_t forw_adj [], int adj_ct,
int frag_len, char * * a_part, char * * b_part)
...
static char b_rev_seq [AS_READ_MAX_NORMAL_LEN + 1];
static Adjust_t b_rev_adj [AS_READ_MAX_NORMAL_LEN];
{
(* a_part) = a_seq;
if (olap -> a_hang > 0)
(* a_part) += Hang_Adjust (olap -> a_hang, Frag [sub] . adjust,
Frag [sub] . adjust_ct);
if (olap -> orient == 'N')
(* b_part) = b_seq;
else
{
if (b_rev_id != olap -> b_iid)
{
strcpy (b_rev_seq, b_seq);
reverseComplementSequence (b_rev_seq, 0);
The size of b_rev_seq is statically determined at compile time as 64 k and frag_len is consistent with that. However, buffer overrun was detected, implying that b_seq is infact not null terminated at the specified length. Using strncpy should prevent the observed buffer overrun. However, I am not sure if it is the right way. Should we get the 64 k at the beginning or the end? The code appears to compute reverse complement after copying to the buffer.
If AS_READ_MAX_NORMAL_LEN is large enough that all reads can be held in the buffer, the buffer overrun should not occur as well. Thus, I expected increasing AS_READ_MAX_NORMAL_LEN_BITS will hide the problem, and it seems the case (https://github.com/alekseyzimin/masurca/issues/257#issuecomment-965873035). However, as we get longer reads, the buffer overrun will occur again.
One solution may dynamically allocate the buffer based on the frag_len. But, there are perhaps many places AS_READ_MAX_NORMAL_LEN rules the buffer size.
During some runs with MaSuRCA-4.0.5 (compiled from release), a few core was produced.
Opening the core file with gdb indicated that