Closed mingodad closed 8 months ago
Also after that I'm getting this:
make
g++ -c -o gramexpl.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic gramexpl.cc
g++ -c -o asockind.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic asockind.cc
g++ -c -o grammar.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic grammar.cc
g++ -c -o emitcode.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic emitcode.cc
g++ -c -o mlsstr.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic mlsstr.cc
g++ -c -o genml.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic genml.cc
rm -f gramast.ast.gen.cc gramast.ast.gen.h
../ast/astgen.exe -ogramast.ast.gen gramast.ast
writing gramast.ast.gen.h...
writing gramast.ast.gen.cc...
chmod a-w gramast.ast.gen.h gramast.ast.gen.cc
g++ -c -o gramast.ast.gen.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic gramast.ast.gen.cc
../smflex/smflex -ogramlex.yy.cc gramlex.lex
g++ -c -o gramlex.yy.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic gramlex.yy.cc
g++ -c -o grampar.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic grampar.cc
g++ -c -o grampar.tab.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic grampar.tab.cc
g++ -c -o parsetables.o -MMD -g -std=c++11 -I../smbase -I../ast -I. -Ic parsetables.cc
g++ -o elkhound.exe -g -std=c++11 -I../smbase -I../ast -I. -Ic -DGRAMANL_MAIN gramanl.cc gramexpl.o asockind.o grammar.o emitcode.o mlsstr.o genml.o gramast.ast.gen.o gramlex.yy.o grampar.o grampar.tab.o parsetables.o ../ast/libast.a ../smbase/libsmbase.a
grammar.o: In function `VoidListMutator::~VoidListMutator()':
elkhound-smcpeak/elkhound/../smbase/flatutil.h:32: multiple definition of `xfer(Flatten&, char&)'
/tmp/ccK0CitS.o:elkhound-smcpeak/elkhound/../smbase/flatutil.h:32: first defined here
grammar.o: In function `xfer(Flatten&, bool&)':
elkhound-smcpeak/elkhound/../smbase/flatutil.h:37: multiple definition of `xfer(Flatten&, bool&)'
/tmp/ccK0CitS.o:elkhound-smcpeak/elkhound/../smbase/flatutil.h:37: first defined here
grammar.o: In function `xfer(Flatten&, int&)':
elkhound-smcpeak/elkhound/../smbase/flatutil.h:42: multiple definition of `xfer(Flatten&, int&)'
/tmp/ccK0CitS.o:elkhound-smcpeak/elkhound/../smbase/flatutil.h:42: first defined here
grammar.o: In function `xfer(Flatten&, unsigned int&)':
elkhound-smcpeak/elkhound/../smbase/flatutil.h:47: multiple definition of `xfer(Flatten&, unsigned int&)'
/tmp/ccK0CitS.o:elkhound-smcpeak/elkhound/../smbase/flatutil.h:47: first defined here
grammar.o: In function `xfer(Flatten&, long&)':
elkhound-smcpeak/elkhound/../smbase/flatutil.h:52: multiple definition of `xfer(Flatten&, long&)'
/tmp/ccK0CitS.o:elkhound-smcpeak/elkhound/../smbase/flatutil.h:52: first defined here
grammar.o: In function `xfer(Flatten&, unsigned long&)':
elkhound-smcpeak/elkhound/../smbase/flatutil.h:57: multiple definition of `xfer(Flatten&, unsigned long&)'
/tmp/ccK0CitS.o:elkhound-smcpeak/elkhound/../smbase/flatutil.h:57: first defined here
collect2: error: ld returned 1 exit status
Makefile:355: recipe for target 'elkhound.exe' failed
make: *** [elkhound.exe] Error 1
We need this changes in smbase/flatutil.h
to build it:
---------------------------------- flatutil.h ----------------------------------
index 7f6c296..65a70cb 100644
@@ -22,38 +22,38 @@ T *createForUnflat(Flatten &flat)
// Nominal way to flatten. Again, overload to override.
template <class T>
-void xfer(Flatten &flat, T &t)
+inline void xfer(Flatten &flat, T &t)
{
t.xfer(flat);
}
// Overloads for things Flatten knows how to do directly.
-void xfer(Flatten &flat, char &c)
+inline void xfer(Flatten &flat, char &c)
{
flat.xferChar(c);
}
-void xfer(Flatten &flat, bool &b)
+inline void xfer(Flatten &flat, bool &b)
{
flat.xferBool(b);
}
-void xfer(Flatten &flat, int32_t &i)
+inline void xfer(Flatten &flat, int32_t &i)
{
flat.xfer_int32_t(i);
}
-void xfer(Flatten &flat, uint32_t &i)
+inline void xfer(Flatten &flat, uint32_t &i)
{
flat.xfer_uint32_t(i);
}
-void xfer(Flatten &flat, int64_t &i)
+inline void xfer(Flatten &flat, int64_t &i)
{
flat.xfer_int64_t(i);
}
-void xfer(Flatten &flat, uint64_t &i)
+inline void xfer(Flatten &flat, uint64_t &i)
{
flat.xfer_uint64_t(i);
}
Also valgrind report some mismatch between new[]/delete[]
:
In elkhound/gramnl.cc
:
@@ -914,11 +914,11 @@ GrammarAnalysis::GrammarAnalysis()
GrammarAnalysis::~GrammarAnalysis()
{
if (indexedNonterms != NULL) {
- delete indexedNonterms;
+ delete[] indexedNonterms;
}
if (indexedTerms != NULL) {
- delete indexedTerms;
+ delete[] indexedTerms;
}
In smbase/bit2d.cc
:
----------------------------------- bit2d.cc -----------------------------------
index 39f4d7d..30ef38b 100644
@@ -22,7 +22,7 @@ Bit2d::Bit2d(point const &aSize)
Bit2d::~Bit2d()
{
if (owning) {
- delete data;
+ delete[] data;
}
}
By the way you've done a nice cleanup !
Also in smbase/strtable.cc
:
--------------------------------- strtable.cc ---------------------------------
index 819aad3..b3ea7cf 100644
@@ -43,7 +43,7 @@ void StringTable::clear()
while (longStrings != NULL) {
LongString *temp = longStrings;
longStrings = longStrings->next;
- delete temp->data;
+ delete[] temp->data;
delete temp;
}
}
Have you done any cleanup/fixes to elsa
http://scottmcpeak.com/elkhound/sources/elsa/index.html ?
Testing with valgrind c/lexer2
:
valgrind ./lexer2.exe ../c.in/c.in4 | more
==30151== Memcheck, a memory error detector
==30151== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==30151== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==30151== Command: ./lexer2.exe ../c.in/c.in4
==30151==
==30151== Invalid read of size 1
==30151== at 0x11B801: StringHash::coreHash(char const*) (strhash.cc:117)
==30151== by 0x11A5F2: hashFunction (hashtbl.cc:12)
==30151== by 0x11A5F2: getEntry (hashtbl.cc:42)
==30151== by 0x11A5F2: HashTable::get(void const*) const (hashtbl.cc:67)
==30151== by 0x115B82: get (strhash.h:41)
==30151== by 0x115B82: get (strtable.cc:95)
==30151== by 0x115B82: StringTable::add(char const*) (strtable.cc:55)
==30151== by 0x10EF4C: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:537)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151== Address 0x5d1e6f8 is 0 bytes after a block of size 8 alloc'd
==30151== at 0x4C34AE3: operator new[](unsigned long) (vg_replace_malloc.c:714)
==30151== by 0x117978: setAllocatedSize (array.h:196)
==30151== by 0x117978: GrowArray<char>::eidLoop(int) (array.h:236)
==30151== by 0x11742D: ensureIndexDoubler (array.h:128)
==30151== by 0x11742D: setIndexDoubler (array.h:132)
==30151== by 0x11742D: push (array.h:299)
==30151== by 0x11742D: decodeEscapes(ArrayStack<char>&, string const&, char, bool) (strutil.cc:248)
==30151== by 0x10EC45: quotedUnescape(ArrayStack<char>&, string const&, char, bool) (lexer2.cc:441)
==30151== by 0x10EEE4: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:527)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Invalid read of size 1
==30151== at 0x4C392D4: strlen (vg_replace_strmem.c:501)
==30151== by 0x115BA7: StringTable::add(char const*) (strtable.cc:60)
==30151== by 0x10EF4C: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:537)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151== Address 0x5d1e6f8 is 0 bytes after a block of size 8 alloc'd
==30151== at 0x4C34AE3: operator new[](unsigned long) (vg_replace_malloc.c:714)
==30151== by 0x117978: setAllocatedSize (array.h:196)
==30151== by 0x117978: GrowArray<char>::eidLoop(int) (array.h:236)
==30151== by 0x11742D: ensureIndexDoubler (array.h:128)
==30151== by 0x11742D: setIndexDoubler (array.h:132)
==30151== by 0x11742D: push (array.h:299)
==30151== by 0x11742D: decodeEscapes(ArrayStack<char>&, string const&, char, bool) (strutil.cc:248)
==30151== by 0x10EC45: quotedUnescape(ArrayStack<char>&, string const&, char, bool) (lexer2.cc:441)
==30151== by 0x10EEE4: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:527)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Invalid read of size 1
==30151== at 0x4C3D3DE: memmove (vg_replace_strmem.c:1400)
==30151== by 0x115BE0: memcpy (string_fortified.h:34)
==30151== by 0x115BE0: StringTable::add(char const*) (strtable.cc:82)
==30151== by 0x10EF4C: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:537)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151== Address 0x5d1e6f8 is 0 bytes after a block of size 8 alloc'd
==30151== at 0x4C34AE3: operator new[](unsigned long) (vg_replace_malloc.c:714)
==30151== by 0x117978: setAllocatedSize (array.h:196)
==30151== by 0x117978: GrowArray<char>::eidLoop(int) (array.h:236)
==30151== by 0x11742D: ensureIndexDoubler (array.h:128)
==30151== by 0x11742D: setIndexDoubler (array.h:132)
==30151== by 0x11742D: push (array.h:299)
==30151== by 0x11742D: decodeEscapes(ArrayStack<char>&, string const&, char, bool) (strutil.cc:248)
==30151== by 0x10EC45: quotedUnescape(ArrayStack<char>&, string const&, char, bool) (lexer2.cc:441)
==30151== by 0x10EEE4: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:527)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Conditional jump or move depends on uninitialised value(s)
==30151== at 0x11B806: StringHash::coreHash(char const*) (strhash.cc:117)
==30151== by 0x11A5F2: hashFunction (hashtbl.cc:12)
==30151== by 0x11A5F2: getEntry (hashtbl.cc:42)
==30151== by 0x11A5F2: HashTable::get(void const*) const (hashtbl.cc:67)
==30151== by 0x115B82: get (strhash.h:41)
==30151== by 0x115B82: get (strtable.cc:95)
==30151== by 0x115B82: StringTable::add(char const*) (strtable.cc:55)
==30151== by 0x10EF4C: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:537)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Conditional jump or move depends on uninitialised value(s)
==30151== at 0x4C392D8: strlen (vg_replace_strmem.c:501)
==30151== by 0x115BA7: StringTable::add(char const*) (strtable.cc:60)
==30151== by 0x10EF4C: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:537)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Conditional jump or move depends on uninitialised value(s)
==30151== at 0x11B806: StringHash::coreHash(char const*) (strhash.cc:117)
==30151== by 0x11A88F: hashFunction (hashtbl.cc:12)
==30151== by 0x11A88F: getEntry (hashtbl.cc:42)
==30151== by 0x11A88F: HashTable::add(void const*, void*) (hashtbl.cc:128)
==30151== by 0x115BF5: add (strhash.h:46)
==30151== by 0x115BF5: StringTable::add(char const*) (strtable.cc:87)
==30151== by 0x10EF4C: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:537)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Conditional jump or move depends on uninitialised value(s)
==30151== at 0x4C3A2DB: strcmp (vg_replace_strmem.c:939)
==30151== by 0x11B7C8: StringHash::keyCompare(char const*, char const*) (strhash.cc:268)
==30151== by 0x11A60E: getEntry (hashtbl.cc:49)
==30151== by 0x11A60E: HashTable::get(void const*) const (hashtbl.cc:67)
==30151== by 0x115B82: get (strhash.h:41)
==30151== by 0x115B82: get (strtable.cc:95)
==30151== by 0x115B82: StringTable::add(char const*) (strtable.cc:55)
==30151== by 0x10EF4C: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:537)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Conditional jump or move depends on uninitialised value(s)
==30151== at 0x11A611: getEntry (hashtbl.cc:49)
==30151== by 0x11A611: HashTable::get(void const*) const (hashtbl.cc:67)
==30151== by 0x115B82: get (strhash.h:41)
==30151== by 0x115B82: get (strtable.cc:95)
==30151== by 0x115B82: StringTable::add(char const*) (strtable.cc:55)
==30151== by 0x10EF4C: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:537)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Invalid read of size 1
==30151== at 0x4C3D3D0: memmove (vg_replace_strmem.c:1400)
==30151== by 0x115BE0: memcpy (string_fortified.h:34)
==30151== by 0x115BE0: StringTable::add(char const*) (strtable.cc:82)
==30151== by 0x10EF4C: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:537)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151== Address 0x5d34672 is 0 bytes after a block of size 2 alloc'd
==30151== at 0x4C34AE3: operator new[](unsigned long) (vg_replace_malloc.c:714)
==30151== by 0x117978: setAllocatedSize (array.h:196)
==30151== by 0x117978: GrowArray<char>::eidLoop(int) (array.h:236)
==30151== by 0x11742D: ensureIndexDoubler (array.h:128)
==30151== by 0x11742D: setIndexDoubler (array.h:132)
==30151== by 0x11742D: push (array.h:299)
==30151== by 0x11742D: decodeEscapes(ArrayStack<char>&, string const&, char, bool) (strutil.cc:248)
==30151== by 0x10EC45: quotedUnescape(ArrayStack<char>&, string const&, char, bool) (lexer2.cc:441)
==30151== by 0x10EEE4: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:527)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Conditional jump or move depends on uninitialised value(s)
==30151== at 0x4C392D8: strlen (vg_replace_strmem.c:501)
==30151== by 0x114F34: stringBuilder::operator+=(char const*) (str.cc:291)
==30151== by 0x10F03A: operator<< (str.h:312)
==30151== by 0x10F03A: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:483)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Conditional jump or move depends on uninitialised value(s)
==30151== at 0x11B806: StringHash::coreHash(char const*) (strhash.cc:117)
==30151== by 0x11A702: hashFunction (hashtbl.cc:12)
==30151== by 0x11A702: getEntry (hashtbl.cc:42)
==30151== by 0x11A702: HashTable::resizeTable(int) (hashtbl.cc:101)
==30151== by 0x11A92A: HashTable::add(void const*, void*) (hashtbl.cc:123)
==30151== by 0x115BF5: add (strhash.h:46)
==30151== by 0x115BF5: StringTable::add(char const*) (strtable.cc:87)
==30151== by 0x10EE7A: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:543)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
==30151== Invalid read of size 1
==30151== at 0x4C3A2C8: strcmp (vg_replace_strmem.c:939)
==30151== by 0x11B7C8: StringHash::keyCompare(char const*, char const*) (strhash.cc:268)
==30151== by 0x11A60E: getEntry (hashtbl.cc:49)
==30151== by 0x11A60E: HashTable::get(void const*) const (hashtbl.cc:67)
==30151== by 0x115B82: get (strhash.h:41)
==30151== by 0x115B82: get (strtable.cc:95)
==30151== by 0x115B82: StringTable::add(char const*) (strtable.cc:55)
==30151== by 0x10EF4C: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:537)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151== Address 0x5d43ac2 is 0 bytes after a block of size 2 alloc'd
==30151== at 0x4C34AE3: operator new[](unsigned long) (vg_replace_malloc.c:714)
==30151== by 0x117978: setAllocatedSize (array.h:196)
==30151== by 0x117978: GrowArray<char>::eidLoop(int) (array.h:236)
==30151== by 0x1174BF: ensureIndexDoubler (array.h:128)
==30151== by 0x1174BF: setIndexDoubler (array.h:132)
==30151== by 0x1174BF: push (array.h:299)
==30151== by 0x1174BF: decodeEscapes(ArrayStack<char>&, string const&, char, bool) (strutil.cc:263)
==30151== by 0x10EC45: quotedUnescape(ArrayStack<char>&, string const&, char, bool) (lexer2.cc:441)
==30151== by 0x10EEE4: lexer2_lex(Lexer2&, Lexer1 const&, char const*) (lexer2.cc:527)
==30151== by 0x10F41B: lexer2_gettoken (lexer2.cc:721)
==30151== by 0x10DCA0: main (lexer2.cc:772)
==30151==
[L2] Token at L2_TYPEDEF: typedef
[L2] Token at L2_VOID: void
[L2] Token at (L2_NAME RETSIGTYPE): NAME(RETSIGTYPE)
[L2] Token at L2_SEMICOLON: ;
[L2] Token at L2_TYPEDEF: typedef
[L2] Token at L2_INT: int
[L2] Token at (L2_NAME Boolean): NAME(Boolean)
[L2] Token at L2_SEMICOLON: ;
[L2] Token at L2_TYPEDEF: typedef
[L2] Token at L2_STRUCT: struct
[L2] Token at (L2_NAME FILE): NAME(FILE)
[L2] Token at L2_LBRACE: {
[L2] Token at L2_INT: int
[L2] Token at (L2_NAME foo): NAME(foo)
[L2] Token at L2_SEMICOLON: ;
[L2] Token at L2_RBRACE: }
[L2] Token at (L2_NAME FILE): NAME(FILE)
[L2] Token at L2_SEMICOLON: ;
[L2] Token at L2_VOID: void
[L2] Token at (L2_NAME tcsetattr): NAME(tcsetattr)
[L2] Token at L2_LPAREN: (
[L2] Token at L2_ELLIPSIS: ...
[L2] Token at L2_RPAREN: )
[L2] Token at L2_SEMICOLON: ;
[L2] Token at L2_INT: int
[L2] Token at (L2_NAME fileno): NAME(fileno)
--More--==30151== Conditional jump or move depends on uninitialised value(s)
==30151== at 0x4C392D8: strlen (vg_replace_strmem.c:501)
==30151== by 0x114F34: stringBuilder::operator+=(char const*) (str.cc:291)
==30151== by 0x10E817: operator<< (str.h:312)
==30151== by 0x10E817: Lexer2Token::toStringType(bool, Lexer2TokenType) const (lexer2.cc:392)
==30151== by 0x10EB76: toString (lexer2.cc:372)
==30151== by 0x10EB76: Lexer2Token::print() const (lexer2.cc:433)
==30151== by 0x10DC9B: main (lexer2.cc:773)
==30151==
==30151== Conditional jump or move depends on uninitialised value(s)
==30151== at 0x4C392D8: strlen (vg_replace_strmem.c:501)
==30151== by 0x114F34: stringBuilder::operator+=(char const*) (str.cc:291)
==30151== by 0x10E817: operator<< (str.h:312)
==30151== by 0x10E817: Lexer2Token::toStringType(bool, Lexer2TokenType) const (lexer2.cc:392)
==30151== by 0x10EB93: toString (lexer2.cc:372)
==30151== by 0x10EB93: Lexer2Token::print() const (lexer2.cc:433)
==30151== by 0x10DC9B: main (lexer2.cc:773)
==30151==
==30151==
==30151== HEAP SUMMARY:
==30151== in use at exit: 203,353 bytes in 10,360 blocks
==30151== total heap usage: 19,708 allocs, 9,348 frees, 438,539 bytes allocated
==30151==
==30151== LEAK SUMMARY:
==30151== definitely lost: 0 bytes in 0 blocks
==30151== indirectly lost: 0 bytes in 0 blocks
==30151== possibly lost: 0 bytes in 0 blocks
==30151== still reachable: 203,353 bytes in 10,360 blocks
==30151== suppressed: 0 bytes in 0 blocks
==30151== Rerun with --leak-check=full to see details of leaked memory
==30151==
==30151== Use --track-origins=yes to see where uninitialised values come from
==30151== For lists of detected and suppressed errors, rerun with: -s
==30151== ERROR SUMMARY: 107 errors from 14 contexts (sup
...
Thanks for the reports! I think all the issues are resolved as of commit c0e3c9c959.
Regarding elsa, I've just published my working repo at https://github.com/smcpeak/elsa . It's got some cleanup done, as well as a half-baked Clang AST importer.
Regarding the Valgrind reports about reading uninitialized data, I believe those are false positives due to compiling with optimization enabled. GCC will generate code for certain operations that reads a little more than it naively should, but which works correctly because it knows the x86 rules. In contrast, Valgrind is applying C/C++ language rules, which are more restrictive. Try without optimization and if there are still legit-looking issues go ahead and file a new issue in GH.
The problem reported by valgrind
is due to use of a non zero terminated string here L2->strValue = dest.idTable.add(tmp.getArray());
where tmp
array doesn't have an ending zero, see my fix shown bellow:
--------------------------------- c/lexer2.cc ---------------------------------
index 3d2875e..d266ee6 100644
@@ -534,6 +534,7 @@ void lexer2_lex(Lexer2 &dest, Lexer1 const &src, char const *fname)
break;
}
}
+ tmp.push('\0');
L2->strValue = dest.idTable.add(tmp.getArray());
break;
And now valgrind
report this:
valgrind ./cparse.exe ../c.in/c.in4
==7229== Memcheck, a memory error detector
==7229== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==7229== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==7229== Command: ./cparse.exe ../c.in/c.in4
==7229==
* beginning of output -*- outline -*-
%%% progress: 1ms: building parse tables from internal data
%%% progress: 17ms: lexical analysis...
==7229== Conditional jump or move depends on uninitialised value(s)
==7229== at 0x149925: GLR::duplicateSemanticValue(short, unsigned long) (glr.cc:243)
==7229== by 0x14A6F0: GLR::grabTopSval(StackNode*) (glr.cc:720)
==7229== by 0x14BC7E: GLR::cleanupAfterParse(unsigned long&) (glr.cc:1479)
==7229== by 0x14B6A5: GLR::innerGlrParse(GLR&, LexerInterface&, unsigned long&) (glr.cc:1326)
==7229== by 0x14A839: GLR::glrParse(LexerInterface&, unsigned long&) (glr.cc:818)
==7229== by 0x11A6A0: glrParseNamedFile(GLR&, Lexer2&, unsigned long&, char const*) (parssppt.cc:62)
==7229== by 0x11A7D9: toplevelParse(ParseTreeAndTokens&, char const*) (parssppt.cc:75)
==7229== by 0x11A9E2: treeMain(ParseTreeAndTokens&, int, char**, char const*) (parssppt.cc:168)
==7229== by 0x148EF8: doit(int, char**) (main.cc:59)
==7229== by 0x1494F2: main (main.cc:132)
==7229==
==7229== Conditional jump or move depends on uninitialised value(s)
==7229== at 0x1499F7: deallocateSemanticValue(short, UserActions*, unsigned long) (glr.cc:266)
==7229== by 0x149BDB: StackNode::deallocSemanticValues() (glr.cc:373)
==7229== by 0x14E10F: StackNode::deinit() (glr.cc:351)
==7229== by 0x14E72F: ObjectPool<StackNode>::dealloc(StackNode*) (objpool.h:143)
==7229== by 0x14E36A: StackNode::decRefCt() (glr.cc:460)
==7229== by 0x149F74: decParserList(ArrayStack<StackNode*>&) (glr.cc:526)
==7229== by 0x14BCC6: GLR::cleanupAfterParse(unsigned long&) (glr.cc:1498)
==7229== by 0x14B6A5: GLR::innerGlrParse(GLR&, LexerInterface&, unsigned long&) (glr.cc:1326)
==7229== by 0x14A839: GLR::glrParse(LexerInterface&, unsigned long&) (glr.cc:818)
==7229== by 0x11A6A0: glrParseNamedFile(GLR&, Lexer2&, unsigned long&, char const*) (parssppt.cc:62)
==7229== by 0x11A7D9: toplevelParse(ParseTreeAndTokens&, char const*) (parssppt.cc:75)
==7229== by 0x11A9E2: treeMain(ParseTreeAndTokens&, int, char**, char const*) (parssppt.cc:168)
==7229==
%%% progress: 309ms: done parsing (303 ms, 0_000000 cycles)
%%% progress: 330ms: type checking...
../c.in/c.in4:85:3: error: you have to mark the global saved_tio as thmprv_attr(addrtaken) if you take its address
../c.in/c.in4:273:9: error: you have to mark the global saved_tio as thmprv_attr(addrtaken) if you take its address
../c.in/c.in4:276:9: error: you have to mark the global saved_tio as thmprv_attr(addrtaken) if you take its address
../c.in/c.in4:297:7: error: you have to mark the global saved_tio as thmprv_attr(addrtaken) if you take its address
../c.in/c.in4:314:3: error: you have to mark the global saved_tio as thmprv_attr(addrtaken) if you take its address
../c.in/c.in4:235:5: warning: circular path
../c.in/c.in4:235:5: warning: circular path
../c.in/c.in4:188:3: warning: circular path
../c.in/c.in4:188:3: warning: circular path
../c.in/c.in4:188:3: warning: circular path
../c.in/c.in4:188:3: warning: circular path
../c.in/c.in4:188:3: warning: circular path
../c.in/c.in4:134:15: warning: circular path
../c.in/c.in4:134:15: warning: circular path
../c.in/c.in4:134:15: warning: circular path
../c.in/c.in4:134:15: warning: circular path
../c.in/c.in4:134:15: warning: circular path
../c.in/c.in4:134:15: warning: circular path
../c.in/c.in4:235:5: warning: circular path
../c.in/c.in4:235:5: warning: circular path
../c.in/c.in4:188:3: warning: circular path
../c.in/c.in4:188:3: warning: circular path
../c.in/c.in4:188:3: warning: circular path
../c.in/c.in4:188:3: warning: circular path
../c.in/c.in4:188:3: warning: circular path
../c.in/c.in4:366:3: warning: circular path
../c.in/c.in4:388:7: warning: circular path
../c.in/c.in4:385:7: warning: circular path
../c.in/c.in4:366:3: warning: circular path
../c.in/c.in4:388:7: warning: circular path
../c.in/c.in4:385:7: warning: circular path
there were 5 typechecking errors
==7229==
==7229== HEAP SUMMARY:
==7229== in use at exit: 104,358 bytes in 2,738 blocks
==7229== total heap usage: 17,073 allocs, 14,335 frees, 516,311 bytes allocated
==7229==
==7229== LEAK SUMMARY:
==7229== definitely lost: 0 bytes in 0 blocks
==7229== indirectly lost: 0 bytes in 0 blocks
==7229== possibly lost: 0 bytes in 0 blocks
==7229== still reachable: 104,358 bytes in 2,738 blocks
==7229== suppressed: 0 bytes in 0 blocks
==7229== Rerun with --leak-check=full to see details of leaked memory
==7229==
==7229== Use --track-origins=yes to see where uninitialised values come from
==7229== For lists of detected and suppressed errors, rerun with: -s
==7229== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
The problem reported by
valgrind
is due to use of a non zero terminated string hereL2->strValue = dest.idTable.add(tmp.getArray());
wheretmp
array doesn't have an ending zero, see my fix shown bellow:--------------------------------- c/lexer2.cc --------------------------------- index 3d2875e..d266ee6 100644 @@ -534,6 +534,7 @@ void lexer2_lex(Lexer2 &dest, Lexer1 const &src, char const *fname) break; } } + tmp.push('\0'); L2->strValue = dest.idTable.add(tmp.getArray()); break;
There is more places where the same logic happen and no zero termination is added !!!!!
To test elkhound
I converted a naked PostgreSQL-16
grammar (see attached) and here are some observations:
$ /usr/bin/time ../elkhound/elkhound.exe -tr bison postgresql16.gr
4 unreachable terminals
8.25user 0.21system 0:08.54elapsed 99%CPU (0avgtext+0avgdata 235400maxresident)k
0inputs+88104outputs (0major+79043minor)pagefaults 0swaps
$ /usr/bin/time bison-nb postgresql16.y
1.87user 0.02system 0:01.85elapsed 102%CPU (0avgtext+0avgdata 18748maxresident)k
0inputs+5280outputs (0major+11738minor)pagefaults 0swaps
elkhound
uses 4 times more time and more than 10 times more memory compared to bison-3.8.2
, also the generated parser source code is around 15 times bigger:
ls -l *.c*
-rw-rw-r-- 1 mingo mingo 44748273 ene 9 15:11 postgresql16.cc
-rw-rw-r-- 1 mingo mingo 2702068 ene 9 15:11 postgresql16.tab.c
Here is the output with -v
option:
/usr/bin/time ../elkhound/elkhound.exe -v postgresql16.gr
%%% progress: 0ms: parsing grammar source: postgresql16.gr
%%% progress: 39ms: parsing grammar AST..
%%% progress: 421ms: beginning grammar analysis..
%%% progress: 424ms: init...
%%% progress: 426ms: derivability relation...
%%% progress: 449ms: first...
%%% progress: 457ms: follow...
%%% progress: 462ms: LR item sets...
%%% progress: 6586ms: done with LR sets: 6221 states
%%% progress: 6586ms: BFS tree on transition graph...
%%% progress: 7015ms: state renumbering...
%%% progress: 7047ms: parse tables...
4 unreachable terminals
%%% progress: 7203ms: emitting C++ code to postgresql16.cc and postgresql16.h ...
%%% progress: 8211ms: done
7.98user 0.17system 0:08.24elapsed 98%CPU (0avgtext+0avgdata 235436maxresident)k
0inputs+87696outputs (0major+79067minor)pagefaults 0swaps
How to see the 4 unreachable terminals
?
The Valgrind issues reported on ./cparse.exe ../c.in/c.in4
are fixed as of commit dc369d8d7d. They were indeed valid; I'd seen some similar-looking false positives in the past, but I was wrong in thinking these were also FPs.
You say there are other places lacking zero termination, but I didn't see any. Can you be more specific?
Regarding the comparison to Bison, the time taken for grammar analysis and the size of the resulting output were never the subject of serious optimization efforts, so it's not too surprising that Elkhound is worse in these respects. The main comparison criterion was simply the time taken by the generated parser, with the results summarized in section 6 of the Elkhound Technical Report.
Regarding the unreachable terminals, if you add the -tr lrtable
switch to the elkhound
command line, it will write a file (postgresql16.out
) with additional information. At the top of that file it says:
unreachable terminals:
UMINUS
__dummy_filler_token2
__dummy_filler_token5
__dummy_filler_token12
The UMINUS
symbol is indeed never used in any production, aside from precedence
declarations.
The other three are the terminals with codes 2, 5, and 12. Elkhound's algorithm needs all of the code values to be used within the range of codes, so it synthesizes these "filler" tokens, which of course then end up unused. It wouldn't be hard to filter those out of the "unused terminals" report but I never had a need to do so.
Thank you for reply !
After fixing the token range I'm getting the expected/known 1 unreachable terminals
.
With a script extracting the rows from the parser tables and comparing then I found:
That for postgresql16
in actionTable_static
there is around 20% duplicates and for gotoTable_static
there is around 68% duplicates.
Output:
$ squilu try-compress-parser-tables.nut
44691980
12837520
actionTable_static 6221 4978
30853137
gotoTable_static 6221 1996
That for elsa
in actionTable_static
there is around 18% duplicates and for gotoTable_static
there is around 67% duplicates.
Output:
$ squilu try-compress-parser-tables.nut
2977727
715404
actionTable_static 1382 1128
1902119
gotoTable_static 1382 457
try-compress-parser-tables.nut:
__max_print_stack_str_size <- 1000;
auto fname = "postgresql16.cc";
//auto fname = "../elsa/cc.cc";
auto txt = readfile(fname);
print(txt.len());
function checkDuplicates(name, txt_lines)
{
auto lines = {};
auto lines_count = 0;
txt_lines.gmatch(
"/%* *%d+%*/ ([^\n]+)",
function(line)
{
++lines_count;
//print(line);
//merge/group identical lines and count then
table_incnum(lines, line, 1, true);
return true;
}
);
print(name, lines_count, table_len(lines));
}
auto actionTable_static = txt.match("static ActionEntry const actionTable_static%[%d+%] = (%b{});");
print(actionTable_static.len());
checkDuplicates("actionTable_static", actionTable_static);
auto gotoTable_static = txt.match("static GotoEntry const gotoTable_static%[%d+%] = (%b{});");
print(gotoTable_static.len());
checkDuplicates("gotoTable_static", gotoTable_static);
The Valgrind issues reported on
./cparse.exe ../c.in/c.in4
are fixed as of commit dc369d8. They were indeed valid; I'd seen some similar-looking false positives in the past, but I was wrong in thinking these were also FPs.You say there are other places lacking zero termination, but I didn't see any. Can you be more specific?
Here https://github.com/smcpeak/elkhound/blob/dc369d8d7d3a969789779763bd61ea3a8c5b9883/c/lexer2.cc#L497
There is the same pattern here https://github.com/smcpeak/elkhound/blob/dc369d8d7d3a969789779763bd61ea3a8c5b9883/c/lexer2.cc#L574 but do not reference it as string
By default, the tables aren't compressed at all. The elkhound/configure.pl
script accepts some options that enable various forms of compression, but I don't remember what any of them do specifically!
Unless I'm mistaken, the line:
sb.append(tempString.getArray(), tempString.length());
should be safe because stringBuilder
maintains a NUL-terminated array, and append
is explicitly passed the length.
The line:
L2->charValue = tmp[0];
should also be safe because we're only getting the one char
value, so the absence of a terminator does not matter.
Thank you again for reply !
Yes you're right !
After your reply I've looked at it in more detail (before I only did a search for quotedUnescape
usages and found the same pattern of usage but didn't did a thorough examination of then because the valgrind test was not giving the same error anymore.
Sorry about the false alarm on that !
By default, the tables aren't compressed at all. The
elkhound/configure.pl
script accepts some options that enable various forms of compression, but I don't remember what any of them do specifically!
Indeed after adding the missing
# for a boolean option:
# -foo -> true
# -foo=1 -> true
# -foo=0 -> false
sub getBoolArg {
if ($value eq "" || $value eq "1") {
return 1;
}
elsif ($value eq "0") {
return 0;
}
else {
die("option $option expects either no argument, or arg 0 or 1\n");
}
}
to configure.pl
and running ./configure compress=1
then running try-compress-parser-tables.nut
on elsa/cc.gr.gen.cc
we get this:
squilu "try-compress-parser-tables.nut"
543996
36605
actionTable_static 122 122
60593
gotoTable_static 306 306
But for postgresql16
then we get several assertions failing like:
/usr/bin/time ../elkhound/elkhound.exe -v postgresql16.gr
%%% progress: 0ms: parsing grammar source: postgresql16.gr
%%% progress: 50ms: parsing grammar AST..
%%% progress: 437ms: beginning grammar analysis..
%%% progress: 440ms: init...
%%% progress: 441ms: derivability relation...
%%% progress: 465ms: first...
%%% progress: 474ms: follow...
%%% progress: 480ms: LR item sets...
%%% progress: 7425ms: done with LR sets: 6221 states
%%% progress: 7425ms: BFS tree on transition graph...
%%% progress: 7867ms: state renumbering...
%%% progress: 7899ms: parse tables...
Assertion failed: index 163 truncated!, file parsetables.cc line 236
Command exited with non-zero status 4
7.88user 0.09system 0:07.97elapsed 99%CPU (0avgtext+0avgdata 145576maxresident)k
0inputs+0outputs (0major+35602minor)pagefaults 0swaps
Then configuring with ./configure eef=1 gcs=1 gcsc=1
we get this:
/usr/bin/time ../elkhound/elkhound.exe -v postgresql16.gr%%% progress: 0ms: parsing grammar source: postgresql16.gr
%%% progress: 40ms: parsing grammar AST..
%%% progress: 420ms: beginning grammar analysis..
%%% progress: 423ms: init...
%%% progress: 425ms: derivability relation...
%%% progress: 448ms: first...
%%% progress: 456ms: follow...
%%% progress: 461ms: LR item sets...
%%% progress: 6600ms: done with LR sets: 6221 states
%%% progress: 6600ms: BFS tree on transition graph...
%%% progress: 7027ms: state renumbering...
%%% progress: 7059ms: parse tables...
%%% progress: 7176ms: computing errorBits[]
%%% progress: 7212ms: merging action columns
Assertion failed: ti == c, file parsetables.cc line 587
Command exited with non-zero status 4
7.31user 0.09system 0:07.41elapsed 99%CPU (0avgtext+0avgdata 158892maxresident)k
0inputs+0outputs (0major+39068minor)pagefaults 0swaps
Then manually changing this line https://github.com/smcpeak/elkhound/blob/b603b43c480341ca9a5034d27df65aa73ecb76dc/parsetables.h#L78 to
typedef unsigned short TermIndex;
we can generate a parser for postgresql16
:
/usr/bin/time ../elkhound/elkhound.exe -v postgresql16.gr%%% progress: 0ms: parsing grammar source: postgresql16.gr
%%% progress: 41ms: parsing grammar AST..
%%% progress: 423ms: beginning grammar analysis..
%%% progress: 430ms: init...
%%% progress: 432ms: derivability relation...
%%% progress: 458ms: first...
%%% progress: 465ms: follow...
%%% progress: 471ms: LR item sets...
%%% progress: 6518ms: done with LR sets: 6221 states
%%% progress: 6518ms: BFS tree on transition graph...
%%% progress: 6942ms: state renumbering...
%%% progress: 6978ms: parse tables...
%%% progress: 7092ms: computing errorBits[]
%%% progress: 7128ms: merging action columns
%%% progress: 7256ms: merging action rows
%%% progress: 12811ms: merging goto columns
%%% progress: 17680ms: merging goto rows
1 unreachable terminals
%%% progress: 20863ms: emitting C++ code to postgresql16.cc and postgresql16.h ...
%%% progress: 21140ms: done
21.01user 0.12system 0:21.15elapsed 99%CPU (0avgtext+0avgdata 173660maxresident)k
0inputs+19704outputs (0major+48213minor)pagefaults 0swaps
and the output of squilu try-compress-parser-tables.nut
is now:
9936436
7032468
actionTable_static 3303 3303
1315121
gotoTable_static 1368 1368
the size of the generated parser is now:
-rw-rw-r-- 1 mingo mingo 9936436 ene 10 13:04 postgresql16.cc
-rw-rw-r-- 1 mingo mingo 2702068 ene 9 15:11 postgresql16.tab.c
So the memory usage and the size of the geretaded parser decreased considerable.
Probably if using unsigned short
instead of unsigned char
for the CRS_COMPRESSION
we could have it enabled too for postgresql16
or grammars of similar size/complexity.
Oops! The missing getBoolArg
function is re-added in commit b603b43c48. Thanks for tracking that down.
It looks like you're already ahead of me on diagnosing the limitations of the compression schemes. It looks like they are at least partially documented in parsetables.h
(after around line 200).
With the changes shown bellow I can get to parse postgresql16
with full compression:
-------------------------------- parsetables.h --------------------------------
index 04cf434..d4d67ba 100644
@@ -28,15 +28,15 @@ inline ostream& operator<< (ostream &os, StateId id)
#if ENABLE_CRS_COMPRESSION
// high bits encoding
enum ActionEntryKind {
- AE_MASK = 0xC0, // selection mask
- AE_SHIFT = 0x00, // 00 = shift
- AE_REDUCE = 0x40, // 01 = reduce
- AE_AMBIGUOUS = 0x80, // 10 = ambiguous
- AE_ERROR = 0xC0, // 11 = error (if EEF is off)
- AE_MAXINDEX = 63 // maximum value of lower bits
+ AE_MASK = 0xC000, // selection mask
+ AE_SHIFT = 0x0000, // 00 = shift
+ AE_REDUCE = 0x4000, // 01 = reduce
+ AE_AMBIGUOUS = 0x8000, // 10 = ambiguous
+ AE_ERROR = 0xC000, // 11 = error (if EEF is off)
+ AE_MAXINDEX = 0x3FFF // maximum value of lower bits
};
- // remaining 6 bits:
+ // remaining 14 bits:
//
// shift: desination state, encoded as an offset from the
// first state that that terminal can reach
@@ -47,7 +47,7 @@ inline ostream& operator<< (ostream &os, StateId id)
// ambiguous: for each state, have an array of ActionEntries.
// ambiguous entries index into this array. first indexed
// entry is the count of how many actions follow
- typedef unsigned char ActionEntry;
+ typedef unsigned short ActionEntry;
ActionEntry makeAE(ActionEntryKind k, int index);
#define errorActionEntry ((ActionEntry)AE_ERROR)
#else
@@ -75,7 +75,7 @@ inline ostream& operator<< (ostream &os, StateId id)
// name a terminal using an index
-typedef unsigned char TermIndex;
+typedef unsigned short TermIndex;
// name a nonterminal using an index
typedef unsigned short NtIndex;
@@ -201,7 +201,7 @@ protected: // data
// --------------------- table compression ----------------------
// table compression techniques taken from:
- // [DDH] Peter Dencker, Karl Dürre, and Johannes Heuft.
+ // [DDH] Peter Dencker, Karl D�rre, and Johannes Heuft.
// Optimization of Parser Tables for Portable Compilers.
// In ACM TOPLAS, 6, 4 (1984) 546-572.
// http://citeseer.nj.nec.com/context/27540/0 (not in database)
Output :
/usr/bin/time ../elkhound/elkhound.exe -v postgresql16.gr%%% progress: 0ms: parsing grammar source: postgresql16.gr
%%% progress: 42ms: parsing grammar AST..
%%% progress: 424ms: beginning grammar analysis..
%%% progress: 427ms: init...
%%% progress: 428ms: derivability relation...
%%% progress: 458ms: first...
%%% progress: 466ms: follow...
%%% progress: 472ms: LR item sets...
%%% progress: 6668ms: done with LR sets: 6221 states
%%% progress: 6668ms: BFS tree on transition graph...
%%% progress: 7095ms: state renumbering...
%%% progress: 7130ms: parse tables...
%%% progress: 7252ms: computing errorBits[]
%%% progress: 7289ms: merging action columns
%%% progress: 7757ms: merging action rows
%%% progress: 11571ms: merging goto columns
%%% progress: 15724ms: merging goto rows
1 unreachable terminals
%%% progress: 16941ms: emitting C++ code to postgresql16.cc and postgresql16.h ...
%%% progress: 17036ms: done
16.97user 0.08system 0:17.05elapsed 100%CPU (0avgtext+0avgdata 151316maxresident)k
0inputs+4784outputs (0major+39486minor)pagefaults 0swaps
And the output of squilu try-compress-parser-tables.nut
:
2297536
494621
actionTable_static 352 352
151376
gotoTable_static 1089 1089
And the the size of the generated parser is now smaller than the one generated by bison
:
-rw-rw-r-- 1 mingo mingo 2297536 ene 10 14:15 postgresql16.cc
-rw-rw-r-- 1 mingo mingo 2702068 ene 9 15:11 postgresql16.tab.c
Now checking with valgrind
there is 2 memory leaks related to use of compression:
valgrind --leak-check=full ../elkhound/elkhound.exe -v postgresql16.gr
==11437== Memcheck, a memory error detector
==11437== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==11437== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==11437== Command: ../elkhound/elkhound.exe -v postgresql16.gr
==11437==
%%% progress: 0ms: parsing grammar source: postgresql16.gr
%%% progress: 1206ms: parsing grammar AST..
%%% progress: 6416ms: beginning grammar analysis..
%%% progress: 6622ms: init...
%%% progress: 6648ms: derivability relation...
%%% progress: 7224ms: first...
%%% progress: 7419ms: follow...
%%% progress: 7550ms: LR item sets...
%%% progress: 88491ms: done with LR sets: 6221 states
%%% progress: 88492ms: BFS tree on transition graph...
%%% progress: 91889ms: state renumbering...
%%% progress: 92368ms: parse tables...
%%% progress: 96806ms: computing errorBits[]
%%% progress: 97587ms: merging action columns
%%% progress: 101936ms: merging action rows
%%% progress: 183454ms: merging goto columns
%%% progress: 226060ms: merging goto rows
1 unreachable terminals
%%% progress: 246741ms: emitting C++ code to postgresql16.cc and postgresql16.h ...
%%% progress: 249395ms: done
==11437==
==11437== HEAP SUMMARY:
==11437== in use at exit: 99,536 bytes in 2 blocks
==11437== total heap usage: 7,087,800 allocs, 7,087,798 frees, 429,091,140 bytes allocated
==11437==
==11437== 49,768 bytes in 1 blocks are definitely lost in loss record 1 of 2
==11437== at 0x4C34AE3: operator new[](unsigned long) (vg_replace_malloc.c:714)
==11437== by 0x134BA7: void allocZeroArray<unsigned short*>(unsigned short**&, int) (parsetables.cc:42)
==11437== by 0x133262: ParseTables::alloc(int, int, int, int, StateId, int) (parsetables.cc:94)
==11437== by 0x11B1A9: GrammarAnalysis::computeParseTables(bool) (gramanl.cc:3183)
==11437== by 0x11D3FC: GrammarAnalysis::runAnalyses(char const*) (gramanl.cc:4019)
==11437== by 0x11E409: inner_entry(int, char**) (gramanl.cc:4995)
==11437== by 0x11EBB8: entry(int, char**) (gramanl.cc:5068)
==11437== by 0x110C25: main (gramanl.cc:5074)
==11437==
==11437== 49,768 bytes in 1 blocks are definitely lost in loss record 2 of 2
==11437== at 0x4C34AE3: operator new[](unsigned long) (vg_replace_malloc.c:714)
==11437== by 0x134BA7: void allocZeroArray<unsigned short*>(unsigned short**&, int) (parsetables.cc:42)
==11437== by 0x133271: ParseTables::alloc(int, int, int, int, StateId, int) (parsetables.cc:101)
==11437== by 0x11B1A9: GrammarAnalysis::computeParseTables(bool) (gramanl.cc:3183)
==11437== by 0x11D3FC: GrammarAnalysis::runAnalyses(char const*) (gramanl.cc:4019)
==11437== by 0x11E409: inner_entry(int, char**) (gramanl.cc:4995)
==11437== by 0x11EBB8: entry(int, char**) (gramanl.cc:5068)
==11437== by 0x110C25: main (gramanl.cc:5074)
==11437==
==11437== LEAK SUMMARY:
==11437== definitely lost: 99,536 bytes in 2 blocks
==11437== indirectly lost: 0 bytes in 0 blocks
==11437== possibly lost: 0 bytes in 0 blocks
==11437== still reachable: 0 bytes in 0 blocks
==11437== suppressed: 0 bytes in 0 blocks
==11437==
==11437== For lists of detected and suppressed errors, rerun with: -s
==11437== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
And this seems to fix it:
-------------------------------- parsetables.cc --------------------------------
index e6d82ad..c476336 100644
@@ -416,10 +416,12 @@ void ParseTables::finishTables()
copyArray(bigProductionListSize, bigProductionList, temp->bigProductionList);
// transfer productionsForState, translating indices into pointers
+ if(productionsForState) delete[] productionsForState;
copyIndexPtrArray(numStates, productionsForState, bigProductionList,
temp->productionsForState);
// ambigStateTable
+ if(ambigStateTable) delete[] ambigStateTable;
copyIndexPtrArray(numStates, ambigStateTable, ambigTable,
temp->ambigStateTable);
}
After the fixes:
valgrind --leak-check=full ../elkhound/elkhound.exe -v postgresql16.gr
==11887== Memcheck, a memory error detector
==11887== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==11887== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==11887== Command: ../elkhound/elkhound.exe -v postgresql16.gr
==11887==
%%% progress: 0ms: parsing grammar source: postgresql16.gr
%%% progress: 1204ms: parsing grammar AST..
%%% progress: 6530ms: beginning grammar analysis..
%%% progress: 6731ms: init...
%%% progress: 6758ms: derivability relation...
%%% progress: 7371ms: first...
%%% progress: 7574ms: follow...
%%% progress: 7716ms: LR item sets...
%%% progress: 89238ms: done with LR sets: 6221 states
%%% progress: 89238ms: BFS tree on transition graph...
%%% progress: 92647ms: state renumbering...
%%% progress: 93129ms: parse tables...
%%% progress: 97572ms: computing errorBits[]
%%% progress: 98361ms: merging action columns
%%% progress: 102855ms: merging action rows
%%% progress: 184405ms: merging goto columns
%%% progress: 227106ms: merging goto rows
1 unreachable terminals
%%% progress: 247803ms: emitting C++ code to postgresql16.cc and postgresql16.h ...
%%% progress: 250429ms: done
==11887==
==11887== HEAP SUMMARY:
==11887== in use at exit: 0 bytes in 0 blocks
==11887== total heap usage: 7,087,800 allocs, 7,087,800 frees, 429,091,140 bytes allocated
==11887==
==11887== All heap blocks were freed -- no leaks are possible
==11887==
==11887== For lists of detected and suppressed errors, rerun with: -s
==11887== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
And with compression another leak appear when building elsa
grammar:
==12581== Memcheck, a memory error detector
==12581== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==12581== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==12581== Command: ../elkhound/elkhound.exe -tr bison cc.gr kandr.gr gnu.gr
==12581==
123 shift/reduce conflicts (expected 76)
180 reduce/reduce conflicts (expected 94)
==12581==
==12581== HEAP SUMMARY:
==12581== in use at exit: 136 bytes in 8 blocks
==12581== total heap usage: 515,570 allocs, 515,562 frees, 26,913,248 bytes allocated
==12581==
==12581== 136 (64 direct, 72 indirect) bytes in 4 blocks are definitely lost in loss record 2 of 2
==12581== at 0x4C337DB: operator new(unsigned long) (vg_replace_malloc.c:472)
==12581== by 0x1215DF: addToTerminalSet(TerminalSet*&, Terminal*, int) [clone .isra.0] (grammar.cc:346)
==12581== by 0x12E76D: astParseNonterm(Environment&, TF_nonterm const*) (grampar.cc:725)
==12581== by 0x12FBE1: astParseGrammar(Grammar&, GrammarAST*) (grampar.cc:360)
==12581== by 0x12FD2E: parseGrammarAST(Grammar&, GrammarAST*) (grampar.cc:1294)
==12581== by 0x11E309: inner_entry(int, char**) (gramanl.cc:4985)
==12581== by 0x11EBB8: entry(int, char**) (gramanl.cc:5068)
==12581== by 0x110C25: main (gramanl.cc:5074)
==12581==
==12581== LEAK SUMMARY:
==12581== definitely lost: 64 bytes in 4 blocks
==12581== indirectly lost: 72 bytes in 4 blocks
==12581== possibly lost: 0 bytes in 0 blocks
==12581== still reachable: 0 bytes in 0 blocks
==12581== suppressed: 0 bytes in 0 blocks
==12581==
==12581== For lists of detected and suppressed errors, rerun with: -s
==12581== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
Related to the last memory leak there is this piece of code in grammar.cc
that casts Terminal *t
into TerminalSet *&set
through the call of addToTerminalSet
in addNTForbid
:
// it's somewhat unfortunate that I have to be told the
// total number of terminals, but oh well
static void addToTerminalSet(TerminalSet *&set, Terminal *t, int numTerminals)
{
if (!set) {
set = new TerminalSet(numTerminals);
}
set->add(t->termIndex);
}
void Nonterminal::addNTForbid(Terminal *t, int totalNumTerminals)
{
addToTerminalSet(ntForbid, t, totalNumTerminals);
}
Forget my last comment, I've been confused by the order of parameters, there is ntForbid
that is the correct type.
This seems to fix the memory leak :
---------------------------------- grammar.cc ----------------------------------
index d35dd21..f436c9c 100644
@@ -244,7 +244,9 @@ Nonterminal::Nonterminal(LocString const &name, bool isEmpty)
{}
Nonterminal::~Nonterminal()
-{}
+{
+ if(ntForbid) delete ntForbid;
+}
Nonterminal::Nonterminal(Flatten &flat)
==13441== Memcheck, a memory error detector
==13441== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==13441== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==13441== Command: ../elkhound/elkhound.exe -tr bison cc.gr kandr.gr gnu.gr
==13441==
123 shift/reduce conflicts (expected 76)
180 reduce/reduce conflicts (expected 94)
==13441==
==13441== HEAP SUMMARY:
==13441== in use at exit: 0 bytes in 0 blocks
==13441== total heap usage: 515,570 allocs, 515,570 frees, 26,913,248 bytes allocated
==13441==
==13441== All heap blocks were freed -- no leaks are possible
==13441==
==13441== For lists of detected and suppressed errors, rerun with: -s
==13441== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
When testing elsa
built without elkhound
without compression I'm getting this:
==18309== Memcheck, a memory error detector
==18309== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==18309== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==18309== Command: ../elsa/ccparse.exe tmp.c
==18309==
In state 916, I expected one of these tokens:
<name>, operator, template, (, ::, ~, &, *,
tmp.c:796:36: Parse error (state 916) at ;
error: parse error
==18309==
==18309== HEAP SUMMARY:
==18309== in use at exit: 583,728 bytes in 14,160 blocks
==18309== total heap usage: 17,735 allocs, 3,575 frees, 833,939 bytes allocated
==18309==
==18309== LEAK SUMMARY:
==18309== definitely lost: 51,488 bytes in 1,454 blocks
==18309== indirectly lost: 532,240 bytes in 12,706 blocks
==18309== possibly lost: 0 bytes in 0 blocks
==18309== still reachable: 0 bytes in 0 blocks
==18309== suppressed: 0 bytes in 0 blocks
==18309== Rerun with --leak-check=full to see details of leaked memory
==18309==
==18309== For lists of detected and suppressed errors, rerun with: -s
==18309== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
with elkhund
with compression:
==18477== Memcheck, a memory error detector
==18477== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==18477== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==18477== Command: ../elsa/ccparse.exe tmp.c
==18477==
terminate called after throwing an instance of 'x_assert'
==18477==
==18477== Process terminating with default action of signal 6 (SIGABRT)
==18477== at 0x5504E87: raise (raise.c:51)
==18477== by 0x55067F0: abort (abort.c:79)
==18477== by 0x4EE3645: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.32)
==18477== by 0x4EF4F05: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.32)
==18477== by 0x4EF3F18: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.32)
==18477== by 0x4EF4674: __gxx_personality_v0 (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.32)
==18477== by 0x52BC363: ??? (in /lib/x86_64-linux-gnu/libgcc_s.so.1)
==18477== by 0x52BCA10: _Unwind_RaiseException (in /lib/x86_64-linux-gnu/libgcc_s.so.1)
==18477== by 0x4EF51A6: __cxa_throw (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.32)
==18477== by 0x208E7E: x_assert_fail(char const*, char const*, int) (exc.cpp:111)
==18477== by 0x1FB4CA: decRefCt (glr.cc:455)
==18477== by 0x1FB4CA: dec (rcptr.h:30)
==18477== by 0x1FB4CA: ~RCPtr (rcptr.h:35)
==18477== by 0x1FB4CA: ~SiblingLink (glr.cc:291)
==18477== by 0x1FB4CA: StackNode::~StackNode() (glr.cc:311)
==18477== by 0x20015B: ObjectPool<StackNode>::~ObjectPool() (objpool.h:80)
==18477==
==18477== HEAP SUMMARY:
==18477== in use at exit: 795,773 bytes in 14,262 blocks
==18477== total heap usage: 17,731 allocs, 3,469 frees, 884,926 bytes allocated
==18477==
==18477== LEAK SUMMARY:
==18477== definitely lost: 50,888 bytes in 1,442 blocks
==18477== indirectly lost: 31,192 bytes in 858 blocks
==18477== possibly lost: 0 bytes in 0 blocks
==18477== still reachable: 713,693 bytes in 11,962 blocks
==18477== of which reachable via heuristic:
==18477== newarray : 5,784 bytes in 3 blocks
==18477== suppressed: 0 bytes in 0 blocks
==18477== Rerun with --leak-check=full to see details of leaked memory
==18477==
==18477== For lists of detected and suppressed errors, rerun with: -s
==18477== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Aborted (core dumped)
Here is the file been used to test: tmp.c.zip
As of commit b391ca0b65, I've fixed the leak of Nonterminal::ntForbid
and the leaks of productionsForState
and ambigStateTable
. I also expanded ActionEntry
and TermIndex
to 16 bits with CRS compression as you suggested.
I don't see the reported assertion failure in elsa
. Without table compression, I get the parse error you do, and with table compression, it just says "parse error" without any assertion failure. (The lack of a more informative diagnostic is a consequence of table compression, since the crude diagnostics Elkhound provides only work with uncompressed tables.) Could you file that as a separate issue? (File it against Elkhound since the stack trace points there.)
I just tested again and found that the terminate called after throwing an instance of 'x_assert'
only happen when I build elkhound
with optimization OPTIMIZATION_FLAGS = -O2
.
Tested again building everything with -O2
:
terminate called after throwing an instance of 'x_assert'
terminate called after throwing an instance of 'x_assert'
error: parse error
Funny problem !
Note that g++
alert this:
g++ -c -o grammar.o -MMD -g -O2 -Wall -Wextra -Wno-unused-parameter -std=c++11 -I../smbase -I../ast -I. -Ic grammar.cc
grammar.cc: In member function ‘void Terminal::xfer(Flatten&)’:
grammar.cc:168:24: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing]
168 | flat.xferInt32((int&)precedence);
| ^~~~~~~~~~
grammar.cc: In member function ‘void Production::xfer(Flatten&)’:
grammar.cc:588:24: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing]
588 | flat.xferInt32((int&)precedence);
| ^~~~~~~~~~
I've changed grammar.cc
to eliminate the warnings but the terminate called after throwing an instance of 'x_assert'
is still present:
---------------------------------- grammar.cc ----------------------------------
index d35dd21..b573ab7 100644
@@ -165,7 +165,8 @@ void Terminal::xfer(Flatten &flat)
alias.xfer(flat);
- flat.xferInt32((int&)precedence);
+ int prec = precedence;
+ flat.xferInt32(prec);
xferEnum(flat, associativity);
flat.xferInt32(termIndex);
@@ -244,7 +245,9 @@ Nonterminal::Nonterminal(LocString const &name, bool isEmpty)
{}
Nonterminal::~Nonterminal()
-{}
+{
+ if(ntForbid) delete ntForbid;
+}
Nonterminal::Nonterminal(Flatten &flat)
@@ -581,7 +584,8 @@ void Production::xfer(Flatten &flat)
{
eh_xferObjList(flat, right);
action.xfer(flat);
- flat.xferInt32((int&)precedence);
+ int prec = precedence;
+ flat.xferInt32(prec);
xferNullableOwnerPtr(flat, forbid);
flat.xferInt32(rhsLen);
In elkhound/Makefile
there is this comment:
# TODO: I previously identified a problem with enabling optimization.
# I doubt that has been fixed. Investigate.
The problem you're seeing, "terminate called after throwing", could well be what that comment is referring to. There is a bug somewhere in Elkhound causing it, but I don't know where.
I'm comparing elsa/cc.gr.gen.cc
generated by elkhound
compiled with and without optimization an found this differences :
--- <unnamed>
+++ <unnamed>
@@ -1,4 +1,4 @@
-//elkhound -O0
+//elkhound -O2
CCParse_ParseTables::CCParse_ParseTables()
: ParseTables(false /*owning*/)
{
@@ -7,16 +7,16 @@
numStates = 1382;
numProds = 750;
actionCols = 48;
- actionRows = 120;
- gotoCols = 30;
+ actionRows = 122;
+ gotoCols = 31;
gotoRows = 306;
- ambigTableSize = 461;
+ ambigTableSize = 452;
startState = (StateId)0;
finalProductionIndex = 0;
bigProductionListSize = 1122;
errorBitsRowSize = 20;
uniqueErrorRows = 236;
- // storage size: 11520 bytes
- // rows: 120 cols: 48
- static ActionEntry const actionTable_static[5760] = {
+ // storage size: 11712 bytes
+ // rows: 122 cols: 48
+ static ActionEntry const actionTable_static[5856] = {
I didn't expected any difference regardless of any optimization.
And here the difference between -O2
and -O2 -fno-dce
:
--- <unnamed>
+++ <unnamed>
@@ -1,4 +1,4 @@
-//elkhound -O2
+//elkhound -O2 -fno-dce
CCParse_ParseTables::CCParse_ParseTables()
: ParseTables(false /*owning*/)
{
@@ -10,7 +10,7 @@
actionRows = 122;
gotoCols = 31;
gotoRows = 306;
- ambigTableSize = 452;
+ ambigTableSize = 473;
startState = (StateId)0;
finalProductionIndex = 0;
bigProductionListSize = 1122;
Deactivating compression one at a time (see bellow) it seems that the code for eff=1
has the bug.
./configure eef=1 gcs=1 gcsc=1 crs=1
../elsa/ccparse.exe tmp.c
terminate called after throwing an instance of 'x_assert'
Aborted (core dumped)
...
./configure eef=1 gcs=1 gcsc=1
../elsa/ccparse.exe tmp.c
terminate called after throwing an instance of 'x_assert'
Aborted (core dumped)
...
./configure eef=1 gcs=1
../elsa/ccparse.exe tmp.c
terminate called after throwing an instance of 'x_assert'
Aborted (core dumped)
...
./configure eef=1
../elsa/ccparse.exe tmp.c
terminate called after throwing an instance of 'x_assert'
Aborted (core dumped)
...
./configure gcs=1
GCS requires EEF
...
./configure gcsc=1
GCSC requires GCS
...
./configure crs=1
../elsa/ccparse.exe tmp.c
In state 916, I expected one of these tokens:
<name>, operator, template, (, ::, ~, &, *,
tmp.c:796:36: Parse error (state 916) at ;
error: parse error
Testing with small grammars in examples
I found that elkhound
build with compression eef=1
produce slightly different parser tables with examples/gcom/gcom.gr
and examples/cdecl/cdecl.gr
, it seems that some rows are swapped, maybe this can give a clue on the problem:
--- /tmp/elkhound-smcpeak/elkhound/examples/gcom/gcom-O0-eef.cc
+++ /tmp/elkhound-smcpeak/elkhound/examples/gcom/gcom-O2-eef.cc
@@ -498,31 +498,31 @@
/* 3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0,
/* 4*/ -4, 0, 0, -4, -4, -4, 0, -4, 0, 0, -4, -4, 0, -4, -4, 0, 0, 0, 0, -4, 0, -4, 0, -4, -4, -4,
/* 5*/ -19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -19, 0, -19, 0, -19, 0, -19,
- /* 6*/ 0, 3, 5, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 7*/ 0, 3, 5, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 8*/ 0, 3, 5, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* 9*/ 0, 3, 5, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /*10*/ 0, 3, 5, 0, 0, 0, 11, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 6*/ 0, 3, 5, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 7*/ 0, 3, 5, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 8*/ 0, 3, 5, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 9*/ 0, 3, 5, 0, 0, 0, 10, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /*10*/ 0, 3, 5, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/*11*/ 0, 0, 0, 0, 0, 0, 0, -16, 0, 0, 0, 0, 0, -16, -16, 0, 0, 0, 0, 0, 0, 0, 0, 0, -16, 0,
/*12*/ -8, 0, 0, -8, -8, -8, 0, -8, 0, 0, -8, -8, 0, -8, -8, 0, 0, 0, 0, -8, 0, -8, 0, -8, -8, -8,
/*13*/ 0, 0, 0, 0, 0, 0, 0, -9, 0, 0, 0, 0, 0, -9, -9, 0, 0, 0, 0, 0, 0, 0, 0, 0, -9, 0,
/*14*/ 0, 0, 0, 0, 0, 0, 0, -10, 0, 0, 0, 0, 0, -10, -10, 0, 0, 0, 0, 0, 0, 0, 0, 0, -10, 0,
- /*15*/ 0, 3, 5, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /*16*/ 0, 3, 5, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /*17*/ 0, 3, 5, 0, 0, 0, 11, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /*18*/ 0, 3, 5, 0, 0, 0, 11, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /*19*/ 0, 3, 5, 0, 0, 0, 11, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /*15*/ 0, 3, 5, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /*16*/ 0, 3, 5, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /*17*/ 0, 3, 5, 0, 0, 0, 10, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /*18*/ 0, 3, 5, 0, 0, 0, 10, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /*19*/ 0, 3, 5, 0, 0, 0, 10, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/*20*/ -17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17, 0, -17, 0, -17, 0, -17,
/*21*/ -18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -18, 0, -18, 0, -18, 0, -18,
/*22*/ 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /*23*/ 0, 3, 5, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /*23*/ 0, 3, 5, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/*24*/ 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 22, 23, 0, 0, 26, 0, 28, 0, 0, 0,
- /*25*/ 0, 3, 5, 0, 0, 0, 11, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /*25*/ 0, 3, 5, 0, 0, 0, 10, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/*26*/ -22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -22, 0, -22, 0, -22, 0, -22,
- /*27*/ 0, 3, 5, 0, 0, 0, 11, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /*27*/ 0, 3, 5, 0, 0, 0, 10, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/*28*/ -23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -23, 0, -23, 0, -23, 0, -23,
/*29*/ 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 22, 23, 0, 0, 26, 0, 28, 0, 0, 0,
- /*30*/ 0, 3, 5, 0, 0, 0, 11, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /*30*/ 0, 3, 5, 0, 0, 0, 10, 0, 14, 15, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/*31*/ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/*32*/ 0, 0, 0, 7, 8, 9, 0, 13, 0, 0, 16, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/*33*/ 0, 0, 0, 7, 8, 9, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -559,8 +559,8 @@
/* 6*/ 65535, 65535, 65535, 37, 65535, 65535, 65535,
/* 7*/ 65535, 65535, 65535, 38, 65535, 65535, 65535,
/* 8*/ 65535, 65535, 65535, 39, 65535, 65535, 65535,
- /* 9*/ 65535, 65535, 65535, 33, 65535, 65535, 65535,
- /*10*/ 65535, 65535, 65535, 32, 41, 65535, 65535,
+ /* 9*/ 65535, 65535, 65535, 32, 41, 65535, 65535,
+ /*10*/ 65535, 65535, 65535, 33, 65535, 65535, 65535,
/*11*/ 65535, 65535, 65535, 65535, 65535, 65535, 65535,
/*12*/ 65535, 65535, 65535, 65535, 65535, 65535, 65535,
/*13*/ 65535, 65535, 65535, 65535, 65535, 65535, 65535,
@@ -660,7 +660,7 @@
errorBitsPointers = new ErrorBitsEntry* [52];
// storage size: 208 bytes
static int const errorBitsPointers_offsets[52] = {
- /*0*/ 0, 4, 8, 12, 8, 16, 20, 20, 20, 20, 24, 28, 8, 28, 28, 20,
+ /*0*/ 0, 4, 8, 12, 8, 16, 20, 20, 20, 24, 20, 28, 8, 28, 28, 20,
/*1*/ 20, 24, 24, 24, 16, 16, 32, 20, 0, 24, 16, 24, 16, 0, 24, 36,
/*2*/ 40, 44, 48, 52, 52, 8, 8, 8, 56, 60, 64, 28, 28, 28, 68, 72,
/*3*/ 16, 76, 80, 84,
And building elkhound
wirh -fsanitize=undefined
produce this diff output when generating the elsa
parser, notice mainly ../smbase/astlist.h:46:58: runtime error: member access within null pointer of type 'struct ASTList'
:
--- <unnamed>
+++ <unnamed>
@@ -45,12 +45,14 @@
gramlex.lex:189:3: runtime error: load of value 7489, which is not a valid value for type 'SourceLoc'
grampar.y:329:144: runtime error: load of value 7489, which is not a valid value for type 'SourceLoc'
../smbase/astlist.h:46:58: runtime error: member access within null pointer of type 'struct ASTList'
+../smbase/astlist.h:46:58: runtime error: member access within null pointer of type 'struct ASTList'
gramlex.lex:239:3: runtime error: load of value 7709, which is not a valid value for type 'SourceLoc'
gramlex.lex:239:3: runtime error: load of value 7709, which is not a valid value for type 'SourceLoc'
gramlex.lex:233:3: runtime error: load of value 7716, which is not a valid value for type 'SourceLoc'
gramlex.lex:233:3: runtime error: load of value 7716, which is not a valid value for type 'SourceLoc'
gramlex.lex:159:1: runtime error: load of value 7718, which is not a valid value for type 'SourceLoc'
gramlex.lex:159:1: runtime error: load of value 7718, which is not a valid value for type 'SourceLoc'
+../smbase/astlist.h:46:58: runtime error: member access within null pointer of type 'struct ASTList'
grampar.y:350:64: runtime error: load of value 8418, which is not a valid value for type 'SourceLoc'
grampar.y:352:66: runtime error: load of value 8686, which is not a valid value for type 'SourceLoc'
gramlex.lex:160:1: runtime error: load of value 9870, which is not a valid value for type 'SourceLoc'
@@ -61,30 +63,33 @@
gramlex.lex:119:3: runtime error: load of value 20729, which is not a valid value for type 'SourceLoc'
gramlex.lex:133:5: runtime error: load of value 20731, which is not a valid value for type 'SourceLoc'
gramlex.lex:127:5: runtime error: load of value 20750, which is not a valid value for type 'SourceLoc'
-%%% progress: 31ms: parsing grammar source: gnu.gr
+%%% progress: 32ms: parsing grammar source: gnu.gr
%%% progress: 38ms: merging module: gnu.gr
%%% progress: 39ms: parsing grammar source: kandr.gr
-%%% progress: 41ms: merging module: kandr.gr
+%%% progress: 40ms: merging module: kandr.gr
grampar.cc:188:25: runtime error: load of value 6185, which is not a valid value for type 'SourceLoc'
../ast/locstr.h:34:17: runtime error: load of value 6185, which is not a valid value for type 'SourceLoc'
../smbase/astlist.h:46:58: runtime error: member access within null pointer of type 'struct ASTList'
-%%% progress: 42ms: parsing grammar AST..
-%%% progress: 76ms: beginning grammar analysis..
-%%% progress: 78ms: init...
-%%% progress: 78ms: derivability relation...
-%%% progress: 93ms: first...
-%%% progress: 98ms: follow...
-%%% progress: 105ms: LR item sets...
-gramanl.cc:588:16: runtime error: load of value 491, which is not a valid value for type 'StateId'
-gramanl.cc:588:28: runtime error: load of value 105, which is not a valid value for type 'StateId'
-%%% progress: 667ms: done with LR sets: 1382 states
-%%% progress: 667ms: BFS tree on transition graph...
-%%% progress: 706ms: state renumbering...
-gramanl.cc:3101:22: runtime error: load of value 40, which is not a valid value for type 'StateId'
-gramanl.cc:3101:34: runtime error: load of value 40, which is not a valid value for type 'StateId'
-gramanl.cc:3117:22: runtime error: load of value 195, which is not a valid value for type 'StateId'
-gramanl.cc:3117:34: runtime error: load of value 195, which is not a valid value for type 'StateId'
-%%% progress: 725ms: parse tables...
+../smbase/astlist.h:46:58: runtime error: member access within null pointer of type 'struct ASTList'
+../smbase/astlist.h:46:58: runtime error: member access within null pointer of type 'struct ASTList'
+%%% progress: 41ms: parsing grammar AST..
+../ast/locstr.h:34:17: runtime error: load of value 97396, which is not a valid value for type 'SourceLoc'
+%%% progress: 71ms: beginning grammar analysis..
+%%% progress: 72ms: init...
+%%% progress: 73ms: derivability relation...
+%%% progress: 77ms: first...
+%%% progress: 79ms: follow...
+%%% progress: 83ms: LR item sets...
+gramanl.cc:588:16: runtime error: load of value 1083, which is not a valid value for type 'StateId'
+gramanl.cc:588:28: runtime error: load of value 761, which is not a valid value for type 'StateId'
+%%% progress: 279ms: done with LR sets: 1382 states
+%%% progress: 279ms: BFS tree on transition graph...
+%%% progress: 298ms: state renumbering...
+gramanl.cc:3101:22: runtime error: load of value 32, which is not a valid value for type 'StateId'
+gramanl.cc:3101:34: runtime error: load of value 32, which is not a valid value for type 'StateId'
+gramanl.cc:3117:22: runtime error: load of value 251, which is not a valid value for type 'StateId'
+gramanl.cc:3117:34: runtime error: load of value 251, which is not a valid value for type 'StateId'
+%%% progress: 306ms: parse tables...
gramanl.cc:3329:49: runtime error: load of value 672, which is not a valid value for type 'StateId'
gramanl.cc:3287:37: runtime error: load of value 1, which is not a valid value for type 'StateId'
gramanl.cc:3337:35: runtime error: load of value 1, which is not a valid value for type 'StateId'
@@ -98,14 +103,14 @@
gramanl.cc:3264:76: runtime error: load of value 6, which is not a valid value for type 'StateId'
gramanl.cc:3268:54: runtime error: load of value 6, which is not a valid value for type 'StateId'
gramanl.cc:3275:55: runtime error: load of value 9, which is not a valid value for type 'StateId'
-%%% progress: 800ms: computing errorBits[]
-%%% progress: 814ms: printing item sets to cc.gr.gen.out ...
+%%% progress: 344ms: computing errorBits[]
+%%% progress: 350ms: printing item sets to cc.gr.gen.out ...
gramanl.cc:823:30: runtime error: load of value 672, which is not a valid value for type 'StateId'
gramanl.cc:841:49: runtime error: load of value 672, which is not a valid value for type 'StateId'
gramanl.cc:2531:40: runtime error: load of value 1, which is not a valid value for type 'StateId'
gramanl.cc:799:23: runtime error: load of value 1, which is not a valid value for type 'StateId'
gramanl.cc:834:46: runtime error: load of value 3, which is not a valid value for type 'StateId'
-%%% progress: 1177ms: emitting C++ code to cc.gr.gen.cc and cc.gr.gen.h ...
+%%% progress: 473ms: emitting C++ code to cc.gr.gen.cc and cc.gr.gen.h ...
../ast/locstr.h:55:34: runtime error: load of value 3281, which is not a valid value for type 'SourceLoc'
gramanl.cc:4329:31: runtime error: load of value 3281, which is not a valid value for type 'SourceLoc'
-%%% progress: 1250ms: done
+%%% progress: 535ms: done
With the changes shown bellow the error: load of value xxxx, which is not a valid value for type 'SourceLoc'
error disappear:
smbase/srcloc.h:
----------------------------------- srcloc.h -----------------------------------
index 1852d30..6891c51 100644
@@ -47,7 +47,7 @@ class HashLineMap; // hashline.h
// would not allow variables of this type to be created
// uninitialized.. that's the one drawback of calling this an 'enum'
// instead of a 'class': I don't get to write a constructor.
-enum SourceLoc {
+enum SourceLoc : int {
// entity is defined within the translator's initialization code
SL_INIT=-1,
elkhound/parsetables.h:
-------------------------------- parsetables.h --------------------------------
index 04cf434..513804a 100644
@@ -18,7 +18,7 @@ class Bit2d; // bit2d.h
// integer id for an item-set DFA state; I'm using an 'enum' to
// prevent any other integers from silently flowing into it
-enum StateId { STATE_INVALID=-1 };
+enum StateId : int { STATE_INVALID=-1 };
inline ostream& operator<< (ostream &os, StateId id)
{ return os << (int)id; }
Add assertion to smbase/astlist.h
:
---------------------------------- astlist.h ----------------------------------
index 859f998..6721685 100644
@@ -8,6 +8,7 @@
#include "vdtllist.h" // VoidTailList
#include <stddef.h> // size_t
+#include <assert.h> // size_t
template <class T> class ASTListIter;
@@ -43,7 +44,7 @@ public:
// stealing ctor; among other things, since &src->list is assumed to
// point at 'src', this class can't have virtual functions;
// these ctors delete 'src'
- ASTList(ASTList<T> *src) : list(&src->list) {}
+ ASTList(ASTList<T> *src) : list(&src->list) {assert(src); assert(&src->list);}
void steal(ASTList<T> *src) { deleteAll(); list.steal(&src->list); }
// selectors
Then when building ast
:
valgrind ./astgen.exe -oexample.ast.gen example.ast
==9613== Memcheck, a memory error detector
==9613== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==9613== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==9613== Command: ./astgen.exe -oexample.ast.gen example.ast
==9613==
astgen.exe: ../smbase/astlist.h:47: ASTList<T>::ASTList(ASTList<T>*) [with T = CtorArg]: Assertion `src' failed.
==9613==
==9613== Process terminating with default action of signal 6 (SIGABRT)
==9613== at 0x5504E87: raise (raise.c:51)
==9613== by 0x55067F0: abort (abort.c:79)
==9613== by 0x54F63F9: __assert_fail_base (assert.c:92)
==9613== by 0x54F6471: __assert_fail (assert.c:101)
==9613== by 0x113855: ASTList (astlist.h:47)
==9613== by 0x113855: ASTClass (ast.ast.h:208)
==9613== by 0x113855: agrampar_yyparse(ASTParseParams*) (agrampar.y:169)
==9613== by 0x11A9DF: readAbstractGrammar(char const*) (agrampar.cc:165)
==9613== by 0x123CCB: entry(int, char**) (astgen.cc:2332)
==9613== by 0x1113B5: main (astgen.cc:2425)
==9613==
==9613== HEAP SUMMARY:
==9613== in use at exit: 127,618 bytes in 76 blocks
==9613== total heap usage: 255 allocs, 179 frees, 132,256 bytes allocated
==9613==
==9613== LEAK SUMMARY:
==9613== definitely lost: 0 bytes in 0 blocks
==9613== indirectly lost: 0 bytes in 0 blocks
==9613== possibly lost: 0 bytes in 0 blocks
==9613== still reachable: 127,618 bytes in 76 blocks
==9613== suppressed: 0 bytes in 0 blocks
==9613== Rerun with --leak-check=full to see details of leaked memory
==9613==
==9613== For lists of detected and suppressed errors, rerun with: -s
==9613== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Aborted (core dumped)
With gdb:
gdb -args ./astgen.exe -oexample.ast.gen example.ast
GNU gdb (Ubuntu 10.2-0ubuntu1~18.04~2) 10.2
Copyright (C) 2021 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from ./astgen.exe...
(gdb) r
Starting program: /tmp/elkhound-smcpeak/ast/astgen.exe -oexample.ast.gen example.ast
astgen.exe: ../smbase/astlist.h:47: ASTList<T>::ASTList(ASTList<T>*) [with T = CtorArg]: Assertion `src' failed.
Program received signal SIGABRT, Aborted.
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1 0x00007ffff739f7f1 in __GI_abort () at abort.c:79
#2 0x00007ffff738f3fa in __assert_fail_base (fmt=0x7ffff75166c0 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n",
assertion=assertion@entry=0x5555555adab5 "src", file=file@entry=0x5555555adaa1 "../smbase/astlist.h",
line=line@entry=47, function=function@entry=0x5555555adb38 "ASTList<T>::ASTList(ASTList<T>*) [with T = CtorArg]")
at assert.c:92
#3 0x00007ffff738f472 in __GI___assert_fail (assertion=0x5555555adab5 "src",
file=0x5555555adaa1 "../smbase/astlist.h", line=47,
function=0x5555555adb38 "ASTList<T>::ASTList(ASTList<T>*) [with T = CtorArg]") at assert.c:101
#4 0x00005555555617f3 in ASTList<CtorArg>::ASTList (this=0x5555557f0488, src=0x0) at ../smbase/astlist.h:47
#5 0x00005555555613dd in ASTClass::ASTClass (this=0x5555557f0480, _name=..., _args=0x0, _lastArgs=0x0, _bases=0x0,
_decls=0x0) at ast.ast.h:208
#6 0x000055555555f9d1 in agrampar_yyparse (parseParam=0x7fffffffd2d0) at agrampar.y:176
#7 0x000055555556a381 in readAbstractGrammar (fname=0x7fffffffdda3 "example.ast") at agrampar.cc:165
#8 0x0000555555575bc4 in entry (argc=3, argv=0x7fffffffd990) at astgen.cc:2332
#9 0x000055555557662f in main (argc=3, argv=0x7fffffffd978) at astgen.cc:2425
The changes shown bellow fixes ../smbase/astlist.h:46:58: runtime error: member access within null pointer of type 'struct ASTList'
:
---------------------------------- astlist.h ----------------------------------
index 859f998..0697dac 100644
@@ -43,7 +43,7 @@ public:
// stealing ctor; among other things, since &src->list is assumed to
// point at 'src', this class can't have virtual functions;
// these ctors delete 'src'
- ASTList(ASTList<T> *src) : list(&src->list) {}
+ ASTList(ASTList<T> *src) { if(src) list.steal(&src->list); }
void steal(ASTList<T> *src) { deleteAll(); list.steal(&src->list); }
// selectors
Now building elsa
with -fsanitize=undefined -static-libasan -O2
we have :
./ccparse.exe in/t0001.cc
baselexer.cc:48:25: runtime error: member call on address 0x558039ab0f00 which does not point to an object of type 'BaseLexer'
0x558039ab0f00: note: object has invalid vptr
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^~~~~~~~~~~~~~~~~~~~~~~
invalid vptr
baselexer.cc:38:9: runtime error: member access within address 0x558039ab0f00 which does not point to an object of type 'BaseLexer'
0x558039ab0f00: note: object has invalid vptr
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^~~~~~~~~~~~~~~~~~~~~~~
invalid vptr
baselexer.cc:44:10: runtime error: member access within address 0x558039ab0f00 which does not point to an object of type 'BaseLexer'
0x558039ab0f00: note: object has invalid vptr
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
^~~~~~~~~~~~~~~~~~~~~~~
invalid vptr
And valgrind shows:
valgrind ../elsa/ccparse.exe tmp.c
==14097== Memcheck, a memory error detector
==14097== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==14097== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==14097== Command: ../elsa/ccparse.exe tmp.c
==14097==
==14097== Syscall param write(buf) points to unaddressable byte(s)
==14097== at 0x565B45E: ??? (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0x5660336: ??? (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0x56503E1: ??? (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0x564EF45: ??? (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0x564F62A: __ubsan_handle_dynamic_type_cache_miss (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0xCEC2E0: BaseLexer::BaseLexer(StringTable&, char const*) (baselexer.cc:48)
==14097== by 0xCF096E: Lexer::Lexer(StringTable&, CCLang&, char const*) (lexer.cc:81)
==14097== by 0xC17B0E: ParseTreeAndTokens::ParseTreeAndTokens(CCLang&, unsigned long&, StringTable&, char const*) (parssppt.cc:16)
==14097== by 0xBAD72B: ElsaParse::parse(char const*) (elsaparse.cc:236)
==14097== by 0x6F95B9: doit(int, char**) (main.cc:397)
==14097== by 0x6F2FEA: main (main.cc:428)
==14097== Address 0xfffffffffffffff0 is not stack'd, malloc'd or (recently) free'd
==14097==
==14097== Syscall param write(buf) points to unaddressable byte(s)
==14097== at 0x565B45E: ??? (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0x5660336: ??? (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0x56504B7: ??? (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0x565015F: ??? (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0x564EF84: ??? (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0x564F62A: __ubsan_handle_dynamic_type_cache_miss (in /usr/lib/x86_64-linux-gnu/libubsan.so.1.0.0)
==14097== by 0xCEC2E0: BaseLexer::BaseLexer(StringTable&, char const*) (baselexer.cc:48)
==14097== by 0xCF096E: Lexer::Lexer(StringTable&, CCLang&, char const*) (lexer.cc:81)
==14097== by 0xC17B0E: ParseTreeAndTokens::ParseTreeAndTokens(CCLang&, unsigned long&, StringTable&, char const*) (parssppt.cc:16)
==14097== by 0xBAD72B: ElsaParse::parse(char const*) (elsaparse.cc:236)
==14097== by 0x6F95B9: doit(int, char**) (main.cc:397)
==14097== by 0x6F2FEA: main (main.cc:428)
==14097== Address 0xfffffffffffffff0 is not stack'd, malloc'd or (recently) free'd
==14097==
baselexer.cc:48:25: runtime error: member call on address 0x000006aa10b0 which does not point to an object of type 'BaseLexer'
0x000006aa10b0: note: object has invalid vptr
The changes bellow seems to fix baselexer.cc:48:25: runtime error: member call on address 0x000006aa10b0 which does not point to an object of type 'BaseLexer'
:
--------------------------------- baselexer.cc ---------------------------------
index b599cca..3de5aa1 100644
@@ -45,7 +45,7 @@ istream *BaseLexer::openFile(char const *fname)
}
BaseLexer::BaseLexer(StringTable &s, char const *fname)
- : yyFlexLexer(openFile(fname)),
+ : yyFlexLexer(), LexerInterface(),
// 'inputStream' is initialized by 'openFile'
srcFile(NULL), // changed below
@@ -57,6 +57,7 @@ BaseLexer::BaseLexer(StringTable &s, char const *fname)
errors(0),
warnings(0)
{
+ yym_lexer_state.yy_input_stream = openFile(fname);
srcFile = sourceLocManager->getInternalFile(fname);
loc = sourceLocManager->encodeBegin(fname);
@@ -72,7 +73,7 @@ istream *BaseLexer::openString(char const *buf, int len)
BaseLexer::BaseLexer(StringTable &s, SourceLoc initLoc,
char const *buf, int len)
- : yyFlexLexer(openString(buf, len)),
+ : yyFlexLexer(), LexerInterface(),
// 'inputStream' is initialized by 'openString'
srcFile(NULL), // changed below
@@ -84,6 +85,7 @@ BaseLexer::BaseLexer(StringTable &s, SourceLoc initLoc,
errors(0),
warnings(0)
{
+ yym_lexer_state.yy_input_stream = openString(buf, len);
// decode the given location
char const *fname;
int line, col;
After all the above now if I build elkhound
with compression=1
and èlsa(both with
-fsanitize=undefined -static-libasan -O2`) then it doesn't crash:
../elsa/ccparse.exe tmp.c
error: parse error
But if I remove the -fsanitize=undefined -static-libasan
leaving only -O2
:
valgrind ../elsa/ccparse.exe tmp.c
==17158== Memcheck, a memory error detector
==17158== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==17158== Using Valgrind-3.21.0 and LibVEX; rerun with -h for copyright info
==17158== Command: ../elsa/ccparse.exe tmp.c
==17158==
terminate called after throwing an instance of 'x_assert'
==17158==
==17158== Process terminating with default action of signal 6 (SIGABRT)
==17158== at 0x5504E87: raise (raise.c:51)
==17158== by 0x55067F0: abort (abort.c:79)
==17158== by 0x4EE3645: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.32)
==17158== by 0x4EF4F05: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.32)
==17158== by 0x4EF3F18: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.32)
==17158== by 0x4EF4674: __gxx_personality_v0 (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.32)
==17158== by 0x52BC363: ??? (in /lib/x86_64-linux-gnu/libgcc_s.so.1)
==17158== by 0x52BCA10: _Unwind_RaiseException (in /lib/x86_64-linux-gnu/libgcc_s.so.1)
==17158== by 0x4EF51A6: __cxa_throw (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.32)
==17158== by 0x20A08E: x_assert_fail(char const*, char const*, int) (exc.cpp:111)
==17158== by 0x1FBD8A: decRefCt (glr.cc:455)
==17158== by 0x1FBD8A: dec (rcptr.h:30)
==17158== by 0x1FBD8A: ~RCPtr (rcptr.h:35)
==17158== by 0x1FBD8A: ~SiblingLink (glr.cc:291)
==17158== by 0x1FBD8A: StackNode::~StackNode() (glr.cc:311)
==17158== by 0x200A1B: ObjectPool<StackNode>::~ObjectPool() (objpool.h:80)
==17158==
==17158== HEAP SUMMARY:
==17158== in use at exit: 795,773 bytes in 14,262 blocks
==17158== total heap usage: 17,731 allocs, 3,469 frees, 884,926 bytes allocated
==17158==
==17158== LEAK SUMMARY:
==17158== definitely lost: 50,888 bytes in 1,442 blocks
==17158== indirectly lost: 31,192 bytes in 858 blocks
==17158== possibly lost: 0 bytes in 0 blocks
==17158== still reachable: 713,693 bytes in 11,962 blocks
==17158== of which reachable via heuristic:
==17158== newarray : 5,784 bytes in 3 blocks
==17158== suppressed: 0 bytes in 0 blocks
==17158== Rerun with --leak-check=full to see details of leaked memory
==17158==
==17158== For lists of detected and suppressed errors, rerun with: -s
==17158== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Aborted (core dumped)
Back to square one !
Here is the full stack trace with gdb
:
gdb -args ../elsa/ccparse.exe tmp.c
GNU gdb (Ubuntu 10.2-0ubuntu1~18.04~2) 10.2
Copyright (C) 2021 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from ../elsa/ccparse.exe...
(gdb) r
Starting program: /tmp/elkhound-smcpeak/elsa/ccparse.exe tmp.c
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
terminate called after throwing an instance of 'x_assert'
Program received signal SIGABRT, Aborted.
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1 0x00007ffff67f57f1 in __GI_abort () at abort.c:79
#2 0x00007ffff7a13646 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#3 0x00007ffff7a24f06 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#4 0x00007ffff7a23f19 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#5 0x00007ffff7a24675 in __gxx_personality_v0 () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#6 0x00007ffff6bbf364 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1
#7 0x00007ffff6bbfa11 in _Unwind_RaiseException () from /lib/x86_64-linux-gnu/libgcc_s.so.1
#8 0x00007ffff7a251a7 in __cxa_throw () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#9 0x0000000000b42f82 in x_assert_fail (cond=cond@entry=0xcf0b07 "referenceCount > 0",
file=file@entry=0xcf00b8 "glr.cc", line=line@entry=455) at exc.cpp:111
#10 0x0000000000b11e06 in StackNode::decRefCt (this=0x1522b58) at glr.cc:453
#11 StackNode::decRefCt (this=0x1522b58) at glr.cc:453
#12 0x0000000000b11cc9 in RCPtr<StackNode>::dec (this=0x151d2b8) at rcptr.h:30
#13 RCPtr<StackNode>::operator= (p=0x0, this=0x151d2b8) at rcptr.h:38
#14 StackNode::deinit (this=0x151d2a8) at glr.cc:356
#15 ObjectPool<StackNode>::dealloc (obj=0x151d2a8, this=0x7fffffffc200) at ../smbase/objpool.h:143
#16 StackNode::decRefCt (this=0x151d2a8) at glr.cc:460
#17 0x0000000000afc5dc in RCPtr<StackNode>::dec (this=0x151d078) at rcptr.h:30
#18 RCPtr<StackNode>::~RCPtr (this=0x151d078, __in_chrg=<optimized out>) at rcptr.h:35
#19 SiblingLink::~SiblingLink (this=0x151d078, __in_chrg=<optimized out>) at glr.cc:292
#20 StackNode::~StackNode (this=0x151d068, __in_chrg=<optimized out>) at glr.cc:314
#21 0x0000000000b1231b in ObjectPool<StackNode>::~ObjectPool (this=0x7fffffffc200, __in_chrg=<optimized out>)
at ../smbase/objpool.h:80
#22 0x0000000000b0cdb9 in GLR::innerGlrParse (glr=..., lexer=..., treeTop=@0x7fffffffc510: 0) at glr.cc:1327
#23 0x0000000000b1023a in GLR::glrParse (this=this@entry=0x7fffffffc3a0, lexer=..., treeTop=@0x7fffffffc510: 0)
--Type <RET> for more, q to quit, c to continue without paging--
at glr.cc:818
#24 0x000000000094899e in glrParseNamedFile (inputFname=0x7fffffffdda7 "tmp.c", treeTop=<optimized out>, lexer=...,
glr=...) at parssppt.cc:62
#25 toplevelParse (ptree=..., inputFname=inputFname@entry=0x7fffffffdda7 "tmp.c") at parssppt.cc:75
#26 0x00000000008debd9 in ElsaParse::parse (this=this@entry=0x7fffffffd760,
inputFname=inputFname@entry=0x7fffffffdda7 "tmp.c") at elsaparse.cc:264
#27 0x000000000042aa41 in doit (argc=<optimized out>, argv=<optimized out>) at main.cc:397
#28 0x0000000000428c7a in main (argc=<optimized out>, argv=<optimized out>) at main.cc:428
After reading the comments at the end of elkhound/glr.cc::innerGlrParse
and looking at the stack trace I placed a call to glr.cleanupAfterParse(treeTop);
before return and now it doesn't assert anymore.
Can the change bellow be OK ?
------------------------------------ glr.cc ------------------------------------
index 2c5a561..a774750 100644
@@ -984,6 +984,7 @@ STATICDEF bool GLR
#if ENABLE_EEF_COMPRESSION
if (tables->actionEntryIsError(parser->state, lexer.type)) {
+ glr.cleanupAfterParse(treeTop);
return false; // parse error
}
#endif
../elsa/ccparse.exe tmp.c
error: parse error
To build this project we need to replace occurrences of
throw_XOpen
,readInt
,writeInt
,xferInt
,xferLong
,checkpoint
, ... byxsyserror("open", ...)
,readInt32
,writeInt32
,xferInt32
,xferLong64
,checkpoint32
, ...