Closed phillid closed 9 years ago
@phillid thank you for your interest in dodo!
I am not convinced your memmove length math is correct
strlen(i->argument.str) - ((i->argument.str - source) - *index) - 1);
In english this reads: "strlen from start of this string argument - number of characters between start of source and start of this string - offset into source - 1"
when really what we want is
"number of character remaining in source from index"
or
strlen(source + index)
.
Your memmove length math should be causing an illegal read off the end of the buffer.
Here is an example showing the bug:
chris@Ox1b dodo(master)-> cat parse_string_example.c
#include <stdio.h> /* fopen, fseek, fread, fwrite, FILE */
#include <stdlib.h> /* exit */
#include <string.h> /* strcmp, strncmp */
#include <ctype.h> /* isdigit */
int parse_string(char *source, size_t *index){
int len = 0;
char *str = 0;
int copy_len = 0;
if( ! source ){
puts("Parse_string: null source");
return 0;
}
if( ! index ){
puts("Parse_string: null index");
return 0;
}
if( '/' != source[*index] ){
printf("Parse_string: unexpected character '%c', expected beginning delimiter'/'\n", source[*index]);
return 0;
}
/* skip past starting delimiter */
++(*index);
/* save start of string */
str = &(source[*index]);
/* count length of string */
/* FIXME may want to have buffer length passed in
* 'just incase; buffer is not \0 terminated
*/
for( len=0; ; ++(*index) ){
switch( source[*index] ){
/* end of buffer */
case '\0':
/* error, expected terminating / */
puts("Parse_string: unexpected end of source buffer, expected terminating delimiter'/'");
return 0;
break;
case '\\':
//copy_len = strlen(source + *index);
copy_len = strlen(str) - ((str - source) - *index) - 1;
printf("you are asking me to copy '%d' bytes from an area with '%d' bytes\n",
copy_len,
strlen(source + *index)
);
/* A bit yucky: Rework the string to delete the escape character '\'
Note the use of memmove as the source and destination overlap */
memmove(source+*index,
source+*index+1,
copy_len
);
(*index)++;
len+=2;
break;
/* terminating delimiter */
case '/':
/* skip past / */
++(*index);
/* we are finished here */
goto EXIT;
break;
/* just another character in our string */
default:
++len;
break;
}
}
EXIT:
return 0;
}
int main(void){
size_t index = 0;
char *str = calloc(9, 1);
strcat(str, "/abc\\/d/");
parse_string(str, &index);
free(str);
}
chris@Ox1b dodo(master)-> gcc parse_string_example.c
chris@Ox1b dodo(master)-> ./a.out
you are asking me to copy '9' bytes from an area with '4' bytes
chris@Ox1b dodo(master)-> valgrind ./a.out
==14108== Memcheck, a memory error detector
==14108== Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al.
==14108== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==14108== Command: ./a.out
==14108==
you are asking me to copy '9' bytes from an area with '4' bytes
==14108== Invalid read of size 1
==14108== at 0x4C2B61F: memmove (mc_replace_strmem.c:981)
==14108== by 0x400825: parse_string (in /home/chris/devel/dodo/a.out)
==14108== by 0x4008EF: main (in /home/chris/devel/dodo/a.out)
==14108== Address 0x51ba049 is 0 bytes after a block of size 9 alloc'd
==14108== at 0x4C272B8: calloc (vg_replace_malloc.c:566)
==14108== by 0x400899: main (in /home/chris/devel/dodo/a.out)
==14108==
==14108== Invalid read of size 1
==14108== at 0x4C2B610: memmove (mc_replace_strmem.c:981)
==14108== by 0x400825: parse_string (in /home/chris/devel/dodo/a.out)
==14108== by 0x4008EF: main (in /home/chris/devel/dodo/a.out)
==14108== Address 0x51ba04a is 1 bytes after a block of size 9 alloc'd
==14108== at 0x4C272B8: calloc (vg_replace_malloc.c:566)
==14108== by 0x400899: main (in /home/chris/devel/dodo/a.out)
==14108==
==14108== Invalid write of size 1
==14108== at 0x4C2B614: memmove (mc_replace_strmem.c:981)
==14108== by 0x400825: parse_string (in /home/chris/devel/dodo/a.out)
==14108== by 0x4008EF: main (in /home/chris/devel/dodo/a.out)
==14108== Address 0x51ba049 is 0 bytes after a block of size 9 alloc'd
==14108== at 0x4C272B8: calloc (vg_replace_malloc.c:566)
==14108== by 0x400899: main (in /home/chris/devel/dodo/a.out)
==14108==
==14108==
==14108== HEAP SUMMARY:
==14108== in use at exit: 0 bytes in 0 blocks
==14108== total heap usage: 1 allocs, 1 frees, 9 bytes allocated
==14108==
==14108== All heap blocks were freed -- no leaks are possible
==14108==
==14108== For counts of detected and suppressed errors, rerun with: -v
==14108== ERROR SUMMARY: 9 errors from 3 contexts (suppressed: 4 from 4)
and the corrected code
chris@Ox1b dodo(master)-> cat parse_string_example.c
#include <stdio.h> /* fopen, fseek, fread, fwrite, FILE */
#include <stdlib.h> /* exit */
#include <string.h> /* strcmp, strncmp */
#include <ctype.h> /* isdigit */
int parse_string(char *source, size_t *index){
int len = 0;
char *str = 0;
int copy_len = 0;
if( ! source ){
puts("Parse_string: null source");
return 0;
}
if( ! index ){
puts("Parse_string: null index");
return 0;
}
if( '/' != source[*index] ){
printf("Parse_string: unexpected character '%c', expected beginning delimiter'/'\n", source[*index]);
return 0;
}
/* skip past starting delimiter */
++(*index);
/* save start of string */
str = &(source[*index]);
/* count length of string */
/* FIXME may want to have buffer length passed in
* 'just incase; buffer is not \0 terminated
*/
for( len=0; ; ++(*index) ){
switch( source[*index] ){
/* end of buffer */
case '\0':
/* error, expected terminating / */
puts("Parse_string: unexpected end of source buffer, expected terminating delimiter'/'");
return 0;
break;
case '\\':
copy_len = strlen(source + *index);
//copy_len = strlen(str) - ((str - source) - *index) - 1;
printf("you are asking me to copy '%d' bytes from an area with '%d' bytes\n",
copy_len,
strlen(source + *index)
);
/* A bit yucky: Rework the string to delete the escape character '\'
Note the use of memmove as the source and destination overlap */
memmove(source+*index,
source+*index+1,
copy_len
);
(*index)++;
len+=2;
break;
/* terminating delimiter */
case '/':
/* skip past / */
++(*index);
/* we are finished here */
goto EXIT;
break;
/* just another character in our string */
default:
++len;
break;
}
}
EXIT:
return 0;
}
int main(void){
size_t index = 0;
char *str = calloc(9, 1);
strcat(str, "/abc\\/d/");
parse_string(str, &index);
free(str);
}
chris@Ox1b dodo(master)-> gcc parse_string_example.c
chris@Ox1b dodo(master)-> ./a.out
you are asking me to copy '4' bytes from an area with '4' bytes
chris@Ox1b dodo(master)-> valgrind ./a.out
==14035== Memcheck, a memory error detector
==14035== Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al.
==14035== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==14035== Command: ./a.out
==14035==
you are asking me to copy '4' bytes from an area with '4' bytes
==14035==
==14035== HEAP SUMMARY:
==14035== in use at exit: 0 bytes in 0 blocks
==14035== total heap usage: 1 allocs, 1 frees, 9 bytes allocated
==14035==
==14035== All heap blocks were freed -- no leaks are possible
==14035==
==14035== For counts of detected and suppressed errors, rerun with: -v
==14035== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 4 from 4)
EDIT: whitespace, and correcting input string
note that I am taking the strlen(source + index)
this is safe as we have strlen characters plus the \0
and we want to skip one character (the \
) but include the \0
This is the same as strlen(source + index + 1) - 1
A slightly more obvious demonstration of the correct strlen(source +index)
chris@Ox1b dodo(master)-> cat simpler_example.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void parse_string(char *source, size_t *index){
int len = 0;
char *str = 0;
int copy_len = 0;
++(*index);
str = &(source[*index]);
for( len=0; ; ++(*index) ){
switch( source[*index] ){
case '\0':
puts("Parse_string: unexpected end of source buffer, expected terminating delimiter'/'");
return;
break;
case '\\':
copy_len = strlen(source + *index);
//copy_len = strlen(str) - ((str - source) - *index) - 1;
printf("you are asking me to copy '%d' bytes from an area with '%d' bytes\n",
copy_len,
strlen(source + *index)
);
memmove(source+*index,
source+*index+1,
copy_len
);
(*index)++;
len+=2;
break;
case '/':
++(*index);
return;
break;
default:
++len;
break;
}
}
}
int main(void){
size_t index = 0;
char *str = calloc(9, 1);
strcat(str, "/abc\\/d/");
printf("string before '%s", str);
printf("strlen before '%d'\n", strlen(str));
parse_string(str, &index);
printf("strlen after '%d'\n", strlen(str));
printf("string after '%s'\n", str);
free(str);
}
chris@Ox1b dodo(master)-> gcc simpler_example.c
chris@Ox1b dodo(master)-> ./a.out
string before '/abc\/d/strlen before '8'
you are asking me to copy '4' bytes from an area with '4' bytes
strlen after '7'
string after '/abc/d/'
chris@Ox1b dodo(master)-> valgrind ./a.out
==17083== Memcheck, a memory error detector
==17083== Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al.
==17083== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==17083== Command: ./a.out
==17083==
string before '/abc\/d/strlen before '8'
you are asking me to copy '4' bytes from an area with '4' bytes
strlen after '7'
string after '/abc/d/'
==17083==
==17083== HEAP SUMMARY:
==17083== in use at exit: 0 bytes in 0 blocks
==17083== total heap usage: 1 allocs, 1 frees, 9 bytes allocated
==17083==
==17083== All heap blocks were freed -- no leaks are possible
==17083==
==17083== For counts of detected and suppressed errors, rerun with: -v
==17083== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 4 from 4)
Ooh crikey, you're right, that's embarrassing. I was over-complicating things and getting them wrong in the process—I've fixed this in my master, so the pull request should now contain the right code.
@phillid thanks for fixing that, and thanks for the work :+1:
@phillid I just squashed your two commits into one [1] so you may want to make your clone match mine, if you need help with this please let me know.
Sorry for the hassle.
[1] in order for tools like git-bisect
to behave you ideally want every commit to be as free of known errors as possible
@mkfifo thanks for the hint, I was going to do it with the commits on my pull request but was unsure of what GitHub's behaviour would be
@phillid you can ammend (git commit --amend
) and then force push (git push --force origin master:master
) and github will update the PR
I've added the ability for dodo to encounter use a backslash as a sort-of escape character. It simply causes dodo to
memmove()
everything down and skip checking the character immediately after the backslash (now in the backslash's place) for null, '\' or '/'. The calls to memmove are potentially a source of slowness, but it was the quickest way to slot this functionality into dodo.