gridcf / gct-docs

Grid Community Toolkit documentation
http://gridcf.org/gct-docs/
1 stars 4 forks source link

Lack of IO error checks generate incorrect file checksums #33

Closed spbooth closed 1 year ago

spbooth commented 1 year ago

The routine globus_l_gass_copy_cksm_file in globus_gass_copy_glob.c has a file read loop while((n = read(fd,buf,count)) > 0) If an IO error occurs in the read call it will return -1 and the loop will terminate early generating an incorrect digest for the file but no error report until the subsequent file transfer fails its checksum test.

On a related note if you check for errors properly you discover that a recursive globus-url-copy attempts to calculate checksums on directory-urls using this routine (which always generates a error in the read call)

My POC fix is `static globus_result_t globus_l_gass_copy_cksm_file( globus_gass_copy_handle_t handle, char url, char cksm, globus_off_t offset, globus_off_t length, const char algorithm, globus_gass_copy_callback_t callback, void callback_arg) { char myname = "globus_l_gass_copy_cksm_file";

globus_url_t                        parsed_url;
globus_result_t                     result;
int                                 rc;

EVP_MD_CTX *                        mdctx;
const EVP_MD *                      md;
char *                              cksmptr;
unsigned char                       md_value[EVP_MAX_MD_SIZE];
char                                cksm_buff[CKSM_SIZE];
unsigned int                        md_len;

char                                buf[GASS_COPY_CKSM_BUFSIZE];

int                                 i;
int                                 fd;
int                                 n;
ssize_t                             readlen;
globus_off_t                        count;
globus_off_t                        read_left;
struct stat                         statbuf;
int                                 is_regular;

rc = globus_url_parse_loose(url, &parsed_url);
if(rc != 0)
{
    result = globus_error_put(
        globus_error_construct_string(
            GLOBUS_GASS_COPY_MODULE,
            GLOBUS_NULL,
            "[%s]: error parsing url: "
            "globus_url_parse returned %d",
            myname,
            url));
    goto error_url;
}

if(parsed_url.url_path == GLOBUS_NULL)
{
    result = globus_error_put(
        globus_error_construct_string(
            GLOBUS_GASS_COPY_MODULE,
            GLOBUS_NULL,
            "[%s]: error parsing url: "
            "url has no path",
            myname));
    goto error_fd;
}    

read_left = length;
if(length >= 0)
{
    count = (read_left > GASS_COPY_CKSM_BUFSIZE) ? 
        GASS_COPY_CKSM_BUFSIZE : read_left;
}
else
{
    count = GASS_COPY_CKSM_BUFSIZE;
}

fd = open(parsed_url.url_path, O_RDONLY);        
if(fd < 0)
{
    result = globus_error_put(
        globus_error_construct_string(
            GLOBUS_GASS_COPY_MODULE,
            GLOBUS_NULL,
            "[%s]: error opening checksum file %s",
            myname,
            parsed_url.url_path));
    goto error_fd;
}
fstat(fd,&statbuf);
is_regular = S_ISREG(statbuf.st_mode);    

if (is_regular && lseek(fd, offset, SEEK_SET) == -1)
{
    result = globus_error_put(
        globus_error_construct_string(
            GLOBUS_GASS_COPY_MODULE,
            GLOBUS_NULL,
            "[%s]: error (%d) seeking checksum file %s",
            myname,
            errno,
            parsed_url.url_path));
    goto error_seek;
}

OpenSSL_add_all_algorithms();
md = EVP_get_digestbyname(algorithm);
if (!md)
{
    /*
     * try again with uppercase algorithm name.
     */
    char *                          p;
    char *                          alg = strdup(algorithm);
    for(p = alg; *p != '\0'; p++)
    {
        *p = toupper(*p);
    }
    md = EVP_get_digestbyname(alg);
    free(alg);
}
if (!md)
{
    result = globus_error_put(globus_error_construct_string(
            GLOBUS_GASS_COPY_MODULE,
            GLOBUS_NULL, 
            "Unable to use checksum algorithm %s", 
            algorithm));
    goto error_seek;
}

mdctx = EVP_MD_CTX_create();
EVP_DigestInit_ex(mdctx, md, NULL);

if( is_regular ){
while((n = (readlen = read(fd, buf, count))) > 0)
{
    if(length >= 0)
    {
        read_left -= n;
        count = (read_left > GASS_COPY_CKSM_BUFSIZE) ? GASS_COPY_CKSM_BUFSIZE : read_left;
    }

    EVP_DigestUpdate(mdctx, buf, n);
}
if( readlen < 0 && count > 0 ){
    result = globus_error_put(globus_error_construct_string(
            GLOBUS_GASS_COPY_MODULE,
            GLOBUS_NULL,
            "Read error in checksum calculation url=%s read_left =%d %d %s",
            url,read_left,readlen, strerror(errno)));
    goto error_seek;
}
}

EVP_DigestFinal_ex(mdctx, md_value, &md_len);
EVP_MD_CTX_destroy(mdctx);

close(fd);

cksmptr = cksm_buff;
for (i = 0; i < md_len; i++)
{
   sprintf(cksmptr, "%02x", md_value[i]);
   cksmptr++;
   cksmptr++;
}
*cksmptr = '\0';

strncpy(cksm, cksm_buff, sizeof(cksm_buff));
//globus_libc_fprintf(stderr,"SPB file-checksum %s=%s\n",url,cksm); 
globus_url_destroy(&parsed_url);

if(callback)
{
    callback(callback_arg, handle, NULL);
}
return GLOBUS_SUCCESS;

error_seek: close(fd); error_fd: globus_url_destroy(&parsed_url);

error_url:

return result;

} ` But it probably needs a bit of tidying up.

spbooth commented 1 year ago

dang wrong project