bowbahdoe / mccue.dev-comments

0 stars 0 forks source link

pages/7-27-22-c-errors #9

Open utterances-bot opened 2 years ago

utterances-bot commented 2 years ago

The different ways to handle errors in C

https://mccue.dev/pages/7-27-22-c-errors

bowbahdoe commented 2 years ago

Variation derived from a comment by /u/levodelellis where a negated enum is used to assign error conditions constant values.

enum ParseNaturalError {
    PARSE_NATURAL_EMPTY_STRING,
    PARSE_NATURAL_BAD_CHARACTER
};

int parse_natural_base_10_number(const char* s) {
    if (s[0] == '\0') {
        return ~PARSE_NATURAL_EMPTY_STRING;
    }
    int parsed = 0;
    for (size_t i = 0; s[i] != '\0'; i++) {
        if (s[i] < '0' || s[i] > '9') {
            return ~PARSE_NATURAL_BAD_CHARACTER;
        }
        else {
            parsed *= 10;
            parsed += s[i] - '0';
        }
    }

    return parsed;
}

int main() {
    const char* examples[] = { "10", "foo", "42" };
    for (size_t i = 0; i < 3; i++) {
        const char* example = examples[i];
        int parsed = parse_natural_base_10_number(example);
        if (parsed < 0) {
            printf("%d", parsed);
            switch (~parsed) {
                case PARSE_NATURAL_EMPTY_STRING:
                    printf("Empty String\n");
                    break;
                case PARSE_NATURAL_BAD_CHARACTER:
                    printf("Bad character: %s\n", example);
                    break;
            }
        }
        else {
            printf("worked: %d\n", parsed);
        }
    }

    return 0;
}
findusl commented 2 years ago

I don't code in C, so I might be wrong. The last approach reminds me a bit of Railway Orientied Programming, with the Result return type with Success and Failure. Is it simliar? https://blog.logrocket.com/what-is-railway-oriented-programming/

bowbahdoe commented 2 years ago

@findusl I suppose so?

A more natural translation of "every function succeeds or fails" is systems with exceptions or more composable Result<T, E> type errors. What #11 solves for is making it so that there is a common E, but each you need to essentially code the Result each time in C and the composition is manual.

There are a few other approaches that people on reddit brought up that probably deserve to be on this list

minimaxwell commented 2 years ago

Another scheme, that might look pretty crazy, is using setjmp/longjmp. You call setjmp before calling the function that can return an error. It setjmp returns non-zero it means that we just jumped to that location, due to an error in the future.

This is very tricky to read since the error handling is done before actually calling the function !

This might look like a joke, but it's actually the error handling scheme you need to use with libpng : https://github.com/glennrp/libpng/blob/libpng16/example.c#L642

bowbahdoe commented 2 years ago

Implementing a basic version of the longjmp scheme. The 1 can be subbed with other constants to signal a specific error.

#include <stdio.h>
#include <setjmp.h>

int parse_natural_base_10_number(const char* s, jmp_buf env) {
    int parsed = 0;
    for (size_t i = 0; s[i] != '\0'; i++) {
        if (s[i] < '0' || s[i] > '9') {
            longjmp(env, 1);
        }
        else {
            parsed *= 10;
            parsed += s[i] - '0';
        }
    }

    return parsed;
}

int main() {
    const char* examples[] = { "10", "42", "foo", "32", "1f"};

    for (size_t i = 0; i < 6; i++) {
        const char* example = examples[i];

        jmp_buf env;
        if (setjmp(env) > 0) {
            printf("failed: %s\n", example);
        }
        else {
            int parsed = parse_natural_base_10_number(example, env);
            printf("parsed: %d\n", parsed);
        }

    }

    return 0;
}
parsed: 10
parsed: 42
failed: foo
parsed: 32
failed: 1f
bowbahdoe commented 2 years ago

setjmp is evil y'all.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <setjmp.h>
#include <errno.h>

struct ErrorOps {
    char* (*describe)(const void*);
    void (*free)(void*);
};

struct Error {
    struct ErrorOps ops;
    void* self;
};

char* error_describe(struct Error error) {
    return error.ops.describe(error.self);
}

void error_free(struct Error error) {
    if (error.ops.free != NULL) {
        error.ops.free(error.self);
    }
}

_Thread_local static struct Error last_error;

#define TRY(env, code, catch) ({ \
    if (setjmp(env) > 0) {\
        catch;\
    }\
    else {\
        code;\
    }\
})

#define THROW(env, error) ({ \
    last_error = error;      \
    longjmp(env, 1);\
})

char* empty_string_describe(const void* self) {
    char* result;
    asprintf(&result, "Empty string is not good");
    return result;
}

const struct ErrorOps empty_string_error_ops = {
        .describe = empty_string_describe,
        .free = NULL
};

struct Error empty_string_error() {
    struct Error result = {
            .ops = empty_string_error_ops,
            .self = NULL
    };
    return result;
}

struct BadCharacterError {
    char* source;
    size_t index;
};

char* bad_character_describe(const void* self) {
    const struct BadCharacterError* this = self;
    char* result;
    asprintf(&result, "Bad character in %s at index %zu: '%c'", this->source, this->index, this->source[this->index]);
    return result;
}

void bad_character_free(void* self) {
    struct BadCharacterError* this = self;
    free(this->source);
    free(this);
}

const struct ErrorOps bad_character_error_ops = {
        .describe = bad_character_describe,
        .free = bad_character_free
};

struct Error bad_character_error(const char* source, size_t index) {
    struct BadCharacterError* error = malloc(sizeof (struct BadCharacterError));

    char* source_clone = calloc(strlen(source) + 1, sizeof (char));
    strcpy(source_clone, source);
    error->source = source_clone;

    error->index = index;

    struct Error result = {
            .ops = bad_character_error_ops,
            .self = error
    };
    return result;
}

int parse_natural_base_10_number(jmp_buf env, const char* s) {
    if (s[0] == '\0') {
        THROW(env, empty_string_error());
    }

    int parsed = 0;
    for (size_t i = 0; s[i] != '\0'; i++) {
        if (s[i] < '0' || s[i] > '9') {
            THROW(env, bad_character_error(s, i));
        }
        else {
            parsed *= 10;
            parsed += s[i] - '0';
        }
    }

    return parsed;
}

struct FileOperationError {
    int error_number;
};

char* file_operation_error_describe(const void* self) {
    const struct FileOperationError* this = self;
    char* result;
    asprintf(&result, "%s", strerror(this->error_number));
    return result;
}

void file_operation_error_free(void* self) {
    free(self);
}

const struct ErrorOps file_operation_error_ops = {
        .describe = file_operation_error_describe,
        .free = file_operation_error_free
};

struct Error file_operation_error(int error_number) {
    struct FileOperationError* file_operation_error = malloc(sizeof (struct FileOperationError));
    file_operation_error->error_number = error_number;

    struct Error result = {
            .ops = file_operation_error_ops,
            .self = file_operation_error
    };
    return result;
}

int read_number_from_file(jmp_buf env, const char* path) {
    FILE* fp = fopen(path, "r");
    if (fp == NULL) {
        struct Error error = file_operation_error(errno);
        errno = 0;
        fclose(fp);
        THROW(env, error);
    }

    // Max positive int is only 10 characters big in base 10
    char first_line[12];
    fgets(first_line, sizeof (first_line), fp);

    if (ferror(fp)) {
        struct Error error = file_operation_error(errno);
        errno = 0;
        fclose(fp);
        THROW(env, error);
    }

    fclose(fp);
    return parse_natural_base_10_number(env, first_line);
}

int main() {
    const char* examples[] = { "../ex1", "../ex2", "../ex3" };
    for (size_t i = 0; i < 3; i++) {
        const char* example_file = examples[i];

        jmp_buf env;
        TRY(env, {
            int number = read_number_from_file(env, example_file);
            printf("success: %d\n", number);
        }, {
            char* description = error_describe(last_error);
            printf("error: %s\n", description);
            free(description);
            error_free(last_error);
        });

    }

    return 0;
}
N-R-K commented 2 years ago
    int* result = malloc(sizeof (int));
    *result = parsed;
    return result;

This code isn't returning NULL on error because if malloc fails it's going to result in a NULL dereference (which is undefined behaviour, but on most OS it'll cause a segfault).

bowbahdoe commented 2 years ago

@N-R-K you are not wrong.

okovko commented 2 years ago

A nice trick is to index into an array of handlers using the enum status code you return, which is similar to your boxed errors but doesn't incur as much if-soup: https://godbolt.org/z/PzE7rG5Wf

bowbahdoe commented 2 years ago

@okovko I think that's a good technique for translating an enum to a displayable error.

The key capability of the boxed errors though is the ability to carry around more context like "you got this kind of error at line X while trying to do Y"

okovko commented 2 years ago

If you take another look you might notice that there is a vtable and a dynamically allocated context, like what you describe with boxed errors, that can indicate specifically what went right or wrong. If you like, I can write an example that does the same as your parsing code, or you can take a second look for yourself.

okovko commented 2 years ago

In a few more words, in the context of your example, there is a success case, file not found error, and bad character error. The status code indexes into the function pointer array to select the corresponding error handler. When the function itself is called, it puts whatever relevant error information into the union out param, which is then passed to the handler. So, every handler has whatever error context it needs, and you can dynamically allocate the contents of the out param if a handler needs a lot of non-trivial context, and only that specific handler will need to deal with memory clean up. If you really want, you can make the out param itself dynamically allocated, but that would impede simple cases when you just want an int.

bowbahdoe commented 2 years ago

You are right, I wasn't reading your example correctly.

Levev commented 2 years ago

In example 7, instead of doing this

for (size_t j = 0; j < bad_index; j++) {
    printf(" ");
}
printf("^☹️\n");

You could just simply write something like

printf("%*c☹️\n", bad_index, '^');

This should work in example 10 as well

Keroronsk commented 2 years ago

@bowbahdoe there is actually library with TRY() CATCH() in C, written by "Test Driven Development for Embedded C", it's kinda neat... https://github.com/ThrowTheSwitch/CException

OetkenPurveyorOfCode commented 7 months ago
#include <stdio.h>
#include <string.h>

char* parse_natural_base_10_number(const char* s, int* out) {
    if (s[0] == '\0') {
        return "Empty String";
    }
    int parsed = 0;
    for (size_t i = 0; s[i] != '\0'; i++) {
        if (s[i] < '0' || s[i] > '9') {
            static char string[sizeof("Bad Character -2147483648 X")];
            sprintf(string, "Bad Character %d %c", i, s[i]);
            return string;
        }
        else {
            parsed *= 10;
            parsed += s[i] - '0';
        }
    }
    *out = parsed;
    return "Success";
}

int main() {
    const char* examples[] = { "10", "foo", "42", "12a34" };
    for (size_t i = 0; i < 4; i++) {
        const char* example = examples[i];
        int number = 0;
        int index;
        int character;
        char* result = parse_natural_base_10_number(example, &number);
        if (strcmp(result, "Success") == 0) {
            printf("worked: %d\n", number);
        }
        else if (strcmp(result, "Empty String") == 0) {
            printf("got empty string\n");
        }
        else if (sscanf(result, "Bad Character %d %c", &index, &character) == 2) {
            printf("failed: %s\n        ", example);
            for (size_t j = 0; j < index; j++) {
                printf(" ");
            }
            printf("^☹️ '%c' is not good\n", character);
        } else {

        }

    }

    return 0;
}

Stringly typed error handling. Inefficient and weirdly half-ergonomic. I have never used it or seen it used, but it was always a little intriguing.

OetkenPurveyorOfCode commented 7 months ago

I could actually forgo the out parameter by stringifying it in the success string and than parsing it back out. But if you continue along that route, every function will accept a string and return a string with complicated parsing logic in everything and no compile time safety.