llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
29.27k stars 12.1k forks source link

Clang Static Analyzer generates false positive NULL pointer dereference report when NULL pointer is overwritten by a structure copy via a C pointer dereference #60694

Open ryao opened 1 year ago

ryao commented 1 year ago

The last time I tried filing a bug report, I made a minimal test case that was so minimal that it lost a key nuance that was important for demonstrating the issue in Clang's static analyzer, so this time, I am providing this example which has all of the code that is relevant to the report and I did not try to reduce the example any further:

#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#define ASSERT assert

typedef unsigned long           ulong_t;

struct avl_node {
    struct avl_node *avl_child[2];  /* left/right children nodes */
    uintptr_t avl_pcb;      /* parent, child_index, balance */
};

/*
 * macros to extract/set fields in avl_pcb
 *
 * pointer to the parent of the current node is the high order bits
 */
#define AVL_XPARENT(n)      ((struct avl_node *)((n)->avl_pcb & ~7))
#define AVL_SETPARENT(n, p)                     \
    ((n)->avl_pcb = (((n)->avl_pcb & 7) | (uintptr_t)(p)))

/*
 * index of this node in its parent's avl_child[]: bit #2
 */
#define AVL_XCHILD(n)       (((n)->avl_pcb >> 2) & 1)
#define AVL_SETCHILD(n, c)                      \
    ((n)->avl_pcb = (uintptr_t)(((n)->avl_pcb & ~4) | ((c) << 2)))

/*
 * balance indication for a node, lowest 2 bits. A valid balance is
 * -1, 0, or +1, and is encoded by adding 1 to the value to get the
 * unsigned values of 0, 1, 2.
 */
#define AVL_XBALANCE(n)     ((int)(((n)->avl_pcb & 3) - 1))
#define AVL_SETBALANCE(n, b)                        \
    ((n)->avl_pcb = (uintptr_t)((((n)->avl_pcb & ~3) | ((b) + 1))))

/*
 * switch between a node and data pointer for a given tree
 * the value of "o" is tree->avl_offset
 */
#define AVL_NODE2DATA(n, o) ((void *)((uintptr_t)(n) - (o)))
#define AVL_DATA2NODE(d, o) ((struct avl_node *)((uintptr_t)(d) + (o)))

/*
 * The tree structure. The fields avl_root, avl_compar, and avl_offset come
 * first since they are needed for avl_find().  We want them to fit into
 * a single 64 byte cache line to make avl_find() as fast as possible.
 */
struct avl_tree {
    struct avl_node *avl_root;  /* root node in tree */
    int (*avl_compar)(const void *, const void *);
    size_t avl_offset;      /* offsetof(type, avl_link_t field) */
    ulong_t avl_numnodes;       /* number of nodes in the tree */
#ifndef _KERNEL
    size_t avl_pad;         /* For backwards ABI compatibility. */
#endif
};

typedef struct avl_tree avl_tree_t;
typedef struct avl_node avl_node_t;

extern int
avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance);

void
avl_remove(avl_tree_t *tree, void *data)
{
    avl_node_t *delete;
    avl_node_t *parent;
    avl_node_t *node;
    avl_node_t tmp;
    int old_balance;
    int new_balance;
    int left;
    int right;
    int which_child;
    size_t off = tree->avl_offset;

    delete = AVL_DATA2NODE(data, off);

    /*
     * Deletion is easiest with a node that has at most 1 child.
     * We swap a node with 2 children with a sequentially valued
     * neighbor node. That node will have at most 1 child. Note this
     * has no effect on the ordering of the remaining nodes.
     *
     * As an optimization, we choose the greater neighbor if the tree
     * is right heavy, otherwise the left neighbor. This reduces the
     * number of rotations needed.
     */
    if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {

        /*
         * choose node to swap from whichever side is taller
         */
        old_balance = AVL_XBALANCE(delete);
        left = (old_balance > 0);
        right = 1 - left;

        /*
         * get to the previous value'd node
         * (down 1 left, as far as possible right)
         */
        for (node = delete->avl_child[left];
            node->avl_child[right] != NULL;
            node = node->avl_child[right])
            ;

        /*
         * create a temp placeholder for 'node'
         * move 'node' to delete's spot in the tree
         */
        tmp = *node;

        *node = *delete;
        if (node->avl_child[left] == node)
            node->avl_child[left] = &tmp;

        parent = AVL_XPARENT(node);
        if (parent != NULL)
            parent->avl_child[AVL_XCHILD(node)] = node;
        else
            tree->avl_root = node;
        AVL_SETPARENT(node->avl_child[left], node);
        AVL_SETPARENT(node->avl_child[right], node);

        /*
         * Put tmp where node used to be (just temporary).
         * It always has a parent and at most 1 child.
         */
        delete = &tmp;
        parent = AVL_XPARENT(delete);
        parent->avl_child[AVL_XCHILD(delete)] = delete;
        which_child = (delete->avl_child[1] != 0);
        if (delete->avl_child[which_child] != NULL)
            AVL_SETPARENT(delete->avl_child[which_child], delete);
    }

    /*
     * Here we know "delete" is at least partially a leaf node. It can
     * be easily removed from the tree.
     */
    ASSERT(tree->avl_numnodes > 0);
    --tree->avl_numnodes;
    parent = AVL_XPARENT(delete);
    which_child = AVL_XCHILD(delete);
    if (delete->avl_child[0] != NULL)
        node = delete->avl_child[0];
    else
        node = delete->avl_child[1];

    /*
     * Connect parent directly to node (leaving out delete).
     */
    if (node != NULL) {
        AVL_SETPARENT(node, parent);
        AVL_SETCHILD(node, which_child);
    }
    if (parent == NULL) {
        tree->avl_root = node;
        return;
    }
    parent->avl_child[which_child] = node;

    /*
     * Since the subtree is now shorter, begin adjusting parent balances
     * and performing any needed rotations.
     */
    do {

        /*
         * Move up the tree and adjust the balance
         *
         * Capture the parent and which_child values for the next
         * iteration before any rotations occur.
         */
        node = parent;
        old_balance = AVL_XBALANCE(node);
        new_balance = old_balance - (which_child ? 1 : -1);
        parent = AVL_XPARENT(node);
        which_child = AVL_XCHILD(node);

        /*
         * If a node was in perfect balance but isn't anymore then
         * we can stop, since the height didn't change above this point
         * due to a deletion.
         */
        if (old_balance == 0) {
            AVL_SETBALANCE(node, new_balance);
            break;
        }

        /*
         * If the new balance is zero, we don't need to rotate
         * else
         * need a rotation to fix the balance.
         * If the rotation doesn't change the height
         * of the sub-tree we have finished adjusting.
         */
        if (new_balance == 0)
            AVL_SETBALANCE(node, new_balance);
        else if (!avl_rotation(tree, node, new_balance))
            break;
    } while (parent != NULL);
}

Clang's static analyzer reports a false positive on this:

$ clang --analyze -S /tmp/bug.c
/tmp/bug.c:129:3: warning: Access to field 'avl_pcb' results in a dereference of a null pointer [core.NullDereference]
                AVL_SETPARENT(node->avl_child[right], node);
                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/tmp/bug.c:21:20: note: expanded from macro 'AVL_SETPARENT'
        ((n)->avl_pcb = (((n)->avl_pcb & 7) | (uintptr_t)(p)))
                          ^~~~~~~~~~~~
1 warning generated.

If you replace *node = *delete; with memcpy(node, delete, sizeof (*node));, the false positive disappears. Based on this, I believe that Clang's static analyzer does not understand memory copies via C pointer dereferences. This should be a bug in the analyzer.

This occurred with LLVM/Clang 15.0.4.

llvmbot commented 1 year ago

@llvm/issue-subscribers-clang-static-analyzer