llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
29.14k stars 12.02k forks source link

libclang: attribute annotate on ClassDecl is not exposed in the AST if it is a result of a macro #87813

Open jszymonek opened 7 months ago

jszymonek commented 7 months ago

The source code I am parsing uses [[clang::annotate]] on a class declaration, however that annotation is not always visible in the AST. In the following source, only Test01 and Test02 ClassDecls have a attribute(annotate) child in the AST. The expected behavior would be for all of the class declarations have the attribute(annotate) child.

class [[clang::annotate("ann")]] Test01 {};
class __attribute__((annotate("ann"))) Test02 {};

#define ANNOTATE1 [[clang::annotate("ann")]]
#define ANNOTATE2 __attribute__((annotate("ann")))

class ANNOTATE1 Test03 {};
class ANNOTATE2 Test04 {};

#define ANNOTATE_STR1(x) [[clang::annotate((x))]]
#define ANNOTATE_STR2(x) __attribute__((annotate((x))))

class ANNOTATE_STR1("ann") Test05 {};
class ANNOTATE_STR2("ann") Test06 {};

Here's the code I am running:

#include <clang-c/Index.h>
#include <cstdio>

#include <vector>
#include <format>

CXErrorCode CreateSimpleTU(CXIndex index, CXTranslationUnit& tu)
{
    constexpr const char* header = "testfile.cpp";

    constexpr const char* header_args[] = {
        "-x",
        "c++",

        "-std=c++20",
        "--pedantic",
        "--pedantic-errors",

        "-Wall",
        "-Wextra",
        "-Weverything",
        "-Werror",

        "-Wno-c++98-compat",
    };

    const int tu_flags =  CXTranslationUnit_SkipFunctionBodies | CXTranslationUnit_VisitImplicitAttributes;

    CXErrorCode err = clang_parseTranslationUnit2
    (
        index,
        header,
        /*args*/ header_args, /*num_args*/ std::size(header_args),
        /*unsaved files*/ nullptr,  /*num_unsaved_files*/ 0,
        tu_flags,
        &tu
    );

    return err;
}

constexpr bool PRETTY_PRINTED = false;

struct VisitorData
{
    int depth = 1;
};

static CXChildVisitResult VisitTU(CXCursor current_cursor, CXCursor parent, CXClientData client_data)
{
    VisitorData* data = reinterpret_cast<VisitorData*>(client_data);

    if (clang_Location_isFromMainFile(clang_getCursorLocation(current_cursor)) == 0)
    {
        return CXChildVisit_Continue;
    }

    const CXCursorKind cursor_kind = clang_getCursorKind(current_cursor);
    const bool is_attr = clang_isAttribute(cursor_kind) != 0;
    const bool has_attr = clang_Cursor_hasAttrs(current_cursor);

    const CXString display_name = clang_getCursorDisplayName(current_cursor);
    const CXString spelling = clang_getCursorSpelling(current_cursor);
    const CXString kind_spelling = clang_getCursorKindSpelling(cursor_kind);
    const CXPrintingPolicy print_policy = clang_getCursorPrintingPolicy(current_cursor);
    const CXString pretty_printed = clang_getCursorPrettyPrinted(current_cursor, print_policy);
    const CXType cursor_type = clang_getCursorType(current_cursor);
    const CXString type_spelling = clang_getTypeSpelling(cursor_type);

    std::string indent = std::format("{:->{}}", "", data->depth * 2);
    std::string msg = std::format("{} {} '{}' <{}> {}{}"
        , indent
        , clang_getCString(kind_spelling)
        , clang_getCString(spelling)
        , clang_getCString(type_spelling)
        , is_attr ? "[ATTRIB] " : ""
        , has_attr ? "[HAS_ATTR] " : ""
    );
    printf("%s\n", msg.c_str());

    if constexpr (PRETTY_PRINTED)
    {
        printf("%s\n", clang_getCString(pretty_printed));
    }

    VisitorData child_data;
    child_data.depth = data->depth + 1;
    clang_visitChildren(current_cursor, VisitTU, &child_data);

    clang_disposeString(type_spelling);
    clang_disposeString(pretty_printed);
    clang_PrintingPolicy_dispose(print_policy);
    clang_disposeString(kind_spelling);
    clang_disposeString(spelling);
    clang_disposeString(display_name);

    return CXChildVisit_Continue;
};

int main(int argc, char* argv[])
{
    {
        CXString clang_ver = clang_getClangVersion();
        printf("clang ver: %s\n", clang_getCString(clang_ver));
        clang_disposeString(clang_ver);
    }

    CXIndex index = clang_createIndex(/*exclide decls from PCH*/1, /*print diagnostics*/0); 
    if (index == nullptr)
    {
        printf("error\n");
        return 1;
    }

    CXTranslationUnit tu;
    CXErrorCode err = CreateSimpleTU(index, tu);

    if (tu == nullptr || err != CXError_Success)
    {
        printf("tu creation error: %d\n", int(err));
        return 123;
    }

    const int num_diags = clang_getNumDiagnostics(tu);
    printf("diagnostics (%d):\n", num_diags);
    for (int i = 0; i < num_diags; ++i)
    {
        CXDiagnostic diag = clang_getDiagnostic(tu, i);
        CXString s = clang_formatDiagnostic(diag, clang_defaultDiagnosticDisplayOptions());

        printf("%s\n", clang_getCString(s));

        clang_disposeString(s);
        clang_disposeDiagnostic(diag);
    }

    CXCursor cursor = clang_getTranslationUnitCursor(tu);

    VisitorData data;
    clang_visitChildren(cursor, VisitTU, /*user_data*/&data);

    clang_disposeTranslationUnit(tu);
    tu = nullptr;

    clang_disposeIndex(index);
    index = nullptr;

    return 0;
}

Here's the output it produces:

clang ver: clang version 18.1.1
diagnostics (0):
-- ClassDecl 'Test01' <Test01> [HAS_ATTR]
---- attribute(annotate) 'ann' <> [ATTRIB] [HAS_ATTR]
-- ClassDecl 'Test02' <Test02> [HAS_ATTR]
---- attribute(annotate) 'ann' <> [ATTRIB] [HAS_ATTR]
-- ClassDecl 'Test03' <Test03> [HAS_ATTR]
-- ClassDecl 'Test04' <Test04> [HAS_ATTR]
-- ClassDecl 'Test05' <Test05> [HAS_ATTR]
-- ClassDecl 'Test06' <Test06> [HAS_ATTR]

If you set PRETTY_PRINTED to true, the output will contain the pretty printed source representation of each node. As you can observe, that representation contains the annotation, which suggests that it is not lost during the parsing, just not properly exposed in the AST.

clang ver: clang version 18.1.1
diagnostics (0):
-- ClassDecl 'Test01' <Test01> [HAS_ATTR]
class [[clang::annotate("ann")]] Test01 {
}
---- attribute(annotate) 'ann' <> [ATTRIB] [HAS_ATTR]

-- ClassDecl 'Test02' <Test02> [HAS_ATTR]
class __attribute__((annotate("ann"))) Test02 {
}
---- attribute(annotate) 'ann' <> [ATTRIB] [HAS_ATTR]

-- ClassDecl 'Test03' <Test03> [HAS_ATTR]
class [[clang::annotate("ann")]] Test03 {
}
-- ClassDecl 'Test04' <Test04> [HAS_ATTR]
class __attribute__((annotate("ann"))) Test04 {
}
-- ClassDecl 'Test05' <Test05> [HAS_ATTR]
class [[clang::annotate("ann")]] Test05 {
}
-- ClassDecl 'Test06' <Test06> [HAS_ATTR]
class __attribute__((annotate("ann"))) Test06 {
}

Further testing indicates that the same applies to annotations attached to class methods.

jszymonek commented 7 months ago

I should also point out that clang 18.1.1 on Compiler Explorer dumps the AST as I would expect - exposing the annotation no matter what method is used. Please refer to the following example.

sturcotte06 commented 6 months ago

I can confirm I ran into the same issue.

kelvich commented 1 week ago

I don't have a good explanation of why but removing clang_Location_isFromMainFile check makes you code work in all cases:

-- ClassDecl 'Test01' <Test01> [HAS_ATTR] 
---- attribute(annotate) 'ann' <> [IS_ATTR] [HAS_ATTR] 
-- ClassDecl 'Test02' <Test02> [HAS_ATTR] 
---- attribute(annotate) 'ann' <> [IS_ATTR] [HAS_ATTR] 
-- ClassDecl 'Test03' <Test03> [HAS_ATTR] 
---- attribute(annotate) 'ann' <> [IS_ATTR] [HAS_ATTR] 
-- ClassDecl 'Test04' <Test04> [HAS_ATTR] 
---- attribute(annotate) 'ann' <> [IS_ATTR] [HAS_ATTR] 
-- ClassDecl 'Test05' <Test05> [HAS_ATTR] 
---- attribute(annotate) 'ann' <> [IS_ATTR] [HAS_ATTR] 
-- ClassDecl 'Test06' <Test06> [HAS_ATTR] 
---- attribute(annotate) 'ann' <> [IS_ATTR] [HAS_ATTR]