llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
28.79k stars 11.9k forks source link

Clang 17 and later rejects __attribute__ after static specifier in C++23 lambda expressions #114026

Open johnplatts opened 3 days ago

johnplatts commented 3 days ago

Here is a snippet of C++23 code that compiles successfully with GCC 13 or later with -std=c++23 but fails to compile with Clang 17 or later (even with the std=c++23 option):

#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>

__m128i __attribute__((__target__("sse4.2"))) SomeFunc(
    const int32_t* src_ptr, size_t num_of_vectors) {
    auto process_func = [](const int32_t* p) static __attribute__((__target__("sse4.2"))) {
        __m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
        return _mm_mullo_epi32(v, _mm_set_epi32(15752961, 3969, 63, 1));
    };

    __m128i h = _mm_setzero_si128();
    for (size_t i = 0; i < num_of_vectors; i++) {
        h = _mm_add_epi32(_mm_sub_epi32(_mm_slli_epi32(h, 6), h),
            process_func(src_ptr + 4 * i));
    }
    return h;
}

The results of compiling the above snippet with GCC 13, Clang 17, and Clang trunk can be found at https://godbolt.org/z/WfoqnnPqc.

Should Clang be updated to allow __attribute__((__target__("sse4.2"))) after the static specifier in C++23 lambda expressions?

On the other hand, the below snippet successfully compiles with Clang 17 or later in C++23 mode but fails to compile with GCC 13 in C++23 mode:

#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>

__m128i __attribute__((__target__("sse4.2"))) SomeFunc(
    const int32_t* src_ptr, size_t num_of_vectors) {
    auto process_func = [](const int32_t* p) __attribute__((__target__("sse4.2"))) static {
        __m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
        return _mm_mullo_epi32(v, _mm_set_epi32(15752961, 3969, 63, 1));
    };

    __m128i h = _mm_setzero_si128();
    for (size_t i = 0; i < num_of_vectors; i++) {
        h = _mm_add_epi32(_mm_sub_epi32(_mm_slli_epi32(h, 6), h),
            process_func(src_ptr + 4 * i));
    }
    return h;
}

The results of compiling the above snippet with GCC 13, Clang 17, and Clang trunk can be found at https://godbolt.org/z/9ooT8bj1f.

cor3ntin commented 2 days ago

@AaronBallman @erichkeane

llvmbot commented 2 days ago

@llvm/issue-subscribers-clang-frontend

Author: John Platts (johnplatts)

Here is a snippet of C++23 code that compiles successfully with GCC 13 or later with `-std=c++23` but fails to compile with Clang 17 or later (even with the `std=c++23` option): ``` #include <stddef.h> #include <stdint.h> #include <immintrin.h> __m128i __attribute__((__target__("sse4.2"))) SomeFunc( const int32_t* src_ptr, size_t num_of_vectors) { auto process_func = [](const int32_t* p) static __attribute__((__target__("sse4.2"))) { __m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); return _mm_mullo_epi32(v, _mm_set_epi32(15752961, 3969, 63, 1)); }; __m128i h = _mm_setzero_si128(); for (size_t i = 0; i < num_of_vectors; i++) { h = _mm_add_epi32(_mm_sub_epi32(_mm_slli_epi32(h, 6), h), process_func(src_ptr + 4 * i)); } return h; } ``` The results of compiling the above snippet with GCC 13, Clang 17, and Clang trunk can be found at https://godbolt.org/z/WfoqnnPqc. Should Clang be updated to allow `__attribute__((__target__("sse4.2")))` after the `static` specifier in C++23 lambda expressions? On the other hand, the below snippet successfully compiles with Clang 17 or later in C++23 mode but fails to compile with GCC 13 in C++23 mode: ``` #include <stddef.h> #include <stdint.h> #include <immintrin.h> __m128i __attribute__((__target__("sse4.2"))) SomeFunc( const int32_t* src_ptr, size_t num_of_vectors) { auto process_func = [](const int32_t* p) __attribute__((__target__("sse4.2"))) static { __m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); return _mm_mullo_epi32(v, _mm_set_epi32(15752961, 3969, 63, 1)); }; __m128i h = _mm_setzero_si128(); for (size_t i = 0; i < num_of_vectors; i++) { h = _mm_add_epi32(_mm_sub_epi32(_mm_slli_epi32(h, 6), h), process_func(src_ptr + 4 * i)); } return h; } ``` The results of compiling the above snippet with GCC 13, Clang 17, and Clang trunk can be found at https://godbolt.org/z/9ooT8bj1f.
erichkeane commented 2 days ago

Huh, neat! It looks like we just disagree about the location for parsing the attributes. Annoyingly, we have parsed in that location for a while, so we probably have to parse in BOTH locations in order to be both clang backward, and gcc compatible. Likely just a missing "MaybeParseGCCAttributes" line somewhere. We might want to do an analysis as a part of this for what MSVC does in these areas.

johnplatts commented 2 days ago

Huh, neat! It looks like we just disagree about the location for parsing the attributes. Annoyingly, we have parsed in that location for a while, so we probably have to parse in BOTH locations in order to be both clang backward, and gcc compatible. Likely just a missing "MaybeParseGCCAttributes" line somewhere. We might want to do an analysis as a part of this for what MSVC does in these areas.

MSVC does not currently support the static lambda specifier (even with the -std:c++latest compiler option), but here is a snippet with the [[msvc::forceinline]] attribute declared after the noexcept lambda specifier that does compile with MSVC with the -std:c++latest option:

#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>

__m128i SomeFunc(const int32_t* src_ptr, size_t num_of_vectors) {
    auto process_func = [](const int32_t* p) noexcept [[msvc::forceinline]] {
        __m128i v = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
        return _mm_mullo_epi32(v, _mm_set_epi32(15752961, 3969, 63, 1));
    };

    __m128i h = _mm_setzero_si128();
    for (size_t i = 0; i < num_of_vectors; i++) {
        h = _mm_add_epi32(_mm_sub_epi32(_mm_slli_epi32(h, 6), h),
            process_func(src_ptr + 4 * i));
    }
    return h;
}