llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
27.83k stars 11.46k forks source link

Alignment deduction for mm_malloc/posix_memalign #49374

Open davidbolvansky opened 3 years ago

davidbolvansky commented 3 years ago
Bugzilla Link 50030
Version trunk
OS Linux
CC @jdoerfert

Extended Description

Motivation:

#include <cstdint>

#ifndef __MM_MALLOC_H
#define __MM_MALLOC_H

#include <stdlib.h>

#ifdef _WIN32
#include <malloc.h>
#else
#ifndef __cplusplus
extern int posix_memalign(void **memptr, size_t alignment, size_t size);
#else
// Some systems (e.g. those with GNU libc) declare posix_memalign with an
// exception specifier. Via an "egregious workaround" in
// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid
// redeclaration of glibc's declaration.
extern "C" int posix_memalign(void **memptr, size_t alignment, size_t size);
#endif
#endif

#if !(defined(_WIN32) && defined(_mm_malloc))
static __inline__ void *__attribute__((__always_inline__, __nodebug__,
                                       __malloc__))
_mm_malloc(size_t size, size_t align)
{
  if (align == 1) {
    return malloc(size);
  }

  if (!(align & (align - 1)) && align < sizeof(void *))
    align = sizeof(void *);

  void *mallocedMemory;
#if defined(__MINGW32__)
  mallocedMemory = __mingw_aligned_malloc(size, align);
#elif defined(_WIN32)
  mallocedMemory = _aligned_malloc(size, align);
#else
  if (posix_memalign(&mallocedMemory, align, size))
    return 0;
#endif
  return mallocedMemory;
}

static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_free(void *p)
{
  free(p);
}
#endif

#endif /* __MM_MALLOC_H */

void mm_malloc(int n) {
    char *p = (char *)_mm_malloc(n, 32);
    // ....
    _mm_free(p);
}

Translates to IR:

define dso_local void @&#8203;_Z9mm_malloci(i32 %0) local_unnamed_addr #&#8203;0 {
  %2 = alloca i8*, align 8
  %3 = sext i32 %0 to i64
  %4 = bitcast i8** %2 to i8*
  call void @&#8203;llvm.lifetime.start.p0i8(i64 8, i8* nonnull %4) #&#8203;4
  %5 = call i32 @&#8203;posix_memalign(i8** nonnull %2, i64 32, i64 %3) #&#8203;4
  %6 = icmp eq i32 %5, 0
  %7 = load i8*, i8** %2, align 8
  %8 = select i1 %6, i8* %7, i8* null
  call void @&#8203;llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4) #&#8203;4
  call void @&#8203;free(i8* %8) #&#8203;4
  ret void
}

As we see, there is no align 32 represented in IR.

What approach we be better here,

A) Traverse all loads of %2 and call CreateAlignmentAssumption(Load, 32) B) Pattern match this pattern (see below) starting from select and then CreateAlignmentAssumption(%8)

err = posix_memalign(&ptr, 32, size)
newptr = err != 0 ? nullptr, ptr);

// CreateAlignmentAssumption(newptr, 32)

C) ?

https://godbolt.org/z/h88zG9hh8

jdoerfert commented 3 years ago

The select is the thing we know has an alignment of 32, right? So we should look for that pattern, I think.