Open kalmarek opened 6 years ago
That does seem like a good one!
Here's my Rust attempt. This sure feels hard to optimize.
#include <stdlib.h>
#include <stdio.h>
void swap(size_t *p, size_t a, size_t b) {
size_t tmp = p[a];
p[a] = p[b];
p[b] = tmp;
}
size_t heaps(size_t n) {
size_t *a = malloc(n * sizeof *a);
if (!a) {
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < n; i++) {
a[i] = i; // i + 1 was strange
}
size_t *c = calloc(n, sizeof *c);
if (!c) {
exit(EXIT_FAILURE);
}
size_t count = a[0];
size_t i = 0;
while (i < n) {
if (c[i] < i) {
swap(a, i % 2 ? c[i] : 0, i);
count += a[0];
c[i]++;
i = 0;
} else {
c[i] = 0;
i++;
}
}
free(a);
free(c);
return count;
}
I think there are some issues with these codes
heaps(n), n=1:4 | code |
---|---|
1, 3, 12, 60 | kalmarek Julia |
0, 3, 12, 60 | karmarek C |
0, 1, 6, 36 | Stargateur C |
1, 2, 6, 24 | n! |
@johnfgibson Sorry, I should make it more clear that I change the initialisation in favor of the rust:
p[n] = n+1; // n + 1 could overflow n is as good as n + 1 and can't overflow
let mut perm: Vec<_> = (0..n).collect(); // rust use n
a[i] = i; // so I just used n too
@johnfgibson, @Stargateur: for comparison with n
-factorial: we don't simply count the permutations, but sum the first entry of every permutation (otherwise future compilers may skip the swap
part entirely);
p[n] = n+1
was because permutations are usually counted from one, not zero. This does matter for the returned value.
Could You also recheck n=1
? my C version returns squarely 1
for N=1
;-)
@kalmarek aha! thank you. (regarding returning the sum of the first entry).
I ran what's currently posted above and got heaps(1:5) == 1, 3, 12, 60, 360. There were some differences from an earlier version (count += p[0]
versus count += p[1]
) that probably account for the difference.
@Stargateur I don't follow from your comment what changes should be made.
I can add working codes to the microbenchmark suite but I don't think I can debug and fix codes with errors.
@kalmarek @johnfgibson I'm not a mathematician, if you conclude that function should start init with 1
, I'm ok with it. But I didn't think that the result was so important, just as you said it's for avoid compiler to optimize it. I'm just a C dev who read this thread.
This is better ?
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <assert.h>
static void swap(uintmax_t *p, size_t a, size_t b) {
uintmax_t tmp = p[a];
p[a] = p[b];
p[b] = tmp;
}
// UB for n == 0
static uintmax_t heaps(size_t n) {
uintmax_t *a = malloc(n * sizeof *a);
if (!a) {
exit(EXIT_FAILURE);
}
uintmax_t acu = 1;
for (size_t i = 0; i < n; i++) {
a[i] = acu++;
}
size_t *c = calloc(n, sizeof *c);
if (!c) {
exit(EXIT_FAILURE);
}
assert(n != 0); // remove when NDEBUG is defined
uintmax_t count = a[0];
size_t i = 0;
while (i < n) {
if (c[i] < i) {
swap(a, i % 2 ? c[i] : 0, i);
count += a[0];
c[i]++;
i = 0;
} else {
c[i] = 0;
i++;
}
}
free(a);
free(c);
return count;
}
int main(void) {
for (size_t i = 1; i < 5; i++) {
printf("heaps(%zu) = %" PRIuMAX "\n", i, heaps(i));
}
}
@Stargateur, sure the exact value does not matter, it was just to explain the differences in the results. If I am not mistaken, You could start from a[0] = 0
, but then You need to sum a[1]
. Anyway, the important thing is that we swap during each iteration.
BTW How do I call Your code from julia? if I use:
C_code = """
[Your C code here]
"""
const Clib = tempname()
open(`gcc -fPIC -O3 -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f
print(f, C_code)
end
heaps_c(n::Int) = ccall(("heaps", Clib), Int, (Int,), n)
I get ccall: could not find function heaps in library /tmp/juliaACJ6nY
, but this works for my, primitive ;-) C code
Remove static
from static uintmax_t heaps(size_t n)
, it's make the symbol not global.
C_code = """
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <assert.h>
static void swap(uintmax_t *p, size_t a, size_t b) {
uintmax_t tmp = p[a];
p[a] = p[b];
p[b] = tmp;
}
// UB for n == 0
uintmax_t heaps(size_t n) {
uintmax_t *a = malloc(n * sizeof *a);
if (!a) {
exit(EXIT_FAILURE);
}
uintmax_t acu = 1;
for (size_t i = 0; i < n; i++) {
a[i] = acu++;
}
size_t *c = calloc(n, sizeof *c);
if (!c) {
exit(EXIT_FAILURE);
}
assert(n != 0); // remove when NDEBUG is defined
uintmax_t count = a[0];
size_t i = 0;
while (i < n) {
if (c[i] < i) {
swap(a, i % 2 ? c[i] : 0, i);
count += a[0];
c[i]++;
i = 0;
} else {
c[i] = 0;
i++;
}
}
free(a);
free(c);
return count;
}
"""
const Clib = tempname()
open(`gcc -fPIC -O3 -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f
print(f, C_code)
end
ccall(("heaps", Clib), Cuintmax_t , (Csize_t,), n)
I updated the Rust code above as well, thanks to your feedback. It was initializing p
from 0 rather than 1.
a naive python version
def heaps(n):
elts = [i+1 for i in range(n)]
c = [0 for i in range(n)]
n = 0
countfirst = elts[0]
while n < len(elts):
if c[n] < n:
if n % 2:
k = c[n]
else:
k = 0
elts[k], elts[n] = elts[n], elts[k]
# print(elts)
countfirst += elts[0]
c[n] += 1
n = 0
else:
c[n] = 0
n += 1
return countfirst
To be honest, I don't know any other language microbenchmark compares julia against, so someone else has to come-up with the code.
defining heaps_c(n) = Int(ccall(("heaps", Clib), Cuintmax_t , (Csize_t,), n))
I get:
On julia-0.6.4:
julia> @btime allperms(12)
2.862 s (3 allocations: 384 bytes)
3113510400
julia> @btime heaps_c(12)
1.470 s (0 allocations: 0 bytes)
3113510400
On julia-0.7.0:
julia> @btime allperms(12)
1.397 s (2 allocations: 352 bytes)
3113510400
julia> @btime heaps_c(12)
1.466 s (0 allocations: 0 bytes)
3113510400
I'm impressed!
The Heap's algorithm which crawls over all permutations seem a good testcase, cause You can not be "smart" and use language specific optimisations, it's just pure operations on integer arrays (indexing, changing content of);
a similar implementation in
c
:EDIT:
@inbounds
macro makes a fairer comparison to C (and speeds julias version!)