Direct concrete index notation schedules breaks tensor serialization order

Here is an example to reproduce the problem.

File spmv2.cpp:

#include "taco/tensor.h"
#include <iostream>
#include <getopt.h>
#include <sys/stat.h>
#include <string>
#include <chrono>

template <typename Setup, typename Test>
double benchmark(double time_max, int trial_max, Setup setup, Test test){
    auto time_total = std::chrono::high_resolution_clock::duration(0);
    auto time_min = std::chrono::high_resolution_clock::duration(0);
    int trial = 0;
    while(trial < trial_max){
        setup();
        auto tic = std::chrono::high_resolution_clock::now();
        test();
        auto toc = std::chrono::high_resolution_clock::now();
        auto time = std::chrono::duration_cast<std::chrono::nanoseconds>(toc-tic);
        trial++;
        if(trial == 1 || time < time_min){
            time_min = time;
        }
        time_total += time;
        if(time_total.count() * 1e-9 > time_max){
            break;
        }
    }
    return time_min.count() * 1e-9;
}

using namespace taco;

static void usage()
{
    fprintf(stderr,
        "usage: foo [options]\n"
        "  --ntrials <arg>        Maximum number of trials to run\n"
        "  --ttrials <arg>        Maximum time to run trials\n"
        "  --help                 Display help message\n"

"--tensor_B <arg> file\n"

"--tensor_d <arg> file\n"

"--tensor_C <arg> file\n"

"--tensor_a <arg> file\n"

    );
}

int main(int argc, char **argv)
{
    int help = 0;

    int n_trials = 10000;
    double t_trials = 5.0;

std::string file_B = "";

std::string file_d = "";

std::string file_C = "";

std::string file_a = "";

    /* Beware. Option parsing below. */
    long longarg;
    double doublearg;
    struct stat statthing;
    while (1)
    {
        const char *options = "";
        const struct option long_options[] = {
            {"ntrials", required_argument, NULL, 1},
            {"ttrials", required_argument, NULL, 1},
            {"help", no_argument, &help, 1},

{"tensor_B", required_argument, NULL, 1},

{"tensor_d", required_argument, NULL, 1},

{"tensor_C", required_argument, NULL, 1},

{"tensor_a", required_argument, NULL, 1},

            {0, 0, 0, 0}};

        /* getopt_long stores the option index here. */
        int option_index = 0;

        int c = getopt_long(argc, argv, options, long_options, &option_index);

        if (c == 0){
            continue;
        }

        /* Detect the end of the options. */
        if (c == -1)
            break;

        switch (option_index) {
            case 0:
                errno = 0;
                longarg = strtol(optarg, 0, 10);
                if (errno != 0 || longarg < 1)
                {
                    printf("option --ntrials takes an integer maximum number of trials >= 1\n");
                    usage();
                    return 1;
                }
                n_trials = longarg;
                break;

            case 1:
                errno = 0;
                doublearg = strtod(optarg, 0);
                if (errno != 0 || doublearg < 0.0)
                {
                    printf("option --ttrials takes a maximum measurement time in seconds >= 0.0\n");
                    usage();
                    return 1;
                }
                t_trials = doublearg;
                break;

            case 2:
                help = 1;
                break;

case 3:

if (stat(optarg, &statthing) < 0 || !S_ISREG(statthing.st_mode))
{
    printf("argument to --tensor_B must be a file\n");
    usage();
    return 1;
}
file_B = optarg;
break;

case 4:

if (stat(optarg, &statthing) < 0 || !S_ISREG(statthing.st_mode))
{
    printf("argument to --tensor_d must be a file\n");
    usage();
    return 1;
}
file_d = optarg;
break;

case 5:

if (stat(optarg, &statthing) < 0 || !S_ISREG(statthing.st_mode))
{
    printf("argument to --tensor_C must be a file\n");
    usage();
    return 1;
}
file_C = optarg;
break;

case 6:

if (stat(optarg, &statthing) < 0 || !S_ISREG(statthing.st_mode))
{
    printf("argument to --tensor_a must be a file\n");
    usage();
    return 1;
}
file_a = optarg;
break;

            default:
                printf("unrecognized argument\n");
                usage();
                abort();
        }
    }

    if (help)
    {
        printf("Try a tensor kernel!\n");
        usage();
        return 0;
    }

    taco::setEvaluateAtAssign(false);

    // Create tensors

if(file_B == ""){
    std::cout << "oh no! There's no tensor file for B" << std::endl;
    return -1;
}
Tensor<double> tensor_B = read(file_B, Format({Dense, Sparse}), true);
TensorVar tensorvar_B = tensor_B.getTensorVar();

if(file_d == ""){
    std::cout << "oh no! There's no tensor file for d" << std::endl;
    return -1;
}
Tensor<double> tensor_d = read(file_d, Format({Dense}), true);
TensorVar tensorvar_d = tensor_d.getTensorVar();

if(file_C == ""){
    std::cout << "oh no! There's no tensor file for C" << std::endl;
    return -1;
}
Tensor<double> tensor_C = read(file_C, Format({Dense, Sparse}), true);
TensorVar tensorvar_C = tensor_C.getTensorVar();

Tensor<double> tensor_B_6432 = tensor_B.transpose({0, 1}, Format({Sparse, Sparse}));
TensorVar tensorvar_B_6432 = tensor_B_6432.getTensorVar();

Tensor<double> tensor_w_1({}, Format({}));
TensorVar tensorvar_w_1 = tensor_w_1.getTensorVar();

Tensor<double> tensor_a({tensor_B.getDimensions()[0]}, Format({Dense}));
TensorVar tensorvar_a = tensor_a.getTensorVar();

    // Form a tensor-vector multiplication expression

    IndexVar index_i_6434;

    IndexVar index_j_6433;

    IndexVar index_k;

    IndexVar index_j;

    IndexVar index_i;

    IndexVar index_foo_6435;

    tensor_a(index_foo_6435) += ((tensor_d(index_k) * tensor_C(index_j, index_k)) * tensor_B_6432(index_j, index_i));

    // Compile the expression
    tensor_a.compile(forall(index_j, where(forall(index_i, tensorvar_a(index_i) += (tensorvar_w_1() * tensorvar_B_6432(index_j, index_i))), forall(index_k, tensorvar_w_1() += (tensorvar_d(index_k) * tensorvar_C(index_j, index_k))))));

    // Assemble output indices and numerically compute the result
    auto time = benchmark(
        10, 10000, [&tensor_a]()
        { tensor_a.assemble(); 
        //tensor_a.setNeedsCompute(true);
        },
        [&tensor_a]()
        { tensor_a.compute();
         });

    std::cout << time << std::endl;

if(file_a != ""){
    write(file_a, tensor_a);
}

    return 0;
}

In a file spmv2_breaks.cpp, change the above line

tensor_a(index_foo_6435) += ((tensor_d(index_k) * tensor_C(index_j, index_k)) * tensor_B_6432(index_j, index_i));

tensor_a(index_foo_6435) += (tensor_C(index_j, index_k) * (tensor_d(index_k) * tensor_B_6432(index_j, index_i)));

File tensor_B.tns:

1 1 1.0
1024 1024 2.0

File tensor_C.tns:

1 1 1.0
1024 1024 2.0

File tensor_d.tns:

1 1.0
1024 2.0

Compile with

gcc -c -o spmv2_breaks.o spmv2_breaks.cpp --std=c++11 -I/data/scratch/pahrens/taco/include -I/data/scratch/pahrens/taco/src -g -ggdb -O0
gcc -o spmv2_breaks spmv2_breaks.o --std=c++11 -L/data/scratch/pahrens/taco/build/lib -g -ggdb -O0 -lm -ltaco -lstdc++

and run with

 LD_LIBRARY_PATH=/data/scratch/pahrens/taco/build/lib ./spmv2_breaks --tensor_B=tensor_B.tns --tensor_C=tensor_C.tns --tensor_d=tensor_d.tns

to obtain a very exciting segfault. Compiling and running spmv2 runs fine, as expected. It is the only correct tensor ordering, but it's not the only one that doesn't segfault. Some tensor orderings will be silently incorrect.

tensor-compiler / taco

Direct concrete index notation schedules breaks tensor serialization order #495