Open troyliu0105 opened 3 years ago
template<int M, bool NeedEncodeX>
void _run_bolt_matmul(const RowMatrix
if (NeedEncodeX) {
bolt_encode<M>(X.data(), X.rows(), (int)X.cols(), centroids.data(), codes.data());
}
for (int i = 0; i < nqueries; i++) {
const float* q = Q.row(i).data();
bolt_lut<M>(q, (int)ncols, centroids.data(), offsets.data(),
scaleby, lut_out.data());
bolt_scan<M, true>(codes.data(), lut_out.data(), out.row(i).data(), nblocks);
}
}
///#########################################################################
//##########################################################################
template
// create random data
RowMatrix<float> X(nrows, ncols);
X.setRandom();
// create random queries
RowMatrix<float> Q(nqueries, ncols);
Q.setRandom();
// create random centroids
ColMatrix<float> centroids(ncentroids, ncols);
centroids.setRandom();
// create random codes in [0, 15]
ColMatrix<uint8_t> codes(nrows, ncodebooks);
codes.setRandom();
codes = codes.array() / 16;
// create random / arbitrary offsets and scale factor for luts
RowVector<float> offsets(ncols);
offsets.setRandom();
float scaleby = 3; // arbitrary number
// storage for luts, product
ColMatrix<uint8_t> luts(ncentroids, ncodebooks);
RowMatrix<uint16_t> out(nqueries, nrows);
// time it
std::string msg = string_with_format("bolt<%d> encode=%d matmul %d",
M, false, nqueries);
REPEATED_PROFILE_DIST_COMPUTATION(kNreps, msg, kNtrials,
out.data(), nrows * nqueries,
(_run_bolt_matmul<M, false>(X, Q, centroids, codes, offsets, scaleby, luts, out)) );
std::string msg2 = string_with_format("bolt<%d> encode=%d matmul %d",
M, true, nqueries);
REPEATED_PROFILE_DIST_COMPUTATION(kNreps, msg2, kNtrials,
out.data(), nrows * nqueries,
(_run_bolt_matmul<M, true>(X, Q, centroids, codes, offsets, scaleby, luts, out)) );
} //################################################################
template
mithral_amm_task(int N, int D, int M, int ncodebooks,
float lut_work_const):
N_padded(N % scan_block_nrows == 0 ? N :
N + (scan_block_nrows - (N % scan_block_nrows))),
centroids(ncentroids * ncodebooks, D),
nsplits(ncodebooks * nsplits_per_codebook),
splitdims(nsplits),
splitvals(max_splitvals, nsplits),
encode_scales(nsplits),
encode_offsets(nsplits),
nnz_per_centroid(lut_work_const > 0 ?
lut_work_const * D / ncodebooks : D),
idxs(ncodebooks, nnz_per_centroid),
amm(N_padded, D, M, ncodebooks, centroids.data(),
splitdims.data(), splitvals.data(),
encode_scales.data(), encode_offsets.data(),
idxs.data(), nnz_per_centroid),
X(N_padded, D),
Q(D, M)
{
centroids.setRandom();
splitdims.setRandom();
for (int i = 0; i < splitdims.size(); i++) {
splitdims(i) = splitdims(i) % D;
}
splitvals.setRandom();
encode_scales.setRandom();
encode_offsets.setRandom();
// randomly initialize idxs, ensuring all are unique and < D
idxs.setRandom();
int all_idxs[D];
for (int i = 0; i < D; i++) {
all_idxs[i] = i;
}
std::random_device rd;
std::mt19937 g(rd()); // why can't shuffle just create its own...
for (int c = 0; c < ncodebooks; c++) { // random sequential idxs
std::shuffle(all_idxs, all_idxs + D, g);
std::sort(all_idxs, all_idxs + nnz_per_centroid);
for (int j = 0; j < nnz_per_centroid; j++) {
idxs(c, j) = all_idxs[j];
}
}
X.setRandom();
Q.setRandom();
}
void encode() { amm.encode(X.data()); }
void lut() { amm.lut(Q.data()); }
void scan() { amm.scan(); }
void run_matmul(bool create_lut=true) {
encode();
if (create_lut) {
lut();
}
scan();
}
const ColMatrix<output_t>& output() const { return amm.out_mat; }
// stuff we pass into the amm object (would be learned during training)
int N_padded;
ColMatrix<float> centroids;
int nsplits;
RowVector<uint32_t> splitdims;
ColMatrix<int8_t> splitvals;
RowVector<scale_t> encode_scales;
RowVector<offset_t> encode_offsets;
int nnz_per_centroid;
RowMatrix<int> idxs;
// amm object
mithral_amm<InputT> amm;
// random data
ColMatrix<InputT> X;
ColMatrix<float> Q;
}; //################################################################# void run_matmul(bool create_lut=true) { encode(); if (create_lut) { lut(); } scan(); }
@dumpinfo Thx for your reply. I'm going to test it. 😆
Hello, how to use this test case, I have compiled the source file:bazel run :main,What should I do afterwards, can I tell you more about it?
To run any of the MADDNESS code, use cpp/test/main.cpp. I used catch tests for everything because it gives you a nice CLI and makes debugging easy.
You can run it with no arguments, but you probably want to pass some tags for which tests to run--the full suite takes a while. You probably want something like [amm] ~[old]
to just run the MADDNESS tests and exclude legacy / debugging tests.
Hi @dumpinfo, @troyliu0105, it seems like you have worked with mithral in its C++ implementation. Could you find files where the parameter optimization takes place? Running run_matmul()
in mithral_amm_task()
obviously does not work, since the parameters necessary for the encoding and lut-creation are set randomly. Or does one have to provide the learning-phase?
@fjrdev: there is no learning available in the C++ API, only the python implementation. So you'd have to generate the split thresholds, split values, and prototypes in python and pass them to C++. Which this repo doesn't support doing yet (and I likely won't add it in the foreseeable future because I wrote this code 2.5y ago and am super busy with work + family these days).
implemented in "python/clusterize.py"
@dumpinfo have you managed to port the python learning code to c++ or at least were able to extract the learnt parameters into the c++ codebase?
I couldn't find any c++ examples (including basic matrix multiply) here. Any related code snippet would be very helpful... 😢