Open caijieming-ng opened 7 years ago
测试发现hbase随机写在抖动幅度比较大,每行1M,写带宽在100~400M之间抖动。 调研其compact策略:https://www.ngdata.com/visualizing-hbase-flushes-and-compactions/ 与rocksdb的Universal compaction策略一致 ref: https://github.com/facebook/rocksdb/wiki/Universal-Compaction
compact过程仿真代码:
#include <stdlib.h>
#include <iostream>
#include <vector>
int main(int ac, char* av[]) {
if (ac != 5) {
std::cout << "Usage: " << av[0] << " flush_nr(10) flush_size(80M) min_compact(3) max_compact(10)\n";
std::cout << "Usage Sample: " << av[0] << " 10 80 3 10\n";
return 0;
}
int flush_nr = atoi(av[1]);
int flush_size = atoi(av[2]);
int min_compact = atoi(av[3]);
int max_compact = atoi(av[4]);
std::vector<int> storefiles;
for (int i = 0; i < flush_nr; i++) {
// flush stage
storefiles.push_back(flush_size);
std::cout << "Store round[" << i << "]: [flush] ";
for (int j = 0; j < storefiles.size(); j++) {
std::cout << storefiles[j] << " ";
}
std::cout << " ===> [compact] ";
// check compact
for (int j = 0; (storefiles.size() > min_compact) && (j < (int)storefiles.size() - min_compact); j++) {
int current_size = storefiles[j];
int sum = 0;
for (int k = j + 1; k < (int)storefiles.size(); k++) {
sum += storefiles[k];
}
if (current_size <= 1.2 * sum) {
int nr_compact = 0;
if (storefiles.size() - j - 1 > max_compact) {
nr_compact = max_compact;
} else if (storefiles.size() - j - 1 < min_compact) {
nr_compact = 0;
} else {
nr_compact = storefiles.size() - j - 1;
}
// do compact
if (nr_compact > 0) {
int store_size = storefiles[j];
for (int m = j + 1; m < nr_compact + j + 1; m++) {
store_size += storefiles[m];
storefiles[m] = storefiles[m + nr_compact];
}
storefiles[j] = store_size;
int current_num = storefiles.size();
storefiles.resize(current_num - nr_compact);
break;
}
}
}
// stat current store file
for (int j = 0; j < storefiles.size(); j++) {
std::cout << storefiles[j] << " ";
}
std::cout << std::endl;
}
return 0;
}
场景:每次flush 80MB的memtable,执行若干轮之后的结果:
[xxx ~]$ ./hbase
Usage: ./hbase flush_nr(10) flush_size(80M) min_compact(3) max_compact(10)
Usage Sample: ./hbase 10 80 3 10
[xxx ~]$ ./hbase 10 80 3 10
Store round[0]: [flush] 80 ===> [compact] 80
Store round[1]: [flush] 80 80 ===> [compact] 80 80
Store round[2]: [flush] 80 80 80 ===> [compact] 80 80 80
Store round[3]: [flush] 80 80 80 80 ===> [compact] 320
Store round[4]: [flush] 320 80 ===> [compact] 320 80
Store round[5]: [flush] 320 80 80 ===> [compact] 320 80 80
Store round[6]: [flush] 320 80 80 80 ===> [compact] 320 80 80 80
Store round[7]: [flush] 320 80 80 80 80 ===> [compact] 640
Store round[8]: [flush] 640 80 ===> [compact] 640 80
Store round[9]: [flush] 640 80 80 ===> [compact] 640 80 80
可以分析一下该策略的写放大 :-)
roadmap:
计划: