Open xuchuanyin opened 6 years ago
private byte[] convertRowCountFromShortToByteArray(List
test("test by modify rcd2") { // minimum per page is 2000 rows CarbonProperties.getInstance().addProperty(CarbonCommonConstants.BLOCKLET_SIZE, "2000") // minimum per blocklet is 16MB CarbonProperties.getInstance().addProperty(CarbonV3DataFormatConstants.BLOCKLET_SIZE_IN_MB, "16") // these lines will result in 3 blocklets in one block and bloom will hit at least 2 of them val lines = 100000 sql("drop table if exists testrcd").collect() val r = new Random() import sqlContext.implicits. val df = sqlContext.sparkContext.parallelize(1 to lines) .map(x => ("No." + r.nextInt(10000), "country" + x % 10000, "city" + x % 10000, x % 10000, UUID.randomUUID().toString, UUID.randomUUID().toString, UUID.randomUUID().toString, UUID.randomUUID().toString, UUID.randomUUID().toString, UUID.randomUUID().toString, UUID.randomUUID().toString, UUID.randomUUID().toString, UUID.randomUUID().toString, UUID.randomUUID().toString, UUID.randomUUID().toString, UUID.randomUUID().toString)) .toDF("ID", "country", "city", "population", "random1", "random2", "random3", "random4", "random5", "random6", "random7", "random8", "random9", "random10", "random11", "random12") df.write .format("carbondata") .option("tableName", "test_rcd") .option("SORT_COLUMNS", "id") .option("SORT_SCOPE", "LOCAL_SORT") .mode(SaveMode.Overwrite) .save()
sql("select count(*) from test_rcd where city = 'city40'").show(numRows = Integer.MAX_VALUE - 1)
sql("CREATE DATAMAP dm_rcd ON TABLE test_rcd " +
"USING 'bloomfilter' DMPROPERTIES " +
"('INDEX_COLUMNS' = 'city', 'BLOOM_SIZE'='640000', 'BLOOM_FPP'='0.00001')")
sql("select count(*) from test_rcd where city = 'city40'").show(numRows = Integer.MAX_VALUE - 1)
sql("drop table if exists test_rcd").collect()
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.BLOCKLET_SIZE,
CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL)
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE, "true")
}
private List intersectFilteredBlocklets(CarbonTable carbonTable,
List defaultDataMapPrunedBlocklets,
List otherDataMapPrunedBlocklets) {
List prunedBlocklets = null;
if (BlockletDataMapUtil
.isCacheLevelBlock(carbonTable, BlockletDataMapFactory.CACHE_LEVEL_BLOCKLET)) {
prunedBlocklets = new ArrayList<>(otherDataMapPrunedBlocklets);
// add blocklets from default dataMap that are not filtered by other dataMaps
for (ExtendedBlocklet defaultBlocklet : defaultDataMapPrunedBlocklets) {
if (!otherDataMapPrunedBlocklets.contains(defaultBlocklet)) {
prunedBlocklets.add(defaultBlocklet);
}
}
} else {
prunedBlocklets = (List) CollectionUtils
.intersection(otherDataMapPrunedBlocklets, defaultDataMapPrunedBlocklets);
}
return prunedBlocklets;
}