Open caoweiquan322 opened 8 months ago
Awesome!!! I'll be honest I never understood how Mark Adler's enough tool worked :'(
A very interesting graph. It seems node "33022" contributes 128 to the table_size. Follow the chain of parent's
33022->16510->8254->4126->2062->1030->514->256
where node 256 is where the root_bits prefix changes.
Exactly. And node "33024" contributes another 128 to the table size.
It took me days to understand enough.c. It requires mental gymnastics :-)
LiveOverflow just discovered a 542 sized table :O !!! https://twitter.com/LiveOverflow/status/1737238584917180853
There must be a bug in my code. I will figure it out.
Meanwhile, I realized that a big table size does not always lead to big overflow. Trying to figure out this too.
no worries mate. The 538 table is good because it can be visualized in tree view and the tree isnt too wide.
Actually the 542 table was found by brute forcing the values above the root_bits.
Glad to have the bug fixed by removing an unnecessary searching restriction. It was a logical error.
Now "NotEnough" tool could produce about 100 distinct solutions with table_size=542.
The repo was updated accordingly.
Thanks so much :) !!!!
Also, how did u prune the tree. I tried this but it doesnt look as nice.
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <assert.h>
#include <malloc.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#define MAX_ALLOWED_CODE_LENGTH 15
//copy and paste output of this program into Graphvis
//https://dreampuf.github.io/GraphvizOnline/
void print_tree() {
int num_nodes = 1; // number of Huffman tree nodes
int num_open = 1; // number of open branches in current tree level
uint32_t n = 1;
uint32_t count[] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 5, 9, 17, 1, 1, 3};
uint32_t symbol_count = 0;
for(uint32_t i = 0; i <= MAX_ALLOWED_CODE_LENGTH; i++) {
symbol_count += count[i];
}
uint32_t *leaf_nodes = malloc(sizeof(uint32_t) * symbol_count);
uint32_t total_leaf_count = 0;
for (int len = 1; len <= MAX_ALLOWED_CODE_LENGTH; ++len) {
num_open <<= 1;
num_nodes += num_open;
uint32_t leaf_count = count[len];
uint32_t internal_count = num_open - leaf_count;
for(uint32_t i = 0; i < num_open; i++) {
uint32_t node = ((1 << len) - 1) + i;
if(i >= internal_count) {
leaf_nodes[total_leaf_count++] = node;
}
}
num_open -= leaf_count;
}
assert(total_leaf_count == symbol_count);
//let graphvis handle the duplicate edges
printf("strict digraph BST {\n");
for(uint32_t i = 0; i < total_leaf_count; i++) {
uint32_t node = leaf_nodes[i];
printf("\tn%d [shape=doublecircle]\n", node);
while(node != 0) {
uint32_t parent = (node-1)/2;
printf("\tn%d [label=\"%d\"]\n", node, node);
printf("\tn%d -> n%d [label=\"%d\"]\n", parent, node, ((node+1) % 2));
node = parent;
}
}
printf("}\n");
}
int main(int argc, char **argv) {
print_tree();
return 0;
}
hmmm i think i have to iterate in level order :S
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <assert.h>
//#include <malloc.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#define MAX_ALLOWED_CODE_LENGTH 15
//copy and paste output of this program into Graphvis
//https://dreampuf.github.io/GraphvizOnline/
void print_tree() {
int num_nodes = 1; // number of Huffman tree nodes
int num_open = 1; // number of open branches in current tree level
uint32_t n = 1;
#if 1
uint32_t count[] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 5, 9, 17, 1, 1, 3}; // size=542 //alphabet = 40, table_size = 414
#else
uint32_t count[] = {0, 1, 1, 1, 1, 0, 1, 1, 0, 15, 5, 9, 1, 1, 1, 2}; //alphabet = 40, table_size = 410
#endif
uint32_t has_leaf_child[65537] = {0};
has_leaf_child[0] = 1;
for (int len = 1; len <= MAX_ALLOWED_CODE_LENGTH; ++len) {
num_open <<= 1;
num_nodes += num_open;
uint32_t leaf_count = count[len];
uint32_t internal_count = num_open - leaf_count;
for(uint32_t i = 0; i < num_open; i++) {
uint32_t node = ((1 << len) - 1) + i;
uint32_t parent = (node-1)/2;
if(i >= internal_count) {
while (node > 0) {
has_leaf_child[node] = 1;
node = (node-1)/2;
}
}
n++;
}
num_open -= leaf_count;
}
num_open = 1;
num_nodes = 1;
n = 1;
printf("digraph BST {\n");
for (int len = 1; len <= MAX_ALLOWED_CODE_LENGTH; ++len) {
num_open <<= 1;
num_nodes += num_open;
uint32_t leaf_count = count[len];
uint32_t internal_count = num_open - leaf_count;
for(uint32_t i = 0; i < num_open; i++) {
uint32_t node = ((1 << len) - 1) + i;
uint32_t parent = (node-1)/2;
if (has_leaf_child[node]) {
printf("\tn%d [label=\"%d\"]\n", node, node);
if(i >= internal_count) {
printf("\tn%d [shape=doublecircle]\n", node);
}
printf("\tn%d -> n%d [label=\"%d\"]\n", parent, node, ((i+1) % 2));
}
n++;
}
num_open -= leaf_count;
}
printf("}\n");
}
int main(int argc, char **argv) {
print_tree();
return 0;
}
Thank you :) !!!!
Hello,
Thanks for providing the powerful craft.c and print_tree.c. Here I developed a tool to discover a set of bad code_lengths by modifying zlib's enough.c. I call it NotEnough. For 410 limit of the last huffman table, this tool can break the memory usage to 538.
Please take a look. Here is the link:
NotEnough