d-i-s-c-o-i-n-f-e-r-n-o / Archivator_Project

0 stars 0 forks source link

Func list #1

Open APUkoloff opened 2 months ago

APUkoloff commented 2 months ago

Список функций

  1. defl_ilog2 - log2 x служебная функция - в консоли нет
  2. defl_uload32 - data block load - служебная функция см1
  3. defl_hash32(const void* p) - хэширует данные из 2 для кодирования по Хоффману
  4. defl_put(unsigned char* dst, struct defl s, int code, int bitcnt) - dst(ячейка), code , bitcnt - биты . функция заполняет поле с кодами символов в служебной структуре s
  5. defl_heap_sub(unsigned A[], unsigned len, unsigned sub) - magic
    defl_heap_sub(unsigned A[], unsigned len, unsigned sub) 
    {
    unsigned c, p = sub;
    unsigned v = A[sub];
    while ((c = p << 1) <= len) 
    {
        if (c < len && A[c + 1] > A[c]) c++;
        if (v >= A[c]) break;
        A[p] = A[c], p = c;
    }
    A[p] = v;
    }
  6. static void
    defl_heap_array(unsigned* A, unsigned len) - more *edited* magic
    defl_heap_array(unsigned* A, unsigned len) 
    {
    unsigned sub;
    for (sub = len >> 1; sub >= 1; sub--)
        sdefl_heap_sub(A, len, sub);
    }
  7. defl_heap_sort(unsigned* A, unsigned n) - опять указатель на кучу, вызывает обработку массива (6).
  8. defl_sort_sym(unsigned sym_cnt, unsigned freqs, unsigned char lens, unsigned sym_out) - похоже, кусок процесса дешифровки. 9, defl_build_tree(unsigned A, unsigned sym_cnt) - похоже строит дерево для генерации кодов для символов. defl_gen_len_cnt(unsigned A, unsigned root, unsigned len_cnt, unsigned max_code_len) - magic

    {
    int n;
    unsigned i;
    for (i = 0; i <= max_code_len; i++)
        len_cnt[i] = 0;
    len_cnt[1] = 2;
    
    A[root] &= DEFL_SYM_MSK;
    for (n = (int)root - 1; n >= 0; n--) 
    {
        unsigned p = A[n] >> DEFL_SYM_BITS;
        unsigned pdepth = A[p] >> DEFL_SYM_BITS;
        unsigned depth = pdepth + 1;
        unsigned len = depth;
    
        A[n] = (A[n] & DEFL_SYM_MSK) | (depth << DEFL_SYM_BITS);
        if (len >= max_code_len) 
        {
            len = max_code_len;
            do len--; while (!len_cnt[len]);
        }
        len_cnt[len]--;
        len_cnt[len + 1] += 2;
    }
    }
  9. defl_gen_codes(unsigned A, unsigned char lens, const unsigned* len_cnt, unsigned max_code_word_len, unsigned sym_cnt) - собственно коды символов (в самой программе этой и подобных функций быть не должно - только в h файле).
  10. defl_rev(unsigned c, unsigned char n) - MAGIC
    c = ((c & 0x5555) << 1) | ((c & 0xAAAA) >> 1);
    c = ((c & 0x3333) << 2) | ((c & 0xCCCC) >> 2);
    c = ((c & 0x0F0F) << 4) | ((c & 0xF0F0) >> 4);
    c = ((c & 0x00FF) << 8) | ((c & 0xFF00) >> 8);
    return c >> (16 - n);
  11. Huffman algs - внутр. ф-я
    defl_huff(unsigned char* lens, unsigned* codes, unsigned* freqs,
    unsigned num_syms, unsigned max_code_len) 
    {
    unsigned c, * A = codes;
    unsigned len_cnt[DEFL_MAX_CODE_LEN + 1];
    unsigned used_syms = defl_sort_sym(num_syms, freqs, lens, A);
    if (!used_syms) return;
    if (used_syms == 1) 
    {
        unsigned s = A[0] & DEFL_SYM_MSK;
        unsigned i = s ? s : 1;
        codes[0] = 0, lens[0] = 1;
        codes[i] = 1, lens[i] = 1;
        return;
    }
    defl_build_tree(A, used_syms);
    defl_gen_len_cnt(A, used_syms - 2, len_cnt, max_code_len);
    defl_gen_codes(A, lens, len_cnt, max_code_len, num_syms);
    for (c = 0; c < num_syms; c++) 
    {
        codes[c] = defl_rev(codes[c], lens[c]);
    }
    }

    Я все. Продолжение будет завтра.

APUkoloff commented 2 months ago
  1. 3 bits - стандартный размер маркеров для блоков

    defl_precode(struct defl_scnt* cnt, unsigned* freqs, unsigned* items,
    const unsigned char* litlen, const unsigned char* offlen) 
    {
    unsigned* at = items;
    unsigned start = 0;
    
    unsigned total = 0;
    unsigned char lens[DEFL_SYM_MAX + DEFL_OFF_MAX];
    for (cnt->lit = DEFL_SYM_MAX; cnt->lit > 257; cnt->lit--)
        if (litlen[cnt->lit - 1]) break;
    for (cnt->off = DEFL_OFF_MAX; cnt->off > 1; cnt->off--)
        if (offlen[cnt->off - 1]) break;
    
    total = (unsigned)(cnt->lit + cnt->off);
    memcpy(lens, litlen, sizeof(unsigned char) * (size_t)cnt->lit);
    memcpy(lens + cnt->lit, offlen, sizeof(unsigned char) * (size_t)cnt->off);
    do {
        unsigned len = lens[start];
        unsigned end = start;
        do end++; while (end != total && len == lens[end]);
        if (!len) 
        {
            while ((end - start) >= 11) 
            {
                unsigned n = (end - start) - 11;
                unsigned xbits = n < 0x7f ? n : 0x7f;
                freqs[18]++;
                *at++ = 18u | (xbits << 5u);
                start += 11 + xbits;
            }
            if ((end - start) >= 3) 
            {
                unsigned n = (end - start) - 3;
                unsigned xbits = n < 0x7 ? n : 0x7;
                freqs[17]++;
                *at++ = 17u | (xbits << 5u);
                start += 3 + xbits;
            }
        }
        else if ((end - start) >= 4) 
        {
            freqs[len]++;
            *at++ = len;
            start++;
            do 
            {
                unsigned xbits = (end - start) - 3;
                xbits = xbits < 0x03 ? xbits : 0x03;
                *at++ = 16 | (xbits << 5);
                start += 3 + xbits;
                freqs[16]++;
            } while ((end - start) >= 3);
        }
        while (start != end) 
        {
            freqs[len]++;
            *at++ = len;
            start++;
        }
    } while (start != total);
    cnt->items = (int)(at - items);
    }
    struct defl_match_codest 
    {
    int ls, lc;
    int dc, dx;
    };
  2. defl_match_codes(struct defl_match_codest* cod, int dist, int len) - дешифровка (часть)

    {
    static const short dxmax[] = { 0,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576 };
    static const unsigned char lslot[258 + 1] = {
      0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12,
      12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16,
      16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18,
      18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20,
      20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
      21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
      22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
      23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25,
      25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
      25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26,
      26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
      26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
      27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
      27, 27, 28
    };
    assert(len <= 258);
    assert(dist <= 32768);
    cod->ls = lslot[len];
    cod->lc = 257 + cod->ls;
    assert(cod->lc <= 285);
    
    cod->dx = sdefl_ilog2(sdefl_npow2(dist) >> 2);
    cod->dc = cod->dx ? ((cod->dx + 1) << 1) + (dist > dxmax[cod->dx]) : dist - 1;
    }
  3. defl_put16(unsigned char** dst, unsigned short x) - переводит ячейки памяти в кодовый символ (-50% file size)
    {
    unsigned char* val = *dst;
    val[0] = (unsigned char)(x & 0xff);
    val[1] = (unsigned char)(x >> 8);
    *dst = val + 2;
    }
  4. defl_match(unsigned char* dst, struct defl s, int dist, int len) еще дешифровка

    {
    static const char lxn[] = { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0 };
    static const short lmin[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,
        51,59,67,83,99,115,131,163,195,227,258 };
    static const short dmin[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,
        385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577 };
    
    struct defl_match_codest cod;
    defl_match_codes(&cod, dist, len);
    defl_put(dst, s, (int)s->cod.word.lit[cod.lc], s->cod.len.lit[cod.lc]);
    defl_put(dst, s, len - lmin[cod.ls], lxn[cod.ls]);
    defl_put(dst, s, (int)s->cod.word.off[cod.dc], s->cod.len.off[cod.dc]);
    defl_put(dst, s, dist - dmin[cod.dc], cod.dx);
    }
  5. static void defl_flush(unsigned char* dst, struct sdefl s, int is_last, const unsigned char* in, int blk_begin, int blk_end) - VERY important magic(flush cleans buffer and writes data in file while previously encoding it (note huffman codes section))

    
    {
    int blk_len = blk_end - blk_begin;
    int j, i = 0, item_cnt = 0;
    struct defl_scnt scnt = { 0 };
    unsigned codes[DEFL_PRE_MAX];
    unsigned char lens[DEFL_PRE_MAX];
    unsigned freqs[DEFL_PRE_MAX] = { 0 };
    unsigned items[DEFL_SYM_MAX + DEFL_OFF_MAX];
    static const unsigned char perm[DEFL_PRE_MAX] = { 16,17,18,0,8,7,9,6,10,5,11,
        4,12,3,13,2,14,1,15 };
    
    // calculate huffman codes
    s->freq.lit[DEFL_EOB]++;
    defl_huff(s->cod.len.lit, s->cod.word.lit, s->freq.lit, DEFL_SYM_MAX, DEFL_LIT_LEN_CODES);
    defl_huff(s->cod.len.off, s->cod.word.off, s->freq.off, DEFL_OFF_MAX, DEFL_OFF_CODES);
    defl_precode(&symcnt, freqs, items, s->cod.len.lit, s->cod.len.off);
    defl_huff(lens, codes, freqs, DEFL_PRE_MAX, DEFL_PRE_CODES);
    for (item_cnt = DEFL_PRE_MAX; item_cnt > 4; item_cnt--) 
    {
        if (lens[perm[item_cnt - 1]]) 
        {
            break;
        }
    }
    // write block
    switch (defl_blk_type(s, blk_len, item_cnt, freqs, lens)) 
    {
    case DEFL_BLK_UCOMPR: 
    {
        // uncompressed blocks
        int n = defl_div_round_up(blk_len, DEFL_RAW_BLK_SIZE);
        for (i = 0; i < n; ++i) 
        {
            int fin = is_last && (i + 1 == n);
            int amount = blk_len < DEFL_RAW_BLK_SIZE ? blk_len : DEFL_RAW_BLK_SIZE;
            defl_put(dst, s, !!fin, 1); // block
            defl_put(dst, s, 0x00, 2); // stored block
            if (s->bitcnt) 
            {
                defl_put(dst, s, 0x00, 8 - s->bitcnt);
            }
            assert(s->bitcnt == 0);
            defl_put16(dst, (unsigned short)amount);
            defl_put16(dst, ~(unsigned short)amount);
            memcpy(*dst, in + blk_begin + i * SDEFL_RAW_BLK_SIZE, amount);
            *dst = *dst + amount;
            blk_len -= amount;
        }
    } break;
    case DEFL_BLK_DYN: 
    {
        // dynamic huffman block
        defl_put(dst, s, !!is_last, 1); // block
        defl_put(dst, s, 0x02, 2); // dynamic huffman
        defl_put(dst, s, scnt.lit - 257, 5);
        defl_put(dst, s, scnt.off - 1, 5);
        defl_put(dst, s, item_cnt - 4, 4);
        for (i = 0; i < item_cnt; ++i) 
        {
            defl_put(dst, s, lens[perm[i]], 3);
        }
        for (i = 0; i < scnt.items; ++i) 
        {
            unsigned sym = items[i] & 0x1F;
            defl_put(dst, s, (int)codes[sym], lens[sym]);
            if (sym < 16) continue;
            if (sym == 16) defl_put(dst, s, items[i] >> 5, 2);
            else if (sym == 17) defl_put(dst, s, items[i] >> 5, 3);
            else defl_put(dst, s, items[i] >> 5, 7);
        }
        // block sequences
        for (i = 0; i < s->seq_cnt; ++i) 
        {
            if (s->seq[i].off >= 0) {
                for (j = 0; j < s->seq[i].len; ++j) 
                {
                    int c = in[s->seq[i].off + j];
                    defl_put(dst, s, (int)s->cod.word.lit[c], s->cod.len.lit[c]);
                }
            }
            else 
            {
                defl_match(dst, s, -s->seq[i].off, s->seq[i].len);
            }
        }
        defl_put(dst, s, (int)(s)->cod.word.lit[DEFL_EOB], (s)->cod.len.lit[DEFL_EOB]);
    } break;
    }
    memset(&s->freq, 0, sizeof(s->freq));
    s->seq_cnt = 0;
    }
18. static void
defl_seq(struct defl* s, int off, int len) - длина последовательности + диагностика (assert делает это)
19. defl_reg_match(struct defl* s, int off, int len) - определяет частоту появления символа в последовательности
20. defl_fnd(struct defl_match* m, const struct defl* s, int chain_len,
    int max_match, const unsigned char* in, int p, int e) - что-то ищет (magic)
21. defl_compr(struct defl* s, unsigned char* out, const unsigned char* in,
    int in_len, int lvl) - служебная процедура
22. extern int
deflate(struct defl* s, void* out, const void* in, int n, int lvl) - проводит процедуры сжатия, возвращает код ошибки.
23. defl_adler32(unsigned adler32, const unsigned char* in, int in_len) - что-то, связанное с контрольными суммами.
24. extern int zsdeflate(struct defl* s, void* out, const void* in, int n, int lvl) - разновидность 23
25. extern int defl_bound(int len) - возвращает число обработанных символов

{ int max_blocks = 1 + defl_div_round_up(len, DEFL_RAW_BLK_SIZE); int bound = 5 * max_blocks + len + 1 + 4 + 8; return bound; }

APUkoloff commented 2 weeks ago

об adler32 - алгоритм хэширования общий принцип: 2 контрольные суммы (16 бит), первая - кодируемая информация(1+1-й байт +... + n-й байт)mod 65521 (подойдут и другие простые числа, если они больше 2^16). вторая контр сумма - (длина последовательности байт1 + (длина последовательности-1) байт2 + ... + длина последовательности)mod 65521 Итоговая контрольная сумма - это кс1+ кс2*65536