rizinorg / rizin

UNIX-like reverse engineering framework and command-line toolset.
https://rizin.re
GNU Lesser General Public License v3.0
2.66k stars 357 forks source link

Refactor use of `core->block` to the transparent IO access #2695

Open XVilka opened 2 years ago

XVilka commented 2 years ago

To avoid the need of manually keeping it in mind when using the API.

Should use the transparent RzIO and RzCore API without the need to work with block or blocksize

I propose the new API that will handle all reads and writes itself:

See https://github.com/rizinorg/rizin/pull/2694

$ rg "core->block[,\s\)]" | wc -l
     121

$ rg "core->block[,\s\)]"
test/unit/test_yank.c
20: mu_assert_memeq(core->block, (const ut8 *)"\x44\x33\x22\x11", 4, "original bytes should be right at address 0");
29: mu_assert_memeq(core->block, (const ut8 *)"\x44\x33\x22\x11", 4, "yanked bytes should be pasted at address 4");
46: mu_assert_streq((const char *)core->block, "Hello World", "yanked bytes should be pasted at address 4");
48: mu_assert_streq((const char *)core->block, "HellHello World", "yanked bytes should be pasted at address 4, original content there");

test/db/archos/linux-x64/dbg_bps
263:NAME=read core->block on short move

librz/core/rtr_http.c
438:    memcpy(newblk, core->block, core->blocksize);
440:    core->block = newblk;
452:        core->block = origblk;
468:        core->block = newblk;

librz/core/cprint.c
28:     value = rz_read_ble(core->block + pos, false, 8);
1093:   memcpy(buf, core->block, core->blocksize);

librz/core/cmp.c
187:            core->block + i, len - i);

librz/core/tui/visual.c
829:                q = core->block + i;
834:        q = rz_mem_mem(core->block + d, core->blocksize - d,
837:            q = rz_mem_mem(core->block, RZ_MIN(core->blocksize, d),
842:        core->print->cur = (int)(size_t)(q - core->block);
924:    p = rz_mem_mem(core->block + d, core->blocksize - d,
927:        core->print->cur = (int)(size_t)(p - core->block);
1005:       rz_asm_disassemble(core->rasm, &op, core->block, RZ_MIN(32, core->blocksize));
1603:               core->block + next_roff, 32);
1690:                   core->block, 32);
1753:               &op, core->block, 32);
2675:                           rz_asm_disassemble(core->rasm, &op, core->block, 32);
3642:       op, core->block, 32);

librz/core/cio.c
246:    if (core && core->block) {
247:        return rz_io_read_at(core->io, core->offset, core->block, core->blocksize);
379:    if (rz_analysis_op(core->analysis, &op, core->offset, core->block, core->blocksize, RZ_ANALYSIS_OP_MASK_BASIC) < 1) {

librz/core/hack.c
293:        if (rz_analysis_op(core->analysis, &aop, core->offset, core->block, core->blocksize, RZ_ANALYSIS_OP_MASK_BASIC) < 1) {

librz/core/seek.c
208:        if (rz_analysis_op(core->analysis, &aop, core->offset, core->block, core->blocksize, RZ_ANALYSIS_OP_MASK_BASIC) > 0) {

librz/core/core.c
591:        rz_analysis_op(core->analysis, &op, core->offset, core->block, core->blocksize, RZ_ANALYSIS_OP_MASK_BASIC);
2326:   core->block = (ut8 *)calloc(RZ_CORE_BLOCKSIZE + 1, 1);
2327:   if (!core->block) {
2796:   bump = realloc(core->block, bsize + 1);
2801:   core->block = bump;
2803:   memset(core->block, 0xff, core->blocksize);
2948:                   memcpy(ptr + 5, core->block, i); // core->blocksize);

librz/core/cgraph.c
784:    if (rz_analysis_op(core->analysis, &op, core->offset, core->block, core->blocksize, flags) > 0) {

librz/core/canalysis.c
1048:       ptr = core->block + delta;

librz/core/tui/esil.c
51: memcpy(buf, core->block, sizeof(ut64));

librz/core/tui/define.c
228:            core->block + off - core->offset, 32, RZ_ANALYSIS_OP_MASK_BASIC);
312:                if (rz_analysis_op(core->analysis, &op, off, core->block + delta,

librz/core/tui/panels.c
1544:               &op, core->block, 32);

librz/core/tui/biteditor.c
40: memcpy(buf, core->block + cur, sizeof(ut64));

librz/core/disasm.c
6109:   if (buf != core->block) {

librz/core/cmd/cmd_magic.c
96: str = rz_magic_buffer(ck, core->block + delta, core->blocksize - delta);

librz/core/cmd/cmd_search.c
2559:       int diff = memcmpdiff(core->block, block, core->blocksize);
2786:   memcpy(buf, core->block, bufsz);

librz/core/cmd/cmd_write.c
57:     rz_crypto_update(core->crypto, (const ut8 *)core->block, core->blocksize);

librz/core/cmd/cmd_print.c
1598:   if (data != core->block) {
1670:       handle_entropy(core, plugin->name, core->block, core->blocksize);
1672:       handle_ssdeep(core, plugin->name, core->block, core->blocksize);
1674:       handle_hash_cfg(core, plugin->name, core->block, core->blocksize);
2134:   const ut8 *buffer = core->block + offset;
2188:   if (*core->block & 0x1) { // "long" string
2189:       const ut8 *ptr = core->block + (bitness / 8) * 2;
2223:   if (rz_scan_strings_raw(core->block, found, &scan_opt, 0, core->blocksize, RZ_STRING_ENC_GUESS) < 0) {
2256:   if (rz_scan_strings_raw(core->block, found, &scan_opt, 0, core->blocksize, RZ_STRING_ENC_GUESS) < 0) {
2281:       string_len = rz_read_ble16(core->block, big_endian);
2284:       string_len = rz_read_ble32(core->block, big_endian);
2287:       string_len = rz_read_ble64(core->block, big_endian);
2303:       opt.buffer = core->block + offset;
2310:       print_json_string(core, core->block + offset, string_len, RZ_STRING_ENC_8BIT, true);
2545:   // TODO After core->block is removed, this should be changed to a block read.
2573:           if (core->blocksize < 4 || !memcmp(core->block, "\xff\xff\xff\xff", 4)) {
2576:               char *res = rz_print_json_indent((const char *)core->block, true, "  ", NULL);
2801:       print_json_string(core, core->block, core->blocksize, RZ_STRING_ENC_UTF16LE, true);
2815:       print_json_string(core, core->block, core->blocksize, RZ_STRING_ENC_UTF32LE, true);
2829:       print_json_string(core, core->block, core->blocksize, RZ_STRING_ENC_UTF16BE, true);
2843:       print_json_string(core, core->block, core->blocksize, RZ_STRING_ENC_UTF32BE, true);
2890:       rz_str_bits(buf, core->block + i, 8, NULL);
2900:           const ut8 *b = core->block + i - 3;
3015:           ut8 *p = (ut8 *)core->block + j;
3024:           ut8 *p = (ut8 *)core->block + j;
3038:   rz_print_hexii(core->print, core->offset, core->block,
3087:   rz_core_print_hexdump(core, core->offset, core->block, len, 16, 1, 1);
3105:   memcpy(block, core->block, len);
3115:       core->block, len, 8, 1, 1);
3138:   int len = (int)rz_str_nlen((const char *)core->block, core->blocksize);
3142:   rz_print_bytes(core->print, core->block, len, "%02x");
3204:       char *code = rz_lang_byte_array(core->block, size, type); \
3216:       char *code = rz_lang_byte_array(core->block, core->blocksize, big_endian ? type##_BE : type##_LE); \
3256:   char *code = rz_core_print_bytes_with_inst(core, core->block, core->offset, size);
3629:   rz_core_analysis_bytes_il(core, core->block, size, 0, false);
3694:   core->num->value = rz_core_print_disasm(core, core->offset, core->block, core->blocksize, RZ_ABS(n_instrs), state, &disasm_options);
3754:       ret = rz_asm_disassemble(core->rasm, &asm_op, core->block + i, core->blocksize - i);
3987:       int ret = rz_analysis_op(core->analysis, &aop, offset, core->block, core->blocksize, RZ_ANALYSIS_OP_MASK_BASIC);
4066:   char *buf = rz_base64_encode_dyn((const unsigned char *)core->block, core->blocksize);
4077:   ut8 *buf = rz_base64_decode_dyn((const char *)core->block, core->blocksize);
4100:   rz_str_bits(buf, core->block, len + skip, NULL);
4130:   RzASN1Object *asn1 = rz_asn1_object_parse(core->block, core->blocksize);
4146:   char *s = rz_protobuf_decode(core->block, core->blocksize, false);
4157:   char *s = rz_protobuf_decode(core->block, core->blocksize, true);
4169:   RzCMS *cms = rz_pkcs7_cms_parse(core->block, core->blocksize);
4193:   RzX509Certificate *x509 = rz_x509_certificate_parse2(core->block, core->blocksize);
4223:   char *s = rz_axml_decode(core->block, core->blocksize);
4306:   ut8 *block_end = core->block + blocksize;
4635:   char *s = rz_hash_cfg_randomart(core->block, len, core->offset);
4663:           rz_io_read_at(core->io, core->offset, core->block, len);
4664:           s = rz_hash_cfg_randomart(core->block, len, core->offset);
4671:   rz_io_read_at(core->io, offset0, core->block, len);
6609:   colordump(core, core->block, len);
6617:   out = rz_inflate(core->block, core->blocksize, &inConsumed, &outlen);

librz/core/cmd/cmd_cmp.c
190:        rz_core_print_hexdiff(core, core->offset, core->block, addr, b, core->blocksize, col);
206:        rz_core_print_hexdiff(core, core->offset, core->block, addr, b, core->blocksize, col);
326:    ret = rz_hex_bin2str(core->block, strlen(input) / 2, (char *)buf);

librz/core/cmd/cmd_seek.c
52:         rz_asm_disassemble(core->rasm, &op, core->block, 32);
73:     ret = rz_analysis_op(core->analysis, &op, core->offset, core->block,

librz/core/cmd/cmd_analysis.c
375:        ut32 n = rz_read_ble32(core->block + i, big_endian);
5535:       core_analysis_bytes_json(core, core->block, core->blocksize, 0, state->d.pj);
5538:       core_analysis_bytes_standard(core, core->block, core->blocksize, 0);
5549:   core_analysis_bytes_esil(core, core->block, core->blocksize, 0);
5554:   core_analysis_bytes_desc(core, core->block, core->blocksize, 0);
5559:   core_analysis_bytes_size(core, core->block, core->blocksize, 0);
5583:       core_analysis_bytes_json(core, core->block, core->blocksize, count, state->d.pj);
5586:       core_analysis_bytes_standard(core, core->block, core->blocksize, count);
5616:   core_analysis_bytes_size(core, core->block, core->blocksize, count);
5641:   core_analysis_bytes_esil(core, core->block, core->blocksize, count);
5758:   rz_core_analysis_bytes_il(core, core->block, core->blocksize, count, false);
5782:   rz_core_analysis_bytes_il(core, core->block, core->blocksize, count, true);
5796:       core_analysis_bytes_desc(core, core->block + cur, core->blocksize, 1);
6602:       core->offset, core->block, core->blocksize);
$ rg rz_core_block_read -l
librz/main/rizin.c
librz/include/rz_core.h
librz/core/cio.c
librz/core/seek.c
librz/core/core.c
librz/core/yank.c
librz/core/cfile.c
librz/core/cbin.c
librz/core/disasm.c
librz/core/tui/visual.c
librz/core/tui/panels.c
librz/core/cconfig.c
librz/core/cmd/cmd_write.c
librz/core/cmd/cmd.c
librz/core/cmd/cmd_print.c
librz/core/cmd/cmd_open.c
test/unit/test_yank.c
wargio commented 1 year ago

i agree, but do we also need api for switching bins etc.. ?

PeiweiHu commented 10 months ago

I am a little bit confused before getting to it and I would really appreciate the further explaination. We are not going to remove the concept of core->block, right? Just wanna make the access of core->block more easily. For example, for the proposed API RZ_API bool rz_core_io_read_at(RzCore *core, ut64 offset, ut64 *read), offset means the offset against the start of current core->block and read is a buf to store all content of core->block from the offset to the end of core->block?

Or RZ_API bool rz_core_io_read_at(RzCore *core, ut64 offset, ut64 *read) is used to directly read the binary under test? But how to decide the length of reading? @XVilka

XVilka commented 10 months ago

@PeiweiHu idea is to not expose core->block or similar APIs to 99% of those API users - both external and internal. Splitting into blocks, caching reads, etc, should be done opaquely under the hood. We could only keep the way to change the block size and or/cache size via separate APIs, but that's it. So, for example, if we have a 64Gb file, the user should be able to read/write to any address via single API call without understanding the current block, the local cache content, etc.

But how to decide the length of reading?

Sorry, I forgot to add ut64 len parameter to both functions, my bad.