mhw0 / libethc

Open-source Ethereum C library
https://mhw0.github.io/libethc/
MIT License
48 stars 9 forks source link

When use eth_rlp_uint(), the result of rlp serialization is inconsistent with the result of rlp encode in the go-etherum library. #45

Open qingfengzxr opened 3 months ago

qingfengzxr commented 3 months ago

Hello, brothers. I meet some confusing things. To put it simply, when I use eth_rlp_hex() to serialize my transaction data, I can get the correct serialization result. But when I use methods such as eth_rlp_uint() or eth_rlp_uint64() to serialize the same data, the final result is different. This is the C code I used to test:

#include <stdio.h>
#include <assert.h>
#include <ethc/rlp.h>
#include <gmp.h>

#define ok(ethcop) assert(ethcop >= 0)

void rlp_encode_test_1() {
    uint64_t nonce = 1000;
    uint64_t gasprice_int = 20000000000;
    uint64_t gaslimit = 20996;

    // deal value element
    uint64_t value_uint64 = 1000000000000000000;
    /*
        mpz_t value_int;
        mpz_init_set_str(value_int, "1000000000000000000", 10); // Initialize value_int with the value 1000000000000000000

        size_t value_len = (mpz_sizeinbase(value_int, 2) + 7) / 8; // Calculate the number of bytes needed
        uint8_t* value_bytes = malloc(value_len);

        if (value_bytes == NULL) {
            printf("Memory allocation failed\n");
            return ;
        }

        mpz_export(value_bytes, NULL, 1, 1, 0, 0, value_int); // Export the value of value_int to value_bytes
    */

    // deal data element
    uint8_t data[] = {0x01};
    uint8_t *data_ptr = data;
    size_t data_len = sizeof(data);

    uint64_t chainid = 31337;
    uint64_t zero = 0;

    char *toaddr = "0x3535353535353535353535353535353535353535";
    struct eth_rlp rlp0;
    uint8_t *rlp0bytes;
    size_t rlp0len;

    ok(eth_rlp_init(&rlp0, ETH_RLP_ENCODE));

    ok(eth_rlp_array(&rlp0));                  // [
        ok(eth_rlp_uint(&rlp0, &nonce));        //   0x00,
        ok(eth_rlp_uint(&rlp0, &gasprice_int)); //   0x04a817c800,
        ok(eth_rlp_uint(&rlp0, &gaslimit)); //   0x5208,
        ok(eth_rlp_address(&rlp0, &toaddr));     //   0x3535353535353535353535353535353535353535,
        ok(eth_rlp_uint(&rlp0, &value_uint64));    //   0x0de0b6b3a7640000,
        ok(eth_rlp_bytes(&rlp0, &data_ptr, &data_len));         //   0x,
        ok(eth_rlp_uint(&rlp0, &chainid));     //   0x7a69,
        ok(eth_rlp_uint(&rlp0, &zero));         //   0x,
        ok(eth_rlp_uint(&rlp0, &zero));         //   0x,
    ok(eth_rlp_array_end(&rlp0));              // ]

    ok(eth_rlp_to_bytes(&rlp0bytes, &rlp0len, &rlp0));
    ok(eth_rlp_free(&rlp0));

    // print rlp bytes in hexadecimal
    for(int i = 0; i < rlp0len; i++) {
        printf("%02x", rlp0bytes[i]);
    }
    printf("\n");

    free(rlp0bytes);
}

void rlp_encode_test_2() {
    struct eth_rlp rlp0;
    uint8_t *rlp0bytes;
    size_t rlp0len;

    char *nonce = "0x3E8";
    char *gasprice_int = "0x4A817C800";
    char *gaslimit = "0x5208";
    // uint64_t gaslimit_int = strtol(gaslimit, NULL, 16);

    // deal value element
    char *value_uint64 = "0xDE0B6B3A7640000";

    char *chainid = "0x7A69";
    char *zero = "0x0";
    char *toaddr = "0x3535353535353535353535353535353535353535";

    // deal data element
    uint8_t data[] = {0x01};
    uint8_t *data_ptr = data;
    size_t data_len = sizeof(data);

    ok(eth_rlp_init(&rlp0, ETH_RLP_ENCODE));

    ok(eth_rlp_array(&rlp0));                  // [
        ok(eth_rlp_hex(&rlp0, &nonce, NULL));        //   0x00,
        ok(eth_rlp_hex(&rlp0, &gasprice_int, NULL)); //   0x04a817c800,
        ok(eth_rlp_hex(&rlp0, &gaslimit, NULL)); //   0x5208,
        ok(eth_rlp_address(&rlp0, &toaddr));     //   0x3535353535353535353535353535353535353535,
        ok(eth_rlp_hex(&rlp0, &value_uint64, NULL));    //   0x0de0b6b3a7640000,
        ok(eth_rlp_bytes(&rlp0, &data_ptr, &data_len));         //   0x,
        // ok(eth_rlp_hex(&rlp0, &data, NULL));     //   0x7a69,
        ok(eth_rlp_hex(&rlp0, &chainid, NULL));     //   0x7a69,
        ok(eth_rlp_hex(&rlp0, &zero, NULL));         //   0x,
        ok(eth_rlp_hex(&rlp0, &zero, NULL));         //   0x,
    ok(eth_rlp_array_end(&rlp0));              // ]

    ok(eth_rlp_to_bytes(&rlp0bytes, &rlp0len, &rlp0));
    ok(eth_rlp_free(&rlp0));

    // print rlp bytes in hexadecimal
    for(int i = 0; i < rlp0len; i++) {
        printf("%02x", rlp0bytes[i]);
    }
    printf("\n");

    free(rlp0bytes);
}

int main() {
    rlp_encode_test_1();
    return 0;
}

when i call rlp_encode_test_1(), i got the rlp encode result is: f38203e88800000004a817c800825204943535353535353535353535353535353535353535880de0b6b3a764000001827a698080

and call rlp_encode_test_2() will get: f08203e88504a817c800825208943535353535353535353535353535353535353535880de0b6b3a764000001827a698080

I can confirm that the result obtained by rlp_encode_test_2() is correct. Because I serialized the same transaction using both go code and python code, I got the same result as rlp_encode_test_2().

Go test code:

package main

import (
    "encoding/hex"
    "encoding/json"
    "fmt"
    "log"
    "math/big"

    "github.com/ethereum/go-ethereum/common"
    "github.com/ethereum/go-ethereum/core/types"
    "github.com/ethereum/go-ethereum/rlp"
)

func rlp_encode_test() ([]byte, common.Hash) {
    to := common.HexToAddress("0x3535353535353535353535353535353535353535")
    // to := common.HexToAddress("0x8eee12Bd33Ec72a277ffA9ddF246759878589D3b")

    gasPrice := big.NewInt(20000000000)
    gasLimit := uint64(20996)
    value := big.NewInt(1000000000000000000)

    data, err := hex.DecodeString("01")
    if err != nil {
        log.Fatal(err)
    }
    fmt.Println("[rlp_encode_test] data:", data)

    legacyTx := types.NewTransaction(1000, to, value, gasLimit, gasPrice, []byte(data))
    signer := types.NewEIP155Signer(big.NewInt(31337))

    encodedBytes, err := rlp.EncodeToBytes([]interface{}{
        legacyTx.Nonce(),
        legacyTx.GasPrice(),
        legacyTx.Gas(),
        legacyTx.To(),
        legacyTx.Value(),
        legacyTx.Data(),
        big.NewInt(31337), uint(0), uint(0),
    })
    if err != nil {
        // handle error
        fmt.Println("rlp.EncodeToBytes error:", err)
        return nil, common.Hash{}
    }
    fmt.Printf("legacyTx rlp encodeBytes hex: %x\n", encodedBytes)

    rlphash := signer.Hash(legacyTx)
    fmt.Println("=================================")
    fmt.Println("legacyTx rlp sign hash:", rlphash)

    return encodedBytes, rlphash
}

Python test code:

import rlp
from ethereum.transactions import Transaction
from web3 import Web3
import binascii

def rlp_encode_test():
    to = "0x3535353535353535353535353535353535353535"
    gasPrice = 20000000000
    gasLimit = 20996
    value = 1000000000000000000
    # data = "0x01".encode()
    data = bytes.fromhex("01")
    # data = bytes([1])
    chainId = 31337

    transaction = Transaction(nonce=1000, gasprice=gasPrice, startgas=gasLimit, to=to, value=value, data=data, v=chainId, r=0, s=0)

    rlp_encoded = rlp.encode(transaction)
    print("RLP Encoded legacyTx:", rlp_encoded.hex())

    keccak_hash = Web3.keccak(rlp_encoded)
    print("Keccak256 Hash of RLP Encoded legacyTx:", keccak_hash.hex())
    return rlp_encoded.hex(), keccak_hash.hex()

rlp_encoded, keccak_hash = rlp_encode_test()

will both get the rlp encode result is: f08203e88504a817c800825204943535353535353535353535353535353535353535880de0b6b3a764000001827a698080

So I am confused about this phenomenon. Why do I get different results when serializing using the eth_rlp_uint() function? Do you understand the reason for this problem? @zzzzzzch @mhw0

qingfengzxr commented 3 months ago

I did some incremental replacement testing. It was found that the problem lies in the gasprice_uint field.

    char *gasprice = "0x4A817C800";
    uint64_t gasprice_uint = 20000000000;

When i use gasprice variable with eth_rlp_hex(&rlp0, &gasprice, NULL), i still got the correct result: f08203e88504a817c800825204943535353535353535353535353535353535353535880de0b6b3a764000001827a698080

If i use gasprice_uint variable with eth_rlp_uint(&rlp0, &gasprice_uint), will get: f38203e88800000004a817c800825204943535353535353535353535353535353535353535880de0b6b3a764000001827a698080

Other fields have no effect. The code example is as follows:

    ok(eth_rlp_array(&rlp0));                  // [
        ok(eth_rlp_hex(&rlp0, &nonce, NULL));        //   0x00,
        // ok(eth_rlp_hex(&rlp0, &gasprice, NULL)); //   0x04a817c800,
        ok(eth_rlp_uint(&rlp0, &gasprice_uint));
        // ok(eth_rlp_hex(&rlp0, &gaslimit, NULL)); //   0x5208,
        ok(eth_rlp_uint(&rlp0, &gaslimit_uint));
        ok(eth_rlp_address(&rlp0, &toaddr));     //   0x3535353535353535353535353535353535353535,
        // ok(eth_rlp_hex(&rlp0, &value, NULL));    //   0x0de0b6b3a7640000,
        ok(eth_rlp_uint(&rlp0, &value_uint64));         //   0x,
        ok(eth_rlp_bytes(&rlp0, &data_ptr, &data_len));         //   0x,
        // ok(eth_rlp_hex(&rlp0, &data, NULL));     //   0x7a69,
        // ok(eth_rlp_hex(&rlp0, &chainid, NULL));     //   0x7a69,
        ok(eth_rlp_uint(&rlp0, &chainid_uint));         //   0x,
        ok(eth_rlp_uint8(&rlp0, &zero_uint));         //   0x,
        ok(eth_rlp_uint8(&rlp0, &zero_uint));         //   0x,
    ok(eth_rlp_array_end(&rlp0));              // ]
mhw0 commented 3 months ago

Hi @qingfengzxr eth_rlp_uint works differently. You're passing gasprice_int which is 20000000000 and occupies 5 bytes and fits in uint64. So this check is true in eth_rlp_uint source code:

if (*d <= 0xff) {
  return eth_rlp_uint8(rlp, (uint8_t*)d);
} else if (*d <= 0xffff) {
  return eth_rlp_uint16(rlp, (uint16_t*)d);
} else if (*d <= 0xffffffff) {
  return eth_rlp_uint32(rlp, (uint32_t*)d);
} else if (*d <= 0xffffffffffffffff) { // satisfies this if check
  return eth_rlp_uint64(rlp, d); // and this function is being called
}

If we go further and look at the code of eth_rlp_uint64, it's clear that it expects and encodes exactly 8 bytes, even if you pass one-byte integer into it. Let's say we're encoding: 0xff and passing it into eth_rlp_uint64. The result would be: 0x00000000000000ff. As you can see there are leading zero bytes. That's why you're getting different results.

mhw0 commented 3 months ago

I think the way the function works right now is not correct. If we pass 5 byte integer into eth_rlp_uint it should produce 5 byte result, not 8.

Not closing the issue, and marking it as bug. Thank you so much!

qingfengzxr commented 3 months ago

Hi @qingfengzxr eth_rlp_uint works differently. You're passing gasprice_int which is 20000000000 and occupies 5 bytes and fits in uint64. So this check is true in eth_rlp_uint source code:

if (*d <= 0xff) {
  return eth_rlp_uint8(rlp, (uint8_t*)d);
} else if (*d <= 0xffff) {
  return eth_rlp_uint16(rlp, (uint16_t*)d);
} else if (*d <= 0xffffffff) {
  return eth_rlp_uint32(rlp, (uint32_t*)d);
} else if (*d <= 0xffffffffffffffff) { // satisfies this if check
  return eth_rlp_uint64(rlp, d); // and this function is being called
}

If we go further and look at the code of eth_rlp_uint64, it's clear that it expects and encodes exactly 8 bytes, even if you pass one-byte integer into it. Let's say we're encoding: 0xff and passing it into eth_rlp_uint64. The result would be: 0x00000000000000ff. As you can see there are leading zero bytes. That's why you're getting different results.

I understand, thank you for your answer, I will keep the issue open. Thank you very much! @mhw0

zzzzzzch commented 3 months ago

yeah I also encountered this problem before when I tried to encode ETH Sepolia ChainID 11155111 into RLP. After converting the uint to char*, the serialization problem was fixed.