ethereumjs / ethereumjs-monorepo

Monorepo for the Ethereum VM TypeScript Implementation
2.57k stars 746 forks source link

Inconsistent Memory Expansion Behavior while logging `ethereumjs/evm` #3561

Open Alleysira opened 1 month ago

Alleysira commented 1 month ago

What happened

Hello developers, I'm testing ethereumjs/evm's with other EVM implementations. I found an inconsistency that ethereumjs/evm will expand the memory before the expand memory related opcodes are executed, while Geth, Besu and Py-EVM don't.

Just as in this example, we can observe that the memory length is already determined before the MSTORE operation is executed. Similarly, before the CREATE instruction is invoked, Geth, Besu, and Py-EVM all produce the same output.

Ethereumjs/evm

{"pc":35,"op":82,"gas":"0xfffff9","gasCost":"0x3","memory":"0x0000000000000000000000000000000000000000000000000000000000000000","memSize":32,"stack":["0x60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff","0x0"],"depth":0,"opName":"MSTORE"}

{"pc":42,"op":240,"gas":"0xffffea","gasCost":"0x7d00","memory":"0x60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff0000000000000000000000000000000000000000000000000000000000000000","memSize":64,"stack":["0x32","0x0","0x16"],"depth":0,"opName":"CREATE"}

Geth

{"pc":35,"op":82,"gas":"0xfffff9","gasCost":"0x6","memSize":0,"stack":["0x60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff","0x0"],"depth":1,"refund":0,"opName":"MSTORE"}

{"pc":42,"op":240,"gas":"0xffffea","gasCost":"0x7d07","memory":"0x60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff","memSize":32,"stack":["0x32","0x0","0x16"],"depth":1,"refund":0,"opName":"CREATE"}

As I am conducting differential testing, maintaining consistent output is very important to me. I'm curious to know if there is an issue with my test script (used step to log run time information) or with the implementation of the ethereumjs/evm. Details will be given later.

Thinks for your time!

Environment

Script

This is the script I used to run ethereumjs/evm:

const { Account,Address,hexToBytes,bytesToHex } =require('@ethereumjs/util')
const { EVM } =require('@ethereumjs/evm')
const { Chain, Common, Hardfork,ConsensusType,ConsensusAlgorithm, AccountFields  } =require('@ethereumjs/common')
const {Blockchain} = require('@ethereumjs/blockchain')
const { DefaultStateManager } = require('@ethereumjs/statemanager')
const { MemoryLevel } = require( 'memory-level')
const opcodes =require("./opcodes.js")
const yargs = require('yargs')

yargs.option('code',{type:"string",demandOption:true})
     .option('sig',{type:"string",demandOption:false})

var argv = yargs.argv

var code = '0x00'
var sig = argv.sig
// note that code has 0x prefix but sig doesn't
if (argv.code!=true){
    code='0x'+argv.code
}

function uint8ArrayToHexString(uint8Array) {
    let hexString = Array.from(uint8Array)
      .map(byte => byte.toString(16).padStart(2, '0'))
      .join('');

    return hexString;
}
function getValueByOpname(opname) {
    for (const key in opcodes) {
        if (opcodes.hasOwnProperty(key)) {
            if (key === opname) {
                return opcodes[key];
            }
        }
    }
    return undefined; // means undefined opcode 
}

//using runCall instead of runCode, can deal with SLOAD
async function runEvmRunCall (){
    const common = Common.custom({
        chainId: 9599,
        networkId: 9599,
        defaultHardfork: Hardfork.Shanghai,
        consensus: {
          type: ConsensusType.ProofOfStake,
          algorithm: ConsensusAlgorithm.Casper,
        },
        genesis: {
          gasLimit: 0xffffff,
          difficulty: 0,
          nonce: '0x0000000000000000',
          extraData: '0x0',
        },
        comment: 'Custom empty chain for benchmarks',
        bootstrapNodes: [],
        dnsNetworks: [],
      })
    const db = new MemoryLevel() 

    const blockchain = await Blockchain.create(common,db)

    const stateManager = new DefaultStateManager()
    stateManager.DEBUG = true

    const evm = new EVM({ common,stateManager,blockchain })
    evm.DEBUG=true

    const contractCode = hexToBytes(code) 
    const contractAddress = Address.fromString('0x0000000000000000000000007265636569766572')
    const AccountBalance = BigInt('0xffffffffffffffffffffffffffffffffffffffff')
    await evm.stateManager.putContractCode(contractAddress, contractCode)
    const accountFields = { balance: AccountBalance}

    await evm.stateManager.modifyAccountFields(contractAddress,accountFields)
    const anotherContractAddress = Address.fromString("0x1c7cd2d37ffd63856a5bd56a9af1643f2bcf545f")
    await evm.stateManager.modifyAccountFields(anotherContractAddress,accountFields) 
    evm.events.on('step', function (data) {
        // data.stateManager.DEBUG=true
        let hexStack = []
        hexStack = data.stack.map(item => {
            return '0x' + item.toString(16)
        })
        const mem_len = data.memory.length
        const mem_size = Number(data.memoryWordCount)*32
        if (mem_len!=0){
            let hexMemory = '0x'
            for (let i=0;i<mem_len;i++){
                hexMemory += data.memory[i].toString(16).padStart(2,'0')
            }
            var opTrace = {
                'pc': data.pc,
                'op': getValueByOpname(data.opcode.name),
                'gas': '0x' + data.gasLeft.toString(16),
                'gasCost': '0x' + data.opcode.fee.toString(16),
                'memory': hexMemory,
                'memSize': mem_size,
                'stack': hexStack,
                'depth': data.depth,
                'opName': data.opcode.name
            }
        }
        else {
            var opTrace = {
                'pc': data.pc,
                'op': getValueByOpname(data.opcode.name),
                'gas': '0x' + data.gasLeft.toString(16),
                'gasCost': '0x' + data.opcode.fee.toString(16),
                'memSize': mem_size,
                'stack': hexStack,
                'depth': data.depth,
                'opName': data.opcode.name
            }
        }
        opTrace_json = JSON.stringify(opTrace)
        console.log(opTrace_json)
      })

      if(sig==undefined){
        try{
        const results = await evm.runCall({
            code: hexToBytes(code),
            gasLimit: BigInt('0x'+'ffffff'),
            to: contractAddress,
            caller: new Address(hexToBytes("0x1c7cd2d37ffd63856a5bd56a9af1643f2bcf545f"))
        }).catch(console.error);
        var ret = {
            'output':uint8ArrayToHexString(results.execResult.returnValue),
            'gasUsed':'0x'+results.execResult.executionGasUsed.toString(16),
            'error':results.execResult.exceptionError
        }
        ret_json = JSON.stringify(ret)
        console.log(ret_json)
        }
        catch(err){
            console.log(err)
        }
    }
    // sig in defined
        else {
            sig=sig.toString(16)
            if(sig.charAt(0)!= "0" && sig.charAt(1)!= "x"){
                sig='0x'+sig
            }

            const results = await evm.runCall({
                gasLimit: BigInt('0x'+'ffffff'),
                data: hexToBytes(sig),
                to: contractAddress,
                caller: new Address(hexToBytes("0x1c7cd2d37ffd63856a5bd56a9af1643f2bcf545f"))
            });
            var ret = {
                'output':uint8ArrayToHexString(results.execResult.returnValue),
                'gasUsed':'0x'+results.execResult.executionGasUsed.toString(16),
                'error':results.execResult.exceptionError
            }
            ret_json = JSON.stringify(ret)
            console.log(ret_json)
        }
}

runEvmRunCall()

The command line to run the script:

node test.js --code 7f60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff600052603260006016f060005260406000f3 > jsevm.json

The bytecodes in the command line consist of:

PUSH32 60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff //As initcode
PUSH1 00 // OFFSET
MSTORE
PUSH1 32 // SIZE
PUSH1 00 // OFFSET
PUSH1 16 // VALUE
CREATE 
PUSH1 00 // OFFSET
MSTORE 
PUSH1 40 // SIZE
PUSH1 00 // OFFSET
RETURN 

Result

As the jsons have too many rows, I have to upload json files, sorry for the trouble. gethout.json, besuout.json, pyout.json, jsout.json

Thanks for your patience!

jochem-brouwer commented 1 month ago

Hi there, thanks a lot for this report, you are right that the logging behavior of @ethereumjs/evm is not consistent with the other tested clients. The reason is because of how EthJS works internally, and also (partially) the way you log the output.

In EthJS, to run a single step in the EVM, there are some steps, which I will summarize with the relevant ones:

  1. Perform gas cost calculation
  2. Log the step event (this has to be done after gas calculation to correctly report the gas cost)
  3. Actually perform the operation

Due to an optimization / the internal layout of the EVM, for memory-related gas opcodes we perform the "memory expansion cost" gas calculation (which is done once the memory has to be expanded beyond the current size of memory). For instance, (in your setup), if you MSTORE to a region which the memory has not yet expanded to, we thus have to pay expansion gas. To do so, internally (so in step 1, the "gas cost calculation") we check if this expansion has to be done, and if this is the case, we also set memoryWordCount to the value (which we expand to after running the opcode).

In your logging, you read memoryWordCount which is thus updated before step 3 (performing the operation). However, if you would read the memory, you will see that it is consistent.

Nevertheless, I think we should fix this, it is indeed inconsistent that in general we report the state before running the opcode, and in this case we update a variable which thus shows the state after running the opcode.

jochem-brouwer commented 1 month ago

Hmm, I just realized that the memory field can also be wrong :sweat_smile: Have to think if there is an easy way to fix this or if we need a specific flag just for this logging (which will impact performance, even if there is no logger attached :sweat_smile: )

Alleysira commented 1 month ago

Thanks for your detailed explanation! @jochem-brouwer.

I'm afraid you are right. I added a new field data.memory in the end of json.

            var opTrace = {
                'pc': data.pc,
                'op': getValueByOpname(data.opcode.name),
                'gas': '0x' + data.gasLeft.toString(16),
                'gasCost': '0x' + data.opcode.fee.toString(16),
                'memory': hexMemory,
                'memSize': mem_size,
                'stack': hexStack,
                'depth': data.depth,
                'opName': data.opcode.name,
                'data.memory': data.memory  // here
            }

The output shows the data.memory is also updated before the execution.

{"pc":35,"op":82,"gas":"0xfffff9","gasCost":"0x3","memory":"0x0000000000000000000000000000000000000000000000000000000000000000","memSize":32,"stack":["0x60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff","0x0"],"depth":0,"opName":"MSTORE","data.memory":{"0":0,"1":0,"2":0,"3":0,"4":0,"5":0,"6":0,"7":0,"8":0,"9":0,"10":0,"11":0,"12":0,"13":0,"14":0,"15":0,"16":0,"17":0,"18":0,"19":0,"20":0,"21":0,"22":0,"23":0,"24":0,"25":0,"26":0,"27":0,"28":0,"29":0,"30":0,"31":0}}

{"pc":42,"op":240,"gas":"0xffffea","gasCost":"0x7d00","memory":"0x60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff60ff0000000000000000000000000000000000000000000000000000000000000000","memSize":64,"stack":["0x32","0x0","0x16"],"depth":0,"opName":"CREATE","data.memory":{"0":96,"1":255,"2":96,"3":255,"4":96,"5":255,"6":96,"7":255,"8":96,"9":255,"10":96,"11":255,"12":96,"13":255,"14":96,"15":255,"16":96,"17":255,"18":96,"19":255,"20":96,"21":255,"22":96,"23":255,"24":96,"25":255,"26":96,"27":255,"28":96,"29":255,"30":96,"31":255,"32":0,"33":0,"34":0,"35":0,"36":0,"37":0,"38":0,"39":0,"40":0,"41":0,"42":0,"43":0,"44":0,"45":0,"46":0,"47":0,"48":0,"49":0,"50":0,"51":0,"52":0,"53":0,"54":0,"55":0,"56":0,"57":0,"58":0,"59":0,"60":0,"61":0,"62":0,"63":0}}

I also checked the spec. All opcodes seem to follow the steps:

  1. stack check and stack operations
  2. gas check and gas charge(where memory expansion gas will be charged)
  3. actual execution
  4. pc increase

Since the specification does not define when logging should begin (as it is not part of the execution), I think logging should start even before the stack check(as stack operations will alter the elements in the stack).

Anyway, thanks again!

Alleysira commented 3 weeks ago

Just checked eip-3155 from geth (even though this eip is still a draft since 2020), it says that:

The stack memory and memSize are the values before execution of the op.

In geth implementation, the trace for memory is indeed before the expansion.