Open hunjixin opened 3 years ago
@hunjixin please add information about this error
@ZenGround0 no any more info, just root unmatch, and have a error message {"errorMessage": "batch verify seals syscall implemented incorrectly", "exitCode": "17", "sender": "f03", "receiver": "f04", "methodNum": "5", "Value": "0", "gasLimit": 100000000000000}
in the code. i have check BatchVerifySeals , this func argument and return has the same length. in the code if miner contain the element , and the verifies must have too.
func (a Actor) processBatchProofVerifies(rt Runtime) {
var st State
var miners []addr.Address
verifies := make(map[addr.Address][]proof.SealVerifyInfo)
rt.StateTransaction(&st, func() {
store := adt.AsStore(rt)
if st.ProofValidationBatch == nil {
return
}
mmap, err := adt.AsMultimap(store, *st.ProofValidationBatch, builtin.DefaultHamtBitwidth, ProofValidationBatchAmtBitwidth)
builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to load proofs validation batch")
claims, err := adt.AsMap(adt.AsStore(rt), st.Claims, builtin.DefaultHamtBitwidth)
builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to load claims")
err = mmap.ForAll(func(k string, arr *adt.Array) error {
a, err := addr.NewFromBytes([]byte(k))
builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to parse address key")
// refuse to process proofs for miner with no claim
found, err := claims.Has(abi.AddrKey(a))
builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to look up claim")
if !found {
rt.Log(rtt.WARN, "skipping batch verifies for unknown miner %s", a)
return nil
}
miners = append(miners, a)
var infos []proof.SealVerifyInfo
var svi proof.SealVerifyInfo
err = arr.ForEach(&svi, func(i int64) error {
infos = append(infos, svi)
return nil
})
builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to iterate over proof verify array for miner %s", a)
verifies[a] = infos
return nil
})
builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to iterate proof batch")
st.ProofValidationBatch = nil
})
res, err := rt.BatchVerifySeals(verifies)
builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to batch verify")
for _, m := range miners {
vres, ok := res[m]
if !ok {
rt.Abortf(exitcode.ErrNotFound, "batch verify seals syscall implemented incorrectly")
}
verifs := verifies[m]
seen := map[abi.SectorNumber]struct{}{}
var successful []abi.SectorNumber
for i, r := range vres {
if r {
snum := verifs[i].SectorID.Number
if _, exists := seen[snum]; exists {
// filter-out duplicates
continue
}
seen[snum] = struct{}{}
successful = append(successful, snum)
}
}
if len(successful) > 0 {
// The exit code is explicitly ignored
_ = rt.Send(
m,
builtin.MethodsMiner.ConfirmSectorProofsValid,
&builtin.ConfirmSectorProofsParams{Sectors: successful},
abi.NewTokenAmount(0),
&builtin.Discard{},
)
}
}
}
@ZenGround0 any idea?
@hunjixin it looks like your syscall implementation is not returning a map with all miners as keys. This is required even if the miner failed all sectors.
@ZenGround0 i have add log in in BatchVerifySeals, there but just compaire the length of arguments and returns. they have the same length. cold nearly the same as lotus. very little chance to meet this problems, there are a error in latest two month
func (sys syscalls) BatchVerifySeals(vis map[address.Address][]proof5.SealVerifyInfo) (map[address.Address][]bool, error) {
out := make(map[address.Address][]bool)
sema := make(chan struct{}, BatchSealVerifyParallelism)
vmlog.Info("BatchVerifySeals miners:", len(vis))
var wg sync.WaitGroup
for addr, seals := range vis {
results := make([]bool, len(seals))
out[addr] = results
for i, s := range seals {
wg.Add(1)
go func(ma address.Address, ix int, svi proof5.SealVerifyInfo, res []bool) {
defer wg.Done()
sema <- struct{}{}
if err := sys.VerifySeal(svi); err != nil {
vmlog.Warnw("seal verify in batch failed", "miner", ma, "index", ix, "err", err)
res[ix] = false
} else {
res[ix] = true
}
<-sema
}(addr, i, s, results)
}
}
wg.Wait()
vmlog.Info("BatchVerifySeals Result miners:", len(out))
return out, nil
}
@hunjixin to confirm it is the venus node that sees the exitcode 17 error which leads to the state root mismatch because it is not seen on mainnet?
If this is the case you should rerun this at the problem epochs and inspect the data making it to res
causing the error.
error
yes , root not match. but when set-head back to previous tipset and reprocess the same tipset. results always become ok. not restart process.
@ZenGround0
i add log like this
keyInVerifies := []addr.Address{}
for key, _ := range verifies {
keyInVerifies = append(keyInVerifies, key)
}
rt.Log(rtt.INFO, "ID: %s, verifies keys before BatchVerifySeals %v", id, keyInVerifies)
rt.Log(rtt.INFO, "ID: %s, miners keys before BatchVerifySeals %v", id, miners)
res, err := rt.BatchVerifySeals(verifies)
builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to batch verify")
keyInRes := []addr.Address{}
for key, _ := range res {
keyInRes = append(keyInRes, key)
}
rt.Log(rtt.INFO, "ID: %s, return before BatchVerifySeals %v", id, keyInRes)
and got log like this
2021-10-16T18:48:03.634+0800 INFO vm.actors vmcontext/runtime_adapter.go:197 ID: 21c48411-0860-4061-82ae-7358eff93ae7, verifies keys before BatchVerifySeals [f01170291 f01317157 f01181168 f01207023 f01169696 f0392813 f0411877 f0226418 f01149485 f01218989 f01227383 f01154295 f065877 f01236627 f01103850 f0156452 f01024569 f01138709 f0107999 f01116666 f0150748 f0160735 f083419 f01271225 f087888 f01288529 f01250983 f01138139 f01247078 f01189202 f0469055 f0127378 f01145144 f01101315 f0151498 f01277031 f054420 f01090983 f01177077 f01319368 f01270285 f0428177 f01312143 f01182223 f01250837 f01043193 f01348517 f01098119 f01125168 f01365744 f01251528 f0442377 f0454186 f0124554 f01123833 f0135066 f01261075 f01031867 f01071719 f0156417 f01353593 f062982 f01263957 f0881687 f01122841 f01272340 f01038625 f02419 f01191029 f01096056 f01193462]
2021-10-16T18:48:03.634+0800 INFO vm.actors vmcontext/runtime_adapter.go:197 ID: 21c48411-0860-4061-82ae-7358eff93ae7, miners keys before BatchVerifySeals [f054420 f01125168 f0442377 f01236627 f0454186 f01317157 f0881687 f01181168 f01365744 f0124554 f01090983 f01247078 f01191029 f087888 f01103850 f01071719 f01177077 f01154295 f01182223 f01288529 f0156452 f0411877 f0226418 f01170291 f01207023 f01122841 f01024569 f01250837 f0150748 f01169696 f01096056 f01043193 f01149485 f01218989 f01348517 f01319368 f01138709 f01123833 f01270285 f0135066 f01251528 f01272340 f01189202 f0160735 f0428177 f0156417 f01038625 f0469055 f01250983 f01353593 f01098119 f0107999 f083419 f062982 f01271225 f01193462 f02419 f0127378 f01145144 f01116666 f01312143 f01227383 f01101315 f0151498 f01277031 f01261075 f01263957 f0392813 f01031867 f065877 f01138139]
2021-10-16T18:48:03.634+0800 INFO vm.context vmcontext/syscalls.go:91 BatchVerifySeals miners:71
2021-10-16T18:48:04.029+0800 INFO vm.context vmcontext/syscalls.go:115 BatchVerifySeals Result miners:71
2021-10-16T18:48:04.029+0800 INFO vm.actors vmcontext/runtime_adapter.go:197 ID: 21c48411-0860-4061-82ae-7358eff93ae7, return before BatchVerifySeals [f0127378 f01031867 f01181168 f0392813 f0156452 f087888 f01247078 f01189202 f0124554 f01250983 f01090983 f01182223 f0881687 f0150748 f01122841 f01138709 f054420 f01193462 f01170291 f02419 f01024569 f01277031 f01125168 f01263957 f0428177 f01365744 f01123833 f01236627 f01101315 f01319368 f01270285 f01038625 f01096056 f01317157 f0411877 f0226418 f083419 f01145144 f01261075 f01169696 f01218989 f065877 f01103850 f01177077 f01098119 f0135066 f01312143 f01043193 f01272340 f01149485 f0107999 f01138139 f0469055 f0151498 f01116666 f01071719 f0156417 f01353593 f01154295 f01271225 f01348517 f01207023 f01250837 f01191029 f0160735 f01251528 f0454186 f01288529 f01227383 f0442377 f062982]
2021-10-16T18:48:04.029+0800 WARN vm.context vmcontext/invocation_context.go:197 Abort during actor execution. {"errorMessage": "batch verify seals syscall implemented incorrectly", "exitCode": "17", "sender": "f03", "receiver": "f04", "methodNum": "5", "Value": "0", "gasLimit": 100000000000000}
2021-10-16T18:48:04.092+0800 INFO vm.context vmcontext/vmcontext.go:333 process cron: 463
before loop this miner keys are the same as res keys.
@ZenGround0 in v6 version , got error like this
2021-11-04T19:29:35.417+0800 ERROR vm.actors vmcontext/runtime_adapter.go:188 unexpected error processing batch proof verifies: batch verify seals syscall implemented incorrectly, result not found for miner: %!s(PANIC=String method: unknown address protocol). Skipping all verification for epoch 1257299
unknown address protocol ?
This error occurs very occasionally, does anyone have an idea? i have confirm BatchVerifySeals argument and return has the same length