Closed yannickmoy closed 1 year ago
running with solver.axioms2files=true produces a ton of files (I had to fix a few bugs in how the files are generated). Some of the files are SAT (z3 returns unknown, but it is really SAT), which pinpoints the bug. The next step is to break or log more at the offending lemma (for me it is lemma number 10465 when using smt.random_seed=3).
Turns out the theory axioms all double checked. To minimize, the following script works
from z3 import *
s1 = Solver()
s1.from_file("C:/z3/build/6270.smt2")
asserts = s1.assertions()
while True:
progress = False
fmls = { f for f in asserts }
for a in asserts:
fmls -= { a }
found = False
print("try", len(fmls), a)
for i in range(10):
s = Solver()
s.set("random_seed", i)
s.set("timeout", 2000)
s.add(fmls)
if unsat == s.check():
print("unsat", i)
progress = True
found = True
break
if not found:
print("not unsat")
fmls |= { a }
asserts = list(fmls)
if not progress:
print(s.sexpr())
break
It produces
(declare-sort zero_8_t 0)
(declare-datatypes ((zero_8_t__init_wrapper 0)) (((zero_8_t__init_wrapperqtmk (rec__value3 zero_8_t) (us_attr__init3 Bool)))))
(declare-fun zero_8_tqtint (zero_8_t) Int)
(declare-fun of_rep11 (Int) zero_8_t)
(declare-fun convert1 ((Array Int zero_8_t)) (Array Int zero_8_t__init_wrapper))
(assert (forall ((x zero_8_t))
(! (= (of_rep11 (zero_8_tqtint x)) x) :pattern ((zero_8_tqtint x)))))
(assert (forall ((x zero_8_t))
(! (and (<= 0 (zero_8_tqtint x)) (<= (zero_8_tqtint x) 0))
:pattern ((zero_8_tqtint x)))))
(assert (forall ((a (Array Int zero_8_t)) (temp___171 Int))
(let ((a!1 (zero_8_tqtint (of_rep11 (zero_8_tqtint (select a temp___171)))))
(a!2 (zero_8_tqtint (rec__value3 (select (convert1 a) temp___171)))))
(and (us_attr__init3 (select (convert1 a) temp___171)) (= a!1 a!2)))))
The command-line doesn't repro well with the above API based minimization. This approach works better:
from z3 import *
def parse_commands(ins):
buffer = ""
lp = 0
rp = 0
for line in ins:
for ch in line:
if ch == '(':
lp += 1
elif ch == ')':
rp += 1
if lp > 0:
buffer += ch
if lp > 0 and lp == rp:
yield buffer
lp = 0
rp = 0
buffer = ""
def get_cmds():
with open("C:/z3/build/6270.smt2") as ins:
for cmd in parse_commands(ins):
yield cmd
def save_cmds(file, cmds):
with open(file, 'w') as ous:
for cmd in cmds:
ous.write(cmd)
ous.write("\n")
cmds = list(get_cmds())
print(list(cmds))
import subprocess
import re
has_unsat = re.compile("unsat")
while True:
progress = False
for i in range(len(cmds)):
cmd = cmds[i]
if cmd.startswith("(assert"):
found = False
cmds[i] = "(echo \"removed\")"
save_cmds("tmp.smt2", cmds)
for seed in range(10):
out = subprocess.Popen(f"C:\\z3\\release\\z3.exe tmp.smt2 /T:2 smt.random_seed={seed}", stdout=subprocess.PIPE).communicate()[0]
if out != None:
out = out.decode(sys.stdout.encoding)
print(out)
found = has_unsat.search(out)
if found:
progress = True
break
if not found:
cmds[i] = cmd
else:
save_cmds("repro.smt2", cmds)
if not progress:
break
The bug appears to be that MBQI introduces fresh distinct values for elements in the domain of zero_8_t. The domain of zero_8_t can only have a single element, though. So MBQI leaks assertions (that there are distinct elements) in a domain when there arent.
Not sure, but the following minimization/reduction might be of help:
[508] % z3release small.smt2
unsat
[509] % cat small.smt2
(declare-sort g 0)
(declare-datatypes ((p 0)) (((b (q g)))))
(declare-datatypes ((r 0)) (((s (ac Bool)))))
(declare-datatypes ((t 0)) (((u (v Bool)))))
(declare-datatypes ((w 0)) (((d (x Bool)))))
(declare-datatypes ((y 0)) (((z (ae g) (af Bool)))))
(declare-datatypes ((ak 0)) (((g (am Bool)))))
(declare-datatypes ((an 0)) (((ax (ao Bool)))))
(declare-datatypes ((ap 0)) (((aq (ar ak) (ay an)))))
(declare-datatypes ((az 0)) (((cv (ca p) (cb ap)))))
(declare-datatypes ((ce 0)) (((cf (cg t)(ch t) (ci w) (cj (Array Int y)) (ck az) (cl r)))))
(declare-fun h (g) Int)
(declare-fun l (Int) g)
(declare-fun ag ((Array Int y)) (Array Int g))
(declare-fun ai ((Array Int g)) (Array Int y))
(define-fun cz ((a az)) Bool true)
(declare-fun cm () Int)
(declare-fun cn () ce)
(assert (forall ((i g)) (= 0 (h i))))
(assert (forall ((k g)) (= (l (h k)) k)))
(assert (forall ((a (Array Int y)) (ah Int)) (= (h (l (h (ae (select a ah))))) (h (select (ag a) ah)))))
(assert (forall ((a (Array Int g)) (aj Int)) (= (af (z (l (h (select a aj))) true)) (af (select (ai a) aj)))))
(assert (forall ((a (Array Int g)) (aj Int)) (= (h (ae (z (l (h (select a aj))) true))) (h (ae (select (ai a) aj))))))
(assert (or (= cm 0) (ac (cl cn)) (and (v (cg cn)) (v (ch cn)) (x (ci cn)) (am (ar (cb (ck cn)))) (not (ao (ay (cb (ck cn))))))))
(check-sat)
great! thanks for the fix and the fantastic investigation, looks like it was a fun one...
Z3 4.10 immediately returns unsat on the VC below, which should be satisfiable (although it's hard to check due to the encoding). Other provers return unknown immediately, and any slight change to the VC also makes Z3 return unknown. For example, removing the first declaration for datatype int__ref (which is unused) causes Z3 to return unknown immediately. Or removing the assertion on line 36 giving the value of (unused) constant two_power_size_minus_one also causes Z3 to return unknown immediately.
I reduced the VC as much as I could, but every new removal seems to make the problem go away.
Can I do something on my side to help diagnose the issue further?