Z3Prover / z3

The Z3 Theorem Prover
Other
10.14k stars 1.46k forks source link

Invalid model with sequences #2445

Closed LeventErkok closed 4 years ago

LeventErkok commented 5 years ago

Z3 returns SAT for the following benchmark, suggesting the value of s0 should be 1596 in the model. I'm pretty confident that this benchmark is actually UNSAT. Indeed, if I put the extra assertion that s0 is 1596, z3 then returns UNSAT. (See Lines 7-8 below.)

Unfortunately, I wasn't able to make it any smaller, and model_validate=true isn't complaining.

(set-option :produce-models true)
(set-logic ALL)
(define-fun s6 () Int 0)
(define-fun s8 () Int 1)
(declare-fun s0 () Int)

; **** Uncomment the following assert and z3 returns UNSAT, even though this is the value in the model.
; (assert (= s0 1596))

(declare-fun s1 () Int)
(declare-fun s2 () (Seq Int))
(define-fun s3 () (Seq Int) (seq.unit s0))
(define-fun s4 () Bool (seq.contains s2 s3))
(define-fun s5 () Int (seq.len s2))
(define-fun s7 () Bool (= s5 s6))
(define-fun s9 () Int (- s5 s8))
(define-fun s10 () (Seq Int) (seq.extract s2 s8 s9))
(define-fun s11 () Int (seq.len s10))
(define-fun s12 () Bool (= s6 s11))
(define-fun s13 () Int (seq.nth s2 s6))
(define-fun s14 () Int (seq.nth s10 s6))
(define-fun s15 () Bool (< s13 s14))
(define-fun s16 () Int (- s11 s8))
(define-fun s17 () (Seq Int) (seq.extract s10 s8 s16))
(define-fun s18 () Int (seq.len s17))
(define-fun s19 () Bool (= s6 s18))
(define-fun s20 () Int (seq.nth s17 s6))
(define-fun s21 () Bool (< s14 s20))
(define-fun s22 () Int (- s18 s8))
(define-fun s23 () (Seq Int) (seq.extract s17 s8 s22))
(define-fun s24 () Int (seq.len s23))
(define-fun s25 () Bool (= s6 s24))
(define-fun s26 () Int (seq.nth s23 s6))
(define-fun s27 () Bool (< s20 s26))
(define-fun s28 () Int (- s24 s8))
(define-fun s29 () (Seq Int) (seq.extract s23 s8 s28))
(define-fun s30 () Int (seq.len s29))
(define-fun s31 () Bool (= s6 s30))
(define-fun s32 () Bool (and s27 s31))
(define-fun s33 () Bool (or s25 s32))
(define-fun s34 () Bool (and s21 s33))
(define-fun s35 () Bool (or s19 s34))
(define-fun s36 () Bool (and s15 s35))
(define-fun s37 () Bool (or s12 s36))
(define-fun s38 () Bool (or s7 s37))
(define-fun s39 () Bool (and s4 s38))
(define-fun s40 () (Seq Int) (seq.unit s1))
(define-fun s41 () Bool (< s1 s13))
(define-fun s42 () (Seq Int) (seq.++ s40 s2))
(define-fun s43 () Bool (> s1 s13))
(define-fun s44 () (Seq Int) (seq.unit s13))
(define-fun s45 () Bool (< s1 s14))
(define-fun s46 () (Seq Int) (seq.++ s40 s10))
(define-fun s47 () Bool (> s1 s14))
(define-fun s48 () (Seq Int) (seq.unit s14))
(define-fun s49 () Bool (< s1 s20))
(define-fun s50 () (Seq Int) (seq.++ s40 s17))
(define-fun s51 () Bool (> s1 s20))
(define-fun s52 () (Seq Int) (seq.unit s20))
(define-fun s53 () Bool (< s1 s26))
(define-fun s54 () (Seq Int) (seq.++ s40 s23))
(define-fun s55 () Bool (> s1 s26))
(define-fun s56 () (Seq Int) (seq.unit s26))
(define-fun s57 () Int (seq.nth s29 s6))
(define-fun s58 () Bool (< s1 s57))
(define-fun s59 () (Seq Int) (seq.++ s40 s29))
(define-fun s60 () Bool (> s1 s57))
(define-fun s61 () (Seq Int) (seq.unit s57))
(define-fun s62 () Int (- s30 s8))
(define-fun s63 () (Seq Int) (seq.extract s29 s8 s62))
(define-fun s64 () (Seq Int) (seq.++ s61 s63))
(define-fun s65 () (Seq Int) (ite s60 s64 s29))
(define-fun s66 () (Seq Int) (ite s58 s59 s65))
(define-fun s67 () (Seq Int) (ite s31 s40 s66))
(define-fun s68 () (Seq Int) (seq.++ s56 s67))
(define-fun s69 () (Seq Int) (ite s55 s68 s23))
(define-fun s70 () (Seq Int) (ite s53 s54 s69))
(define-fun s71 () (Seq Int) (ite s25 s40 s70))
(define-fun s72 () (Seq Int) (seq.++ s52 s71))
(define-fun s73 () (Seq Int) (ite s51 s72 s17))
(define-fun s74 () (Seq Int) (ite s49 s50 s73))
(define-fun s75 () (Seq Int) (ite s19 s40 s74))
(define-fun s76 () (Seq Int) (seq.++ s48 s75))
(define-fun s77 () (Seq Int) (ite s47 s76 s10))
(define-fun s78 () (Seq Int) (ite s45 s46 s77))
(define-fun s79 () (Seq Int) (ite s12 s40 s78))
(define-fun s80 () (Seq Int) (seq.++ s44 s79))
(define-fun s81 () (Seq Int) (ite s43 s80 s2))
(define-fun s82 () (Seq Int) (ite s41 s42 s81))
(define-fun s83 () (Seq Int) (ite s7 s40 s82))
(define-fun s84 () Int (seq.len s83))
(define-fun s85 () Bool (= s6 s84))
(define-fun s86 () Int (seq.nth s83 s6))
(define-fun s87 () Bool (> s0 s86))
(define-fun s88 () Int (- s84 s8))
(define-fun s89 () (Seq Int) (seq.extract s83 s8 s88))
(define-fun s90 () Int (seq.len s89))
(define-fun s91 () Bool (= s6 s90))
(define-fun s92 () Int (seq.nth s89 s6))
(define-fun s93 () Bool (> s0 s92))
(define-fun s94 () Int (- s90 s8))
(define-fun s95 () (Seq Int) (seq.extract s89 s8 s94))
(define-fun s96 () Int (seq.len s95))
(define-fun s97 () Bool (= s6 s96))
(define-fun s98 () Int (seq.nth s95 s6))
(define-fun s99 () Bool (> s0 s98))
(define-fun s100 () Int (- s96 s8))
(define-fun s101 () (Seq Int) (seq.extract s95 s8 s100))
(define-fun s102 () Int (seq.len s101))
(define-fun s103 () Bool (= s6 s102))
(define-fun s104 () Int (seq.nth s101 s6))
(define-fun s105 () Bool (> s0 s104))
(define-fun s106 () Int (- s102 s8))
(define-fun s107 () (Seq Int) (seq.extract s101 s8 s106))
(define-fun s108 () Int (seq.len s107))
(define-fun s109 () Bool (= s6 s108))
(define-fun s110 () Int (seq.nth s107 s6))
(define-fun s111 () Bool (> s0 s110))
(define-fun s112 () Bool (= s0 s110))
(define-fun s113 () Bool (not s111))
(define-fun s114 () Bool (and s112 s113))
(define-fun s115 () Bool (not s109))
(define-fun s116 () Bool (and s114 s115))
(define-fun s117 () Bool (= s0 s104))
(define-fun s118 () Bool (ite s105 s116 s117))
(define-fun s119 () Bool (not s103))
(define-fun s120 () Bool (and s118 s119))
(define-fun s121 () Bool (= s0 s98))
(define-fun s122 () Bool (ite s99 s120 s121))
(define-fun s123 () Bool (not s97))
(define-fun s124 () Bool (and s122 s123))
(define-fun s125 () Bool (= s0 s92))
(define-fun s126 () Bool (ite s93 s124 s125))
(define-fun s127 () Bool (not s91))
(define-fun s128 () Bool (and s126 s127))
(define-fun s129 () Bool (= s0 s86))
(define-fun s130 () Bool (ite s87 s128 s129))
(define-fun s131 () Bool (not s85))
(define-fun s132 () Bool (and s130 s131))
(assert s39)
(assert (not s132))
(check-sat)
(get-value (s0))
NikolajBjorner commented 5 years ago

now produces unsat

LeventErkok commented 5 years ago

@NikolajBjorner

Looks like this got broken again. This time it produces 2275 as the satisfying value for s0 instead of 1596; still reporting sat. Fresh build from github.

NikolajBjorner commented 5 years ago

slightly reduced

(set-option :produce-models true)
(set-logic ALL)
(define-fun s6 () Int 0)
(define-fun s8 () Int 1)
(declare-fun s0 () Int)
(declare-fun s1 () Int)
(declare-fun s2 () (Seq Int))
(define-fun s3 () (Seq Int) (seq.unit s0))
(define-fun s4 () Bool (seq.contains s2 s3))
(define-fun s5 () Int (seq.len s2))
(define-fun s7 () Bool false)
(define-fun s9 () Int (- s5 s8))
(define-fun s10 () (Seq Int) (seq.extract s2 s8 s9))
(define-fun s11 () Int (seq.len s10))
(define-fun s12 () Bool false)
(define-fun s13 () Int (seq.nth s2 s6))
(define-fun s14 () Int (seq.nth s10 s6))
(define-fun s15 () Bool true)
(define-fun s16 () Int (- s11 s8))
(define-fun s17 () (Seq Int) (seq.extract s10 s8 s16))
(define-fun s18 () Int (seq.len s17))
(define-fun s19 () Bool false)
(define-fun s20 () Int (seq.nth s17 s6))
(define-fun s21 () Bool true)
(define-fun s22 () Int (- s18 s8))
(define-fun s23 () (Seq Int) (seq.extract s17 s8 s22))
(define-fun s24 () Int (seq.len s23))
(define-fun s25 () Bool false)
(define-fun s26 () Int (seq.nth s23 s6))
(define-fun s27 () Bool true)
(define-fun s28 () Int (- s24 s8))
(define-fun s29 () (Seq Int) (seq.extract s23 s8 s28))
(define-fun s30 () Int (seq.len s29))
(define-fun s31 () Bool (= s6 s30))
(define-fun s32 () Bool (and s27 s31))
(define-fun s33 () Bool (or s25 s32))
(define-fun s34 () Bool (and s21 s33))
(define-fun s35 () Bool (or s19 s34))
(define-fun s36 () Bool (and s15 s35))
(define-fun s37 () Bool (or s12 s36))
(define-fun s38 () Bool (or s7 s37))
(define-fun s39 () Bool (and s4 s38))
(define-fun s40 () (Seq Int) (seq.unit s1))
(define-fun s41 () Bool false)
(define-fun s42 () (Seq Int) (seq.++ s40 s2))
(define-fun s43 () Bool true)
(define-fun s44 () (Seq Int) (seq.unit s13))
(define-fun s45 () Bool false)
(define-fun s46 () (Seq Int) (seq.++ s40 s10))
(define-fun s47 () Bool true)
(define-fun s48 () (Seq Int) (seq.unit s14))
(define-fun s49 () Bool false)
(define-fun s50 () (Seq Int) (seq.++ s40 s17))
(define-fun s51 () Bool true)
(define-fun s52 () (Seq Int) (seq.unit s20))
(define-fun s53 () Bool (< s1 s26))
(define-fun s54 () (Seq Int) (seq.++ s40 s23))
(define-fun s55 () Bool true)
(define-fun s56 () (Seq Int) (seq.unit s26))
(define-fun s57 () Int (seq.nth s29 s6))
(define-fun s58 () Bool true)
(define-fun s59 () (Seq Int) (seq.++ s40 s29))
(define-fun s60 () Bool true)
(define-fun s61 () (Seq Int) (seq.unit s57))
(define-fun s62 () Int (- s30 s8))
(define-fun s63 () (Seq Int) (seq.extract s29 s8 s62))
(define-fun s64 () (Seq Int) (seq.++ s61 s63))
(define-fun s65 () (Seq Int) (ite s60 s64 s29))
(define-fun s66 () (Seq Int) (ite s58 s59 s65))
(define-fun s67 () (Seq Int) (ite s31 s40 s66))
(define-fun s68 () (Seq Int) (seq.++ s56 s67))
(define-fun s69 () (Seq Int) (ite s55 s68 s23))
(define-fun s70 () (Seq Int) (ite s53 s54 s69))
(define-fun s71 () (Seq Int) (ite s25 s40 s70))
(define-fun s72 () (Seq Int) (seq.++ s52 s71))
(define-fun s73 () (Seq Int) (ite s51 s72 s17))
(define-fun s74 () (Seq Int) (ite s49 s50 s73))
(define-fun s75 () (Seq Int) (ite s19 s40 s74))
(define-fun s76 () (Seq Int) (seq.++ s48 s75))
(define-fun s77 () (Seq Int) (ite s47 s76 s10))
(define-fun s78 () (Seq Int) (ite s45 s46 s77))
(define-fun s79 () (Seq Int) (ite s12 s40 s78))
(define-fun s80 () (Seq Int) (seq.++ s44 s79))
(define-fun s81 () (Seq Int) (ite s43 s80 s2))
(define-fun s82 () (Seq Int) (ite s41 s42 s81))
(define-fun s83 () (Seq Int) (ite s7 s40 s82))
(define-fun s84 () Int (seq.len s83))
(define-fun s85 () Bool true)
(define-fun s86 () Int (seq.nth s83 s6))
(define-fun s87 () Bool (> s0 s86))
(define-fun s88 () Int (- s84 s8))
(define-fun s89 () (Seq Int) (seq.extract s83 s8 s88))
(define-fun s90 () Int (seq.len s89))
(define-fun s91 () Bool true)
(define-fun s92 () Int (seq.nth s89 s6))
(define-fun s93 () Bool (> s0 s92))
(define-fun s94 () Int (- s90 s8))
(define-fun s95 () (Seq Int) (seq.extract s89 s8 s94))
(define-fun s96 () Int (seq.len s95))
(define-fun s97 () Bool false)
(define-fun s98 () Int (seq.nth s95 s6))
(define-fun s99 () Bool (> s0 s98))
(define-fun s100 () Int (- s96 s8))
(define-fun s101 () (Seq Int) (seq.extract s95 s8 s100))
(define-fun s102 () Int (seq.len s101))
(define-fun s103 () Bool false)
(define-fun s104 () Int (seq.nth s101 s6))
(define-fun s105 () Bool (> s0 s104))
(define-fun s106 () Int (- s102 s8))
(define-fun s107 () (Seq Int) (seq.extract s101 s8 s106))
(define-fun s108 () Int (seq.len s107))
(define-fun s109 () Bool true)
(define-fun s110 () Int (seq.nth s107 s6))
(define-fun s111 () Bool false)
(define-fun s112 () Bool true)
(define-fun s113 () Bool false)
(define-fun s114 () Bool true)
(define-fun s115 () Bool true)
(define-fun s116 () Bool true)
(define-fun s117 () Bool (= s0 s104))
(define-fun s118 () Bool (ite s105 s116 s117))
(define-fun s119 () Bool true)
(define-fun s120 () Bool (and s118 s119))
(define-fun s121 () Bool true)
(define-fun s122 () Bool (ite s99 s120 s121))
(assert s39)
(assert s87)
(assert s93)
;(assert (not s99))
;(assert (not s121))
(assert (not s122))

;(assert (= s0 3231))
;(assert (= s1 3232))
(assert (= s0 1))

;(apply (and-then propagate-values simplify))
;(exit)

(check-sat)
(get-value (s0))
(get-value (s1))
(get-value (s2))

script (could be more elaborate about reducing other AST nodes based on subterms)


import re
from z3 import *

has_bool = re.compile("\(define-fun (.*) \(\) Bool (.*)\)")

def read_lines(file):
    with open(file) as ins:
        for line in ins:
            yield line

def replace_bool(line):
    m = has_bool.search(line)
    if m:
        id = m.group(1)
        fml = m.group(2)
        if fml == "true" or fml == "false":
            return None
        return "(define-fun %s () Bool true)\n" % id, "(define-fun %s () Bool false)\n" % id
    return None

def test(lines):
    lines = "".join(lines)
    has_sat = False
    has_unsat = False
    for i in range(100):
        s = Solver()
        s.set("random_seed",i)
        s.from_string(lines)
        r = s.check()
        print(r)
        if r == sat:
            has_sat = True
            if i == 0:
                return False
        if r == unsat:
            has_unsat = True
        if has_sat and has_unsat:
            return True
    return False

def minimize(lines):
    num_reduced = 0
    for i in range(len(lines)):
        line = lines[i]
        rb = replace_bool(lines[i])
        if rb:
            case1, case2 = rb
            lines[i] = case1
            if test(lines):
                num_reduced += 1
                print(case1)
                continue
            lines[i] = case2
            if test(lines):
                num_reduced += 1
                print(case2)
                continue
            print(line)
            lines[i] = line
    print("; reduced: ", num_reduced)
    if num_reduced > 0:
        with open("../2445.smt2", 'w') as ous:
            ous.write("".join(lines))
    return num_reduced > 0

while True:
    lines = [line for line in read_lines("../2445.smt2")]
    if not minimize(lines):
        break

'''
NikolajBjorner commented 5 years ago

note: this is still open and reproduces with suitable value of smt.random_seed.

LeventErkok commented 4 years ago

@NikolajBjorner

z3 now answers this correctly unsat. Thanks!

NikolajBjorner commented 4 years ago

This tricky to debug. Depends random seed. Spent some time last month on this. Have smaller reports as well.