Open kylebd99 opened 4 months ago
Never mind, I think this was coming from the Int, Bool interaction. An improved version which properly initializes A doesn't see this happen,
using Finch
using MatrixDepot
matrix = "GAP/GAP-road"
A = Tensor(Dense(SparseList(Element(false))), SparseMatrixCSC(matrixdepot(matrix)) .!= 0 )
n = size(A_int)[1]
B = Tensor(SparseByteMap(Element(false)), SparseVector(fsprand(Bool, n, 100000)))
C1 = Tensor(SparseList(Element(false)), SparseVector(fsprand(Bool, n, 100000)))
C2 = Tensor(SparseList(Element(false)), SparseVector(fsprand(Bool, n, 100000)))
C3 = Tensor(Sparse(Element(false)), SparseVector(fsprand(Bool, n, 100000)))
D1 = Tensor(Sparse(Element(false)))
@finch begin
D1 .= 0
for i = _
for j = _
D1[j] <<choose(false)>>= A[walk(j),i] & B[i] & !(C1[follow(j)] | C2[follow(j)] | C3[follow(j)])
end
end
end
println("D1 Non-Default Values: ", sum(D1))
println("D1 Counstored: ", countstored(D1))
D2 = Tensor(Sparse(Element(false)))
@finch begin
D2 .= 0
for i = _
for j = _
D2[j] <<choose(false)>>= A[walk(j),i] & D1[i] & !(C1[follow(j)] | C2[follow(j)] | C3[follow(j)])
end
end
end
println("D2 Non-Default Values: ", sum(D2))
println("D2 Counstored: ", countstored(D2))
D3 = Tensor(Sparse(Element(false)))
@finch begin
D3 .= 0
for i = _
for j = _
D3[j] <<choose(false)>>= A[walk(j),i] & D2[i] & !(C1[follow(j)] | C2[follow(j)] | C3[follow(j)])
end
end
end
println("D3 Non-Default Values: ", sum(D3))
println("D3 Counstored: ", countstored(D3))
D4 = Tensor(Sparse(Element(false)))
@finch begin
D4 .= 0
for i = _
for j = _
D4[j] <<choose(false)>>= A[walk(j),i] & D3[i] & !(C1[follow(j)] | C2[follow(j)] | C3[follow(j)])
end
end
end
println("D4 Non-Default Values: ", sum(D4))
println("D4 Counstored: ", countstored(D4))
Okay, wait, I've found the issue. I think the pruning due to the ! clause isn't being captured in the storage as implicit non zeros. If this is intended behavior, then that's totally fine and you can go ahead and re-close this issue.
Here's a new example which does capture it though,
using Finch
using MatrixDepot
matrix = "GAP/GAP-road"
A = Tensor(Dense(SparseList(Element(false))), SparseMatrixCSC(matrixdepot(matrix)) .!= 0 )
n = size(A_int)[1]
B = Tensor(Sparse(Element(false)), SparseVector(fsprand(Bool, n, 100)) .!= 0 )
C1 = Tensor(SparseList(Element(false)), SparseVector(fsprand(Bool, n, 1000000)) .!= 0 )
C2 = Tensor(SparseList(Element(false)), SparseVector(fsprand(Bool, n, 1000000)) .!= 0 )
C3 = Tensor(Sparse(Element(false)), SparseVector(fsprand(Bool, n, 1000000)) .!= 0 )
function test_nnz()
old_D = B
for i in 1:100
new_D = Tensor(Sparse(Element(false)))
@finch begin
new_D .= 0
for i = _
for j = _
new_D[j] <<choose(false)>>= A[walk(j),i] & old_D[i] & !(C1[follow(j)] | C2[follow(j)] | C3[follow(j)])
end
end
end
old_D = new_D
println("new_D Non-Default Values: ", sum(new_D))
println("new_D Counstored: ", countstored(new_D))
end
end
function test_nnz_fix()
old_D = B
for i in 1:100
new_D = Tensor(Sparse(Element(false)))
@finch begin
new_D .= 0
for i = _
for j = _
new_D[j] <<choose(false)>>= A[walk(j),i] & old_D[i] & !(C1[follow(j)] | C2[follow(j)] | C3[follow(j)])
end
end
end
old_D = Tensor(Sparse(Element(false)), new_D)
println("new_D Non-Default Values: ", sum(new_D))
println("new_D Counstored: ", countstored(new_D))
end
end
function test_nnz_fix2()
old_D = B
for i in 1:100
new_D = Tensor(Sparse(Element(false)))
@finch begin
new_D .= 0
for i = _
for j = _
new_D[j] <<choose(false)>>= A[walk(j),i] & old_D[i]
end
end
end
old_D = new_D
println("new_D Non-Default Values: ", sum(new_D))
println("new_D Counstored: ", countstored(new_D))
end
end
#test_nnz()
#test_nnz_fix()
test_nnz_fix2()
Is the the issue that
julia> A = Tensor(Sparse(Element(false)), pattern!(fsprand(Bool, 4, 2)))
4-Tensor
└─ SparseDict (false) [1:4]
├─ [1]: true
└─ [4]: true
julia> B = Tensor(Sparse(Element(false)))
0-Tensor
└─ SparseDict (false) [1:0]
julia> @finch begin
B .= 0
for i = _
B[i] <<choose(false)>>= !A[follow(i)]
end
end
(B = Tensor(SparseDict{Int64}(Element{false, Bool, Int64}(Bool[0, 1, 1, 0]), 4, [1, 5], [1, 2, 3, 4], [1, 2, 3, 4], Dict((1, 2) => 2, (1, 1) => 1, (1, 3) => 3, (1, 4) => 4), Int64[])),)
julia> B
4-Tensor
└─ SparseDict (false) [1:4]
├─ [1]: false
├─ [2]: true
├─ [3]: true
└─ [4]: false
stores explicit false values?
The problem is that Finch can't statically guarantee that the explicit value of Element(false)
is true
, and thus doesn't know that !
would produce a fill value of false
.
Fortunately, PatternLevel
does guarantee that all stored values are true
. Consider the following:
julia> A = pattern!(A)
4-Tensor
└─ SparseDict (false) [1:4]
├─ [1]: true
└─ [4]: true
julia> @finch begin
B .= 0
for i = _
B[i] <<choose(false)>>= !A[follow(i)]
end
end
(B = Tensor(SparseDict{Int64}(Element{false, Bool, Int64}(Bool[1, 1]), 4, [1, 3], [2, 3], [1, 2], Dict((1, 2) => 1, (1, 3) => 2), [3])),)
julia> B
4-Tensor
└─ SparseDict (false) [1:4]
├─ [2]: true
└─ [3]: true
does that help? (Side note, ISO levels would be just like pattern levels, but for more values than true or false: https://github.com/willow-ahrens/Finch.jl/issues/341)
we can also lift dynamic values into statically known ones using if statements.
@finch begin
B .= 0
for i = _
if !A[follow(i)]
B[i] <<choose(false)>>= true
end
end
end
The above program only executes the increment statement when A
is true, regardless of whether that is knowable statically.
Also, the dropfills!
method may be helpful to you, as that is the method which Tensor(lvl, arr)
calls to copy arr
In short, Finch doesn't look at values before storing them to check if they are fill. Instead, the store operation is what introduces explicit values into a tensor, regardless of what the value was. Thus, the result of countstored
reflects how many stores Finch was able to compile away by the time we reach the assignment statement.
Ah that makes sense, that answered my question. Thanks for the thorough response! In Galley, I've fixed this by just checking intermittently checking whether I would gain by compacting using dropfills!
In general, what is stopping Finch from including the if condition "expr != default" before every assignment automatically? Does that just introduce an unacceptable overhead generally?
We could benchmark how expensive that would be!
julia> A = Tensor(Dense(SparseList(Element(0.0))), fsprand(10_000, 10_000, 100_000))
x = 10000×10000-Tensor
└─ Dense [:,1:10000]
├─ [:, 1]: SparseList (0.0) [1:10000]
│ ├─ [422]: 0.0418729
│ ├─ [868]: 0.804566
│ ├─ ⋮
│ ├─ [9533]: 0.0671509
│ └─ [9665]: 0.723424
├─ [:, 2]: SparseList (0.0) [1:10000]
│ ├─ [1673]: 0.528857
│ ├─ [3411]: 0.576459
│ ├─ ⋮
│ ├─ [9553]: 0.597155
│ └─ [9753]: 0.726254
├─ ⋮
├─ [:, 9999]: SparseList (0.0) [1:10000]
│ ├─ [226]: 0.870514
│ ├─ [1489]: 0.239856
│ ├─ ⋮
│ ├─ [7672]: 0.221962
│ └─ [9740]: 0.17364
└─ [:, 10000]: SparseList (0.0) [1:10000]
├─ [500]: 0.361668
├─ [2668]: 0.232563
├─ ⋮
├─ [9119]: 0.302905
└─ [9128]: 0.810383
julia> x = Tensor(Dense(Element(0.0)), rand(10_000))
10000-Tensor
└─ Dense [1:10000]
├─ [1]: 0.465509
├─ [2]: 0.702242
├─ ⋮
├─ [9999]: 0.321236
└─ [10000]: 0.71483
julia> y = Tensor(Dense(Element(0.0)))
0-Tensor
└─ Dense [1:0]
julia> @benchmark begin
(y, A, x) = ($y, $A, $x)
@finch begin
y .= 0
for j = _, i = _
y[j] += A[i, j] * x[i]
end
end
end
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min … max): 100.000 μs … 144.334 μs ┊ GC (min … max): 0.00% … 0.00%
Time (median): 101.375 μs ┊ GC (median): 0.00%
Time (mean ± σ): 102.333 μs ± 2.792 μs ┊ GC (mean ± σ): 0.00% ± 0.00%
▅▇██▇█▆▅▄▃▄▄▄▃▂▃▃▃▃▃▃▁▁▁▁▂▁▁▁ ▁ ▃
█████████████████████████████████▆▇▇▇███▇▇▇▇▇▇▇▇▆▅▆▅▆▇▅▅▄▅▅▄▆ █
100 μs Histogram: log(frequency) by time 114 μs <
Memory estimate: 160 bytes, allocs estimate: 3.
julia> @benchmark begin
(y, A, x) = ($y, $A, $x)
@finch begin
y .= 0
for j = _, i = _
let a = A[i, j]
if a != 0.0
y[j] += a * x[i]
end
end
end
end
end
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min … max): 133.625 μs … 264.333 μs ┊ GC (min … max): 0.00% … 0.00%
Time (median): 134.542 μs ┊ GC (median): 0.00%
Time (mean ± σ): 136.187 μs ± 3.610 μs ┊ GC (mean ± σ): 0.00% ± 0.00%
▅▇█▆▅▃▁ ▂▄▄▂ ▂▃▃▃▂▁ ▂
▆███████▇▆▆▅▆▅▂▇██████▇▆▅▆▇██████████▇▇▆▆▆████▇█▇▇▇▇▇▆▅▆▆▅▆▆▅ █
134 μs Histogram: log(frequency) by time 148 μs <
Memory estimate: 160 bytes, allocs estimate: 3.
That's super interesting. Looks like too much of an overhead to make it unavoidable, but worth a benchmark for sure.
While optimizing a BFS, I've been realizing that the sparse level tends to accumulate unnecessary nonzeros. I'm not sure where exactly this is coming from, but it results in a significant slowdown over time.