Open nemanjagrujic opened 5 months ago
@eyonland Observations on above issues This issue has dependencies on
@ruthreshx Please add Your observations here
With respect to precision issue, please find the example below to understand the problem
x = torch.Tensor(size=input_shape[0]).uniform_(-100, 100).to(torch.bfloat16)
y = torch.Tensor(size=input_shape[1]).uniform_(-100, 100).to(torch.bfloat16)
try:
# get ref result
x.fill_(-69.50000)
y.fill_(-81.00000). // hard coded this for debugging purposes
ref_value = torch.logaddexp2(x, y)
// Replicated the logic used in TT
test_tt_logic = torch.add(torch.exp2(x), torch.exp2(y)) # here result is 0.00000000000000000000119775752698
test_tt_logic = torch.log2(test_tt_logic) #here output is -69.5000000
tt_result = ttnn_ops.logaddexp2(
x,
y,
device=device,
dtype=dtype,
layout=dlayout,
input_mem_config=in_mem_config,
output_mem_config=output_mem_config,
)
except Exception as e:
logger.warning(f"Operation execution crashed")
raise e
assert len(tt_result.shape) == len(ref_value.shape)
assert tt_result.shape == ref_value.shape
// ref value is -69.500
// tt_result is -inf
assert_with_pcc(ref_value, tt_result, 0.99)
In this part of code
// Replicated the logic used in TT
test_tt_logic = torch.add(torch.exp2(x), torch.exp2(y)) # here result is 0.00000000000000000000119775752698
test_tt_logic = torch.log2(test_tt_logic) #here output is -69.5000000
I have replicated the logic used in TT . Here you can see the output of add (exp2()) is valued at a precision level whereas in TT it is zero and log2(0) is always -inf in TT hence the PCC drops this is a similar case for other ops as well
@eyonland Hence we would like to get your thoughts on this
@rtawfik01 As discussed the output of exp2
results are zero in TT
whereas in torch, getting values at precision for the same input hence the PCC gets dropped. Therefore I am adding the label LLk to this issue
Please find the code below CC @eyonland
# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
# SPDX-License-Identifier: Apache-2.0
from loguru import logger
import random
import pytest
import torch
import ttnn
from tests.ttnn.utils_for_testing import assert_with_pcc
from tests.ttnn.python_api_testing.sweep_tests import ttnn_ops
def run_logaddexp2_tests(input_shape, dtype, dlayout, in_mem_config, output_mem_config, data_seed, device):
torch.manual_seed(data_seed)
x = torch.Tensor(size=input_shape[0]).uniform_(-100, 100).to(torch.bfloat16)
y = torch.Tensor(size=input_shape[1]).uniform_(-100, 100).to(torch.bfloat16)
try:
# get ref result
x.fill_(-69.50000)
y.fill_(-81.00000) # hard coded this for debugging purposes
print("Exp2 results of Torch....")
torch.set_printoptions(sci_mode=False, precision=32)
print(torch.exp2(x))
print(torch.exp2(y))
tt_x = ttnn_ops.exp2(
x,
device=device,
dtype=dtype,
layout=dlayout,
input_mem_config=in_mem_config,
output_mem_config=output_mem_config,
)
tt_y = ttnn_ops.exp2(
y,
device=device,
dtype=dtype,
layout=dlayout,
input_mem_config=in_mem_config,
output_mem_config=output_mem_config,
)
# # Replicated the logic used in TT
# ref_value = torch.logaddexp2(x, y)
# test_tt_logic = torch.add(torch.exp2(x), torch.exp2(y)) # here result is 0.00000000000000000000119775752698
# test_tt_logic = torch.log2(test_tt_logic) #here output is -69.5000000
# tt_result = ttnn_ops.logaddexp2(
# x,
# y,
# device=device,
# dtype=dtype,
# layout=dlayout,
# input_mem_config=in_mem_config,
# output_mem_config=output_mem_config,
# )
except Exception as e:
logger.warning(f"Operation execution crashed")
raise e
# assert len(tt_result.shape) == len(ref_value.shape)
# assert tt_result.shape == ref_value.shape
# ref value is -69.500
# tt_result is -inf
print("Exp2 results of TT....")
print(tt_x)
print(tt_y)
test_sweep_args2 = [
(
[(19, 12), (19, 12)],
[ttnn.bfloat16, ttnn.bfloat16],
[ttnn.TILE_LAYOUT, ttnn.TILE_LAYOUT],
[ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
(ttnn.DRAM_MEMORY_CONFIG),
18261510,
),
]
@pytest.mark.parametrize(
"input_shape, dtype, dlayout, in_mem_config, output_mem_config, data_seed",
(test_sweep_args2),
)
def test_eltwise_logaddexp2(input_shape, dtype, dlayout, in_mem_config, output_mem_config, data_seed, device):
run_logaddexp2_tests(input_shape, dtype, dlayout, in_mem_config, output_mem_config, data_seed, device)
This issue is on hold and depends on precision issue that is discussed here : https://github.com/tenstorrent/tt-metal/issues/13002
When testing ttnn ops with TILE layout, bfloat8_b dtype random shapes like [4, 7, 21, 133] most of the operations work correctly.
But ttnn.log, ttnn.log2 and ttnn.log10 operations fail with low PCC.
ttnn.logaddexp2 also fails with low pcc but in very few test cases which are also covered with unit test in this ticket.
Problem is observed on both GS and WH cards.
To Reproduce Steps to reproduce the behavior:
ngrujic/op_bug_unit_tests
(soon to be merged intomain
).test_eltwise_log_log2_log10.py
using this command:Expected behavior There are few test cases presented in the unit test, which are failing with low PCC
Running sweeps To get additional information and results for different combinations of input shapes, types, layouts and memory configs for which this operation was tested you can also run locally sweeps and check the results. To do this you should:
pytest tests/ttnn/python_api_testing/sweep_tests/run_sweep_test.py --input-path tests/ttnn/python_api_testing/sweep_tests/test_configs/ci_sweep_tests_broken/wormhole/ttnn_eltwise_log10_test.yaml --input-method cli --cli-input results_ttnn_log10
There are more sweeps which you can try by changing the above command to target files: