ttnn.relu low PCC in some cases when using tensor height and width as shard shape and column major shard orientation

amalbasaTT commented 1 day ago

To Reproduce Steps to reproduce the behavior:

Checkout branch amalbasaTT/unary_sharded-sweeps-2 (soon to be merged to main)

Copy the unit test below to test_relu_sharded.py:


import torch
import random
import ttnn
import itertools
import pytest
import traceback
import math
from loguru import logger
from functools import partial

from tests.sweep_framework.sweep_utils.utils import gen_shapes, get_device_grid_size, get_sharded_config from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt, _gen_reshape_args_from_volume from tests.ttnn.utils_for_testing import check_with_pcc from models.utility_functions import torch_random

Y, X = get_device_grid_size() DEVICE_GRID_SIZE = ttnn.CoreGrid(y=Y, x=X)

def run_relu_sharded_tests( input_shape, dtype, dlayout, core_size, tensor_height_width_as_shard_shape, shard_orientation, data_seed, device, ): torch.manual_seed(data_seed)

x = gen_func_with_cast_tt(
    partial(torch_random, low=-100, high=100, dtype=torch.bfloat16), dtype
)(input_shape)

try:
    ref_value = torch.nn.functional.relu(x)

    mem_cfg = ttnn.create_sharded_memory_config(
        shape=input_shape,
        core_grid=ttnn.CoreGrid(y=core_size[0], x=core_size[1]),
        strategy=ttnn.ShardStrategy.BLOCK,
        orientation=shard_orientation,
        use_height_and_width_as_shard_shape=tensor_height_width_as_shard_shape,
    )

    tt_x = ttnn.from_torch(
        x,
        dtype=dtype,
        layout=dlayout,
        device=device,
        memory_config=mem_cfg,
    )

    tt_result = ttnn.relu(tt_x, memory_config=mem_cfg)
    tt_result = ttnn.to_torch(tt_result)

except Exception as e:
    logger.warning(f"Test execution crashed: {e}")
    print(traceback.format_exc())
    raise e

passed, output_str = check_with_pcc(x, ttnn.to_torch(tt_x), 1.0)
assert passed, f"Failed before ttnn.relu {output_str}, {data_seed}, {input_shape}, {dtype}, {mem_cfg.shard_spec}"
passed, output_str = check_with_pcc(ref_value, tt_result, 0.999)
assert passed, f"Failed at ttnn.relu, {output_str}, {data_seed}, {input_shape}, {dtype}, {mem_cfg.shard_spec}"

test_sweep_args = [ ( [3, 2, 256, 320], ttnn.bfloat16, ttnn.TILE_LAYOUT, (8, 1), True, ttnn.ShardOrientation.COL_MAJOR, 5863207, ), ( [3, 2, 256, 320], ttnn.bfloat8_b, ttnn.TILE_LAYOUT, (8, 1), True, ttnn.ShardOrientation.COL_MAJOR, 8320078, ), ( [288, 32], ttnn.bfloat16, ttnn.TILE_LAYOUT, (8, 1), True, ttnn.ShardOrientation.COL_MAJOR, 11924152, ), ( [2, 3, 224, 64], ttnn.bfloat16, ttnn.TILE_LAYOUT, (8, 1), True, ttnn.ShardOrientation.COL_MAJOR, 14234094, ), ( [2, 3, 224, 64], ttnn.bfloat8_b, ttnn.TILE_LAYOUT, (8, 1), True, ttnn.ShardOrientation.COL_MAJOR, 15818731, ), ( [16, 256, 128], ttnn.bfloat16, ttnn.TILE_LAYOUT, (2, 8), True, ttnn.ShardOrientation.COL_MAJOR, 3965624, ), ( [16, 256, 128], ttnn.bfloat8_b, ttnn.TILE_LAYOUT, (2, 8), True, ttnn.ShardOrientation.COL_MAJOR, 17790071, ), ]

@pytest.mark.parametrize( "input_shape, dtype, dlayout, core_size, tensor_height_width_as_shard_shape, shard_orientation, data_seed", (test_sweep_args), ) def test_relu_sharded(input_shape, dtype, dlayout, core_size, tensor_height_width_as_shard_shape, shard_orientation, data_seed, device): run_relu_sharded_tests(input_shape, dtype, dlayout, core_size, tensor_height_width_as_shard_shape, shard_orientation, data_seed, device)

3. Run it with command:

pytest test_relu_sharded.py

Expected behavior All test cases should fail.

umadevimcw commented 13 hours ago

@amalbasaTT isn't this issue similar to #15159 ?

amalbasaTT commented 10 hours ago

Not exactly, issue you are mentioning has clear conditions which when met are always reproducing said issue, and that is when using sharding strategies and when the second to innermost dimension is not divisible by 32. This issue happens in some cases when using column major orientation and tensor height width as shard shape. But i typed the wrong title, so I'll fix that.

tenstorrent / tt-metal

ttnn.relu low PCC in some cases when using tensor height and width as shard shape and column major shard orientation #15423