tenstorrent / tt-smi

Tenstorrent console based hardware information program
Apache License 2.0
18 stars 3 forks source link

TT-SMI Snapshot dumps smbus_telem struct which should be hidden #20

Closed hmohiuddinTT closed 3 months ago

hmohiuddinTT commented 3 months ago

We should not expose the smbus_telem struct in the external release of tt-smi:

{
    "time": "2024-04-04T17:39:38.045841",
    "host_info": {
        "OS": "Linux",
        "Distro": "Ubuntu 20.04.3 LTS",
        "Kernel": "5.4.0-174-generic",
        "Hostname": "hmohiuddin-wh-test",
        "Platform": "x86_64",
        "Python": "3.8.10",
        "Memory": "47.14 GB",
        "Driver": "TTKMD 1.27.1"
    },
    "device_info": [
        {
            "smbus_telem": {
                "BOARD_ID": "0x10001851170c06c",
                "SMBUS_TX_ENUM_VERSION": "0xba5e0001",
                "SMBUS_TX_DEVICE_ID": "0x401e1e52",
                "SMBUS_TX_ASIC_RO": "0x2c578",
                "SMBUS_TX_ASIC_IDD": "0x81e",
                "SMBUS_TX_BOARD_ID_HIGH": "0x1000185",
                "SMBUS_TX_BOARD_ID_LOW": "0x1170c06c",
                "SMBUS_TX_ARC0_FW_VERSION": "0x2190000",
                "SMBUS_TX_ARC1_FW_VERSION": "0x2190000",
                "SMBUS_TX_ARC2_FW_VERSION": null,
                "SMBUS_TX_ARC3_FW_VERSION": "0x2190000",
                "SMBUS_TX_SPIBOOTROM_FW_VERSION": "0x30b0000",
                "SMBUS_TX_ETH_FW_VERSION": "0x69000",
                "SMBUS_TX_M3_BL_FW_VERSION": "0x81020000",
                "SMBUS_TX_M3_APP_FW_VERSION": "0x5090002",
                "SMBUS_TX_DDR_SPEED": null,
                "SMBUS_TX_DDR_STATUS": "0x2222222",
                "SMBUS_TX_ETH_STATUS0": "0x11111111",
                "SMBUS_TX_ETH_STATUS1": "0x11111111",
                "SMBUS_TX_PCIE_STATUS": "0x11040000",
                "SMBUS_TX_FAULTS": null,
                "SMBUS_TX_ARC0_HEALTH": "0xf43173f8",
                "SMBUS_TX_ARC1_HEALTH": "0x635e3ab6",
                "SMBUS_TX_ARC2_HEALTH": null,
                "SMBUS_TX_ARC3_HEALTH": "0xa8f22a",
                "SMBUS_TX_FAN_SPEED": "0xffffffff",
                "SMBUS_TX_AICLK": "0x3e801f4",
                "SMBUS_TX_AXICLK": "0x384",
                "SMBUS_TX_ARCCLK": "0x21c",
                "SMBUS_TX_THROTTLER": null,
                "SMBUS_TX_VCORE": "0x2d0",
                "SMBUS_TX_ASIC_TEMPERATURE": "0x2550248",
                "SMBUS_TX_VREG_TEMPERATURE": "0x220021",
                "SMBUS_TX_BOARD_TEMPERATURE": "0x212322",
                "SMBUS_TX_TDP": "0x64000d",
                "SMBUS_TX_TDC": "0xf00011",
                "SMBUS_TX_VDD_LIMITS": "0x3b602d0",
                "SMBUS_TX_THM_LIMITS": "0x53004b",
                "SMBUS_TX_WH_FW_DATE": "0x43050f37",
                "SMBUS_TX_ASIC_TMON0": "0x262724ff",
                "SMBUS_TX_ASIC_TMON1": "0x1f27",
                "SMBUS_TX_MVDDQ_POWER": "0x191f61",
                "SMBUS_TX_GDDR_TRAIN_TEMP0": null,
                "SMBUS_TX_GDDR_TRAIN_TEMP1": null,
                "SMBUS_TX_BOOT_DATE": "0x43161302",
                "SMBUS_TX_RT_SECONDS": "0x112a2b",
                "SMBUS_TX_AUX_STATUS": null,
                "SMBUS_TX_ETH_DEBUG_STATUS0": "0xccddddcc",
                "SMBUS_TX_ETH_DEBUG_STATUS1": "0xccdddddd",
                "SMBUS_TX_TT_FLASH_VERSION": "0x20006"
            },
            "board_info": {
                "bus_id": "0000:04:00.0",
                "board_type": "n150 L",
                "board_id": "010001851170c06c",
                "coords": "(0, 0, 0, 0)",
                "dram_status": true,
                "dram_speed": "12G",
                "pcie_speed": 4,
                "pcie_width": 16
            },
            "telemetry": {
                "voltage": "0.72",
                "current": " 17.0",
                "power": " 13.0",
                "aiclk": " 500",
                "asic_temperature": "36.5"
            },
            "firmwares": {
                "arc_fw": "2.25.0.0",
                "arc_fw_date": "2024-03-05",
                "eth_fw": "6.9.0",
                "m3_bl_fw": "129.2.0.0",
                "m3_app_fw": "5.9.0.2",
                "tt_flash_version": "0.2.0.6"
            },
            "limits": {
                "vdd_min": "0.72",
                "vdd_max": "0.95",
                "tdp_limit": "100",
                "tdc_limit": "240",
                "asic_fmax": "1000",
                "therm_trip_l1_limit": "83",
                "thm_limit": "75",
                "bus_peak_limit": null
            }
        }
    ]
}
sbansalTT commented 3 months ago

Hey Hammad! We added that in on purpose :) ! It is useful for debugging purposes