Open shenki opened 3 years ago
If we use a FIT wrapped u-boot instead, there's even less output:
dd if=aspeed-g5-dev/u-boot.bin of=test.img conv=notrunc seek=64 bs=1024
qemu-system-arm -nographic -M rainier-bmc -drive file=test.img,if=sd,index=2,format=raw
U-Boot SPL 2019.04-00128-g504c7831b516 (Sep 22 2021 - 15:23:45 +0930)
Trying to boot from MMC1
QEMU: Terminated
$ git bisect log
# bad: [fbe79f6b8ef9aa1219ee586f43c3107cd3767540] defconfig
# good: [e5902174d9e0d9611619a8b468a5a9b73ab402b5] configs: aspeed-common: Add redundant env for mmc
git bisect start 'test1' 'origin/v2019.04-aspeed-openbmc'
# bad: [c3d7eb06384ed8f2346e39490ebefa4bf2e93074] ram: aspeed: specify the DDR4 speed configs for AST2600
git bisect bad c3d7eb06384ed8f2346e39490ebefa4bf2e93074
# bad: [4f1716a8c0e77b57cc0484d751573971bb05d019] evb-ast2600-cot: Enable DP
git bisect bad 4f1716a8c0e77b57cc0484d751573971bb05d019
# bad: [5620c876a6b111901756d49cb1e05bb9346f6380] crypto: Add Aspeed ARCY support
git bisect bad 5620c876a6b111901756d49cb1e05bb9346f6380
# good: [f541a2c415189cf66298e2ca22b44c0e9f8871e0] spl: ast2600: Enable HACE probing
git bisect good f541a2c415189cf66298e2ca22b44c0e9f8871e0
# good: [b8233483f822dc441692cc7df659e402e7edcd1b] dt-bindings: Rename AST2600 RSA/ECC clock
git bisect good b8233483f822dc441692cc7df659e402e7edcd1b
# good: [f6110ecd6dbc383d972dcf0f4dd49ca0665edfb8] clk: aspeed: Add RSA/ECC clock to AST2600
git bisect good f6110ecd6dbc383d972dcf0f4dd49ca0665edfb8
# first bad commit: [5620c876a6b111901756d49cb1e05bb9346f6380] crypto: Add Aspeed ARCY support
For some reason the presence of the ARCY driver causes the serial port to not be found:
(gdb) break serial_init
Breakpoint 4 at 0x714c: file ../drivers/serial/
Thread 1 hit Breakpoint 4, serial_init () at ../drivers/serial/serial-uclass.c:75
75 const void *blob = gd->fdt_blob;
(gdb) n
160 serial_find_console_or_panic();
(gdb) s
0x0000714e in serial_find_console_or_panic () at ../drivers/serial/serial-uclass.c:75
75 const void *blob = gd->fdt_blob;
(gdb) n
87 } else if (CONFIG_IS_ENABLED(OF_CONTROL) && blob) {
(gdb)
98 if (!serial_check_stdout(blob, &dev)) {
(gdb)
151 panic_str("No serial driver found");
Reverting the ARCY driver and setting #define DEBUG 1
at the top of common/spl/spl_mmc.c
and common/spl/spl.c
:
$ ~/dev/qemu/cedric/build/qemu-system-arm -nographic -M rainier-bmc -drive file=test.img,if=sd,index=2,format=raw
U-Boot SPL 2019.04-00156-ge3118aa45e30-dirty (Sep 29 2021 - 18:06:44 +0930)
SPL malloc() before relocation used 0x7e8 bytes (1 KB)
>>SPL: board_init_r()
spl_init
Trying to boot from MMC1
spl: mmc boot mode: raw
hdr read sector 80, count=1
mkimage signature not found - ih_magic = 0
read 780 sectors to 81000000
Jumping to U-Boot
SPL malloc() used 0xf4 bytes (0 KB)
loaded - jumping to U-Boot...
image entry point: 0x81000000
Some other mmc debugging:
spl: mmc boot mode: raw
mmc_bread: start 00000080 blkcnt 00000001 dst 83000000
mmc_switch_part 0
hdr read sector 80, count=1
mmc_bread: start 00000080 blkcnt 00000780 dst 81000000
mmc_switch_part 0
read 780 sectors to 81000000
(gdb) break jump_to_image_no_args
Breakpoint 1 at 0x7d4: file ../common/spl/spl.c, line 335.
(gdb) c
Continuing.
Thread 1 hit Breakpoint 1, jump_to_image_no_args (spl_image=spl_image@entry=0x902fff04) at ../common/spl/spl.c:335
335 image_entry_noargs_t image_entry =
(gdb) where
#0 jump_to_image_no_args (spl_image=spl_image@entry=0x902fff04) at ../common/spl/spl.c:335
#1 0x00000936 in board_init_r (dummy1=<optimized out>, dummy2=<optimized out>) at ../common/spl/spl.c:737
#2 0x0000085c in spl_init () at ../common/spl/spl.c:503
#3 0x00004500 in ast2600_clk_enable (clk=0xbde8 <_u_boot_list_2_spl_image_loader_2_spl_mmc_load_image0BOOT_DEVICE_MMC1>)
at ../drivers/clk/aspeed/clk_ast2600.c:1260
#4 0x00000000 in ?? ()
(gdb) si
0x000007d6 336 (image_entry_noargs_t)spl_image->entry_point;
(gdb) p spl_image->entry_point
$2 = 2164260864
(gdb) p/x spl_image->entry_point
$3 = 0x81000000
(gdb) x/16 spl_image->entry_point
0x81000000: 0x00000000 0x00000000 0x00000000 0x00000000
0x81000010: 0x00000000 0x00000000 0x00000000 0x00000000
0x81000020: 0x00000000 0x00000000 0x00000000 0x00000000
0x81000030: 0x00000000 0x00000000 0x00000000 0x00000000
So for some reason there's all zeroes where the image should be.
On a working u-boot, breaking at mmc_bread
shows that the single sector read to get the header works:
(gdb) break mmc_bread
Breakpoint 1 at 0x5748: file ../drivers/mmc/mmc.c, line 405.
(gdb) c
Continuing.
Thread 1 hit Breakpoint 1, mmc_bread (dev=0x10015c14, start=128, blkcnt=1, dst=0x83000000) at ../drivers/mmc/mmc.c:405
405 struct blk_desc *block_dev = dev_get_uclass_platdata(dev);
(gdb) n
407 int dev_num = block_dev->devnum;
(gdb) n
411 if (blkcnt == 0)
(gdb) n
414 struct mmc *mmc = find_mmc_device(dev_num);
(gdb) n
415 if (!mmc)
(gdb) n
419 err = mmc_switch_part(mmc, block_dev->hwpart);
(gdb) n
423 if (err < 0)
(gdb) n
426 if ((start + blkcnt) > block_dev->lba) {
(gdb) n
434 if (mmc_set_blocklen(mmc, mmc->read_bl_len)) {
(gdb) n
440 cur = (blocks_todo > mmc->cfg->b_max) ?
(gdb) n
442 if (mmc_read_blocks(mmc, dst, start, cur) != cur) {
(gdb) n
390 return 0;
(gdb) p dst
$1 = (void *) 0x83000000
(gdb) x/10x 0x83000000
0x83000000: 0xedfe0dd0 0xb1020000 0x38000000 0x34020000
0x83000010: 0x28000000 0x11000000 0x02000000 0x00000000
0x83000020: 0x7d000000 0xfc010000
So something about the mmc access is incorrect.
Adding some debugging to drivers/mmc/sdhci.c
:
@ -153,6 +161,8 @@ static int sdhci_send_command(struct mmc *mmc, struct mmc_cmd *cmd,
int mmc_dev = mmc_get_blk_desc(mmc)->devnum;
ulong start = get_timer(0);
+ debug("%s cmd %02x\n", __func__, cmd->cmdidx);
+
/* Timeout unit - ms */
static unsigned int cmd_timeout = SDHCI_CMD_DEFAULT_TIMEOUT;
@@ -40,6 +44,8 @@ static void sdhci_reset(struct sdhci_host *host, u8 mask)
static void sdhci_cmd_done(struct sdhci_host *host, struct mmc_cmd *cmd)
{
int i;
+ printf("%s\n", __func__);
+
if (cmd->resp_type & MMC_RSP_136) {
and running with some tracing on:
qemu-system-arm -nographic -M rainier-bmc -drive file=test.img,if=sd,index=2,format=raw -d trace:sdhci_access -s -S
shows that a short sequence is run in the 'good' case that is skipped in the failing case:
sdhci_send_command cmd 06
sdhci_access rd32: addr[0x0024] -> 0x01ff0000 (33488896)
sdhci_access wr32: addr[0x0030] <- 0xffffffff (4294967295)
sdhci_access wr8: addr[0x002e] <- 0x0000000e (14)
sdhci_access wr32: addr[0x0008] <- 0x03b34900 (62081280)
sdhci_access wr16: addr[0x000e] <- 0x0000061b (1563)
sdhci_access rd32: addr[0x0030] -> 0x00000003 (3)
sdhci_cmd_done
sdhci_access rd32: addr[0x0010] -> 0x00000900 (2304)
sdhci_access wr32: addr[0x0030] <- 0x00000001 (1)
sdhci_access rd32: addr[0x0030] -> 0x00000002 (2)
sdhci_access wr32: addr[0x0030] <- 0xffffffff (4294967295)
sdhci_send_command cmd 0d
sdhci_access rd32: addr[0x0024] -> 0x01ff0000 (33488896)
sdhci_access wr32: addr[0x0030] <- 0xffffffff (4294967295)
sdhci_access wr32: addr[0x0008] <- 0x00010000 (65536)
sdhci_access wr16: addr[0x000e] <- 0x00000d1a (3354)
sdhci_access rd32: addr[0x0030] -> 0x00000001 (1)
sdhci_cmd_done
sdhci_access rd32: addr[0x0010] -> 0x00000900 (2304)
sdhci_access wr32: addr[0x0030] <- 0x00000001 (1)
sdhci_access rd32: addr[0x0030] -> 0x00000000 (0)
sdhci_access wr32: addr[0x0030] <- 0xffffffff (4294967295)
spl: mmc boot mode: raw
Command 6 is SWITCH_FUNCTION, which is used to switch to the hardware boot partition. Command 0xd (13) is SEND_STATUS.
It's unknown why that extra cmd 06 is being sent.
Another difference is the next cmd 06 being sent, which happens after the spl: mmc boot mode: raw
:
spl: mmc boot mode: raw
sdhci_send_command cmd 06
sdhci_access rd32: addr[0x0024] -> 0x01ff0000 (33488896)
sdhci_access wr32: addr[0x0030] <- 0xffffffff (4294967295)
sdhci_access wr8: addr[0x002e] <- 0x0000000e (14)
sdhci_access wr32: addr[0x0008] <- 0x03b34800 (62081024)
The working case writes 0x03b34900. instead of 800.
cmd.cmdidx = MMC_CMD_SWITCH;
cmd.resp_type = MMC_RSP_R1b;
cmd.cmdarg = (MMC_SWITCH_MODE_WRITE_BYTE << 24) |
(index << 16) |
(value << 8);
$ u 0x03b34900 |grep bits
bits set: 25, 24, 23, 21, 20, 17, 16, 14, 11, 8 (10)
$ u 0x03b34800 |grep bits
bits set: 25, 24, 23, 21, 20, 17, 16, 14, 11 (9)
broken:
spl: mmc boot mode: raw
mmc_switch_part part_num 0
__mmc_switch: idx 06 cmdarg 3b34800 (index 179, value 72)
sdhci_send_command cmd 06
sdhci_access rd32: addr[0x0024] -> 0x01ff0000 (33488896)
sdhci_access wr32: addr[0x0030] <- 0xffffffff (4294967295)
sdhci_access wr8: addr[0x002e] <- 0x0000000e (14)
sdhci_access wr32: addr[0x0008] <- 0x03b34800 (62081024)
working:
mmc_switch_part part_num 1
__mmc_switch: idx 06 cmdarg 3b34900 (index 179, value 73)
sdhci_send_command cmd 06
sdhci_access rd32: addr[0x0024] -> 0x01ff0000 (33488896)
sdhci_access wr32: addr[0x0030] <- 0xffffffff (4294967295)
sdhci_access wr8: addr[0x002e] <- 0x0000000e (14)
sdhci_access wr32: addr[0x0008] <- 0x03b34900 (62081280)
So we are switching to the wrong partition! The working case is requesting hw part 1, which is boot0.
working:
#0 mmc_switch_part (mmc=0x10015b00, part_num=1) at ../drivers/mmc/mmc.c:949
#1 0x00001018 in spl_mmc_load (spl_image=0x902fff04, bootdev=0x902ffee8, filename=filename@entry=0x0 <print_cpuinfo>,
raw_part=raw_part@entry=0, raw_sect=raw_sect@entry=128) at ../common/spl/spl_mmc.c:350
#2 0x000010f6 in spl_mmc_load_image (spl_image=<optimized out>, bootdev=<optimized out>) at ../common/spl/spl_mmc.c:401
#3 0x000008f8 in spl_load_image (loader=0xc008 <_u_boot_list_2_spl_image_loader_2_spl_mmc_load_image0BOOT_DEVICE_MMC1>,
spl_image=0x902fff04) at ../common/spl/spl.c:546
#4 boot_from_devices (count=5, count=5, spl_boot_list=0x902ffef0, spl_image=0x902fff04) at ../common/spl/spl.c:584
#5 board_init_r (dummy1=<optimized out>, dummy2=<optimized out>) at ../common/spl/spl.c:673
#6 0x0000085c in spl_init () at ../common/spl/spl.c:503
#7 0x00004500 in ast2600_clk_enable (clk=0xc008 <_u_boot_list_2_spl_image_loader_2_spl_mmc_load_image0BOOT_DEVICE_MMC1>)
at ../drivers/clk/aspeed/clk_ast2600.c:1245
#8 0x00000000 in ?? ()
broken:
#0 mmc_switch_part (mmc=mmc@entry=0x10015b00, part_num=0) at ../drivers/mmc/mmc.c:949
#1 0x000057dc in mmc_bread (dev=<optimized out>, start=128, blkcnt=1, dst=0x83000000) at ../drivers/mmc/mmc.c:421
#2 0x00000fdc in mmc_load_image_raw_sector (sector=128, mmc=0x10015b00, spl_image=0x902fff04) at ../common/spl/spl_mmc.c:67
#3 spl_mmc_load (spl_image=0x902fff04, bootdev=<optimized out>, filename=filename@entry=0x0 <print_cpuinfo>,
raw_part=raw_part@entry=0, raw_sect=raw_sect@entry=128) at ../common/spl/spl_mmc.c:377
#4 0x000010f2 in spl_mmc_load_image (spl_image=<optimized out>, bootdev=<optimized out>) at ../common/spl/spl_mmc.c:402
#5 0x000008f8 in spl_load_image (loader=0xc0d4 <_u_boot_list_2_spl_image_loader_2_spl_mmc_load_image0BOOT_DEVICE_MMC1>,
spl_image=0x902fff04) at ../common/spl/spl.c:546
#6 boot_from_devices (count=5, count=5, spl_boot_list=0x902ffef0, spl_image=0x902fff04) at ../common/spl/spl.c:584
#7 board_init_r (dummy1=<optimized out>, dummy2=<optimized out>) at ../common/spl/spl.c:673
#8 0x0000085c in spl_init () at ../common/spl/spl.c:503
#9 0x00004500 in ast2600_clk_enable (clk=0xc0d4 <_u_boot_list_2_spl_image_loader_2_spl_mmc_load_image0BOOT_DEVICE_MMC1>)
at ../drivers/clk/aspeed/clk_ast2600.c:1260
#10 0x00000000 in ?? ()
--- a/common/spl/spl_mmc.c
+++ b/common/spl/spl_mmc.c
@@ -333,6 +333,7 @@ int spl_mmc_load(struct spl_image_info *spl_image,
}
boot_mode = spl_boot_mode(bootdev->boot_device);
+ printf("%s: boot_mode %02x boot_device %02x\n", __func__, boot_mode, bootdev->boot_device);
err = -EINVAL;
switch (boot_mode) {
case MMCSD_MODE_EMMCBOOT:
Working:
spl_mmc_load: boot_mode 03 boot_device 01
Broken:
spl_mmc_load: boot_mode 01 boot_device 01
both working and failing branches call: u32 __weak spl_boot_mode(const u32 boot_device)
. The working code returns emmcboot
, and failing raw
.
working:
$ git grep SUPPORT_EMMC_BOOT |grep ast
include/configs/evb_ast2600_spl.h:#define CONFIG_SUPPORT_EMMC_BOOT
include/configs/evb_ast2600a1_spl.h:#define CONFIG_SUPPORT_EMMC_BOOT
include/configs/intel_ast2600.h:#define CONFIG_SUPPORT_EMMC_BOOT
failing:
$ git grep SUPPORT_EMMC_BOOT |grep ast
include/configs/evb_ast2600_spl_emmc.h:#define CONFIG_SUPPORT_EMMC_BOOT
include/configs/evb_ast2600a1_spl.h:#define CONFIG_SUPPORT_EMMC_BOOT
working:
$ git grep evb_ast2600_spl
configs/ast2600_openbmc_spl_emmc_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
configs/evb-ast2600-emmc-cot_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
configs/evb-ast2600-emmc_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
configs/evb-ast2600-obmc-emmc-cot_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
configs/evb-ast2600-obmc-emmc_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
configs/evb-ast2600-spl-ncsi_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
configs/evb-ast2600-spl_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
failing:
$ git grep evb_ast2600_spl
configs/ast2600_openbmc_spl_emmc_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
configs/evb-ast2600-emmc_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl_emmc"
configs/evb-ast2600-spl-ecc_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
configs/evb-ast2600-spl-ncsi_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
configs/evb-ast2600-spl_defconfig:CONFIG_SYS_CONFIG_NAME="evb_ast2600_spl"
This show the configuration we use in the working branch uses evb_ast2600_spl.h
.
In the failing configuration we use evb_ast2600_spl.h
, which has changed in bc1213a380086c31b3f0965e97ed125cf9cdfd5c not include the CONFIG_SUPPORT_EMMC_BOOT
define. It could have been using evb_ast2600a1_spl.h
, or evb_ast2600_spl_emmc.h
.
We should switch to using the evb_ast2600_spl_emmc.h
(or make our own for openbmc and/or p10bmc).
This option is hidden from the kbuild config system as it is a whitelisted option intended to be used as a #define.
Using @legoater's qemu tree, aspeed's v00.04.06 doesn't boot in qemu: