raspberrypi / linux

Kernel source tree for Raspberry Pi-provided kernel builds. Issues unrelated to the linux kernel should be posted on the community forum at https://forums.raspberrypi.com/
Other
11.02k stars 4.95k forks source link

arch/arm/mach-bcm2708/bcm2708.c supporting only Device Tree #1155

Closed notro closed 8 years ago

notro commented 8 years ago

I wanted to see what arch/arm/mach-bcm2708/bcm2708.c would look like if I removed ATAGS booting support. Mainly to see if I had missed something with regards to Device Tree. By enabling 'bcm2835-pm-wdt', I could also remove the reboot/poweroff code (handled by the watchdog driver).

The file did shrink from 1100+ lines to 100 lines.

/*
 *  linux/arch/arm/mach-bcm2708/bcm2708.c
 *
 *  Copyright (C) 2010 Broadcom
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <linux/init.h>
#include <linux/broadcom/vc_cma.h>
#include <linux/dma-mapping.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include <asm/mach-types.h>
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
#include <asm/system_info.h>
#include <mach/system.h>

static void __init bcm2708_init_uart1(void)
{
    struct device_node *np;

    np = of_find_compatible_node(NULL, NULL, "brcm,bcm2835-aux-uart");
    if (of_device_is_available(np)) {
        pr_info("bcm2708: Mini UART enabled\n");
        writel(1, __io_address(UART1_BASE + 0x4));
    }
}

static void __init bcm2708_init(void)
{
    struct device_node *np = of_find_node_by_path("/system");
    u32 val;
    u64 val64;
    int ret;

#if defined(CONFIG_BCM_VC_CMA)
    vc_cma_early_init();
#endif
    ret = of_platform_populate(NULL, of_default_bus_match_table, NULL,
                   NULL);
    if (ret) {
        pr_err("of_platform_populate failed: %d\n", ret);
        BUG();
    }

    bcm2708_init_uart1();

    if (!of_property_read_u32(np, "linux,revision", &val))
        system_rev = val;
    if (!of_property_read_u64(np, "linux,serial", &val64))
        system_serial_low = val64;
}

static void __init bcm2708_init_early(void)
{
    /*
     * Some devices allocate their coherent buffers from atomic
     * context. Increase size of atomic coherent pool to make sure such
     * the allocations won't fail.
     */
    init_dma_coherent_pool_size(SZ_4M);
}

static void __init bcm2708_reserve(void)
{
#if defined(CONFIG_BCM_VC_CMA)
    vc_cma_reserve();
#endif
}

static const char * const bcm2708_compat[] = {
    "brcm,bcm2708",
    NULL
};

MACHINE_START(BCM2708, "BCM2708")
    .init_machine = bcm2708_init,
//  .init_early = bcm2708_init_early,
    .reserve = bcm2708_reserve,
    .dt_compat = bcm2708_compat,
MACHINE_END

I don't think this is needed anymore now that we have changed the GFP_ATOMIC allocations. At least there's no problem booting without it.

    init_dma_coherent_pool_size(SZ_4M);

If the firmware could fill in this, the kernel will report it in /proc/cpuinfo and we don't need "linux,serial".

/ {
    serial-number = "00000000a1725dd1";
};

Does anyone know of drivers/areas where we are not using mainline (excluding the clock driver, we use DT fixed clocks)?

notro commented 8 years ago

Does anyone know of drivers/areas where we are not using mainline

I can answer my own question: bcm2835-i2s and i2c-bcm2835.

I have now removed all non-DT related code as well as some stale code and the net result is 6000+ lines removed. After adding code for months, it's really nice to be able to finally delete some :-) The patches are on top of the DMA patches: https://github.com/notro/linux/commits/atagsexit I'll make a PR when the DMA patches are in.

notro commented 8 years ago

I have converted ARCH_BCM2708 to multi platform and built a kernel with both ARCH_BCM2708 and ARCH_BCM2835 support. This kernel can boot both with bcm2708-rpi-b-plus.dtb and bcm2835-rpi-b-plus.dtb https://github.com/notro/linux/commits/multi (the Kconfig change on the armctrl patch is a bit convoluted, but the change is: select SPARSE_IRQ)

But instead of doing this, maybe we should just move directly from ARCH_BCM2708 to ARCH_BCM2835:

diff --git a/arch/arm/configs/bcmrpi_defconfig b/arch/arm/configs/bcmrpi_defconfig
index cb63df7..4f1e058 100644
--- a/arch/arm/configs/bcmrpi_defconfig
+++ b/arch/arm/configs/bcmrpi_defconfig
@@ -1,5 +1,3 @@
-# CONFIG_ARM_PATCH_PHYS_VIRT is not set
-CONFIG_PHYS_OFFSET=0
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -37,8 +35,9 @@ CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_MAC_PARTITION=y
 CONFIG_CFQ_GROUP_IOSCHED=y
-CONFIG_ARCH_BCM2708=y
-CONFIG_BCM2708_DT=y
+CONFIG_ARCH_MULTI_V6=y
+CONFIG_ARCH_BCM=y
+CONFIG_ARCH_BCM2835=y
 CONFIG_PREEMPT=y
 CONFIG_AEABI=y
 CONFIG_OABI_COMPAT=y
@@ -582,14 +581,12 @@ CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_TTY_PRINTK=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_BCM2835=m
-CONFIG_HW_RANDOM_BCM2708=m
 CONFIG_RAW_DRIVER=y
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=m
 CONFIG_I2C_BCM2708=m
 CONFIG_SPI=y
 CONFIG_SPI_BCM2835=m
-CONFIG_SPI_BCM2708=m
 CONFIG_SPI_SPIDEV=y
 CONFIG_PPS=m
 CONFIG_PPS_CLIENT_LDISC=m
@@ -624,7 +621,6 @@ CONFIG_SENSORS_SHTC1=m
 CONFIG_THERMAL=y
 CONFIG_THERMAL_BCM2835=y
 CONFIG_WATCHDOG=y
-CONFIG_BCM2708_WDT=m
 CONFIG_BCM2835_WDT=m
 CONFIG_UCB1400_CORE=m
 CONFIG_MFD_STMPE=y
@@ -1229,6 +1225,7 @@ CONFIG_FUNCTION_PROFILER=y
 CONFIG_KGDB=y
 CONFIG_KGDB_KDB=y
 CONFIG_KDB_KEYBOARD=y
+CONFIG_DEBUG_LL=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_CBC=y

What do you think?

pelwell commented 8 years ago

That's impressive. As I see it, the remaining questions are:

  1. As you say, do we accept this patch set or switch directly to ARCH_BCM2835? I like the idea of using ARCH_BCM2708 as an intermediate step - it makes it easier to see what we have changed.
  2. What do we do with BCM2709? We could make the equivalent changes to create an ARCH_BCM2709, or wait for ARCH_BCM2836.

If we choose to introduce ARCH_BCM2708 and ARCH_BCM2709, it avoids the perception that Pi 2 is lagging behind in some way, and it allows us to separate the "DT-only" change from the "upstream-only" change

BTW, is the CONFIG_DEBUG_LL still required?

notro commented 8 years ago

As you say, do we accept this patch set or switch directly to ARCH_BCM2835? I like the idea of using ARCH_BCM2708 as an intermediate step - it makes it easier to see what we have changed.

Actually it makes sense to do the convert to multi-platform patch like this instead: arch/arm/mach-bcm/Kconfig

+config ARCH_BCM2708
+        bool "Broadcom BCM2708 family" if ARCH_MULTI_V6
+        select ARCH_BCM2835
+        help
+          This enables support for Broadcom BCM2708 boards.
+

arch/arm/mach-bcm/board_bcm2835.c

+static const char * const bcm2708_compat[] = {
+        "brcm,bcm2708",
+        NULL
+};
+
+DT_MACHINE_START(BCM2708, "BCM2708")
+        .init_machine = bcm2835_init,
+        .init_early = bcm2835_init_early,
+        .reserve = bcm2835_reserve,
+        .dt_compat = bcm2708_compat,
+MACHINE_END
+

The same patch will also delete arch/arm/mach-bcm2708/. I didn't do that in my initial work to keep from cluttering up the patch making it difficult to read.

What do we do with BCM2709? We could make the equivalent changes to create an ARCH_BCM2709, or wait for ARCH_BCM2836.

By "create" I assume you mean "make multiplatform". One multiplatform requirement is MULTI_IRQ_HANDLER:

config ARCH_MULTIPLATFORM
        bool "Allow multiple platforms to be selected"
        depends on MMU
[...]
        select MULTI_IRQ_HANDLER
        select SPARSE_IRQ
        select USE_OF

So in order to turn ARCH_BCM2709 into multiplatform we have to use irq-bcm2836 which arrived with 4.3. It's much easier to make that happen when ARCH_BCM2836 is in place having something to look at, and at that time we face the same: Maybe we should just move straight to ARCH_BCM2836.

Hm, judging from this:

drivers/irqchip/Makefile

obj-$(CONFIG_ARCH_BCM2835)    += irq-bcm2836.o

I see that ARCH_BCM2835 will be used to enable both Pi1 and Pi2 support. Based on that it makes sense to delay switching to that option to avoid confusion. So at a later time when mainline has full Pi2 support we can switch to ARCH_BCM2835 and get both Pi1 and Pi2 support.

BTW, is the CONFIG_DEBUG_LL still required?

It's necessary for this to appear on the serial console: Uncompressing Linux... done, booting the kernel.

notro commented 8 years ago

I have tried to use irq-bcm2836 with ARCH_BCM2709 and with ARCH_BCM2835 on rpi-4.3.y. I kept getting strange errors and hangs. One recurring example:

[    9.550788] Unhandled fault: page domain fault (0x81b) at 0x7ee4d6a8
[    9.558258] pgd = b5c64000
[    9.561982] [7ee4d6a8] *pgd=35c44831, *pte=3621d55f, *ppte=3621dc7e
[    9.569328] Internal error: : 81b [#1] PREEMPT SMP ARM
[    9.575498] Modules linked in: rpcsec_gss_krb5 cfg80211 rfkill bcm2835_gpiomem uio_pdrv_genirq uio i2c_dev snd_bcm2835 snd_pcm snd_timer snd fuse
[    9.590927] CPU: 0 PID: 527 Comm: rsyslogd Not tainted 4.3.0-rc5-v7+ #29
[    9.598780] Hardware name: BCM2709
[    9.603322] task: b91eb000 ti: b813a000 task.ti: b813a000
[    9.609882] PC is at memcpy+0x50/0x330
[    9.614761] LR is at 0x0
...
[    9.668807] Process rsyslogd (pid: 527, stack limit = 0xb813a210)
...
[    9.834826] [<80310b70>] (memcpy) from [<8031dab4>] (__copy_to_user_memcpy+0xf8/0x188)
[    9.845846] [<8031dab4>] (__copy_to_user_memcpy) from [<8031dd68>] (arm_copy_to_user+0x20/0x2c)
[    9.857646] [<8031dd68>] (arm_copy_to_user) from [<80156fbc>] (cp_new_stat64+0x164/0x188)
[    9.868916] [<80156fbc>] (cp_new_stat64) from [<801573f4>] (SyS_fstat64+0x38/0x40)
[    9.879569] [<801573f4>] (SyS_fstat64) from [<8000fcc0>] (ret_fast_syscall+0x0/0x1c)
[    9.890386] Code: f5d1f05c f5d1f07c e8b151f8 e2522020 (e8a051f8)
[    9.898030] ---[ end trace e8a5f80b589141a3 ]---
[    9.904284] note: rsyslogd[527] exited with preempt_count 1

So I moved to rpi-4.2.y and added bcm2836 support to ARCH_BCM2835: https://github.com/notro/linux/commits/bcm2836-42 Less errors, but I still have some problems:

MMC

mmc-bcm2835 doesn't work:

[    1.564526] mmc-bcm2835 3f300000.mmc: mmc_debug:0 mmc_debug2:0
[    1.575467] mmc-bcm2835 3f300000.mmc: DMA channels allocated

[    1.786848] Waiting for root device /dev/mmcblk0p2...
[    1.796956] mmc0: host does not support reading read-only switch, assuming write-enable
[    1.813021] mmc0: new high speed SDHC card at address e624
[    1.824158] mmcblk0: mmc0:e624 SL08G 7.40 GiB

[   11.848816] mmc0: Timeout waiting for hardware interrupt.
[   11.860323] mmcblk0: error -110 sending stop command, original cmd response 0x900, card status 0xb00
[   11.929206] VFS: Cannot open root device "mmcblk0p2" or unknown-block(179,2): error -6

bcm2835-sdhost doesn't work either with DMA enabled:

[    1.599308] mmc0: sdhost-bcm2835 loaded - DMA enabled (>1)

[    1.725530] Waiting for root device /dev/mmcblk0p2...
[    1.732322] mmc0: host does not support reading read-only switch, assuming write-enable
[    1.752079] mmc0: new high speed SDHC card at address e624
[    1.759965] mmcblk0: mmc0:e624 SL08G 7.40 GiB
[    1.839437] VFS: Cannot open root device "mmcblk0p2" or unknown-block(179,2): error -6

bcm2835-sdhost works if I'm not using dma: dtoverlay=sdhost,force_pio

USB

FIQ doesn't work (no devices detected), so I have to disable it: dwc_otg.fiq_enable=0 dwc_otg.fiq_fsm_enable=0

I can use a keyboard, but networking doesn't work even though smsc95xx is detected. It gets a selfassigned address: inet addr:169.254.83.199

unexpected IRQ

I get some of these and I don't know what triggers it. It varies when and how often they appear.

[    5.871824] unexpected IRQ trap at vector 00
[   89.720102] unexpected IRQ trap at vector 00
$ cat /proc/interrupts
           CPU0       CPU1       CPU2       CPU3
 16:          0          0          0          0  bcm2836-timer   0 Edge      arch_timer
 17:       4877       3483       2509       1390  bcm2836-timer   1 Edge      arch_timer
 23:         28          0          0          0  ARMCTRL-level   1 Edge      3f00b880.mailbox
 24:          2          0          0          0  ARMCTRL-level   2 Edge      VCHIQ doorbell
 39:    2934802          0          0          0  ARMCTRL-level  41 Edge      dwc_otg, dwc_otg_pcd, dwc_otg_hcd:usb1
 46:          0          0          0          0  ARMCTRL-level  48 Edge      bcm2708_fb dma
 79:          0          0          0          0  ARMCTRL-level  81 Edge      3f200000.gpio:bank0
 80:          0          0          0          0  ARMCTRL-level  82 Edge      3f200000.gpio:bank1
 86:     120727          0          0          0  ARMCTRL-level  88 Edge      mmc0
 87:       1229          0          0          0  ARMCTRL-level  89 Edge      uart-pl011
IPI0:          0          0          0          0  CPU wakeup interrupts
IPI1:          0          0          0          0  Timer broadcast interrupts
IPI2:       1623       2413       1969       1602  Rescheduling interrupts
IPI3:          5          7          7          8  Function call interrupts
IPI4:          1          2          1          2  Single function call interrupts
IPI5:          0          0          0          0  CPU stop interrupts
IPI6:          1          0          0          0  IRQ work interrupts
IPI7:          0          0          0          0  completion interrupts
Err:          2

Occasionally I get this when booting:


[ ***  ] (2 of 2) A start job is running for dev-ttyAMA0.devi...31s / 1min 30s)
[ TIME ] Timed out waiting for device dev-mmcblk0p1.device.
[DEPEND] Dependency failed for /boot.
[DEPEND] Dependency failed for Local File Systems.
[DEPEND] Dependency failed for File System Check on /dev/mmcblk0p1.
[ TIME ] Timed out waiting for device dev-ttyAMA0.device.
...
Welcome to emergesulogin: root account is locked, starting shell
root@raspberrypi:~# reboot

Bootlog: https://gist.github.com/notro/b5278e5dd33fe7501bba

This work has been based on https://github.com/anholt/linux/commits/bcm2836-irqchip

@anholt What's the status on getting Pi2 support into mainline?

popcornmix commented 8 years ago

The 4.3 panics in memcpy are caused by CONFIG_CPU_SW_DOMAIN_PAN. Seems reliable without that. See: https://github.com/raspberrypi/linux/commit/aacdda4b1e8063d0ef60ed16eb3703748fbcf9fd

notro commented 8 years ago

Thanks, disabling CONFIG_CPU_SW_DOMAIN_PAN removed those random problems. So back to ARCH_BCM2709 with irq-bcm2836 on 4.3: https://github.com/notro/linux/commits/irq2836-43

Down to 2 issues:

@P33M Does the traceback tell you anything about what the problem might be?

[    1.453100] OTG VER PARAM: 0, OTG VER FLAG: 0
[    1.457542] Dedicated Tx FIFOs mode
[    1.461464] WARN::dwc_otg_hcd_init:1047: FIQ DMA bounce buffers: virt = 0xbac14000 dma = 0xfac14000 len=9024
[    1.471523] FIQ FSM acceleration enabled for :
[    1.471523] Non-periodic Split Transactions
[    1.471523] Periodic Split Transactions
[    1.471523] High-Speed Isochronous Endpoints
[    1.488645] WARN::hcd_init_fiq:412: FIQ on core 1 at 0x8041c620
[    1.494683] WARN::hcd_init_fiq:413: FIQ ASM at 0x8041c97c length 36
[    1.505660] WARN::hcd_init_fiq:438: MPHI regs_base at 0xbb8a8000

[    1.516364] armctrl_unmask_irq: hwirq_to_fiq(hwirq=137)=>9

[    1.526567] dwc_otg 3f980000.usb: DWC OTG Controller
[    1.536259] dwc_otg 3f980000.usb: new USB bus registered, assigned bus number 1
[    1.548322] dwc_otg 3f980000.usb: irq 71, io mem 0x00000000
[    1.558614] Init: Port Power? op_state=1
[    1.567146] Init: Power Port (0)
[    1.575252] usb usb1: New USB device found, idVendor=1d6b, idProduct=0002
[    1.586705] usb usb1: New USB device strings: Mfr=3, Product=2, SerialNumber=1
[    1.598595] usb usb1: Product: DWC OTG Controller
[    1.607924] usb usb1: Manufacturer: Linux 4.3.0-rc6-v7+ dwc_otg_hcd
[    1.618913] usb usb1: SerialNumber: 3f980000.usb
[    1.629216] hub 1-0:1.0: USB hub found
[    1.637681] hub 1-0:1.0: 1 port detected
[    1.646584] Bad mode in prefetch abort handler detected
[    1.656384] Internal error: Oops - bad mode: 0 [#1] PREEMPT SMP ARM
[    1.667212] Modules linked in:
[    1.674802] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.3.0-rc6-v7+ #1
[    1.685877] Hardware name: BCM2709
[    1.693804] task: b9908000 ti: b9902000 task.ti: b9902000
[    1.703768] PC is at 0x50c2fe10
[    1.711485] LR is at dwc_otg_enable_global_interrupts+0x28/0x2c
[    1.721987] pc : [<50c2fe10>]    lr : [<803fab20>]    psr: 600001d1
[    1.721987] sp : b9903d80  ip : b9903d80  fp : b9903ddc
[    1.742566] r10: 000000ed  r9 : 00000000  r8 : 00000000
[    1.752351] r7 : 808d22c0  r6 : 00000000  r5 : 808d22c0  r4 : 50c2fe10
[    1.763477] r3 : 00000000  r2 : 00000001  r1 : 9e6713fc  r0 : 99cdc5ec
[    1.774603] Flags: nZCv  IRQs off  FIQs off  Mode FIQ_32  ISA ARM  Segment kernel
[    1.786756] Control: 10c5387d  Table: 0000406a  DAC: 00000055
[    1.797167] Process swapper/0 (pid: 1, stack limit = 0xb9902210)
[    1.807837] Stack: (0xb9903d80 to 0xb9904000)
[    1.816876] 3d80: 99cdc5ec 9e6713fc 00000001 00000000 50c2fe10 808d22c0 00000000 808d22c0
[    1.829872] 3da0: 00000000 00000000 000000ed b9903ddc b9903d80 b9903d80 803fab20 50c2fe10
[    1.842944] 3dc0: 600001d1 ffffffff b99e2c10 8098214c b9903e04 b9903de0 8038e10c 8038fbe8
[    1.855994] 3de0: 00000007 b99e2c10 808d22c0 b99e2c44 00000000 8082626c b9903e24 b9903e08
[    1.869032] 3e00: 8038e268 8038df5c 00000000 00000000 808d22c0 8038e1c0 b9903e4c b9903e28
[    1.882125] 3e20: 8038c3b4 8038e1cc b9896b5c b997e034 b9896b70 808d22c0 b901f580 808cb040
[    1.895218] 3e40: b9903e5c b9903e50 8038dbe4 8038c34c b9903e84 b9903e60 8038d830 8038dbc8
[    1.908313] 3e60: 80601290 b9903e70 808d22c0 808d2188 808d2188 8083c830 b9903e9c b9903e88
[    1.921428] 3e80: 8038e970 8038d6a0 80888b98 808d2188 b9903eac b9903ea0 8038fbc8 8038e8f4
[    1.934557] 3ea0: b9903ed4 b9903eb0 808262cc 8038fb7c 00000000 80888b98 80888b98 b9075ec0
[    1.947709] 3ec0: 8083c830 8082626c b9903f54 b9903ed8 800097f4 80826278 60000113 60000113
[    1.960901] 3ee0: b9903f0c b9903ef0 b9903f00 b9903ef8 00000000 ba7ffbfe 805ebe10 000000ed
[    1.974109] 3f00: b9903f54 b9903f10 800415d4 807f45f4 00000000 00000006 00000006 ba7ffc06
[    1.987324] 3f20: 807724e4 00000000 00000000 00000006 808f1000 808f1000 8083c830 8083c83c
[    2.000561] 3f40: 8085e028 000000ed b9903f94 b9903f58 807f4f44 80009770 00000006 00000006
[    2.013824] 3f60: 00000000 807f45e8 00000000 00000000 805bc12c 00000000 00000000 00000000
[    2.027102] 3f80: 00000000 00000000 b9903fac b9903f98 805bc144 807f4d64 b9902000 00000000
[    2.040402] 3fa0: 00000000 b9903fb0 8000fb68 805bc138 00000000 00000000 00000000 00000000
[    2.053637] 3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[    2.066787] 3fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000
[    2.079928] [<803fab20>] (dwc_otg_enable_global_interrupts) from [<8038e10c>] (really_probe+0x1bc/0x270)
[    2.094462] [<8038e10c>] (really_probe) from [<8038e268>] (__driver_attach+0xa8/0xac)
[    2.107392] [<8038e268>] (__driver_attach) from [<8038c3b4>] (bus_for_each_dev+0x74/0xa4)
[    2.120689] [<8038c3b4>] (bus_for_each_dev) from [<8038dbe4>] (driver_attach+0x28/0x30)
[    2.133859] [<8038dbe4>] (driver_attach) from [<8038d830>] (bus_add_driver+0x19c/0x220)
[    2.147062] [<8038d830>] (bus_add_driver) from [<8038e970>] (driver_register+0x88/0x108)
[    2.160372] [<8038e970>] (driver_register) from [<8038fbc8>] (__platform_driver_register+0x58/0x6c)
[    2.174695] [<8038fbc8>] (__platform_driver_register) from [<808262cc>] (dwc_otg_driver_init+0x60/0x118)
[    2.189500] [<808262cc>] (dwc_otg_driver_init) from [<800097f4>] (do_one_initcall+0x90/0x1ec)
[    2.203387] [<800097f4>] (do_one_initcall) from [<807f4f44>] (kernel_init_freeable+0x1ec/0x2b8)
[    2.217484] [<807f4f44>] (kernel_init_freeable) from [<805bc144>] (kernel_init+0x18/0xfc)
[    2.231117] [<805bc144>] (kernel_init) from [<8000fb68>] (ret_from_fork+0x14/0x2c)
[    2.244119] Code: bad PC value
[    2.252595] ---[ end trace 33baaf4b4ccb1640 ]---

Networking and usb seem to work if I disable fiq: dwc_otg.fiq_enable=0 dwc_otg.fiq_fsm_enable=0

notro commented 8 years ago

I have tracked down the unexpected irq issue to bcm2836_arm_irqchip_handle_irq():

static void
__exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
{
        int cpu = smp_processor_id();
        u32 stat;

        stat = readl_relaxed(intc.base + LOCAL_IRQ_PENDING0 + 4 * cpu);
        if (stat & 0x10) {
#ifdef CONFIG_SMP
                void __iomem *mailbox0 = (intc.base +
                                          LOCAL_MAILBOX0_CLR0 + 16 * cpu);
                u32 mbox_val = readl(mailbox0);
                u32 ipi = ffs(mbox_val) - 1;

                writel(1 << ipi, mailbox0);
                handle_IPI(ipi, regs);
#endif
        } else {
                u32 hwirq = ffs(stat) - 1;

                handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs);
        }
}

This change:

diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index f687082..80918a7 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@ -174,6 +174,10 @@ __exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
 #endif
        } else {
                u32 hwirq = ffs(stat) - 1;
+               unsigned int irq = irq_linear_revmap(intc.domain, hwirq);
+
+               if (!irq)
+                       printk("%s: stat=0x%08x, hwirq=0x%08x, irq=0\n", __func__, stat, hwirq);

                handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs);
        }

Gives me:

[    0.960883] bcm2836_arm_irqchip_handle_irq: stat=0x00000000, hwirq=0xffffffff, irq=0
[    0.968429] unexpected IRQ trap at vector 00

@anholt What do we do with stat == 0 ? Silently drop it?

(irq_linear_revmap() returns zero because hwirq >= domain->revmap_size)

notro commented 8 years ago

I forgot to print the cpu:

[    3.100413] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[    3.390968] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[    3.821507] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[    3.985232] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[    4.042509] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[    4.156505] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[    6.778375] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[   15.513331] bcm2836_arm_irqchip_handle_irq: cpu=3, stat=0x00000000, hwirq=0xffffffff, irq=0
[   18.470878] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[   24.526926] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[   24.654766] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[   24.750653] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[   25.038869] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[   25.142741] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[   25.238775] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
[   25.366728] bcm2836_arm_irqchip_handle_irq: cpu=0, stat=0x00000000, hwirq=0xffffffff, irq=0
anholt commented 8 years ago

So, I assume we could just do } else if (stat != 0) { instead, but I'm concerned that we're screwing up interrupt handling somehow and this is just the canary.

One possibly relevant thing: I assume the l1 controller is just level-triggered (if we exit bcm2836_arm_irqchip_handle_irq and stat ends up != 0 again, we get re-called) If there's anything more complicated than that, I could imagine getting into trouble.

popcornmix commented 8 years ago

Yes interrupt controller is level triggered. If you exit interrupt handler without clearing the interrupt then interrupt handler will trigger again.

Being in the interrupt handler with no interrupt pending is a bit suspicious.

notro commented 8 years ago

FIQ is working now with this change:

diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c
index 9c68c33..5bb3fb2 100644
--- a/drivers/irqchip/irq-bcm2835.c
+++ b/drivers/irqchip/irq-bcm2835.c
@@ -50,6 +50,8 @@
 #include <linux/of_irq.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>

 #include <asm/exception.h>
 #include <asm/mach/irq.h>
@@ -70,6 +72,9 @@
 #define BANK0_VALID_MASK   (BANK0_HWIRQ_MASK | BANK1_HWIRQ | BANK2_HWIRQ \
                    | SHORTCUT1_MASK | SHORTCUT2_MASK)

+#undef ARM_LOCAL_GPU_INT_ROUTING
+#define ARM_LOCAL_GPU_INT_ROUTING 0x0c
+
 #define REG_FIQ_CONTROL        0x0c
 #define REG_FIQ_ENABLE     0x80
 #define REG_FIQ_DISABLE        0
@@ -96,6 +101,7 @@ struct armctrl_ic {
    void __iomem *enable[NR_BANKS];
    void __iomem *disable[NR_BANKS];
    struct irq_domain *domain;
+   struct regmap *local_regmap;
 };

 static struct armctrl_ic intc __read_mostly;
@@ -132,6 +138,29 @@ static void armctrl_unmask_irq(struct irq_data *d)
    if (d->hwirq >= NUMBER_IRQS)
    {
        printk("%s: hwirq_to_fiq(hwirq=%lu)=>%u\n", __func__, d->hwirq, hwirq_to_fiq(d->hwirq));
+
+       if (num_online_cpus() > 1) {
+           unsigned int data;
+           int ret;
+
+           if (!intc.local_regmap) {
+               pr_err("FIQ is disabled due to missing regmap\n");
+               return;
+           }
+
+           ret = regmap_read(intc.local_regmap,
+                     ARM_LOCAL_GPU_INT_ROUTING, &data);
+           if (ret) {
+               pr_err("Failed to read int routing %d\n", ret);
+               return;
+           }
+
+           data &= ~0xc;
+           data |= (1 << 2);
+           regmap_write(intc.local_regmap,
+                    ARM_LOCAL_GPU_INT_ROUTING, data);
+       }
+
        writel_relaxed(REG_FIQ_ENABLE | hwirq_to_fiq(d->hwirq),
                   intc.base + REG_FIQ_CONTROL);
    }
@@ -215,6 +244,15 @@ static int __init armctrl_of_init(struct device_node *node,
        set_handle_irq(bcm2835_handle_irq);
    }

+   if (is_2836) {
+       intc.local_regmap =
+           syscon_regmap_lookup_by_compatible("brcm,bcm2836-arm-local");
+       if (IS_ERR(intc.local_regmap)) {
+           pr_err("Failed to get local register map. FIQ is disabled for cpus > 1\n");
+           intc.local_regmap = NULL;
+       }
+   }
+
    /* Make a duplicate irq range which is used to enable FIQ */
    for (b = 0; b < NR_BANKS; b++) {
        for (i = 0; i < bank_irqs[b]; i++) {
notro commented 8 years ago

ARM assembly is kind of greek to me, but doesn't the current interrupt code silently skip ARM_LOCAL_IRQ_PENDING0 == 0 (stat == 0) ?

arch/arm/mach-bcm2709/include/mach/entry-macro.S

        .macro  get_irqnr_and_base, irqnr, irqstat, base, tmp

        /* get core number */
        mrc     p15, 0, \base, c0, c0, 5
        ubfx    \base, \base, #0, #2

        /* get core's local interrupt controller */
        ldr \irqstat, = __io_address(ARM_LOCAL_IRQ_PENDING0)  @ local interrupt source
        add \irqstat, \irqstat, \base, lsl #2
        ldr \tmp, [\irqstat]

        /* test for mailbox0 (IPI) interrupt */
        tst \tmp, #0x10
        beq 1030f

<snip>

1030:
        /* check gpu interrupt */
        tst \tmp, #0x100
        beq 1040f

<snip>

1040:
        cmp \tmp, #0
        beq 1020f

<snip>

1020:   @ EQ will be set if no irqs pending
        .endm

/*
 * Interrupt handling.  Preserves r7, r8, r9
 */
        .macro  arch_irq_handler_default
1:      get_irqnr_and_base r0, r2, r6, lr
        .endm
popcornmix commented 8 years ago

Yes, it does. There are cases where this can happen when both gpu and arm handle the same interrupt. I believe I2C is the only instance of this. gpu "owns" I2C0 and arm "owns" I2C1 but they have a common interrupt.

This means if both are enabled, and an interrupt is pending then it will trigger on both arm and gpu. Each handler will check if there is something to do (i.e. an interrupt was expected) and just return if nothing to do. If gpu handles the interrupt and is quicker than arm, then arm may get the stat==0 case as interrupt has already been cleared.

This also causes an ugly behaviour where if the gpu is slow to handle the interrupt (which was intended for it), then the arm may spin multiple times though the interrupt handler (as it cannot clear the interrupt). Generally the gpu has lower interrupt latency than the arm, so this typically doesn't happen (although the gpu can spin waiting for arm to handle it).

As far as I know, I2C is the only case where this occurs. Gpu I2C0 is used by CSI camera and DSI display, and the HAT probing on boot (before arm is launched), so normally you won't see this (which is why I said it was suspicious - there may be a good reason, but without identifying it, it is surprising).

notro commented 8 years ago

Ok, I would like to check and see if stat==0 happens with the current interrupt code. How can I change it in a way that makes it call asm_do_IRQ with irq=0 when stat==0 ? This wil trigger ack_bad_irq() if it happens.

popcornmix commented 8 years ago

Untested, but something like:

diff --git a/arch/arm/mach-bcm2709/include/mach/entry-macro.S b/arch/arm/mach-bcm2709/include/mach/entry-macro.S
index 08d184c..d0a74a7 100644
--- a/arch/arm/mach-bcm2709/include/mach/entry-macro.S
+++ b/arch/arm/mach-bcm2709/include/mach/entry-macro.S
@@ -92,7 +92,8 @@
        b       1050f
 1040:
        cmp     \tmp, #0
-       beq     1020f
+       moveq     \irqnr, #0
+       beq     1050f

        /* handle local (e.g. timer) interrupts */
        @ For non-zero x, LSB(x) = 31 - CLZ(x^(x-1))
notro commented 8 years ago

It didn't work. I'm flooded with unexpected IRQ messages:

[    0.000000] Architected cp15 timer(s) running at 19.20MHz (phys).
[    0.000000] clocksource: arch_sys_counter: mask: 0xffffffffffffff max_cycles: 0x46d987e47, max_idle_ns: 440795202767 ns
[    0.000010] sched_clock: 56 bits at 19MHz, resolution 52ns, wraps every 4398046511078ns
[    0.008261] Switching to timer-based delay loop, resolution 52ns
[    0.014675] unexpected IRQ trap at vector 00
<snip 221 lines>
[    0.996799] unexpected IRQ trap at vector 00
[    1.001251] unexpected IRQ trap at vector 00
<snip 224 lines>
[    1.996650] unexpected IRQ trap at vector 00
[    2.001106] unexpected IRQ trap at vector 00
<snip 211 lines>
[    2.996544] unexpected IRQ trap at vector 00

I removed printing of the actual message which would give me the error counting, but it didn't boot up:

Uncompressing Linux... done, booting the kernel.
[    0.000000] Booting Linux on physical CPU 0xf00
[    0.000000] Initializing cgroup subsys cpuset
[    0.000000] Initializing cgroup subsys cpu
[    0.000000] Initializing cgroup subsys cpuacct
[    0.000000] Linux version 4.3.0-rc6-v7+ (pi@raspi2) (gcc version 4.8.3 20140106 (prerelease) (crosstool-NG linaro-1.13.1-4.8-2014.01 - Linaro GCC 2013.11) ) #7 SMP PREEMPT Sun Oct 25 18:28:32 CET 2015
[    0.000000] CPU: ARMv7 Processor [410fc075] revision 5 (ARMv7), cr=10c5387d
[    0.000000] CPU: PIPT / VIPT nonaliasing data cache, VIPT aliasing instruction cache
[    0.000000] Machine model: Raspberry Pi 2 Model B Rev 1.1
[    0.000000] bootconsole [earlycon0] enabled
[    0.000000] cma: Reserved 8 MiB at 0x3a800000
[    0.000000] Memory policy: Data cache writealloc
[    0.000000] [bcm2709_smp_init_cpus] enter (9420->f3003010)
[    0.000000] [bcm2709_smp_init_cpus] ncores=4
[    0.000000] PERCPU: Embedded 13 pages/cpu @b9f64000 s22784 r8192 d22272 u53248
[    0.000000] Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 239540
[    0.000000] Kernel command line: dma.dmachans=0x7f35 bcm2708_fb.fbwidth=1824 bcm2708_fb.fbheight=984 bcm2709.boardrev=0xa01041 bcm2709.serial=0x316e5228 smsc95xx.macaddr=B8:27:EB:6E:52:28 bcm2708_fb.fbswap=1 bcm2709.disk_led_gpio=47 bcm2709.disk_led_active_low=0 sdhci-bcm2708.emmc_clock_freq=250000000 vc_mem.mem_base=0x3dc00000 vc_mem.mem_size=0x3f000000  dwc_otg.fiq_enable=1 dwc_otg.fiq_fsm_enable=1 earlyprintk dwc_otg.lpm_enable=0 console=tty1 console=ttyAMA0,115200 root=/dev/mmcblk0p2 rootfstype=ext4 elevator=deadline fsck.repair=yes rootwait
[    0.000000] PID hash table entries: 4096 (order: 2, 16384 bytes)
[    0.000000] Dentry cache hash table entries: 131072 (order: 7, 524288 bytes)
[    0.000000] Inode-cache hash table entries: 65536 (order: 6, 262144 bytes)
[    0.000000] Memory: 938940K/966656K available (6376K kernel code, 553K rwdata, 1728K rodata, 452K init, 773K bss, 19524K reserved, 8192K cma-reserved)
[    0.000000] Virtual kernel memory layout:
[    0.000000]     vector  : 0xffff0000 - 0xffff1000   (   4 kB)
[    0.000000]     fixmap  : 0xffc00000 - 0xfff00000   (3072 kB)
[    0.000000]     vmalloc : 0xbb800000 - 0xff000000   (1080 MB)
[    0.000000]     lowmem  : 0x80000000 - 0xbb000000   ( 944 MB)
[    0.000000]     modules : 0x7f000000 - 0x80000000   (  16 MB)
[    0.000000]       .text : 0x80008000 - 0x807f2414   (8106 kB)
[    0.000000]       .init : 0x807f3000 - 0x80864000   ( 452 kB)
[    0.000000]       .data : 0x80864000 - 0x808ee544   ( 554 kB)
[    0.000000]        .bss : 0x808f1000 - 0x809b249c   ( 774 kB)
[    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
[    0.000000] Preemptible hierarchical RCU implementation.
[    0.000000]  Build-time adjustment of leaf fanout to 32.
[    0.000000] NR_IRQS:608
[    0.000000] Architected cp15 timer(s) running at 19.20MHz (phys).
[    0.000000] clocksource: arch_sys_counter: mask: 0xffffffffffffff max_cycles: 0x46d987e47, max_idle_ns: 440795202767 ns
[    0.000011] sched_clock: 56 bits at 19MHz, resolution 52ns, wraps every 4398046511078ns
[    0.008265] Switching to timer-based delay loop, resolution 52ns

Code diff:

diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 87c5451..a84f258 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1248,8 +1248,8 @@ choice
                  and will be soon removed.

        config DEBUG_BCM2708_UART0
-               bool "Broadcom BCM2708 UART0 (PL011)"
-               depends on MACH_BCM2708
+               bool "Broadcom BCM2708 and BCM2709 UART0 (PL011)"
+               depends on ARCH_BCM2708 || ARCH_BCM2709
                help
                  Say Y here if you want the debug print routines to direct
                  their output to UART 0. The port must have been initialised
diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h
index 9beb929..8f8466e 100644
--- a/arch/arm/include/asm/hw_irq.h
+++ b/arch/arm/include/asm/hw_irq.h
@@ -8,7 +8,6 @@ static inline void ack_bad_irq(int irq)
 {
        extern unsigned long irq_err_count;
        irq_err_count++;
-       pr_crit("unexpected IRQ trap at vector %02x\n", irq);
 }

 #define ARCH_IRQ_INIT_FLAGS    (IRQ_NOREQUEST | IRQ_NOPROBE)
diff --git a/arch/arm/mach-bcm2709/include/mach/entry-macro.S b/arch/arm/mach-bcm2709/include/mach/entry-macro.S
index 2e9f458..dbbeaac 100644
--- a/arch/arm/mach-bcm2709/include/mach/entry-macro.S
+++ b/arch/arm/mach-bcm2709/include/mach/entry-macro.S
@@ -92,7 +92,8 @@
        b       1050f
 1040:
        cmp     \tmp, #0
-       beq     1020f
+       mov     \irqnr, #0
+       beq     1050f

        /* handle local (e.g. timer) interrupts */
        @ For non-zero x, LSB(x) = 31 - CLZ(x^(x-1))

kconfig diff enabling EARLY_PRINTK:

 DEBUG_LL n -> y
+DEBUG_BCM2708_UART0 y
+EARLY_PRINTK y
popcornmix commented 8 years ago

I did correct the mov to moveq which is necessary. However it didn't boot for me.

notro commented 8 years ago

I think I'll leave this to the professionals and finish the "Drop ATAGS" PR instead. Do you want it on 4.2 or 4.3?

popcornmix commented 8 years ago

Go for 4.3 first. If all seems fine there we can cherry-pick it back to 4.2.

notro commented 8 years ago

@pelwell I have a strange situation here. In my "Drop ATAGS" patches I'm using arch/arm/mach-bcm/board_bcm2835.c as the 2708 board file: https://github.com/notro/linux/commits/multi2708 This means that it can boot both 2708 and 2835 Device Trees (but doesn't build the 2835 dtb). mkknlimg detects this as 283x: y, but when I boot this kernel the bootloader chooses the 2708 dtb. I think this is an intelligent choice :-), but why does it happen?

$ ~/work/notro-raspberrypi-linux/notro-raspberrypi-linux/scripts/knlinfo /boot/kernel.img
Kernel trailer found at 4357616/0x427df0:
  KVer: "Linux version 4.3.0-rc7+ (pi@raspi2) (gcc version 4.8.3 20140106 (prerelease) (crosstool-NG linaro-1.13.1-4.8-2014.01 - Linaro GCC 2013.11) ) #3 PREEMPT Wed Oct 28 17:57:32 CET 2015"
  DTOK: true
  283x: true

$ ls -l /boot/*.dtb
-rwxr-xr-x 1 root root 10214 Oct 28 16:43 /boot/bcm2708-rpi-b.dtb
-rwxr-xr-x 1 root root 10493 Oct 28 16:43 /boot/bcm2708-rpi-b-plus.dtb
-rwxr-xr-x 1 root root 10218 Oct 28 16:43 /boot/bcm2708-rpi-cm.dtb
-rwxr-xr-x 1 root root 11113 Oct  4 20:17 /boot/bcm2709-rpi-2-b.dtb
-rwxr-xr-x 1 root root 10573 Oct 14 21:33 /boot/bcm2835-rpi-b.dtb
-rwxr-xr-x 1 root root 10856 Oct 14 21:33 /boot/bcm2835-rpi-b-plus.dtb
-rwxr-xr-x 1 root root 10557 Oct 14 21:33 /boot/bcm2835-rpi-cm.dtb

$ sudo vcdbg log msg
<snip>
001124.003: *** Restart logging
001125.080: Read command line from file 'cmdline.txt'
earlyprintk dwc_otg.lpm_enable=0 console=tty1 console=ttyAMA0,115200 root=/dev/mmcblk0p2 rootfstype=ext4 elevator=deadline fsck.repair=yes rootwait
001129.710: Loading 'kernel.img' from SD card
001383.699: Kernel trailer DTOK property says yes
001383.785: Loading 'bcm2708-rpi-b-plus.dtb' from SD card
001416.596: dtparam: pwr_led_gpio=35
001420.673: dtparam: cache_line_size=32
001464.784: Loaded overlay 'pitft28-resistive'
001512.819: gpioman: gpioman_get_pin_num: pin SDCARD_CONTROL_POWER not defined
003275.292: vchiq_core: vchiq_init_state: slot_zero = 0x5bc80000, is_master = 1
<snip>

$ dmesg | grep firmware
[    0.494347] raspberrypi-firmware soc:firmware: Attached to firmware from 2015-10-23 16:24
pelwell commented 8 years ago

I've no idea - that's pretty simple code. Can you upload your image somewhere so I can try it?

notro commented 8 years ago

My mistake, I had some leftover from previous testing: device_tree=bcm2708-rpi-b-plus.dtb

pelwell commented 8 years ago

Phew - I've had enough unexplained problems today.

notro commented 8 years ago

arch/arm/mach-bcm2709/delay.S doesn't sem to be used anywhere. Can I delete it?

popcornmix commented 8 years ago

Feel free to delete. Pretty sure it's not been used in years.

notro commented 8 years ago

arm_loader: Don't overwrite the /soc/ranges DT property Why was this done?

popcornmix commented 8 years ago

Is it causing a problem? ping @pelwell

notro commented 8 years ago

No, I'm just curious about changes that affect the Device Tree. Actually I have lost track of which changes the bootloader does to the DT (and why). Hopefully the bootloader will be open source one day and I can see for myself :-)

pelwell commented 8 years ago

Dream on. :-)

I did it so I could add another range for the 2709 ARM-local peripherals, which sit at 0x40000000 in ARM physical space.

notro commented 8 years ago

Yes I can see that I'm missing that range. I'm back running ARCH_BCM2835 on Pi2, so I updated the firmware in case something magic would happen and fix my problems, even though that range is a 1:1 mapping. I now have the 2 ranges, but it didn't help on my problems:

On Pi1 I get lockups without any error in less than 24 hours. Disabling usb gave me >48 hour run before I got a memcpy 'page domain fault'.

Pi2 fault:

[    8.100979] Unhandled fault: page domain fault (0x81b) at 0x76fb1000
[    8.108683] pgd = b82e8000
[    8.112605] [76fb1000] *pgd=3812a831, *pte=36ec075f, *ppte=36ec0c7f
[    8.120169] Internal error: : 81b [#1] PREEMPT SMP ARM
[    8.120302] systemd-journald[131]: Received request to flush runtime journal from PID 1
[    8.137524] Modules linked in: uio_pdrv_genirq uio i2c_dev snd_bcm2835 snd_pcm snd_timer snd fuse
[    8.137548] CPU: 2 PID: 307 Comm: sudo Not tainted 4.3.0+ #1
[    8.137556] Hardware name: BCM2836
[    8.137572] task: b819d7c0 ti: b8328000 task.ti: b8328000
[    8.137597] PC is at memcpy+0x50/0x330
[    8.137609] LR is at 0x72097665
[    8.137631] pc : [<8030af90>]    lr : [<72097665>]    psr: 20000013
[    8.137631] sp : b8329e04  ip : 646f6e0a  fp : b8329e4c
[    8.137638] r10: b8329f78  r9 : 76fb1000  r8 : 7366746f
[    8.137644] r7 : 6f720976  r6 : 65646f6e  r5 : 0a736673  r4 : 79730976
[    8.137651] r3 : 65646f6e  r2 : 000000a9  r1 : b9afc020  r0 : 76fb1000
[    8.137661] Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
[    8.137669] Control: 10c5387d  Table: 382e806a  DAC: 00000051
[    8.137680] Process sudo (pid: 307, stack limit = 0xb8328210)
[    8.137689] Stack: (0xb8329e04 to 0xb832a000)
[    8.137703] 9e00:          00000000 76fb1000 00000149 b9afc000 76fb1000 00000149 803170a8
[    8.137718] 9e20: b812a6c4 ba78ba04 b8329e40 00000000 00000051 b8142f80 00000000 00000149
[    8.137730] 9e40: b8329e5c b8329e50 80317338 80316f8c b8329eac b8329e60 80170260 8031731c
[    8.137747] 9e60: 00100073 b8183500 b8142fb0 b9279180 00000000 00000000 00000001 00000000
[    8.137758] 9e80: b8329ef4 b9aaed80 00000000 00000001 00000000 00000000 76fb1000 00000000
[    8.137775] 9ea0: b8329ed4 b8329eb0 801acd48 8016fed0 b8329f78 76fb1000 76fb1000 b9279180
[    8.137788] 9ec0: b8329f78 00000400 b8329f44 b8329ed8 8014e568 801accd0 00000022 00000003
[    8.137799] 9ee0: 00000003 b8183500 b8329f34 b8329ef8 80128d9c 80128530 00076fb1 00000000
[    8.137815] 9f00: 00000001 8014ecec 564e3be7 00000000 b8183538 00000003 00001000 b9279180
[    8.137851] 9f20: 76fb1000 b9279180 76fb1000 b9279180 b8329f78 00000400 b8329f74 b8329f48
[    8.137864] 9f40: 8014ed08 8014e538 80169aec 80169a5c 00000000 00000000 b9279180 b9279180
[    8.137875] 9f60: 00000400 76fb1000 b8329fa4 b8329f78 8014f62c 8014ec88 00000000 00000000
[    8.137886] 9f80: 55ed7008 55ed7008 55ed7008 00000003 80010128 b8328000 00000000 b8329fa8
[    8.137901] 9fa0: 8000ff60 8014f5ec 55ed7008 55ed7008 00000003 76fb1000 00000400 00000000
[    8.137914] 9fc0: 55ed7008 55ed7008 55ed7008 00000003 7eb96dc4 7eb96dc0 7eb96dc4 00000000
[    8.137928] 9fe0: 00000000 7eb96d5c 76e09164 76e5c3bc 60000010 00000003 00000000 00000000
[    8.137985] [<8030af90>] (memcpy) from [<803170a8>] (__copy_to_user_memcpy+0x128/0x190)
[    8.138008] [<803170a8>] (__copy_to_user_memcpy) from [<80317338>] (arm_copy_to_user+0x28/0x2c)
[    8.138026] [<80317338>] (arm_copy_to_user) from [<80170260>] (seq_read+0x39c/0x454)
[    8.138044] [<80170260>] (seq_read) from [<801acd48>] (proc_reg_read+0x84/0x98)
[    8.138061] [<801acd48>] (proc_reg_read) from [<8014e568>] (__vfs_read+0x3c/0xe0)
[    8.138077] [<8014e568>] (__vfs_read) from [<8014ed08>] (vfs_read+0x8c/0x15c)
[    8.138101] [<8014ed08>] (vfs_read) from [<8014f62c>] (SyS_read+0x4c/0x8c)
[    8.138121] [<8014f62c>] (SyS_read) from [<8000ff60>] (ret_fast_syscall+0x0/0x1c)
[    8.138136] Code: f5d1f05c f5d1f07c e8b151f8 e2522020 (e8a051f8)
[    8.138150] ---[ end trace 60206ecb266e6146 ]---

Pi1 fault:

[260153.765615] Unhandled fault: page domain fault (0x81b) at 0x7e8fe000
[260153.782757] pgd = 9a648000
[260153.796398] [7e8fe000] *pgd=1a498831, *pte=195a214f, *ppte=195a283e
[260153.813826] Internal error: : 81b [#1] PREEMPT SMP ARM
[260153.830105] Modules linked in: cfg80211 rfkill uio_pdrv_genirq uio i2c_dev snd_bcm2835 snd_pcm snd_timer snd fuse
[260153.852159] CPU: 0 PID: 117 Comm: systemd-journal Not tainted 4.3.0+ #1
[260153.870616] Hardware name: BCM2835
[260153.885913] task: 9a702880 ti: 9a70a000 task.ti: 9a70a000
[260153.903276] PC is at memcpy+0xb0/0x330
[260153.919034] LR is at 0x0
[260153.933304] pc : [<8030aff0>]    lr : [<00000000>]    psr: 00000013
[260153.933304] sp : 9a70be64  ip : 00000018  fp : 9a70beac
[260153.968211] r10: 00000000  r9 : 9a70a000  r8 : 000014d7
[260153.985239] r7 : 00000008  r6 : 7e8fe000  r5 : 00000000  r4 : 00000008
[260154.003563] r3 : 60000013  r2 : ffffffe8  r1 : 9a70bf28  r0 : 7e8fe000
[260154.021901] Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
[260154.040994] Control: 00c5387d  Table: 1a648008  DAC: 00000051
[260154.058810] Process systemd-journal (pid: 117, stack limit = 0x9a70a210)
[260154.077748] Stack: (0x9a70be64 to 0x9a70c000)
[260154.094251] be60:          00000000 7e8fe000 00000008 9a70bf20 7e8fe000 00000008 803170a8
[260154.114655] be80: 9a4983f8 9b7b757c 9a70bf5c 00000051 7e8fdfa0 5676f4c8 000000c5 80010128
[260154.135181] bea0: 9a70bebc 9a70beb0 80317338 80316f8c 9a70bf3c 9a70bec0 8015339c 8031731c
[260154.155817] bec0: 00000004 00000000 00000000 000014d7 00008124 00000001 00000000 00000000
[260154.176656] bee0: 00000000 00000000 00000000 00000000 00000000 00000000 00000400 00000000
[260154.197454] bf00: 00000000 00000000 564a2db9 1d34ce80 564a2db9 1d34ce80 564a2db9 1d34ce80
[260154.218328] bf20: 000014d7 00000000 7e8fdfa0 76f6a000 9a70bfa4 9a70bf40 80153708 80153244
[260154.239353] bf40: 000014d7 00000000 00000004 80158124 00000001 00000000 00000000 00000000
[260154.260365] bf60: 00000000 00000000 564a2db9 1d34ce80 564a2db9 1d34ce80 564a2db9 1d34ce80
[260154.281502] bf80: 00000400 000000c5 00000000 00000000 80010040 5676f4c8 00000000 9a70bfa8
[260154.302785] bfa0: 8000ff60 801536dc 5676f4c8 76f6a000 00000012 7e8fdfa0 7e8fdfa0 00000012
[260154.324211] bfc0: 5676f4c8 76f6a000 5676f4c8 000000c5 7e8fe0a4 5676fe90 5676f3d0 7e8fe0d4
[260154.345727] bfe0: 54b6dd7c 7e8fdf90 76e12a84 76e7286c 20000010 00000012 00005519 75161000
[260154.367287] [<8030aff0>] (memcpy) from [<803170a8>] (__copy_to_user_memcpy+0x128/0x190)
[260154.388725] [<803170a8>] (__copy_to_user_memcpy) from [<80317338>] (arm_copy_to_user+0x28/0x2c)
[260154.410911] [<80317338>] (arm_copy_to_user) from [<8015339c>] (cp_new_stat64+0x164/0x18c)
[260154.432602] [<8015339c>] (cp_new_stat64) from [<80153708>] (SyS_fstat64+0x38/0x40)
[260154.453734] [<80153708>] (SyS_fstat64) from [<8000ff60>] (ret_fast_syscall+0x0/0x1c)
[260154.475114] Code: e4804004 e4805004 e4806004 e4807004 (e4808004)
[260155.011216] ---[ end trace a0adb08b4b4d5e97 ]---
notro commented 8 years ago

And I have to disable CONFIG_BCM_VC_SM. At least on Pi1 it hangs in vc_sm_connected_init() -> vchi_connect(). These are the patches I use: https://github.com/notro/linux/commits/bcm2836

notro commented 8 years ago

I want to have uart debug output and use the same kernel binary on both Pi1 and Pi2. This means I need a way to dynamically set the phsyical address of the uart.

I tried this hack:

diff --git a/arch/arm/include/debug/pl01x.S b/arch/arm/include/debug/pl01x.S
index f7d8323..ff9e395 100644
--- a/arch/arm/include/debug/pl01x.S
+++ b/arch/arm/include/debug/pl01x.S
@@ -19,10 +19,16 @@
 #define UART01x_FR     0x14
 #endif

+#define DEBUG_UART_P1_PHYS     0x20201000
+#define DEBUG_UART_P2_PHYS     0x3f201000
+
 #ifdef CONFIG_DEBUG_UART_PHYS
                .macro  addruart, rp, rv, tmp
-               ldr     \rp, =CONFIG_DEBUG_UART_PHYS
                ldr     \rv, =CONFIG_DEBUG_UART_VIRT
+               mrc     p15, 0, \tmp, c0, c0, 0 @ processor id
+               tst     \tmp, #0x410fb767       @ ARM1176JZF-S r0p7
+               ldreq   \rp, =DEBUG_UART_P1_PHYS
+               ldrne   \rp, =DEBUG_UART_P2_PHYS
                .endm
 #endif

Which gives me this error:

arch/arm/kernel/debug.S:81: Error: invalid constant (410fb767) after fixup

If I understand it right this has something to do with the number being too big for immediate register load.

How can I solve this?

pelwell commented 8 years ago

TST is a non-destructive AND that sets the condition flags. If you want to confirm that \tmp == 0x410fb767 then use CMP instead which is a non-destructive SUB, but that still leaves you with the problem of the size of the literal. The clearest way to solve this is to load the constant into another register, something like this:

               .macro  addruart, rp, rv, tmp
               ldr     \rp, =0x410fb767 @ ARM1176JZF-S r0p7
               ldr     \rv, =CONFIG_DEBUG_UART_VIRT
               mrc     p15, 0, \tmp, c0, c0, 0 @ processor id
               cmp     \tmp, \rp @ Is it a Pi 1?
               ldreq   \rp, =DEBUG_UART_P1_PHYS
               ldrne   \rp, =DEBUG_UART_P2_PHYS
               .endm

You could also pick a small number of bits (perhaps even 1) and use TST to avoid the extra register load, but I think this way is OK.

notro commented 8 years ago

Thanks Phil that made the compiler happy. It's working on Pi2, I get the Uncompressing Linux message, but not on Pi1. Any thoughts?

diff --git a/arch/arm/include/debug/pl01x.S b/arch/arm/include/debug/pl01x.S
index f7d8323..8c8a796 100644
--- a/arch/arm/include/debug/pl01x.S
+++ b/arch/arm/include/debug/pl01x.S
@@ -19,9 +19,16 @@
 #define UART01x_FR     0x14
 #endif

+#define DEBUG_UART_P1_PHYS     0x20201000
+#define DEBUG_UART_P2_PHYS     0x3f201000
+
 #ifdef CONFIG_DEBUG_UART_PHYS
                .macro  addruart, rp, rv, tmp
-               ldr     \rp, =CONFIG_DEBUG_UART_PHYS
+               ldr     \rp, =0x410fb767        @ ARM1176JZF-S r0p7
+               mrc     p15, 0, \tmp, c0, c0, 0 @ processor id
+               cmp     \tmp, \rv               @ Is it a Pi 1?
+               ldreq   \rp, =DEBUG_UART_P1_PHYS
+               ldrne   \rp, =DEBUG_UART_P2_PHYS
                ldr     \rv, =CONFIG_DEBUG_UART_VIRT
                .endm
 #endif
notro commented 8 years ago

I see the problem after posting. I need to cmp with \rp not \rv

pelwell commented 8 years ago

Yep - I was just writing that!

notro commented 8 years ago

Perfect - it's working now.