trombik / esp_wireguard

WireGuard Implementation for ESP-IDF.
Other
197 stars 36 forks source link

Guru Meditation Error #55

Open indexds opened 14 hours ago

indexds commented 14 hours ago

Hello,

I've been trying to use this component with Rust and I've been met with a crash that has me scratching my head.

The component was imported through the esp component registry like so,

[[package.metadata.esp-idf-sys.extra_components]]
remote_component = { name = "trombik/esp_wireguard", version = "0.9" }
bindings_header = "src/wireguard/bindings.h"
bindings_module = "wg"

And I can now use this to add the functions exposed in esp_wireguard.h to scope:

use esp_idf_svc::sys::wg::{
    esp_wireguard_connect,
    esp_wireguard_disconnect,
    esp_wireguard_init,
    esp_wireguard_set_default,
    esp_wireguardif_peer_is_up,
    wireguard_config_t,
    wireguard_ctx_t,
};

Now that this is done I try to initialize wireguard and connect to a remote peer like in the example,

pub fn start_wg_tunnel(
    nvs: Arc<Mutex<EspNvs<NvsDefault>>>,
) -> anyhow::Result<*mut wireguard_ctx_t> {
    let nvs = nvs
        .try_lock()
        .map_err(|_| anyhow::anyhow!("Failed to lock NVS Mutex!"))?;

    let nvs_wg = NvsWireguard::new(&nvs)?;

    let endpoint = CString::new(nvs_wg.wg_addr.clean_string().as_str())?.into_raw();
    let port: i32 = nvs_wg.wg_port.clean_string().as_str().parse()?;

    let private_key = CString::new(nvs_wg.wg_client_priv_key.clean_string().as_str())?.into_raw();
    let public_key = CString::new(nvs_wg.wg_server_pub_key.clean_string().as_str())?.into_raw();

    let allowed_ip = CString::new("192.168.0.0")?.into_raw();
    let allowed_ip_mask = CString::new("255.255.0.0")?.into_raw();

    let sntp = EspSntp::new_default()?;

    for retries in 0..=10 {
        if sntp.get_sync_status() == SyncStatus::Completed {
            log::info!("Time synchronized successfully.");
            break;
        }
        log::info!("Waiting for time synchronization...");
        std::thread::park_timeout(std::time::Duration::from_secs(1));

        if retries == 10 {
            log::error!("Failed to synchronize time after multiple attempts!");
            return Err(anyhow::anyhow!("Failed to synchronize time!"));
        }
    }

    unsafe {
        let mut wg_config_t = wireguard_config_t {
            private_key,
            listen_port: 51820,
            fw_mark: 0,
            public_key,
            preshared_key: core::ptr::null_mut(),
            allowed_ip,
            allowed_ip_mask,
            endpoint,
            port,
            persistent_keepalive: 20,
        };

        let config_ptr = &mut wg_config_t as *mut _;

        let mut wg_ctx_t = wireguard_ctx_t {
            config: config_ptr,
            netif: core::ptr::null_mut(),
            netif_default: esp_idf_svc::sys::wg::netif_default,
        };

        let ctx_ptr = &mut wg_ctx_t as *mut _;

        let res = esp_wireguard_init(config_ptr, ctx_ptr);
        if res != ESP_OK {
            log::error!("Failed to initialize WireGuard! - CODE: {}", res);
            return Err(EspError::from(res).unwrap().into());
        } else {
            log::info!("WireGuard initialized successfully.");
        }        

        let res = esp_netif_tcpip_exec(Some(wg_connect_wrapper), ctx_ptr as *mut core::ffi::c_void);
        if res != ESP_OK {
            log::error!("Failed to connect to wireguard peer! - CODE: {}", res);
            return Err(EspError::from(res).unwrap().into());
        }
...
pub unsafe extern "C" fn wg_connect_wrapper(ctx: *mut core::ffi::c_void) -> i32 {
    if ctx.is_null() {
        log::error!("WireGuard context is null in the callback!");
        return ESP_FAIL;
    }

    let res = esp_wireguard_connect(ctx as *mut wireguard_ctx_t);

    return res;
}

Yet when I run the program, I get a guru meditation error, supposedly implying a null pointer was dereferenced somewhere.

I (3651) esp_idf_svc::sntp: Initializing
I (3651) esp_idf_svc::sntp: Initialization complete
I (3661) charizhard::wireguard: Waiting for time synchronization...
I (3731) wifi:dp: 2, bi: 102400, li: 4, scale listen interval from 307200 us to                                                                                                                                                              409600 us
I (3731) wifi:AP's beacon interval = 102400 us, DTIM period = 2
I (4651) esp_netif_handlers: sta ip: 192.168.229.73, mask: 255.255.255.0, gw: 19                                                                                                                                                             2.168.229.242
I (4671) charizhard::wireguard: Waiting for time synchronization...
I (5681) charizhard::wireguard: Waiting for time synchronization...
I (5691) wifi:<ba-add>idx:0 (ifx:0, 4e:c7:3e:ef:32:4d), tid:0, ssn:3, winSize:64
I (6091) wifi:<ba-add>idx:1 (ifx:0, 4e:c7:3e:ef:32:4d), tid:3, ssn:0, winSize:64
I (6691) charizhard::wireguard: Time synchronized successfully.
I (6691) charizhard::wireguard: WireGuard initialized successfully.
I (6691) esp_wireguard: allowed_ip: 192.168.0.0
Guru Meditation Error: Core  0 panic'ed (LoadProhibited). Exception was unhandle                                                                                                                                                             d.

Core  0 register dump:
PC      : 0x401a078c  PS      : 0x00060230  A0      : 0x8011ef12  A1      : 0x3f                                                                                                                                                             fbac70
A2      : 0x3ffb6258  A3      : 0x000000d0  A4      : 0x3ffbad58  A5      : 0x3f                                                                                                                                                             fbae38
A6      : 0x00000000  A7      : 0x00000000  A8      : 0x00000000  A9      : 0x00                                                                                                                                                             00a8c0
A10     : 0x3f427ed0  A11     : 0x00000000  A12     : 0x00060220  A13     : 0x00                                                                                                                                                             000000
A14     : 0x3ffbac00  A15     : 0xff000000  SAR     : 0x00000004  EXCCAUSE: 0x00                                                                                                                                                             00001c
EXCVADDR: 0x00000000  LBEG    : 0x4000c46c  LEND    : 0x4000c477  LCOUNT  : 0xff                                                                                                                                                             ffffff

Backtrace: 0x401a0789:0x3ffbac70 0x4011ef0f:0x3ffbacf0 0x4011effb:0x3ffbad10 0x4                                                                                                                                                             011f11d:0x3ffbad90 0x4013cb00:0x3ffbadc0 0x400d83fb:0x3ffbae80 0x401a0fa3:0x3ffb                                                                                                                                                             aed0 0x401a0f93:0x3ffbaef0 0x40093c08:0x3ffbaf10 0x4019fea9:0x3ffbaf30 0x401a099                                                                                                                                                             f:0x3ffbaf50 0x400d7ede:0x3ffbaf90 0x400db4d5:0x3ffbb1f0 0x4019869b:0x3ffbb300 0                                                                                                                                                             x400dbdac:0x3ffbb320 0x400f557e:0x3ffbb340 0x400db53f:0x3ffbb370 0x400e7b6b:0x3f                                                                                                                                                             fbb3a0 0x401a175f:0x3ffbb3c0

ELF file SHA256: 000000000

Rebooting...

I assume it has something to do with the lwip code but diving into the C implementation reveals little apart from a failure inside the netif_add function that I fail to understand.. Is there something wrong with my code?

indexds commented 9 hours ago

As said before, I've narrowed down the error to esp_wireguard_netif_create,

static esp_err_t esp_wireguard_netif_create(const wireguard_config_t *config)
{
    esp_err_t err;
    ip_addr_t ip_addr;
    ip_addr_t netmask;
    ip_addr_t gateway = IPADDR4_INIT_BYTES(0, 0, 0, 0);
    struct wireguardif_init_data wg = {0};

    if (!config) {
        err = ESP_ERR_INVALID_ARG;
        goto fail;
    }

    /* Setup the WireGuard device structure */
    wg.private_key = config->private_key;
    wg.listen_port = config->listen_port;
    wg.bind_netif = NULL;

    ESP_LOGI(TAG, "allowed_ip: %s", config->allowed_ip);

    if (ipaddr_aton(config->allowed_ip, &ip_addr) != 1) {
        ESP_LOGE(TAG, "ipaddr_aton: invalid allowed_ip: `%s`", config->allowed_ip);
        err = ESP_ERR_INVALID_ARG;
        goto fail;
    }
    if (ipaddr_aton(config->allowed_ip_mask, &netmask) != 1) {
        ESP_LOGE(TAG, "ipaddr_aton: invalid allowed_ip_mask: `%s`", config->allowed_ip_mask);
        err = ESP_ERR_INVALID_ARG;
        goto fail;
    }

    /* Register the new WireGuard network interface with lwIP */
    wg_netif = netif_add(
            &wg_netif_struct,
            ip_2_ip4(&ip_addr),
            ip_2_ip4(&netmask),
            ip_2_ip4(&gateway),
            &wg, &wireguardif_init,
            &ip_input);
    if (wg_netif == NULL) {
        ESP_LOGE(TAG, "netif_add: failed");
        err = ESP_FAIL;
        goto fail;
    }

    /* Mark the interface as administratively up, link up flag is set
     * automatically when peer connects */
    netif_set_up(wg_netif);
    err = ESP_OK;
fail:
    return err;
}

Specifically, the crash occurs when the interface is being created here:

    wg_netif = netif_add(
            &wg_netif_struct,
            ip_2_ip4(&ip_addr),
            ip_2_ip4(&netmask),
            ip_2_ip4(&gateway),
            &wg, &wireguardif_init,
            &ip_input);
    if (wg_netif == NULL) {
        ESP_LOGE(TAG, "netif_add: failed");
        err = ESP_FAIL;
        goto fail;
    }

We never get to see the "netif_add: failed" error, so clearly the crash occurs inside netif_add..