luigirizzo / netmap

Automatically exported from code.google.com/p/netmap
BSD 2-Clause "Simplified" License
1.86k stars 537 forks source link

krings may not be properly aligned #921

Open jcaplan opened 1 year ago

jcaplan commented 1 year ago

https://github.com/luigirizzo/netmap/blob/b58a473becf02c03db4504bbd66987ef5b2fc3d8/sys/dev/netmap/netmap.c#L877

Depending on the values of [n] (specifically if n[0] + n[1] is odd), then kring may not be properly aligned.

giuseppelettieri commented 1 year ago

You are right. tailroom is only used in VALE with a large enough alignment, but better to be explicit. I have pushed a new new commit for this.

jcaplan commented 1 year ago

I actually encountered this when tailroom passed into the function is 0. We maintain a port of FreeBSD network stack to QNX. when we upgraded to GCC12 we started getting segfaults running ctrl-api-test due to an optimization using FP registers and simd instruction on unaligned kring address.

For us the solution was to check that kring % __alignof__(*kring) == 0.

Your solution would not have fixed the problem for us.

giuseppelettieri commented 1 year ago

mmm, OK, that's an entirely different story. Those instructions want 16-byte alignment (old wisdom was that FP registers were off-limits in the kernel, but things change). I'll see what I can do.

jcaplan commented 1 year ago

QNX is a micro kernel so we’re running in user space

giuseppelettieri commented 1 year ago

OK, second attempt. Incidentally, the misalignment may also have created false-sharing in multithreaded apps.

diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index 59533db53..744ad2887 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -837,6 +837,10 @@ netmap_default_bufcfg(struct netmap_kring *kring, uint64_t target)
  *                    |          |  } tailroom bytes
  *                    |          | /
  *                    +----------+
+ * netmap_kring       |          | (aligned to NM_KRING_ALIGNMENT)
+ * structs            ~          ~
+ *                    |          |
+ *                    +----------+
  *
  * Note: for compatibility, host krings are created even when not needed.
  * The tailroom space is currently used by vale ports for allocating leases.
@@ -861,9 +865,9 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
    n[NR_TX] = netmap_all_rings(na, NR_TX);
    n[NR_RX] = netmap_all_rings(na, NR_RX);

-   len = (n[NR_TX] + n[NR_RX]) *
-       (sizeof(struct netmap_kring) + sizeof(struct netmap_kring *))
-       + tailroom;
+   len = nm_tailroom_align((n[NR_TX] + n[NR_RX]) *
+       sizeof(struct netmap_kring *) + tailroom) +
+       (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring);

    na->tx_rings = nm_os_malloc((size_t)len);
    if (na->tx_rings == NULL) {
@@ -874,7 +878,8 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
    na->tailroom = na->rx_rings + n[NR_RX];

    /* link the krings in the krings array */
-   kring = (struct netmap_kring *)((char *)na->tailroom + tailroom);
+   kring = (struct netmap_kring *)
+       nm_tailroom_align((uint64_t)((char *)na->tailroom + tailroom));
    for (i = 0; i < n[NR_TX] + n[NR_RX]; i++) {
        na->tx_rings[i] = kring;
        kring++;
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 066bb3f3b..669048f17 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -407,6 +407,7 @@ struct netmap_zmon_list {
  * RX rings attached to the VALE switch are accessed by both senders
  * and receiver. They are protected through the q_lock on the RX ring.
  */
+#define NM_KRING_ALIGNMENT 64
 struct netmap_kring {
    struct netmap_ring  *ring;

@@ -584,9 +585,9 @@ struct netmap_kring {
 #endif
 }
 #ifdef _WIN32
-__declspec(align(64));
+__declspec(align(NM_KRING_ALIGNMENT));
 #else
-__attribute__((__aligned__(64)));
+__attribute__((__aligned__(NM_KRING_ALIGNMENT)));
 #endif

 /* return 1 iff the kring needs to be turned on */
@@ -1505,9 +1506,9 @@ int netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
 /*
  * tailroom must be properly aligned with nm_tailroom_align().
  */
-static inline u_int
-nm_tailroom_align(u_int tr) {
-   return (tr + 15) & ~15U;
+static inline uint64_t
+nm_tailroom_align(uint64_t tr) {
+   return (tr + (NM_KRING_ALIGNMENT - 1)) & ~((uint64_t)NM_KRING_ALIGNMENT - 1);
 }

 /* deletes the kring array of the adapter. The array must have
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
index fd4682751..f5e65f1c0 100644
--- a/sys/dev/netmap/netmap_vale.c
+++ b/sys/dev/netmap/netmap_vale.c
@@ -436,7 +436,7 @@ netmap_vale_vp_krings_create(struct netmap_adapter *na)
    /*
     * Leases are attached to RX rings on vale ports
     */
-   tailroom = nm_tailroom_align(sizeof(uint32_t) * na->num_rx_desc * nrx);
+   tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;

    error = netmap_krings_create(na, tailroom);
    if (error)