Changes in IPv6 nexthops cause nl_cache_mngr routes to have duplicate nexthops.

tlsalmin commented 3 years ago

Given the test program that uses nl cache manager to listen for route updates:

#include <assert.h>
#include <netlink/netlink.h>
#include <netlink/route/link.h>
#include <netlink/route/route.h>
#include <signal.h>
#include <sys/epoll.h>
#include <sys/signalfd.h>
#include <unistd.h>

static void mngr_update(__attribute__((unused)) struct nl_cache *cache,
                        struct nl_object *obj, int type,
                        __attribute__((unused)) void *context)
{
  char *data;
  size_t data_len;
  struct nl_dump_params dp =
    {
      .dp_type = NL_DUMP_DETAILS,
      .dp_fd = open_memstream(&data, &data_len)
    };

  assert(!strcmp("route/route", nl_object_get_type(obj)));

  nl_object_dump(obj, &dp);

  fclose(dp.dp_fd);

  fprintf(stdout, "Received %d of route [%s]\n", type, data);

  free(data);
}

int main()
{
  int efd = epoll_create(EPOLL_CLOEXEC);

  if (efd != -1)
    {
      sigset_t mask;
      int sigfd;

      sigemptyset(&mask);
      sigaddset(&mask, SIGINT);
      sigaddset(&mask, SIGTERM);
      sigfd = signalfd(-1, &mask, SFD_NONBLOCK | SFD_CLOEXEC);
      if (sigfd != -1)
        {
          struct epoll_event ev = {.events = EPOLLIN,
                                   .data = {
                                     .fd = sigfd,
                                   }};

          if (!epoll_ctl(efd, EPOLL_CTL_ADD, sigfd, &ev))
            {
              struct nl_cache_mngr *mngr = NULL;

              if (!nl_cache_mngr_alloc(NULL, NETLINK_ROUTE, 0, &mngr))
                {
                  int mngr_fd = nl_cache_mngr_get_fd(mngr);
                  ev.data.fd = mngr_fd;
                  ev.events = EPOLLIN;

                  if (!epoll_ctl(efd, EPOLL_CTL_ADD, mngr_fd, &ev))
                    {
                      struct nl_cache *cache = NULL;
                      if (!nl_cache_mngr_add(mngr, "route/route", mngr_update,
                                             NULL, &cache))
                        {
                          while (epoll_wait(efd, &ev, 1, -1) == 1)
                            {
                              if (ev.data.fd == sigfd)
                                {
                                  struct signalfd_siginfo sinfo = {};

                                  if (read(sigfd, &sinfo, sizeof(sinfo)) ==
                                      sizeof(sinfo))
                                    {
                                      if (sinfo.ssi_signo == SIGTERM ||
                                          sinfo.ssi_signo == SIGINT)
                                        {
                                          break;
                                        }
                                    }
                                  else
                                    {
                                      break;
                                    }
                                }
                              else
                                {
                                  assert(ev.data.fd == mngr_fd);
                                  nl_cache_mngr_data_ready(mngr);
                                }
                            }
                        }
                    }
                  nl_cache_mngr_free(mngr);
                }
            }
          close(sigfd);
        }
      close(efd);
    }
}

Any IPv6 route changes will cause the cache handled by nl_cache_mngr to go out of sync:

~/src/random_tests ~clang -Wall -Wextra -I/usr/include/libnl3 nl_mngr_bug.c -o nl_mngr_bug -lnl-genl-3 -lnl-route-3 -lnl-3
~/src/random_tests ~./nl_mngr_bug &
[1] 17247
~/src/random_tests ~ip -6 r
::1 dev lo proto kernel metric 256 pref medium
anycast fe80:: dev enp34s0 proto kernel metric 0 pref medium
anycast fe80:: dev tun0 proto kernel metric 0 pref medium
fe80::/64 dev enp34s0 proto kernel metric 256 pref medium
fe80::/64 dev tun0 proto kernel metric 256 pref medium
multicast ff00::/8 dev enp34s0 proto kernel metric 256 pref medium
multicast ff00::/8 dev tun0 proto kernel metric 256 pref medium
~/src/random_tests ~sudo ip -6 r a fd99::100/128 via fe80::58e5:27ff:fea6:f594 dev enp34s0
Received 1 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev 3 <>
]
~/src/random_tests ~sudo ip -6 r chg fd99::100/128 via fe80::885b:7aff:fe5f:653c dev enp34s0
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
]
~/src/random_tests ~sudo ip -6 r chg fd99::100/128 via fe80::885b:7aff:fe5f:653c dev enp34s0
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
]
~/src/random_tests ~sudo ip -6 r chg fd99::100/128 via fe80::885b:7aff:fe5f:653c dev enp34s0
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
]
~/src/random_tests ~sudo ip -6 r d fd99::100/128 
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev 3 <>
]
~/src/random_tests ~ip -6 r                                                          
::1 dev lo proto kernel metric 256 pref medium
anycast fe80:: dev enp34s0 proto kernel metric 0 pref medium
anycast fe80:: dev tun0 proto kernel metric 0 pref medium
fe80::/64 dev enp34s0 proto kernel metric 256 pref medium
fe80::/64 dev tun0 proto kernel metric 256 pref medium
multicast ff00::/8 dev enp34s0 proto kernel metric 256 pref medium
multicast ff00::/8 dev tun0 proto kernel metric 256 pref medium

I'll have a look later to patch this. But if someone happens to have more time, go ahead.

Submitted on behalf of Forcepoint Finland.

libnl version was libnl-3.5.0 on gentoo.

tlsalmin commented 3 years ago

For a workaround, iterate any CHANGE objects. If they have more than one nexthop, loop over the nexthops with iter1, then loop over iter1 + 1 (iter2). If

rtnl_route_nh_compare(iter1, iter2, 0xffffffff, true)

matches, remove iter1 and iterate again until no more matches.

t0mmmy90 commented 3 years ago

Hi, I'm not sure if this is the same issue, but I've encountered a problem with duplicated nexthops while I was working on IPv6 ECMP routes. I've resolved it by adding a small patch to libnl which now I've added as a PR #290. I hope it will help you!

tlsalmin commented 3 years ago

Sorry doesn't help with the issue. Applied the patch to 3.5.0 and get the same results

~/src/random_tests ~./nl_mngr_bug &
[1] 73033
~/src/random_tests ~ip -6 r            
::1 dev lo proto kernel metric 256 pref medium
anycast fe80:: dev enp34s0 proto kernel metric 0 pref medium
anycast fe80:: dev tun0 proto kernel metric 0 pref medium
fe80::/64 dev enp34s0 proto kernel metric 256 pref medium
fe80::/64 dev tun0 proto kernel metric 256 pref medium
multicast ff00::/8 dev enp34s0 proto kernel metric 256 pref medium
multicast ff00::/8 dev tun0 proto kernel metric 256 pref medium
~/src/random_tests ~sudo ip -6 r a fd99::100/128 via fe80::58e5:27ff:fea6:f594 dev enp34s0
Received 1 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev enp34s0 <>
]
~/src/random_tests ~sudo ip -6 r chg fd99::100/128 via fe80::4609:b8ff:fe4e:1a1b dev enp34s0
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev enp34s0 <>
    nexthop via fe80::4609:b8ff:fe4e:1a1b dev enp34s0 <>
]
~/src/random_tests ~sudo ip -6 r chg fd99::100/128 via fe80::885b:7aff:fe5f:653c dev enp34s0
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev enp34s0 <>
    nexthop via fe80::4609:b8ff:fe4e:1a1b dev enp34s0 <>
    nexthop via fe80::885b:7aff:fe5f:653c dev enp34s0 <>
]
~/src/random_tests ~ip -6 r
::1 dev lo proto kernel metric 256 pref medium
fd99::100 via fe80::885b:7aff:fe5f:653c dev enp34s0 metric 1024 pref medium
anycast fe80:: dev enp34s0 proto kernel metric 0 pref medium
anycast fe80:: dev tun0 proto kernel metric 0 pref medium
fe80::/64 dev enp34s0 proto kernel metric 256 pref medium
fe80::/64 dev tun0 proto kernel metric 256 pref medium
multicast ff00::/8 dev enp34s0 proto kernel metric 256 pref medium
multicast ff00::/8 dev tun0 proto kernel metric 256 pref medium
~/src/random_tests ~ip -6 r d fd99::100
RTNETLINK answers: Operation not permitted
~/src/random_tests ~sudo ip -6 r d fd99::100
Received 5 of route [inet6 fd99::100 table main type unicast 
    scope global priority 0x400 protocol boot 
    nexthop via fe80::58e5:27ff:fea6:f594 dev enp34s0 <>
    nexthop via fe80::4609:b8ff:fe4e:1a1b dev enp34s0 <>
]
~/src/random_tests ~

Also my workaround doesn't work with change. I think its the same as the patch. This only works to remove duplicates of the same, but doesn't fix behaviour when NLM_F_REPLACE is in the netlink message.

t0mmmy90 commented 3 years ago

You're right, I was facing an issue with having a nexthop X for example 3 times in the same route and this is why the patch helped.

tlsalmin commented 3 years ago

I don't see lib/route/route_obj.c taking NLM_F_REPLACE into consideration at all. At least the kernel side in fib6_add_rt2node removes all siblings (nexthops) when NLM_F_REPLACE is present if they are purely RTF_GATEWAY routes.

tlsalmin commented 3 years ago

Added pull request for fix https://github.com/thom311/libnl/pull/293

thom311 / libnl

Changes in IPv6 nexthops cause nl_cache_mngr routes to have duplicate nexthops. #288