Closed italovalcy closed 1 year ago
The following patch seems to improve a bit the situation:
root@1534638afadd:/src/kytos-of-core# git diff main.py
diff --git a/main.py b/main.py
index b8b8dff..07c2e13 100644
--- a/main.py
+++ b/main.py
@@ -2,6 +2,7 @@
import asyncio
import time
+import threading
from collections import defaultdict
from napps.kytos.of_core import settings
@@ -45,6 +46,7 @@ class Main(KytosNApp):
self.of_core_version_utils = {0x04: of_core_v0x04_utils}
self.execute_as_loop(settings.STATS_INTERVAL)
self._connection_lock = defaultdict(asyncio.Lock)
+ self._port_status_lock = defaultdict(threading.Lock)
# Per switch delay to request flow/port stats, to avoid all request
# being sent together and increase the overhead on the controller
@@ -604,55 +606,58 @@ class Main(KytosNApp):
port_no = port.port_no.value
event_name = 'kytos/of_core.switch.interface.'
- if reason == 'OFPPR_ADD':
- status = 'created'
- interface = Interface(name=port.name.value,
- address=port.hw_addr.value,
- port_number=port_no,
- switch=source.switch,
- state=port.state.value,
- features=port.curr)
- source.switch.update_interface(interface)
- try_to_activate_interface(interface, port)
-
- elif reason == 'OFPPR_MODIFY':
- status = 'modified'
- interface = source.switch.get_interface_by_port_no(port_no)
- current_status = None
- if interface:
- current_status = interface.state
- interface.state = port.state.value
- interface.name = port.name.value
- interface.address = port.hw_addr.value
- interface.features = port.curr
- else:
+ intfid = f"{source.switch.id}:{port_no}"
+
+ with self._port_status_lock[intfid]:
+ if reason == 'OFPPR_ADD':
+ status = 'created'
interface = Interface(name=port.name.value,
address=port.hw_addr.value,
port_number=port_no,
switch=source.switch,
state=port.state.value,
features=port.curr)
- source.switch.update_interface(interface)
- try_to_activate_interface(interface, port)
- self._send_specific_port_mod(port, interface, current_status)
-
- elif reason == 'OFPPR_DELETE':
- status = 'deleted'
- interface = source.switch.get_interface_by_port_no(port_no)
- interface.deactivate()
-
- event_name += status
- content = {'interface': interface}
-
- event = KytosEvent(name=event_name, content=content)
- self.controller.buffers.app.put(event)
+ source.switch.update_interface(interface)
+ try_to_activate_interface(interface, port)
+
+ elif reason == 'OFPPR_MODIFY':
+ status = 'modified'
+ interface = source.switch.get_interface_by_port_no(port_no)
+ current_status = None
+ if interface:
+ current_status = interface.state
+ interface.state = port.state.value
+ interface.name = port.name.value
+ interface.address = port.hw_addr.value
+ interface.features = port.curr
+ else:
+ interface = Interface(name=port.name.value,
+ address=port.hw_addr.value,
+ port_number=port_no,
+ switch=source.switch,
+ state=port.state.value,
+ features=port.curr)
+ source.switch.update_interface(interface)
+ try_to_activate_interface(interface, port)
+ self._send_specific_port_mod(port, interface, current_status)
+
+ elif reason == 'OFPPR_DELETE':
+ status = 'deleted'
+ interface = source.switch.get_interface_by_port_no(port_no)
+ interface.deactivate()
+
+ event_name += status
+ content = {'interface': interface}
+
+ event = KytosEvent(name=event_name, content=content)
+ self.controller.buffers.app.put(event)
- # pylint: disable=protected-access
- state_desc = {v: k for k, v in PortState._enum.items()}
- # pylint: enable=protected-access
- state = state_desc.get(port.state.value, port.state.value)
- msg = 'PortStatus %s interface %s:%s state %s'
- log.info(msg, status, source.switch.id, port_no, state)
+ # pylint: disable=protected-access
+ state_desc = {v: k for k, v in PortState._enum.items()}
+ # pylint: enable=protected-access
+ state = state_desc.get(port.state.value, port.state.value)
+ msg = 'PortStatus %s interface %s:%s state %s'
+ log.info(msg, status, source.switch.id, port_no, state)
def _get_version_from_bitmask(message_versions):
However, we still have some corner cases where the state is inconsistent on Kytos (no link_up event and the stored status is different from the switch status):
stored:
root@1534638afadd:/src/kytos-of-core# curl -s http://127.0.0.1:8181/api/kytos/topology/v3/interfaces | jq -r '.interfaces[] | .id + " " + (.active|tostring)' |grep false
00:00:00:00:00:00:00:12:1 false
switch status:
kytos $> controller.switches['00:00:00:00:00:00:00:12'].interfaces[12].status
Out[4]: <EntityStatus.UP: 'UP'>
Hi,
There is a possible race condition on the PortStatus handler, which leads to an interface/link becoming DOWN forever after a number of flaps on some interfaces. Basically, if the controller receives multiple PortStatus report a port is UP and DOWN in a very short period of time (ex: more the 6 events within a second), it might lead the controller to not correctly set the interface status.
How to reproduce: using the Mininet topology for AmLight (https://github.com/kytos-ng/kytos-end-to-end-tests/blob/master/tests/helpers.py#LL14C2-L14C2), try to force multiple events of port UP and down within a second:
In a normal situation, the links should go down and UP:
However, if you keep executing the command above (
ip link set...
), you may eventually end up in a situation where the interface is never activated again and the link_up event is never triggered.