mengdemao / lazybsd

用户态网络库实现
https://mengdemao.github.io/lazybsd/
BSD 3-Clause "New" or "Revised" License
5 stars 3 forks source link

eth设备分析 #54

Open mengdemao opened 2 weeks ago

mengdemao commented 2 weeks ago

网络设备

struct rte_eth_dev {
    eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
    eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
    eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */
    /**
     * Next two fields are per-device data but *data is shared between
     * primary and secondary processes and *process_private is per-process
     * private. The second one is managed by PMDs if necessary.
     */
    struct rte_eth_dev_data *data;  /**< Pointer to device data. */
    void *process_private; /**< Pointer to per-process device data. */
    const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
    struct rte_device *device; /**< Backing device */
    struct rte_intr_handle *intr_handle; /**< Device interrupt handle */
    /** User application callbacks for NIC interrupts */
    struct rte_eth_dev_cb_list link_intr_cbs;
    /**
     * User-supplied functions called from rx_burst to post-process
     * received packets before passing them to the user
     */
    struct rte_eth_rxtx_callback *post_rx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
    /**
     * User-supplied functions called from tx_burst to pre-process
     * received packets before passing them to the driver for transmission.
     */
    struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
    enum rte_eth_dev_state state; /**< Flag indicating the port state */
    void *security_ctx; /**< Context for security ops */

    uint64_t reserved_64s[4]; /**< Reserved for future fields */
    void *reserved_ptrs[4];   /**< Reserved for future fields */
} __rte_cache_aligned;

函数分析

  1. 发送函数
typedef uint16_t (*eth_rx_burst_t)(void *rxq,
                   struct rte_mbuf **rx_pkts,
                   uint16_t nb_pkts);
/**< @internal Retrieve input packets from a receive queue of an Ethernet device. */

static inline uint16_t
rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id,
         struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
    struct rte_eth_dev *dev = &rte_eth_devices[port_id];

#ifdef RTE_LIBRTE_ETHDEV_DEBUG
    RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
    RTE_FUNC_PTR_OR_ERR_RET(*dev->tx_pkt_burst, 0);

    if (queue_id >= dev->data->nb_tx_queues) {
        RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
        return 0;
    }
#endif

#ifdef RTE_ETHDEV_RXTX_CALLBACKS
    struct rte_eth_rxtx_callback *cb;

    /* __ATOMIC_RELEASE memory order was used when the
     * call back was inserted into the list.
     * Since there is a clear dependency between loading
     * cb and cb->fn/cb->next, __ATOMIC_ACQUIRE memory order is
     * not required.
     */
    cb = __atomic_load_n(&dev->pre_tx_burst_cbs[queue_id],
                __ATOMIC_RELAXED);

    if (unlikely(cb != NULL)) {
        do {
            nb_pkts = cb->fn.tx(port_id, queue_id, tx_pkts, nb_pkts,
                    cb->param);
            cb = cb->next;
        } while (cb != NULL);
    }
#endif

    return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
}
  1. 接收函数
typedef uint16_t (*eth_tx_burst_t)(void *txq,
                   struct rte_mbuf **tx_pkts,
                   uint16_t nb_pkts);
/**< @internal Send output packets on a transmit queue of an Ethernet device. */

static inline uint16_t
rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
         struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
{
    struct rte_eth_dev *dev = &rte_eth_devices[port_id];
    uint16_t nb_rx;

#ifdef RTE_LIBRTE_ETHDEV_DEBUG
    RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
    RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);

    if (queue_id >= dev->data->nb_rx_queues) {
        RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
        return 0;
    }
#endif
    nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
                     rx_pkts, nb_pkts);

#ifdef RTE_ETHDEV_RXTX_CALLBACKS
    struct rte_eth_rxtx_callback *cb;

    /* __ATOMIC_RELEASE memory order was used when the
     * call back was inserted into the list.
     * Since there is a clear dependency between loading
     * cb and cb->fn/cb->next, __ATOMIC_ACQUIRE memory order is
     * not required.
     */
    cb = __atomic_load_n(&dev->post_rx_burst_cbs[queue_id],
                __ATOMIC_RELAXED);

    if (unlikely(cb != NULL)) {
        do {
            nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx,
                        nb_pkts, cb->param);
            cb = cb->next;
        } while (cb != NULL);
    }
#endif

    return nb_rx;
}
  1. 准备函数
typedef uint16_t (*eth_tx_prep_t)(void *txq,
                   struct rte_mbuf **tx_pkts,
                   uint16_t nb_pkts);
/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */

私有数据

struct rte_eth_dev_data {
    char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */

    void **rx_queues; /**< Array of pointers to RX queues. */
    void **tx_queues; /**< Array of pointers to TX queues. */
    uint16_t nb_rx_queues; /**< Number of RX queues. */
    uint16_t nb_tx_queues; /**< Number of TX queues. */

    struct rte_eth_dev_sriov sriov;    /**< SRIOV data */

    void *dev_private;
            /**< PMD-specific private data.
             *   @see rte_eth_dev_release_port()
             */

    struct rte_eth_link dev_link;   /**< Link-level information & status. */
    struct rte_eth_conf dev_conf;   /**< Configuration applied to device. */
    uint16_t mtu;                   /**< Maximum Transmission Unit. */
    uint32_t min_rx_buf_size;
            /**< Common RX buffer size handled by all queues. */

    uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */
    struct rte_ether_addr *mac_addrs;
            /**< Device Ethernet link address.
             *   @see rte_eth_dev_release_port()
             */
    uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR];
            /**< Bitmap associating MAC addresses to pools. */
    struct rte_ether_addr *hash_mac_addrs;
            /**< Device Ethernet MAC addresses of hash filtering.
             *   @see rte_eth_dev_release_port()
             */
    uint16_t port_id;           /**< Device [external] port identifier. */

    __extension__
    uint8_t promiscuous   : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
        scattered_rx : 1,  /**< RX of scattered packets is ON(1) / OFF(0) */
        all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */
        dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0). */
        lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
    uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
        /**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
    uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
        /**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
    uint32_t dev_flags;             /**< Capabilities. */
    enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough. */
    int numa_node;                  /**< NUMA node connection. */
    struct rte_vlan_filter_conf vlan_filter_conf;
            /**< VLAN filter configuration. */
    struct rte_eth_dev_owner owner; /**< The port owner. */
    uint16_t representor_id;
            /**< Switch-specific identifier.
             *   Valid if RTE_ETH_DEV_REPRESENTOR in dev_flags.
             */

    uint64_t reserved_64s[4]; /**< Reserved for future fields */
    void *reserved_ptrs[4];   /**< Reserved for future fields */
} __rte_cache_aligned;
mengdemao commented 1 week ago

PCI PMD定义

static int eth_em_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
    struct rte_pci_device *pci_dev)
{
    return rte_eth_dev_pci_generic_probe(pci_dev,
        sizeof(struct e1000_adapter), eth_em_dev_init);
}

static int eth_em_pci_remove(struct rte_pci_device *pci_dev)
{
    return rte_eth_dev_pci_generic_remove(pci_dev, eth_em_dev_uninit);
}

static const struct rte_pci_id pci_id_em_map[] = {
    { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82573L) },
};

static struct rte_pci_driver rte_em_pmd = {
  .id_table = pci_id_em_map,
  .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
  .probe = eth_em_pci_probe,
  .remove = eth_em_pci_remove,
};

RTE_PMD_REGISTER_PCI(net_e1000_em, rte_em_pmd);
RTE_PMD_REGISTER_PCI_TABLE(net_e1000_em, pci_id_em_map);
RTE_PMD_REGISTER_KMOD_DEP(net_e1000_em, "* igb_uio | uio_pci_generic | vfio-pci");
mengdemao commented 1 week ago

rte bus注册

struct rte_pci_bus rte_pci_bus = {
    .bus = {
        .scan = rte_pci_scan,           // 扫描PCI总线,生成设备
        .probe = pci_probe,             // probe设备
        .cleanup = pci_cleanup,
        .find_device = pci_find_device,
        .plug = pci_plug,
        .unplug = pci_unplug,
        .parse = pci_parse,
        .devargs_parse = rte_pci_devargs_parse,
        .dma_map = pci_dma_map,
        .dma_unmap = pci_dma_unmap,
        .get_iommu_class = rte_pci_get_iommu_class,
        .dev_iterate = rte_pci_dev_iterate,
        .hot_unplug_handler = pci_hot_unplug_handler,
        .sigbus_handler = pci_sigbus_handler,
    },
    .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
    .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
};

RTE_REGISTER_BUS(pci, rte_pci_bus.bus);

函数定义

int rte_bus_scan(void);   // ==> scan 
int rte_bus_probe(void);  // ==> probe 

scan: 扫描PCI总线设备,创建dev