vislee / leevis.com

Blog
87 stars 13 forks source link

linux-0.98 socket通知 #184

Open vislee opened 2 years ago

vislee commented 2 years ago

概述

代码

main.c文件start_kernel函数会调用sock_init函数初始化网络文件系统, 该函数又会调用注册的协议族函数表对应的初始化函数。AF_INET对应的回调处理函数表是inet_proto_ops中的ip_proto_init函数。 该函数除了注册传输层支持的协议,还初始化了支持的链路层,还有定时回调函数。

/* Hardware should be inited here. */
// 网络协议初始化
static int ip_proto_init(void)
{
  int i;
  struct device *dev;
  struct ip_protocol *p;
  seq_offset = CURRENT_TIME*250;
  /* add all the protocols. */
  for (i = 0; i < SOCK_ARRAY_SIZE; i++)
    {
       tcp_prot.sock_array[i] = NULL; // tcp协议
       udp_prot.sock_array[i] = NULL; // udp协议
       raw_prot.sock_array[i] = NULL; // 原始数据
    }

  for (p = ip_protocol_base; p != NULL;)
    {
       struct ip_protocol *tmp;
       /* add all the protocols. */
       tmp = p->next;
       add_ip_protocol (p); // IP报分用支持的协议。ICMP、UPD、TCP。
       p = tmp;
    }

  /* add the devices */
  for (dev = dev_base; dev != NULL; dev=dev->next)
    {
       if (dev->init)
       dev->init(dev);
    }
  timer_table[NET_TIMER].fn = net_timer; //注册定时器回调函数
  return (0);
}

dev_base在Space.c文件中定义,如下:


extern void wd8003_init(struct device *);

static struct device wd8003_dev =
{
  "eth0",
  0xd2000,   /* recv memory end. */
  0xd0600,   /* recv memory start. */
  0xd2000,  /* memory end. */
  0xd0000,  /* memory start. */
  0x280,    /* base i/o address. */
  5,        /* irq */        // 网卡中断
  0,0,0,0,0, /* flags */
  NULL, /* next device */
  wd8003_init,
  /* wd8003_init should set up the rest. */
  0,  /* trans start. */
  {NULL}, /* buffs */
  NULL, /* backlog */
  NULL, /* open */
  NULL, /* stop */
  NULL, /* hard_start_xmit */
  NULL, /* hard_header */
  NULL, /* add arp */
  NULL, /* queue xmit */
  NULL, /* rebuild header */
  NULL, /* type_trans */
  NULL, /* send_packet */
  NULL, /* private */
  0,    /* type. */
  0,    /* hard_header_len */
  0,    /* mtu */
  {0,}, /* broadcast address */
  {0,}, /* device address */
  0     /* addr len */
};

extern void loopback_init(struct device *dev);

static struct device loopback_dev =
{
  "loopback",
  -1,       /* recv memory end. */
  0x0,      /* recv memory start. */
  -1,       /* memory end. */
  0,        /* memory start. */
  0,        /* base i/o address. */
  0,        /* irq */
  0,0,1,0,0, /* flags */
  &wd8003_dev, /* next device */
  loopback_init,
  /* loopback_init should set up the rest. */
  0,  /* trans start. */
  {NULL}, /* buffs */
  NULL, /* backlog */
  NULL, /* open */
  NULL, /* stop */
  NULL, /* hard_start_xmit */
  NULL, /* hard_header */
  NULL, /* add arp */
  NULL, /* queue xmit */
  NULL, /* rebuild header */
  NULL, /* type_trans */
  NULL, /* send_packet */
  NULL, /* private */
  0,    /* type. */
  0,    /* hard_header_len */
  0,    /* mtu */
  {0,}, /* broadcast address */
  {0,}, /* device address */
  0     /* addr len */
};

struct device *dev_base = &loopback_dev;

网络驱动初始化函数定义在we.c文件中,如下:


static struct sigaction wd8003_sigaction = 
{
   wd8003_interrupt,
   0,
   0,
   NULL
};

void
wd8003_init(struct device *dev)
{
  unsigned char csum;
  int i;
  csum = 0;
  for (i = 0; i < 8; i++)
    {
      csum += inb_p(WD_ROM+i);
    }
  if (csum != WD_CHECK)
    {
      printk ("Warning WD8013 board not found at i/o = %X.\n",dev->base_addr);

      /* make sure no one can attempt to open the device. */
      status = OPEN;
      return;
    }
  printk("wd8013");
  /* initialize the rest of the device structure. */
  dev->mtu = 1500; /* eth_mtu */
  dev->hard_start_xmit = wd8003_start_xmit;
  dev->open = wd8003_open;
  dev->hard_header = eth_hard_header;
  dev->add_arp = eth_add_arp;
  dev->type_trans = eth_type_trans;
  dev->hard_header_len = sizeof (struct enet_header);
  dev->addr_len = ETHER_ADDR_LEN;
  dev->type = ETHER_TYPE;
  dev->queue_xmit = dev_queue_xmit;
  dev->rebuild_header = eth_rebuild_header;
  for (i = 0; i < DEV_NUMBUFFS; i++)
    dev->buffs[i] = NULL;

#ifndef FORCE_8BIT
  /* check for 16 bit board - it doesn't have register 0/8 aliasing */
  for (i = 0; i < 8; i++) {
    if( inb_p( EN_SAPROM+i ) != inb_p( EN_CMD+i) ){
        csum = inb_p( EN_REG1 ); /* fiddle with 16bit bit */
        outb( csum ^ BUS16, EN_REG1 ); /* attempt to clear 16bit bit */
        if( (csum & BUS16) == (inb_p( EN_REG1 ) & BUS16) ) {
            printk(", using 16 bit I/F ");
            dconfig |= 1; /* use word mode of operation */
            outb_p( LAN16ENABLE|MEMMASK, EN_REG5);
            outb( csum , EN_REG1 );
            break; /* We have a 16bit board here! */
        }
        outb( csum , EN_REG1 );
    }
    }
#endif /* FORCE_8BIT */

  /* mapin the interface memory. */
  outb_p(WD_IMEM,WD_CTL);

  /* clear the interface memory */
  for (i = dev->mem_start; i < dev->mem_end; i++)
    {
      *((unsigned char *)i) = 0;
      if (*((unsigned char *)i) != 0) 
    {
      printk ("WD Memory error.\n");
      if( (i - dev->mem_start) > 4096 )
        break;
      else
        status = OPEN;
    }
    }
  /* Calculate how many pages of memory on board */
  max_pages = ( i - dev->mem_start )/256;

  /* need to set up the dev->mem_end and dev->rmem_end */
  dev->rmem_end = i;
  dev->mem_end = i;

  /* print the initialization message, and the
     ethernet address. */
  printk (", %d pages memory, ethernet Address: ", max_pages );
  for (i = 0; i <ETHER_ADDR_LEN; i++)
    {
      dev->dev_addr[i]=inb_p(WD_ROM+i);
      dev->broadcast[i]=0xff;
      printk ("%2.2X ",dev->dev_addr[i]);
    }

  /* Clear the statistics */
  for( i = 0; i < sizeof( struct enet_statistics ); i++ )
    ((char *)&stats)[i] = 0;

  printk ("\n");
  status = 0;
  // 注册网卡中断回调函数
  if (irqaction (dev->irq, &wd8003_sigaction))
    {
       printk ("Unable to get IRQ%d for wd8013 board\n", dev->irq);
    }
}

当网卡中断系统会调用注册的回调函数wd8003_interrupt。该回调函数根据从网卡寄存器读到的数据判断,如果是发送完一个包则调用wd_trs -> dev_tint, 如果是接收到一个包则调用了wd_rcv -> dev_rintdev_rint函数定义在dev.c文件中,如下:


/* this routine now just gets the data out of the card and returns.
   it's return values now mean.

   1 <- exit even if you have more packets.
   0 <- call me again no matter what.
  -1 <- last packet not processed, try again. */

int
dev_rint(unsigned char *buff, unsigned long len, int flags,
         struct device * dev)
{
   struct sk_buff *skb=NULL;
   struct packet_type *ptype;
   unsigned short type;
   unsigned char flag =0;
   unsigned char *to;
   int amount;

   /* try to grab some memory. */
   if (len > 0 && buff != NULL)
     {
        skb = malloc (sizeof (*skb) + len);
        skb->mem_len = sizeof (*skb) + len;
        skb->mem_addr = skb;
     }

   /* firs we copy the packet into a buffer, and save it for later. */
   if (buff != NULL && skb != NULL)
     {
        if ( !(flags & IN_SKBUFF))
          {
             to = (unsigned char *)(skb+1);
             while (len > 0)
               {
                  amount = min (len, (unsigned long) dev->rmem_end -
                                (unsigned long) buff);
                  memcpy (to, buff, amount);
                  len -= amount;
                  buff += amount;
                  to += amount;
                  if ((unsigned long)buff == dev->rmem_end)
                    buff = (unsigned char *)dev->rmem_start;
               }
          }
        else
          {
             free_s (skb->mem_addr, skb->mem_len);
             skb = (struct sk_buff *)buff;
          }

        skb->len = len;
        skb->dev = dev;
        skb->sk = NULL;

        /* now add it to the dev backlog. */
        cli();
        if (dev-> backlog == NULL)
          {
             skb->prev = skb;
             skb->next = skb;
             dev->backlog = skb;
          }
        else
          {
             skb ->prev = dev->backlog->prev;
             skb->next = dev->backlog;
             skb->next->prev = skb;
             skb->prev->next = skb;
          }
        sti();
        return (0);
     }

   if (skb != NULL) 
     free_s (skb->mem_addr, skb->mem_len);

   /* anything left to process? */

   if (dev->backlog == NULL)
     {
        if (buff == NULL)
          {
             sti();
             return (1);
          }

        if (skb != NULL)
          {
             sti();
             return (-1);
          }

        sti();
        printk ("dev_rint:Dropping packets due to lack of memory\n");
        return (1);
     }

   skb= dev->backlog;
   if (skb->next == skb)
     {
        dev->backlog = NULL;
     }
   else
     {
        dev->backlog = skb->next;
        skb->next->prev = skb->prev;
        skb->prev->next = skb->next;
     }
   sti();

   /* bump the pointer to the next structure. */
   skb->h.raw = (unsigned char *)(skb+1) + dev->hard_header_len;
   skb->len -= dev->hard_header_len;

    // wd8003_init函数赋值eth_type_trans
    // 获取链路层头enet_header的type字段。
   /* convert the type to an ethernet type. */
   type = dev->type_trans (skb, dev);

   /* if there get to be a lot of types we should changes this to
      a bunch of linked lists like we do for ip protocols. */
   for (ptype = ptype_base; ptype != NULL; ptype=ptype->next)
     {
        if (ptype->type == type)
          {
             struct sk_buff *skb2;
             /* copy the packet if we need to. */
             if (ptype->copy)
               {
                  skb2 = malloc (skb->mem_len);
                  if (skb2 == NULL) continue;
                  memcpy (skb2, skb, skb->mem_len);
                  skb2->mem_addr = skb2;
               }
             else
               {
                  skb2 = skb;
                  flag = 1;
               }
              // ip_rcv 函数
             ptype->func (skb2, dev, ptype);
          }
     }

   if (!flag)
     {
        PRINTK ("discarding packet type = %X\n", type);
        free_skb (skb, FREE_READ);
     }

     if (buff == NULL)
       return (0);
     else
       return (-1);
}

根据eth_type_trans函数解析的以太网报文头type字段,如果是ETHERTYPE_IP则调用ip_rcv接收并处理包。 该函数定义在ip.c文件中,如下:


int
ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
{
  struct ip_header *iph;
  unsigned char hash;
  unsigned char flag=0;
  static struct options opt; /* since we don't use these yet, and they
                        take up stack space. */
  struct ip_protocol *ipprot;

  iph=skb->h.iph;

  PRINTK("<<\n");
  print_iph(iph);

  if (ip_csum (iph) || do_options (iph,&opt) || iph->version != 4)
    {
       PRINTK ("ip packet thrown out. \n");
       skb->sk = NULL;
       free_skb(skb, 0);
       return (0);
    }

  /* for now we will only deal with packets meant for us. */
  if (!my_ip_addr(iph->daddr))
    {
       PRINTK ("packet meant for someone else.\n");
       skb->sk = NULL;
       free_skb(skb, 0);
       return (0);
    }

  /* deal with fragments.  or don't for now.*/
  if ((iph->frag_off & 64) || (net16(iph->frag_off)&0x1fff))
    {
       printk ("packet fragmented. \n");
       skb->sk = NULL;
       free_skb(skb, 0);
       return(0);
    }

  skb->h.raw += iph->ihl*4;

  /* add it to the arp table if it's talking to us.  That way we
     will be able to talk to them also. */
  // IP首部的8位协议,根据协议进行分用。
  hash = iph->protocol & (MAX_IP_PROTOS -1);
  // ip_proto_init 函数中 调用add_ip_protocol 注册的
  for (ipprot = ip_protos[hash]; ipprot != NULL; ipprot=ipprot->next)
    {
       struct sk_buff *skb2;
       PRINTK ("Using protocol = %X:\n", ipprot);
       print_ipprot (ipprot);
       /* pass it off to everyone who wants it. */
       /* we should check the return values here. */
       /* see if we need to make a copy of it.  This will
        only be set if more than one protpocol wants it. 
        and then not for the last one. */

       if (ipprot->copy)
       {
          skb2 = malloc (skb->mem_len);
          if (skb2 == NULL) continue;
          memcpy (skb2, skb, skb->mem_len);
          skb2->mem_addr = skb2;
       }
       else
       {
          skb2 = skb;
       }
       flag = 1;
      // 如果是tcp则tcp_rcv
       ipprot->handler (skb2, dev, &opt, iph->daddr,
                  net16(iph->tot_len) - iph->ihl*4,
                  iph->saddr, 0, ipprot);

    }
  if (!flag)
    {
       icmp_reply (skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
       skb->sk = NULL;
       free_skb (skb, 0);
    }

  return (0);
}

int
tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
      unsigned long daddr, unsigned short len,
      unsigned long saddr, int redo, struct ip_protocol * protocol)
{
  struct tcp_header *th;
  volatile struct sock *sk;

  th = skb->h.th;
  /* find the socket. */
  sk=get_sock(&tcp_prot, net16(th->dest), saddr, th->source, daddr);
  PRINTK("<<\n");
  PRINTK("len = %d, redo = %d, skb=%X\n", len, redo, skb);

  if (sk)
    {
      PRINTK ("sk = %X:\n",sk);
      print_sk (sk);
    }

  if (!redo)
    {
       if (th->check && tcp_check (th, len, saddr, daddr ))
       {
          skb->sk = NULL;
          free_skb (skb, 0);
          /* we don't release the socket because it was never
             marked in use. */
          return (0);
       }

       /*See if we know about the socket. */
       if (sk == NULL)
      {
        if (!th->rst)
          tcp_reset (daddr, saddr, th, &tcp_prot, opt,dev);
        skb->sk = NULL;
        free_skb (skb, 0);
        return (0);
      }

       skb->len = len;
       skb->sk = sk;
       skb->acked = 0;
       skb->used = 0;
       skb->free = 0;
       skb->urg_used = 0;
       skb->saddr = daddr;
       skb->daddr = saddr;

       th->seq = net32(th->seq);

       cli();

       /* we may need to add it to the backlog here. */
       if (sk->inuse)
       {
          if (sk->back_log == NULL)
            {
             sk->back_log = skb;
             skb->next = skb;
             skb->prev = skb;
            }
          else
            {
             skb->next = sk->back_log;
             skb->prev = sk->back_log->prev;
             skb->prev->next = skb;
             skb->next->prev = skb;
            }
          sti();
          return (0);
       }
       sk->inuse = 1;
       sti();
    }

  /* charge the memory to the socket. */
  if (sk->rmem_alloc + skb->mem_len >= SK_RMEM_MAX)
    {
       skb->sk = NULL;
       free_skb (skb, 0);
       release_sock (sk);
       return (0);
    }

  sk->rmem_alloc += skb->mem_len;

  PRINTK ("About to do switch. \n");

  /* now deal with it. */

  switch (sk->state)
    {
       /* this should close the system down if it's waiting for an
        ack that is never going to be sent. */
    case TCP_LAST_ACK:
      if (th->rst)
      {
        sk->err = ECONNRESET;
        sk->state = TCP_CLOSE;
        if (!sk->dead)
          {
            wake_up (sk->sleep);
          }
        free_skb (skb, FREE_READ);
        release_sock(sk);
        return (0);
      }

    case TCP_ESTABLISHED:
    case TCP_FIN_WAIT1:
    case TCP_FIN_WAIT2:
    case TCP_TIME_WAIT:

      if (!tcp_sequence (sk, th, len, opt, saddr))
      {
         free_skb (skb, FREE_READ);
         release_sock(sk);
         return (0);
      }
      if (th->rst)
      {
        sk->err = ECONNRESET;
        sk->state = TCP_CLOSE;
        if (!sk->dead)
          {
            wake_up (sk->sleep);
          }
        free_skb (skb, FREE_READ);
        release_sock(sk);
        return (0);
      }

      if (opt->security != 0 || opt->compartment != 0 || th->syn)
      {
         sk->err = ECONNRESET;
         sk->state = TCP_CLOSE;
         tcp_reset (daddr, saddr,  th, sk->prot, opt,dev);
         if (!sk->dead)
           {
            wake_up (sk->sleep);
           }
         free_skb (skb, FREE_READ);
         release_sock(sk);
         return (0);
      }

      if (th->ack)
      {
         if(!tcp_ack (sk, th, saddr))
          {
             free_skb (skb, FREE_READ);
             release_sock(sk);
             return (0);
         }
      }
      if (th->urg)
      {
        if (tcp_urg (sk, th, saddr))
          {
             free_skb (skb, FREE_READ);
             release_sock(sk);
             return (0);
          }
      }

      if ( tcp_data (skb, sk, saddr, len))
      {
         free_skb (skb, FREE_READ);
         release_sock(sk);
         return (0);
      }

      if (!th->fin)
      {
        release_sock(sk);
        return (0);
      }

      tcp_fin (sk, th, saddr, dev);
      release_sock(sk);
      return (0);

    case TCP_CLOSE:

      if (sk->dead || sk->daddr)
      {
         PRINTK ("packet received for closed,dead socket\n");
         free_skb (skb, FREE_READ);
         release_sock (sk);
         return (0);
      }

      if (!th->rst)
      {
        if (!th->ack)
          th->ack_seq=0;
        tcp_reset (daddr, saddr, th, sk->prot, opt,dev);
      }
      free_skb (skb, FREE_READ);
      release_sock(sk);
      return (0);

    case TCP_LISTEN:
      if (th->rst)
      {
         free_skb (skb, FREE_READ);
         release_sock(sk);
         return (0);
      }
      if (th->ack)
      {
        tcp_reset (daddr, saddr, th, sk->prot, opt,dev );
        free_skb (skb, FREE_READ);
        release_sock(sk);
        return (0);
      }

      if (th->syn)
      {
/*        if (opt->security != 0 || opt->compartment != 0)
          {
            tcp_reset (daddr, saddr, th, prot, opt,dev);
            release_sock(sk);
            return (0);
          } */

        /* now we just put the whole thing including the header
           and saddr, and protocol pointer into the buffer.
           We can't respond until the user tells us to accept
           the connection. */

        tcp_conn_request (sk, skb, daddr, saddr, opt, dev);

        release_sock(sk);
        return (0);
      }

      free_skb (skb, FREE_READ);
      release_sock(sk);
      return (0);

    default:
      if (!tcp_sequence (sk, th, len, opt, saddr)) 
      {
         free_skb (skb, FREE_READ);
         release_sock(sk);
         return (0);
      }

    case TCP_SYN_SENT:
      if (th->rst)
      {
        sk->err = ECONNREFUSED;
        sk->state = TCP_CLOSE;
        if (!sk->dead)
          {
            wake_up (sk->sleep);
          }
        free_skb (skb, FREE_READ);
        release_sock(sk);
        return (0);
      }
/*      if (opt->security != 0 || opt->compartment != 0 )
      {
        sk->err = ECONNRESET;
        sk->state = TCP_CLOSE;
        tcp_reset (daddr, saddr,  th, sk->prot, opt, dev);
        if (!sk->dead)
        {
        wake_up (sk->sleep);
        }
        free_skb (skb, FREE_READ);
        release_sock(sk);
        return (0);
      } */

      if (!th->ack) 
      {
        if (th->syn)
          {
            sk->state = TCP_SYN_RECV;
          }

        free_skb (skb, FREE_READ);
        release_sock(sk);
        return (0);
      }

      switch (sk->state)
      {
      case TCP_SYN_SENT:
        if (!tcp_ack(sk, th, saddr))
          {
            tcp_reset(daddr, saddr, th, sk->prot, opt,dev);
            free_skb (skb, FREE_READ);
            release_sock(sk);
            return (0);
          }

        /* if the syn bit is also set, switch to tcp_syn_recv,
           and then to established. */

        if (!th->syn) 
          {
            free_skb (skb, FREE_READ);
            release_sock (sk);
            return (0);
          }

        /* ack the syn and fall through. */
        sk->acked_seq = th->seq+1;
        sk->fin_seq = th->seq;
        tcp_send_ack (sk->send_seq, th->seq+1, sk, 
                       th, sk->daddr);

      case TCP_SYN_RECV:
        if (!tcp_ack(sk, th, saddr))
          {
            tcp_reset(daddr, saddr, th, sk->prot, opt, dev);
            free_skb (skb, FREE_READ);
            release_sock(sk);
            return (0);
          }

        sk->state = TCP_ESTABLISHED;
        /* now we need to finish filling out some of the tcp
           header. */

        /* we need to check for mtu info. */
        tcp_options(sk, th);
        sk->dummy_th.dest = th->source;
        sk->copied_seq = sk->acked_seq-1;
        if (!sk->dead)
          {
            wake_up (sk->sleep);
          }

        /* now process the rest like we were already in the established
           state. */
        if (th->urg)
          if (tcp_urg (sk, th, saddr))
            {
             free_skb (skb, FREE_READ);
             release_sock(sk);
             return (0);
            }
        if (tcp_data (skb, sk, saddr, len))
          free_skb (skb, FREE_READ);

        if (th->fin)
          tcp_fin(sk, th, saddr, dev);

        release_sock(sk);
        return (0);
      }

      if (th->urg)
      {
        if (tcp_urg (sk, th, saddr))
          {
             free_skb (skb, FREE_READ);
             release_sock (sk);
             return (0);
          }
      }

      if (tcp_data (skb, sk, saddr, len))
      {
         free_skb (skb, FREE_READ);
         release_sock (sk);
         return (0);
      }

      if (!th->fin)
      {
        release_sock(sk);
        return (0);
      }
      tcp_fin (sk, th, saddr, dev);
      release_sock(sk);
      return (0);
    }
}

总结

系统启动后,调用start_kernel<main.c文件中>,该函数又调用 sock_init<socket.c文件中>初始化网络文件系统, 该函数又调用了inet_proto_ops.ip_proto_init<sock.c文件中>初始化inet协议族初始化函数, 该函数分别调用 add_ip_protocol(ip_protocol_base<protocols.c文件中>)<ip.c文件中> 注册传输层支持的协议到ip_protoshash表中。 和dev_base.init <Space.c文件> 注册链路层支持的协议, 其中loopback_init是回环地址。 而wd8003_init <we.c文件>则是网卡驱动,并调用 irqaction (dev->irq, &wd8003_sigaction) 注册网卡中断回调函数wd8003_interrupt。当有数据包到达时候,网卡给cpu发中断信号,该函数被调用执行。 最终调用到了ip_rcv