本来是做zero-copy的,顺便把分析记录写下来,供大家参考,如果有错误清大家多包涵。只挑重要的来说,一些细节的地方我也不大懂,要看芯片手册才行,我们作软件的就别看那么细了,最重要是把主要流程弄清除。
系统结构定义
以下定义的结构,要保证长度是32bit的整数,也就是4bytes对齐,在自己添加成员的时候尤其小心。
struct cb 字面理解为control block;
struct nic 网卡的基本信息,该结构是针对单个网卡的,而不是针对网卡驱动整个系统;
子例程分析
staticinlinevoid e100_enable_irq(struct nic *nic)
{
unsignedlong flags;
spin_lock_irqsave(&nic->cmd_lock, flags);
writeb(irq_mask_none, &nic->csr->scb.cmd_hi);
spin_unlock_irqrestore(&nic->cmd_lock, flags);
e100_write_flush(nic);
}
staticinlinevoid e100_disable_irq(struct nic *nic)
{
unsignedlong flags;
spin_lock_irqsave(&nic->cmd_lock, flags);
writeb(irq_mask_all, &nic->csr->scb.cmd_hi);
spin_unlock_irqrestore(&nic->cmd_lock, flags);
e100_write_flush(nic);
}
这两个函数看意思就是把nic指向的网卡的irq打开于关闭,在写寄存器的时候要spin_lock_irq;
e100_write_flush是把内容立即刷新,这里的做法比较简单,就是把pci的总线读一下,这样write的过程就被迫完成了。
总体分析
初始化过程:
staticint e100_hw_init(struct nic *nic)
{
int err;
e100_hw_reset(nic); // 作芯片的复位
DPRINTK(HW, ERR, "e100_hw_init\n");
// 如果是中断期间,返回错误
if(!in_interrupt() && (err = e100_self_test(nic)))
return err;
if((err = e100_phy_init(nic))) // 芯片的初始化,以及后面执行了各种命令
return err;
if((err = e100_exec_cmd(nic, cuc_load_base, 0)))
return err;
if((err = e100_exec_cmd(nic, ruc_load_base, 0)))
return err;
if((err = e100_exec_cb(nic, NULL, e100_load_ucode)))
return err;
if((err = e100_exec_cb(nic, NULL, e100_configure)))
return err;
if((err = e100_exec_cb(nic, NULL, e100_setup_iaaddr)))
return err;
if((err = e100_exec_cmd(nic, cuc_dump_addr,
nic->dma_addr + offsetof(struct mem, stats))))
return err;
if((err = e100_exec_cmd(nic, cuc_dump_reset, 0)))
return err;
e100_disable_irq(nic); // 关闭中断
}
staticvoid e100_watchdog(unsignedlong data)
{
…
// 根据MII的监测工具进行监测,如果发现有网卡动作,则调整统计信息,把网卡设置成up/down状态
mii_ethtool_gset(&nic->mii, &cmd);
if(mii_link_ok(&nic->mii) && !netif_carrier_ok(nic->netdev)) {
DPRINTK(LINK, INFO, "link up, %sMbps, %s-duplex\n",
cmd.speed == SPEED_100 ? "100" : "10",
cmd.duplex == DUPLEX_FULL ? "full" : "half");
} elseif(!mii_l ink_ok(&nic->mii) && netif_carrier_ok(nic->netdev)) {
DPRINTK(LINK, INFO, "link down\n");
}
mii_check_link(&nic->mii);
…
// 最后,watch_dog不是做一次,所以做完了这次,要用mod_timer启动下一次检查
mod_timer(&nic->watchdog, jiffies + E100_WATCHDOG_PERIOD);
}
发包过程:
staticinlineint e100_tx_clean(struct nic *nic) // 对发包队列进行清理
{
struct cb *cb;
int tx_cleaned = 0;
spin_lock(&nic->cb_lock); // 要上锁,其实我觉得这里会影响速度;但是100M网卡,影响也不大,对1000M网卡,这样肯定不行
DPRINTK(TX_DONE, DEBUG, "cb->status = 0x%04X\n",
nic->cb_to_clean->status);
/* Clean CBs marked complete */
for(cb = nic->cb_to_clean;
cb->status & cpu_to_le16(cb_complete); // 把CPU字节转成机器字节
cb = nic->cb_to_clean = cb->next) {
if(likely(cb->skb != NULL)) {
nic->net_stats.tx_packets++;
nic->net_stats.tx_bytes += cb->skb->len;
pci_unmap_single( nic->pdev, // 解除PCI通道的DMA映射
le32_to_cpu(cb->u.tcb.tbd.buf_addr),
le16_to_cpu(cb->u.tcb.tbd.size),
PCI_DMA_TODEVICE);
dev_kfree_skb_any(cb->skb); // 才可以释放skb
cb->skb = NULL; // 把指针设置为空,要用这个作判断,所以还是C++好
tx_cleaned = 1;
}
cb->status = 0;
nic->cbs_avail++;
}
spin_unlock(&nic->cb_lock);
/* Recover from running out of Tx resources in xmit_frame */
if(unlikely(tx_cleaned && netif_queue_stopped(nic->netdev)))
netif_wake_queue(nic->netdev); // 唤醒该网卡的等待队列
return tx_cleaned;
}
控制队列的操作,原理和上面一样:
staticvoid e100_clean_cbs(struct nic *nic)
staticint e100_alloc_cbs(struct nic *nic)
启动接收过程
staticinlinevoid e100_start_receiver(struct nic *nic, struct rx *rx)
给收包过程分配skb,这个是非常重要的过程,主要完成skb的分配工作,如果rx队列没有skb,则new一个,否则把状态同步一下,然后直接使用旧的skb,用于提高效率。分配好的skb要作pci_map动作,就是把内存挂在网卡的DMA通道,等有中断发生,内存就是网络数据包了,效验的动作在后面会作。
staticinlineint e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
{
// 分配skb
if(!(rx->skb = dev_alloc_skb(RFD_BUF_LEN + NET_IP_ALIGN)))
return -ENOMEM;
/* Align, init, and map the RFD. */
rx->skb->dev = nic->netdev;
skb_reserve(rx->skb, NET_IP_ALIGN); // 保留IP对齐,用于VLAN的偏移,一般是2个字节
memcpy(rx->skb->data, &nic->blank_rfd, sizeof(struct rfd));
// 在skb->data保留了一段内存作RFD,应该是状态寄存器,e100网卡的DMA通道前面的内存是用于做状态标志的,实际测试是16个字节
rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data,
RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
// 映射到PCI的DMA通道,这样有中断发生就可以直接送到内存(skb->data)
if(pci_dma_mapping_error(rx->dma_addr)) {
dev_kfree_skb_any(rx->skb);
rx->skb = 0;
rx->dma_addr = 0;
return -ENOMEM;
}
/* Link the RFD to end of RFA by linking previous RFD to
* this one, and clearing EL bit of previous. */
if(rx->prev->skb) { // 如果prev队列没有给释放,太好了,直接把状态清除就可以了
struct rfd *prev_rfd = (struct rfd *)rx->prev->skb->data;
put_unaligned(cpu_to_le32(rx->dma_addr),
(u32 *)&prev_rfd->link);
wmb();
prev_rfd->command &= ~cpu_to_le16(cb_el);
pci_dma_sync_single_for_device(nic->pdev, rx->prev->dma_addr,
sizeof(struct rfd), PCI_DMA_TODEVICE);
// DMA通道同步,把状态寄存器与外面的内存同步一下
}
return 0;
}
// 主要的收包过程,有中断发生后,这个函数把接收的包首先解除PCI_DMA映射,然后纠错,最后要把包送到协议栈
staticinlineint e100_rx_indicate(struct nic *nic, struct rx *rx,
unsignedint *work_done, unsignedint work_to_do)
{
struct sk_buff *skb = rx->skb;
struct rfd *rfd = (struct rfd *)skb->data;
u16 rfd_status, actual_size;
if(unlikely(work_done && *work_done >= work_to_do))
return -EAGAIN;
/* Need to sync before taking a peek at cb_complete bit */
// 同步一下状态,也就是skb前16字节的内存,后面根据rdf_status判断包是否收全了
pci_dma_sync_single_for_cpu(nic->pdev, rx->dma_addr,
sizeof(struct rfd), PCI_DMA_FROMDEVICE);
rfd_status = le16_to_cpu(rfd->status);
DPRINTK(RX_STATUS, DEBUG, "status=0x%04X\n", rfd_status);
/* If data isn't ready, nothing to indicate */
if(unlikely(!(rfd_status & cb_complete)))
return -ENODATA;
/* Get actual data size */
actual_size = le16_to_cpu(rfd->actual_size) & 0x3FFF;
// 判断包是否收全
if(unlikely(actual_size > RFD_BUF_LEN - sizeof(struct rfd)))
actual_size = RFD_BUF_LEN - sizeof(struct rfd);
/* Get data */
// 解除DMA映射,这样skb->data就可以自由了
pci_unmap_single(nic->pdev, rx->dma_addr,
RFD_BUF_LEN, PCI_DMA_FROMDEVICE);
/* this allows for a fast restart without re-enabling interrupts */
if(le16_to_cpu(rfd->command) & cb_el)
nic->ru_running = RU_SUSPENDED;
/* Pull off the RFD and put the actual data (minus eth hdr) */
skb_reserve(skb, sizeof(struct rfd)); // 如果是VLAN,把指针调整一下
skb_put(skb, actual_size);
skb->protocol = eth_type_trans(skb, nic->netdev);
// 作错包乱包检查
if(unlikely(!(rfd_status & cb_ok))) {
/* Don't indicate if hardware indicates errors */
nic->net_stats.rx_dropped++;
dev_kfree_skb_any(skb);
} elseif(actual_size > ETH_DATA_LEN + VLAN_ETH_HLEN) {
/* Don't indicate oversized frames */
nic->rx_over_length_errors++;
nic->net_stats.rx_dropped++;
dev_kfree_skb_any(skb);
} else {
// 终于正确收到了,统计数据都要作下增加
nic->net_stats.rx_packets++;
nic->net_stats.rx_bytes += actual_size;
nic->netdev->last_rx = jiffies;
// 送到协议栈
#ifdef CONFIG_E100_NAPI
netif_receive_skb(skb); // NAPI的poll方式,使用软中断
#else
netif_rx(skb); // 普通的中断方式,使用硬中断
#endif
if(work_done)
(*work_done)++;
}
rx->skb = NULL;
return 0;
}
// 收报skb的清除
staticinlinevoid e100_rx_clean(struct nic *nic, unsignedint *work_done,
unsignedint work_to_do)
// 下面这两个函数针对收报队列的管理,也就是调用e100_rx_clean, e100_rx_alloc_skb,用户状态的链表,实际上比较简单,如果哪个给送走了,就检查,再分配一个;
// 因为e100是百兆网卡,所以只有一个用户太的skb管理队列,e1000系列的则硬件中维护另外一个队列,一次可以map 1024个skb
staticvoid e100_rx_clean_list(struct nic *nic)
staticint e100_rx_alloc_list(struct nic *nic)
// 初始化中断
static irqreturn_t e100_intr(int irq, void *dev_id, struct pt_regs *regs)
设置POLL的函数:
staticint e100_poll(struct net_device *netdev, int *budget)
staticvoid e100_netpoll(struct net_device *netdev)
网卡启动:
对应ifconfig eth0 up这样的命令
staticint e100_up(struct nic *nic)
{
int err;
if((err = e100_rx_alloc_list(nic))) // 分配收包队列
return err;
if((err = e100_alloc_cbs(nic))) // 分配控制队列
goto err_rx_clean_list;
if((err = e100_hw_init(nic))) // 硬件初始化
goto err_clean_cbs;
e100_set_multicast_list(nic->netdev); // 多播?
e100_start_receiver(nic, 0); // 准备工作
mod_timer(&nic->watchdog, jiffies); // 时间狗,自动检查网卡状态
if((err = request_irq(nic->pdev->irq, e100_intr, SA_SHIRQ,
nic->netdev->name, nic->netdev))) // 请求IRQ分配
goto err_no_irq;
netif_wake_queue(nic->netdev); // 唤醒网络队列,通知核心,这个网卡启动了
#ifdef CONFIG_E100_NAPI
netif_poll_enable(nic->netdev); // NAPI方式,把pool使能
/* enable ints _after_ enabling poll, preventing a race between
* disable ints+schedule */
#endif
e100_enable_irq(nic); // 使能中断,NAPI方式也需要,普通方式更需要
return 0;
err_no_irq:
del_timer_sync(&nic->watchdog);
err_clean_cbs:
e100_clean_cbs(nic);
err_rx_clean_list:
e100_rx_clean_list(nic);
return err;
}
Ifconfig eth0 down
staticvoid e100_down(struct nic *nic) // 对应e100_up的逆向操作,比较简单
{
#ifdef CONFIG_E100_NAPI
/* wait here for poll to complete */
netif_poll_disable(nic->netdev);
#endif
netif_stop_queue(nic->netdev);
e100_hw_reset(nic);
free_irq(nic->pdev->irq, nic->netdev);
del_timer_sync(&nic->watchdog);
netif_carrier_off(nic->netdev);
e100_clean_cbs(nic);
e100_rx_clean_list(nic);
}
Ethtools对应的函数,这里都列出来了
staticstruct ethtool_ops e100_ethtool_ops = {
.get_settings = e100_get_settings,
.set_settings = e100_set_settings,
.get_drvinfo = e100_get_drvinfo,
.get_regs_len = e100_get_regs_len,
.get_regs = e100_get_regs,
.get_wol = e100_get_wol,
.set_wol = e100_set_wol,
.get_msglevel = e100_get_msglevel,
.set_msglevel = e100_set_msglevel,
.nway_reset = e100_nway_reset,
.get_link = e100_get_link,
.get_eeprom_len = e100_get_eeprom_len,
.get_eeprom = e100_get_eeprom,
.set_eeprom = e100_set_eeprom,
.get_ringparam = e100_get_ringparam,
.set_ringparam = e100_set_ringparam,
.self_test_count = e100_diag_test_count,
.self_test = e100_diag_test,
.get_strings = e100_get_strings,
.phys_id = e100_phys_id,
.get_stats_count = e100_get_stats_count,
.get_ethtool_stats = e100_get_ethtool_stats,
};
// 对应标准网卡驱动程序的一些封装函数
staticint e100_open(struct net_device *netdev)
staticint e100_close(struct net_device *netdev)
staticint __devinit e100_probe(struct pci_dev *pdev,
conststruct pci_device_id *ent)
staticvoid __devexit e100_remove(struct pci_dev *pdev)
staticint e100_suspend(struct pci_dev *pdev, u32 state)
staticint e100_resume(struct pci_dev *pdev)
staticvoid e100_shutdown(struct device *dev)
// 这个是网卡驱动的函数表,每个网卡都有的
staticstruct pci_driver e100_driver = {
.name = DRV_NAME,
.id_table = e100_id_table,
.probe = e100_probe,
.remove = __devexit_p(e100_remove),
#ifdef CONFIG_PM
.suspend = e100_suspend,
.resume = e100_resume,
#endif
#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
.driver = {
.shutdown = e100_shutdown,
}
#endif
};
staticint __init e100_init_module(void)
{
if(((1 << debug) - 1) & NETIF_MSG_DRV) {
printk(KERN_INFO PFX "%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
printk(KERN_INFO PFX "%s\n", DRV_COPYRIGHT);
}
return pci_module_init(&e100_driver);
}
staticvoid __exit e100_cleanup_module(void)
{
pci_unregister_driver(&e100_driver);
}
// 模块标准函数
module_init(e100_init_module);
module_exit(e100_cleanup_module);
posted on 2006-12-03 21:22
Flutist 阅读(1269)
评论(1) 编辑 收藏 引用 所属分类:
学习文章