用戶空間內(nèi)核空間ipc總結(jié)(sockopt,ioctl,mmap,netlink,proc,seq,file,copy_user) 原作者:ubuntuer
多數(shù)的 Linux 內(nèi)核態(tài)程序都需要和用戶空間的進程交換數(shù)據(jù),但 Linux 內(nèi)核態(tài)無法對傳統(tǒng)的 Linux 進程間同步和通信的方法提供足夠的支持!本文就總結(jié)下常見的ipc,
getsockopt/setsockopt mmap netlink/socket proc/seq copy_from_user/copy_to_user 文件。采用先講解后測試代碼的方式,netlink和proc由于江哥和段兄都寫的比較好了我就貼了鏈接... 好了不廢話了開始
一.getsockopt/setsockopt
最近看ebtables源碼,發(fā)現(xiàn)與內(nèi)核的ipc是采用的getsockopt, 具體實現(xiàn)是在內(nèi)核中用nf_register_sockopt函數(shù)注冊一個nf_sockopt_ops的結(jié)構(gòu)體,比如說:
- static struct nf_sockopt_ops nso = {
- .pf = PF_INET, // 協(xié)議族
- .set_optmin = 常數(shù), // 定義最小set命令字
- .set_optmax = 常數(shù)+N, // 定義最大set命令字
- .set = do_nso_set, // 定義set處理函數(shù)
- .get_optmin = 常數(shù), // 定義最小get命令字
- .get_optmax = 常數(shù)+N, // 定義最大get命令字
- .get = do_nso_get, // 定義set處理函數(shù)
- };
復制代碼
其中命令字不能與系統(tǒng)已有的命令字重復。set/get處理函數(shù)是直接由用戶空間的set/getsockopt函數(shù)調(diào)用的。
從這個圖里面可以看出來,這種方法的本質(zhì)就是調(diào)用是copy_from_user()/copy_to_user()方法完成內(nèi)核和用戶通信的,這樣其實效率不高,多用在傳遞控制選項信息,不適合用做大量數(shù)據(jù)的傳輸。copy_from_user()/copy_to_user()我講在后面介紹... 當然對于linux任何都是文件那么我想應該也是可以定義自己的ioctl的,這個在后面的
copy_xx_user的塊設備中講解
setsockopt/getsockopt kernel部分代碼:
- static int recv_msg(struct sock *sk, int cmd, void *user, unsigned int len)
- {
- int ret = 0;
- printk(KERN_INFO "sockopt: recv_msg()/n");
- /*
- switch(cmd)
- {
- case IMP1_SET:
- {
- char umsg[64];
- memset(umsg, 0, sizeof(char)*64);
- copy_from_user(umsg, user, sizeof(char)*64);
- printk("umsg: %s", umsg);
- }
- break;
- }
- */
- if (cmd == SOCKET_OPS_SET)
- {
- char umsg[64];
- int len = sizeof(char)*64;
- memset(umsg, 0, len);
- ret = copy_from_user(umsg, user, len);
- printk("recv_msg: umsg = %s. ret = %d/n", umsg, ret);
- }
- return 0;
- }
- static int send_msg(struct sock *sk, int cmd, void *user, int *len)
- {
- int ret = 0;
- printk(KERN_INFO "sockopt: send_msg()/n");
- if (cmd == SOCKET_OPS_GET)
- {
- ret = copy_to_user(user, KMSG, KMSG_LEN);
- printk("send_msg: umsg = %s. ret = %d. success/n", KMSG, ret);
- }
- return 0;
- }
- static struct nf_sockopt_ops test_sockops =
- {
- .pf = PF_INET,
- .set_optmin = SOCKET_OPS_SET,
- .set_optmax = SOCKET_OPS_MAX,
- .set = recv_msg,
- .get_optmin = SOCKET_OPS_GET,
- .get_optmax = SOCKET_OPS_MAX,
- .get = send_msg,
- };
復制代碼
setsockopt/getsockopt user部分代碼:
- /*call function recv_msg()*/
- ret = setsockopt(sockfd, IPPROTO_IP, SOCKET_OPS_SET, UMSG, UMSG_LEN);
- printf("setsockopt: ret = %d. msg = %s/n", ret, UMSG);
- len = sizeof(char)*64;
- /*call function send_msg()*/
- ret = getsockopt(sockfd, IPPROTO_IP, SOCKET_OPS_GET, kmsg, &len);
- printf("getsockopt: ret = %d. msg = %s/n", ret, kmsg);
- if (ret != 0)
- {
- printf("getsockopt error: errno = %d, errstr = %s/n", errno, strerror(errno));
- }
復制代碼
二. mmap共享內(nèi)存
采用共享內(nèi)存通信的一個顯而易 見的好處是效率高,因為進程可以直接讀寫內(nèi)存,而不需要任何數(shù)據(jù)的拷貝。對于像管道和消息隊列等通信方式,則需要在內(nèi)核和用戶空間進行四次的數(shù)據(jù)拷貝,而 共享內(nèi)存則只拷貝兩次數(shù)據(jù)[1]:一次從輸入文件到共享內(nèi)存區(qū),另一次從共享內(nèi)存區(qū)到輸出文件。實際上,進程之間在共享內(nèi)存時,并不總是讀寫少量數(shù)據(jù)后就 解除映射,有新的通信時,再重新建立共享內(nèi)存區(qū)域。而是保持共享區(qū)域,直到通信完畢為止,這樣,數(shù)據(jù)內(nèi)容一直保存在共享內(nèi)存中,并沒有寫回文件。共享內(nèi)存 中的內(nèi)容往往是在解除映射時才寫回文件的。因此,采用共享內(nèi)存的通信方式效率是非常高的.
kernel:
- #include <linux/config.h>
- #include <linux/module.h>
- #include <linux/moduleparam.h>
- #include <linux/init.h>
- #include <linux/kernel.h> /* printk() */
- #include <linux/slab.h> /* kmalloc() */
- #include <linux/fs.h> /* everything... */
- #include <linux/errno.h> /* error codes */
- #include <linux/types.h> /* size_t */
- #include <linux/mm.h>
- #include <linux/kdev_t.h>
- #include <asm/page.h>
- #include <linux/cdev.h>
- #include <linux/device.h>
- #include <linux/gfp.h>
- static unsigned char *myaddr=NULL;
- static int simple_major = 0;
- module_param(simple_major, int, 0);
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Kenthy@163.com.");
- MODULE_DESCRIPTION("Kernel study and test.");
- /*
- * Common VMA ops.
- */
- void simple_vma_open(struct vm_area_struct *vma)
- {
- printk(KERN_NOTICE "Simple VMA open, virt %lx, phys %lx/n",
- vma->vm_start, vma->vm_pgoff << PAGE_SHIFT);
- }
- void simple_vma_close(struct vm_area_struct *vma)
- {
- printk(KERN_NOTICE "Simple VMA close./n");
- }
- struct page *simple_vma_nopage(struct vm_area_struct *vma,
- unsigned long address, int *type)
- {
- struct page *pageptr;
- unsigned long offset = (address - vma->vm_start);
- if (offset>PAGE_SIZE*2)
- {
- printk("out of size/n");
- return NULL;
- }
- printk("in vma_nopage: offset=%u/n", offset);
- if(offset<PAGE_SIZE) // the first page
- pageptr=virt_to_page(myaddr);
- else // the second page
- pageptr=virt_to_page(myaddr+PAGE_SIZE);
- get_page(pageptr);
- return pageptr;
- }
- static struct vm_operations_struct simple_nopage_vm_ops = {
- .open = simple_vma_open,
- .close = simple_vma_close,
- .nopage = simple_vma_nopage,
- };
- static int simple_open (struct inode *inode, struct file *filp)
- {
- return 0;
- }
- static int simple_release(struct inode *inode, struct file *filp)
- {
- return 0;
- }
- static int simple_nopage_mmap(struct file *filp, struct vm_area_struct *vma)
- {
- unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
- printk("enter simple_nopage_mmap: offset=%u, vma->vm_pgoff=%u/n", offset, vma->vm_pgoff);
- if (offset >= __pa(high_memory) || (filp->f_flags & O_SYNC))
- vma->vm_flags |= VM_IO;
- vma->vm_flags |= VM_RESERVED;
- vma->vm_ops = &simple_nopage_vm_ops;
- simple_vma_open(vma);
- return 0;
- }
- /*
- * Set up the cdev structure for a device.
- */
- static void simple_setup_cdev(struct cdev *dev, int minor,
- struct file_operations *fops)
- {
- int err, devno = MKDEV(simple_major, minor);
- cdev_init(dev, fops);
- dev->owner = THIS_MODULE;
- dev->ops = fops;
- err = cdev_add (dev, devno, 1);
- /* Fail gracefully if need be */
- if (err)
- printk (KERN_NOTICE "Error %d adding simple%d", err, minor);
- }
- static struct file_operations simple_nopage_ops = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .release = simple_release,
- .mmap = simple_nopage_mmap,
- };
- /*
- * We export two simple devices. There's no need for us to maintain any
- * special housekeeping info, so we just deal with raw cdevs.
- */
- static struct cdev SimpleDevs;
- /*
- * Module housekeeping.
- */
- static int simple_init(void)
- {
- int result;
- //unsigned int addr1, addr2;
- dev_t dev = MKDEV(simple_major, 0);
- /* Figure out our device number. */
- if (simple_major)
- result = register_chrdev_region(dev, 1, "simple_nopage");
- else {
- result = alloc_chrdev_region(&dev, 0, 1, "simple_nopage");
- simple_major = MAJOR(dev);
- }
- if (result < 0) {
- printk(KERN_WARNING "simple_nopage: unable to get major %d/n", simple_major);
- return result;
- }
- if (simple_major == 0)
- simple_major = result;
- /* Now set up two cdevs. */
- simple_setup_cdev(&SimpleDevs, 0, &simple_nopage_ops);
- myaddr = __get_free_pages(GFP_KERNEL, 1);
- if (!myaddr)
- return -ENOMEM;
- // for test
- strcpy(myaddr, "1234567890");
- strcpy(myaddr+PAGE_SIZE, "abcdefghij");
- return 0;
- }
- static void simple_cleanup(void)
- {
- cdev_del(&SimpleDevs);
- unregister_chrdev_region(MKDEV(simple_major, 0), 1);
- }
- module_init(simple_init);
- module_exit(simple_cleanup);
復制代碼
user:
- #include </work/apue/ourhdr.h>
- #include <fcntl.h>
- #include <sys/mman.h>
- int main(int argc, char *argv[])
- {
- int fdin, fdout;
- void *src, *dst;
- struct stat statbuf;
- unsigned char sz[1024]={0};
- if ((fdin = open("/dev/simple_nopage", O_RDONLY)) < 0)
- err_sys("can't open /dev/simple_nopage for reading");
- if ((src = mmap(NULL, 4096*2, PROT_READ, MAP_SHARED,
- fdin, 0)) == MAP_FAILED)
- err_sys("mmap error for simplen");
- memcpy(sz, src, 11);
- sz[10]='/0';
- printf("%x/n", src);
- printf("%s/n/n", sz);
- memcpy(sz, src+4096, 11);
- printf("%x/n", src+4096);
- printf("%s/n", sz);
- exit(0);
- }
復制代碼
mmap加載文件后注意還要mknod
|