+ -
当前位置:首页 → 问答吧 → 帮忙看个oops,很纠结~~~

帮忙看个oops,很纠结~~~

时间:2010-11-23

来源:互联网

9036 Nov 22 19:06:12 zhengzp kernel: [  596.304374] BUG: unable to handle kernel paging request at 33323104
9037 Nov 22 19:06:12 zhengzp kernel: [  596.304378] IP: [<c042ee28>] __skb_recv_datagram+0xb8/0x210
9038 Nov 22 19:06:12 zhengzp kernel: [  596.304389] Oops: 0002 [#1] SMP
9039 Nov 22 19:06:12 zhengzp kernel: [  596.304390] last sysfs file: /sys/devices/virtual/vc/vcsa1/dev
9040 Nov 22 19:06:12 zhengzp kernel: [  596.304430] Process test (pid: 3817, ti=decae000 task=df61cb60 task.ti=decae000)
9041 Nov 22 19:06:12 zhengzp kernel: [  596.304431] Stack:
9042 Nov 22 19:06:12 zhengzp kernel: [  596.304435]  c143363c decafe50 00000040 df705d40 decafe58 decafe6c c0478e2b decafe58
9043 Nov 22 19:06:12 zhengzp kernel: [  596.304438]  00001a37 decafe5c decafe58 decaff8c 00000000 00000000 c1433600 decafe74
9044 Nov 22 19:06:12 zhengzp kernel: [  596.304441] Call Trace:
9045 Nov 22 19:06:12 zhengzp kernel: [  596.304443]  [<c0502f01>] ? _spin_unlock_bh+0x11/0x20
9046 Nov 22 19:06:12 zhengzp kernel: [  596.304446]  [<c0478e2b>] ? udp_recvmsg+0x6b/0x2e0
9047 Nov 22 19:06:13 zhengzp kernel: [  596.304449]  [<c012ad96>] ? dequeue_entity+0x16/0x2a0
9048 Nov 22 19:06:13 zhengzp kernel: [  596.304452]  [<c0427c73>] ? sock_common_recvmsg+0x43/0x60
9049 Nov 22 19:06:13 zhengzp kernel: [  596.304455]  [<c04272b4>] ? sock_recvmsg+0xf4/0x120
9050 Nov 22 19:06:13 zhengzp kernel: [  596.304457]  [<c012f9cb>] ? finish_task_switch+0x2b/0xe0
9051 Nov 22 19:06:13 zhengzp kernel: [  596.304460]  [<c014ecb0>] ? autoremove_wake_function+0x0/0x50
9052 Nov 22 19:06:13 zhengzp kernel: [  596.304463]  [<c014424a>] ? try_to_del_timer_sync+0x4a/0x60
9053 Nov 22 19:06:13 zhengzp kernel: [  596.304465]  [<c0144271>] ? del_timer_sync+0x11/0x20
9054 Nov 22 19:06:13 zhengzp kernel: [  596.304467]  [<c05016e6>] ? schedule_timeout+0x86/0xe0
9055 Nov 22 19:06:13 zhengzp kernel: [  596.304471]  [<e19504f0>] ? thread_fun+0x110/0x160 [test]
9056 Nov 22 19:06:13 zhengzp kernel: [  596.304475]  [<c014e71d>] ? sys_timer_settime+0x11d/0x120
9057 Nov 22 19:06:13 zhengzp kernel: [  596.304477]  [<c014e8d0>] ? kthread+0x0/0x70
9058 Nov 22 19:06:13 zhengzp kernel: [  596.304478] Code: 89 ca e8 4c 3f 0d 00 89 d8 83 c4 28 5b 5e 5f 5d c3 66 90 83 6e 74 01 8b 13 8b 43 04 c7 03 00 00 00 00 c7 43 04 00 00 00      00 89 10 <89> 42 04 eb cd 8d 76 00 8b 86 f8 00 00 00 89 45 d8 e9 6e ff ff
9059 Nov 22 19:06:13 zhengzp kernel: [  596.304495] EIP: [<c042ee28>] __skb_recv_datagram+0xb8/0x210 SS:ESP 0068:decafdf4
  1. #include "kthread.h"
  2. #include "public.h"
  3. #include <linux/kthread.h>
  4. #include <linux/errno.h>
  5. #include <linux/kernel.h>
  6. #include <linux/sched.h>
  7. #include <linux/err.h>
  8. #include <linux/inet.h>
  9. #include <linux/udp.h>
  10. #include <linux/err.h>
  11. #include <asm-generic/errno.h>
  12. #include <linux/syscalls.h>   //系统调用函数的头文件
  13. #include <linux/socket.h>
  14. #include <net/sock.h>
  15. #include <linux/inet.h>
  16. #include <net/checksum.h>
  17. #include <net/udp.h>

  18. #define PORT 6520
  19. char g_buf[BUF_LEN];

  20. static void* thread_fun(char* buf)
  21. {
  22.         int err;
  23.         struct msghdr msg;
  24.         struct iovec iov;
  25.         struct socket* sock_server = NULL;
  26.         struct sockaddr_in server_addr;


  27.         iov.iov_base = buf;
  28.         iov.iov_len = BUF_LEN;

  29.         memset(&msg,0,sizeof(struct msghdr));
  30.         msg.msg_name = NULL;
  31.         msg.msg_namelen = 0;
  32.         msg.msg_iov = &iov;
  33.         msg.msg_iovlen = 1;                                // 记录有多少个buf
  34.         msg.msg_control = NULL;
  35.         msg.msg_controllen = 0;
  36.         msg.msg_flags = MSG_DONTWAIT;
  37.        
  38.         memset(&sock_server,0,sizeof(struct sockaddr_in));
  39.         server_addr.sin_family = AF_INET;
  40.         server_addr.sin_addr.s_addr = INADDR_ANY;
  41.         server_addr.sin_port = htons(PORT);

  42.         // 创建一个socket
  43.         err = sock_create(AF_INET,SOCK_DGRAM,IPPROTO_UDP,&sock_server);
  44.         if (err < 0)
  45.         {
  46.                 printk(KERN_ALERT "sock create error\n");
  47.                 goto out;
  48.         }
  49.         printk(KERN_ALERT " END  CREATE SOCK\n");

  50.         // 绑定地址
  51.         err = kernel_bind(sock_server,(struct sockaddr*)&server_addr,sizeof(struct sockaddr));
  52.         if (err < 0)
  53.         {
  54.                 printk(KERN_ALERT "bind error\n");
  55.                 sock_release(sock_server);
  56.                 goto out;
  57.         }

  58.         while(!kthread_should_stop())
  59.         {
  60.                 // 接收数据
  61.                 err = sock_recvmsg(sock_server,&msg,BUF_LEN,MSG_DONTWAIT);
  62.                 if (err < 0)
  63.                 {
  64.                         continue;
  65.                 }
  66.                 else if (err > 0)
  67.                 {
  68.                         printk(KERN_ALERT "GOT : %s and strlen(buf) is %d\n",buf,strlen(buf));
  69.                 }
  70.                 msleep(1);
  71.         }
  72.         return 0;

  73. out:
  74.         while(!kthread_should_stop())                // 等待kthead_stop();
  75.         {
  76.                 msleep(1);
  77.         }

  78.         return NULL;
  79. }

  80. struct task_struct* test_thread_create(void)
  81. {
  82.         printk(KERN_ALERT "THREAD BEGIN\n");
  83.         struct task_struct* temp_kthread = NULL;
  84.         temp_kthread = kthread_run(thread_fun,(void*)g_buf,MODULE_NAME);
  85.         if (IS_ERR(temp_kthread))
  86.         {
  87.                 printk(KERN_ALERT "cant run thread\n");
  88.                 return NULL;
  89.         }
  90.         return temp_kthread;
  91. }

  92. int test_thread_destroy(struct task_struct* temp_kthread)
  93. {
  94.         printk(KERN_ALERT "DESTROY THREAD BEGIN\n");
  95.         if (temp_kthread != NULL)
  96.         {
  97.                 kthread_stop(temp_kthread);
  98.         }

  99.         return 0;
  100. }
复制代码
上面的是代码,每次insmod模块都出现假死,想它发送数据一段时间后,就出现oops,出现了就不假死了。出现oops之前是可以接收数据的。第一次搞这个很多不懂啊~

作者: xiaopeng14   发布时间: 2010-11-23

memset(&sock_server,0,sizeof(struct sockaddr_in));
这句有问题,
应该是
memset(&server_addr,0,sizeof(struct sockaddr_in));

你变量名字太像了 混淆了吧

作者: chobit_s   发布时间: 2010-11-23

哦 这个确实,用vc写的代码,没写完直接选择了一个vc提示的,没注意到~

作者: xiaopeng14   发布时间: 2010-11-23

现在的问题依旧,还是会死机

作者: xiaopeng14   发布时间: 2010-11-23

没有这样在传输层做的经验,所以以下判断估计不能帮lz解决问题:

因为socket层设置sock和sk是一个较为复杂的流程。从oops信息初步判断:


QUOTE:
9045 Nov 22 19:06:12 zhengzp kernel: [  596.304443]  [<c0502f01>] ? _spin_unlock_bh+0x11/0x20
9046 Nov 22 19:06:12 zhengzp kernel: [  596.304446]  [<c0478e2b>] ? udp_recvmsg+0x6b/0x2e0



感觉像是udp_recvmsg中不正确地使用了锁造成的。这个原因我大胆地假设一下,会不会是sk未被正确完全地初始化从而造成的呢??其实lz要解决这个问题,可以在udp_recvmsg中加一些printk,先判断问题语句。如果汇编OK,也可以根据oops信息,结果编译后的汇编代码来定位问题代码行……然后再进一步把问题拿上来讨论吧。

作者: 独孤九贱   发布时间: 2010-11-23

一般来说内存管理出错还死机 都是内存越界写造成的.破坏了系统记录的数据

作者: smalloc   发布时间: 2010-11-23



QUOTE:
没有这样在传输层做的经验,所以以下判断估计不能帮lz解决问题:

因为socket层设置sock和sk是一个较为复 ...
独孤九贱 发表于 2010-11-23 11:38




    我也估计是socket的初始化问题,关键我就不知道怎么来初始化,现在我改成阻塞模式了,一旦接受数据就OOPS


[  390.556742] BUG: unable to handle kernel NULL pointer dereference at 00000048
[  390.556747] IP: [<c04272b1>] sock_recvmsg+0xf1/0x120
[  390.556774] *pde = 00000000
[  390.556776] Oops: 0000 [#1] SMP
[  390.556778] last sysfs file: /sys/devices/virtual/vc/vcsa1/dev
[  390.556792] Dumping ftrace buffer:
[  390.556794]    (ftrace buffer empty)
[  390.556795] Modules linked in: test1 binfmt_misc vboxsf bridge stp bnep vboxnetflt vboxdrv input_polldev video output lp ppdev psmouse serio_raw pcspkr i2c_piix4 joydev vboxguest parport_pc parport usbhid e1000 fbcon tileblit font bitblit softcursor
[  390.556806]
[  390.556808] Pid: 3594, comm: test Not tainted (2.6.28-11-generic #42-Ubuntu) VirtualBox
[  390.556809] EIP: 0060:[<c04272b1>] EFLAGS: 00010246 CPU: 0
[  390.556811] EIP is at sock_recvmsg+0xf1/0x120
[  390.556812] EAX: ded59ea8 EBX: 00000000 ECX: ded59f8c EDX: cc82f004
[  390.556814] ESI: 00000064 EDI: cc82f004 EBP: ded59f78 ESP: ded59e9c
[  390.556815]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
[  390.556817] Process test (pid: 3594, ti=ded58000 task=dfe9e480 task.ti=ded58000)
[  390.556818] Stack:
[  390.556818]  00000064 00000000 00000000 ded59ec8 c012f9cb 00000000 00000001 ffffffff
[  390.556822]  00000000 00000000 00000000 00000000 00000000 dfe9e480 00000000 00000000
[  390.556825]  c0802500 dfe9e480 c014ecb0 ded59ee8 ded59ee8 ded59f2c 00000000 ded59f2c
[  390.556828] Call Trace:
[  390.556829]  [<c012f9cb>] ? finish_task_switch+0x2b/0xe0
[  390.556833]  [<c014ecb0>] ? autoremove_wake_function+0x0/0x50
[  390.556836]  [<c014424a>] ? try_to_del_timer_sync+0x4a/0x60
[  390.556839]  [<c0144271>] ? del_timer_sync+0x11/0x20
[  390.556841]  [<c05016e6>] ? schedule_timeout+0x86/0xe0
[  390.556845]  [<e1949501>] ? thread_fun+0x111/0x170 [test1]
[  390.556849]  [<e19493f0>] ? thread_fun+0x0/0x170 [test1]
[  390.556851]  [<c014e90c>] ? kthread+0x3c/0x70
[  390.556853]  [<c014e8d0>] ? kthread+0x0/0x70
[  390.556854]  [<c0105477>] ? kernel_thread_helper+0x7/0x10
[  390.556857] Code: 56 09 e6 ff 85 c0 75 26 8b 47 0c 89 d9 89 fa 89 85 2c ff ff ff 8b 45 08 89 34 24 89 44 24 04 8b 9d 2c ff ff ff 8d 85 30 ff ff ff <ff> 53 48 3d ef fd ff ff 74 0d 81 c4 d0 00 00 00 5b 5e 5f 5d c3
[  390.556873] EIP: [<c04272b1>] sock_recvmsg+0xf1/0x120 SS:ESP 0068:ded59e9c
[  390.556877] ---[ end trace b2ea34c242159276 ]---
[  462.741239] DESTROY THREAD BEGIN
[  462.741274] BUG: unable to handle kernel NULL pointer dereference at 00000390

作者: xiaopeng14   发布时间: 2010-11-23



QUOTE:
[  390.556742] BUG: unable to handle kernel NULL pointer dereference at 00000048


这是 oops 最常见的错误,解引用空指针

作者: Godbach   发布时间: 2010-11-23