dmwm / PHEDEX

CMS data-placement suite
8 stars 18 forks source link

Reassigned to another tracker [was: PhEDEx backend died?] #678

Open ericvaandering opened 11 years ago

ericvaandering commented 11 years ago

Original Savannah ticket 68993 reported by None on Sat Jun 19 07:53:38 2010.

THIS ITEM WAS REASSIGNED TO SR #115264.

vocms107 triggered OOM kill, as per mail to cms-service-webtools. Unless I hear anything in a few moments I'll try to identify what exactly got killed and will attempt to restart. From /var/log/oom_kill.log Jun 19 14:06:15 2010: Memory usage over threshold - usage: 96.2 > threshold: 95.000000 Jun 19 14:06:15 2010: Candidate: 1, pid:30137 memory: 113070080 command: sshd gradient: 0 Jun 19 14:06:15 2010: Candidate: 2, pid:31247 memory: 138739712 command: crond gradient: 0 Jun 19 14:06:15 2010: Candidate: 3, pid:13036 memory: 252637184 command: nscd gradient: 24 Jun 19 14:06:15 2010: Candidate: 4, pid:2900 memory: 303480832 command: named gradient: 0 Jun 19 14:06:15 2010: Candidate: 5, pid:1447 memory: 366751744 command: python gradient: 0 Jun 19 14:06:15 2010: Candidate: 6, pid:25423 memory: 383979520 command: python gradient: 0 Jun 19 14:06:15 2010: Candidate: 7, pid:1169 memory: 643989504 command: python gradient: 0 Jun 19 14:06:15 2010: Candidate: 8, pid:19505 memory: 28813746176 command: httpd gradient: 4677027 Jun 19 14:06:28 2010: Tree: init(1)─┬─migration/0(2) ├─ksoftirqd/0(3) ├─watchdog/0(4) ├─migration/1(5) ├─ksoftirqd/1(6) ├─watchdog/1(7) ├─migration/2(8) ├─ksoftirqd/2(9) ├─watchdog/2(10) ├─migration/3(11) ├─ksoftirqd/3(12) ├─watchdog/3(13) ├─migration/4(14) ├─ksoftirqd/4(15) ├─watchdog/4(16) ├─migration/5(17) ├─ksoftirqd/5(18) ├─watchdog/5(19) ├─migration/6(20) ├─ksoftirqd/6(21) ├─watchdog/6(22) ├─migration/7(23) ├─ksoftirqd/7(24) ├─watchdog/7(25) ├─events/0(26) ├─events/1(27) ├─events/2(28) ├─events/3(29) ├─events/4(30) ├─events/5(31) ├─events/6(32) ├─events/7(33) ├─khelper(34) ├─kthread(235)─┬─kblockd/0(247) │ ├─kblockd/1(248) │ ├─kblockd/2(249) │ ├─kblockd/3(250) │ ├─kblockd/4(251) │ ├─kblockd/5(252) │ ├─kblockd/6(253) │ ├─kblockd/7(254) │ ├─kacpid(255) │ ├─cqueue/0(373) │ ├─cqueue/1(374) │ ├─cqueue/2(375) │ ├─cqueue/3(376) │ ├─cqueue/4(377) │ ├─cqueue/5(378) │ ├─cqueue/6(379) │ ├─cqueue/7(380) │ ├─khubd(383) │ ├─kseriod(385) │ ├─khungtaskd(510) │ ├─pdflush(511) │ ├─pdflush(512) │ ├─kswapd0(513) │ ├─kswapd1(514) │ ├─aio/0(515) │ ├─aio/1(516) │ ├─aio/2(517) │ ├─aio/3(518) │ ├─aio/4(519) │ ├─aio/5(520) │ ├─aio/6(521) │ ├─aio/7(522) │ ├─pdflush(594) │ ├─pdflush(595) │ ├─kpsmoused(672) │ ├─mpt_poll_0(776) │ ├─mpt/0(777) │ ├─scsi_eh_0(778) │ ├─kstriped(806) │ ├─kjournald(843) │ ├─kauditd(868) │ ├─kmpathd/0(2237) │ ├─kmpathd/1(2238) │ ├─kmpathd/2(2239) │ ├─kmpathd/3(2240) │ ├─kmpathd/4(2241) │ ├─kmpathd/5(2242) │ ├─kmpathd/6(2243) │ ├─kmpathd/7(2244) │ ├─kmpath_handlerd(2245) │ ├─kjournald(2282) │ ├─kjournald(2284) │ ├─kjournald(2286) │ ├─kjournald(2288) │ ├─kjournald(2290) │ └─kipmi0(3706) ├─udevd(896) ├─python(1169,cmsweb)─┬─{python}(1194) │ ├─{python}(1195) │ ├─{python}(1196) │ ├─{python}(1197) │ ├─{python}(1198) │ ├─{python}(1199) │ ├─{python}(1200) │ ├─{python}(1201) │ ├─{python}(1202) │ ├─{python}(1203) │ ├─{python}(1204) │ ├─{python}(1205) │ ├─{python}(1206) │ ├─{python}(1207) │ ├─{python}(1208) │ ├─{python}(1209) │ ├─{python}(1210) │ ├─{python}(1211) │ ├─{python}(1212) │ ├─{python}(1213) │ ├─{python}(1214) │ ├─{python}(1215) │ ├─{python}(1216) │ ├─{python}(1217) │ ├─{python}(1218) │ ├─{python}(1219) │ ├─{python}(1220) │ ├─{python}(1221) │ ├─{python}(1222) │ ├─{python}(1223) │ ├─{python}(1224) │ ├─{python}(1225) │ └─{python}(1226) ├─python(1447,cmsweb)─┬─{python}(1472) │ ├─{python}(1473) │ ├─{python}(1474) │ ├─{python}(1475) │ ├─{python}(1476) │ ├─{python}(1477) │ ├─{python}(1478) │ ├─{python}(1479) │ ├─{python}(1480) │ ├─{python}(1481) │ ├─{python}(1482) │ ├─{python}(1483) │ └─{python}(1484) ├─agetty(1804) ├─mcstransd(2559) ├─auditd(2725)─┬─{auditd}(2726) │ └─audispd(2727)───{audispd}(2728) ├─restorecond(2743) ├─syslogd(2751) ├─klogd(2755) ├─irqbalance(2875) ├─named(2900,named)─┬─{named}(2901) │ ├─{named}(2902) │ ├─{named}(2903) │ ├─{named}(2904) │ ├─{named}(2905) │ ├─{named}(2906) │ ├─{named}(2907) │ ├─{named}(2908) │ ├─{named}(2909) │ └─{named}(2910) ├─dbus-daemon(2978,dbus)───{dbus-daemon}(2979) ├─acpid(2988) ├─hald(2996,haldaemon)───hald-runner(2997,root)─┬─hald-addon-acpi(3004,haldaemon) │ ├─hald-addon-keyb(3006,haldaemon) │ ├─hald-addon-keyb(3010,haldaemon) │ └─hald-addon-keyb(3017,haldaemon) ├─cdp-listend(3045) ├─ncm-cdispd(3055) ├─beat(3065) ├─sshd(3076)───sshd(30135)───sshd(30137,cmsweb)───bashs(30138) ├─cupsd(3084) ├─xinetd(3095) ├─ntpd(3110,ntp) ├─afs_callback(3363) ├─afs_rxlistener(3365) ├─afs_rxevent(3366) ├─afsd(3374) ├─afs_checkserver(3375) ├─afs_background(3377) ├─afs_background(3380) ├─afs_background(3382) ├─afs_background(3383) ├─afs_background(3385) ├─afs_background(3388) ├─afs_background(3391) ├─afs_background(3392) ├─afs_background(3394) ├─afs_background(3395) ├─afs_cachetrim(3401) ├─notd(3447) ├─atd(3924) ├─smartd(3944) ├─mingetty(3951) ├─mingetty(3952) ├─mingetty(3953) ├─mingetty(3954) ├─mingetty(3955) ├─mingetty(3956) ├─nscd(13036,nscd)─┬─{nscd}(13037) │ ├─{nscd}(13039) │ ├─{nscd}(13040) │ ├─{nscd}(13041) │ ├─{nscd}(13042) │ ├─{nscd}(13043) │ └─{nscd}(13044) ├─snmpd(15591)───{snmpd}(15593) ├─sendmail(15655) ├─sendmail(15664,smmsp) ├─crond(15692)─┬─crond(31246)───sh(31248)───sh(31249)───perl(31250) │ └─crond(31247,cmsweb)───sh(31251)───trim-cache(31252)───htcacheclean(31304) ├─lemon-agent(15788)─┬─{lemon-agent}(15790) │ ├─perl(15864) │ ├─perl(15865) │ ├─lemon-sensor-li(15866) │ ├─perl(15867) │ ├─perl(15868) │ ├─perl(15869) │ ├─lemon-sensor-ex(15870) │ ├─perl(15871) │ └─lemon-sensor-fi(15872) ├─httpd(19505,cmsweb)─┬─httpd(587) │ ├─httpd(588) │ ├─httpd(589) │ ├─httpd(591) │ ├─rotatelogs(19508) │ ├─rotatelogs(19509) │ ├─rotatelogs(19510) │ ├─rotatelogs(19511) │ ├─httpd(29228) │ ├─httpd(29339) │ ├─httpd(31206) │ ├─httpd(31208) │ ├─httpd(31224) │ └─httpd(31305) ├─python(25423,cmsweb)─┬─{python}(25449) │ ├─{python}(25450) │ ├─{python}(25451) │ ├─{python}(25452) │ ├─{python}(25453) │ ├─{python}(25454) │ ├─{python}(25455) │ ├─{python}(25456) │ ├─{python}(25457) │ ├─{python}(25458) │ ├─{python}(25459) │ ├─{python}(25460) │ └─{python}(25461) └─python(28881)───sh(593)───pstree(596) Jun 19 14:06:28 2010: Swap usage above critical, killing biggest application Jun 19 14:06:29 2010: Killing process: httpd (pid=19505) of user cmsweb with children: 31305,591,31206,31208,19510,29339,587,19511,588,589,29228,19508,19509,31224 has memory consumption: 26.83GB. Jun 19 14:06:32 2010: E-mail sent to: cms-service-webtools@cern.ch Jun 19 14:06:32 2010: Killed.
ericvaandering commented 11 years ago

Comment by lat on Sat Jun 19 07:37:45 2010

vocms106 is also heavily swapping, top shows this. note: httpd is 25 giga bytes.

+verbatim+ top - 14:35:49 up 44 days, 21:07, 2 users, load average: 3.95, 2.08, 0.91 Tasks: 179 total, 1 running, 177 sleeping, 0 stopped, 1 zombie Cpu(s): 0.6%us, 5.2%sy, 0.0%ni, 64.9%id, 29.2%wa, 0.0%hi, 0.0%si, 0.0%st Mem: 24675544k total, 24601792k used, 73752k free, 1036k buffers Swap: 4192924k total, 2597680k used, 1595244k free, 29444k cached

PID USER PR NI VIRT RES SHR S P %CPU %MEM TIME+ COMMAND
514 root 10 -5 0 0 0 D 5 22.9 0.0 0:20.63 [kswapd1]
513 root 10 -5 0 0 0 S 4 13.3 0.0 0:14.43 [kswapd0]
17709 cmsweb 18 0 25.0g 23g 10m D 2 5.0 98.5 5:37.26 /data/projects/phedex-datasvc/sw/slc5_amd64_gcc434/cms/PHEDEX-datasvc/DATASVC_1_5_2a-cmp -f /data/projects/phedex-datasvc/sw/apache2/conf/ 17842 cmsweb 16 0 139m 26m 10m D 4 2.0 0.1 0:08.55 /data/projects/phedex-datasvc/sw/slc5_amd64_gcc434/cms/PHEDEX-datasvc/DATASVC_1_5_2a-cmp -f /data/projects/phedex-datasvc/sw/apache2/conf/ -verbatim-

ericvaandering commented 11 years ago

Comment by lat on Sat Jun 19 07:42:18 2010

And load is shooting to heavens, at about ten now.

The stack trace I managed to get for httpd doesn't say much to me, but then neither kswapd nor httpd are active any more. Maybe the processing finished before I got to the trace, or oom_kill got there first.

+verbatim+ $ sudo pstack 17709

0 0x0000003d20c85999 in ?? ()

1 0x0000003d20c86a05 in Perl_hv_exists_ent ()

2 0x0000003d20caad2d in Perl_pp_exists ()

3 0x0000003d20c89e4e in Perl_runops_standard ()

4 0x0000003d20c344b0 in Perl_call_sv ()

5 0x00002b926f318852 in modperl_callback ()

6 0x00002b926f318f41 in modperl_callback_run_handlers ()

7 0x00002b926f3194df in modperl_callback_per_dir ()

8 0x00002b926f3131d0 in modperl_response_handler_run ()

9 0x00002b926f313381 in modperl_response_handler_cgi ()

10 0x00000000004366e3 in ap_run_handler ()

11 0x0000000000439b6f in ap_invoke_handler ()

12 0x00000000004449c8 in ap_process_request ()

13 0x0000000000441aa8 in ap_process_http_connection ()

14 0x000000000043db03 in ap_run_process_connection ()

15 0x0000000000448f8b in child_main ()

16 0x0000000000449254 in make_child ()

17 0x0000000000449e6e in ap_mpm_run ()

18 0x0000000000423405 in main ()

-verbatim-

+verbatim+

Jun 19 14:40:29 2010: Memory usage over threshold - usage: 95.2 > threshold: 95.000000 Jun 19 14:40:29 2010: Candidate: 1, pid:9832 memory: 113205248 command: sshd gradient: 0 Jun 19 14:40:29 2010: Candidate: 2, pid:31858 memory: 252428288 command: nscd gradient: 24 Jun 19 14:40:29 2010: Candidate: 3, pid:2896 memory: 303480832 command: named gradient: 0 Jun 19 14:40:29 2010: Candidate: 4, pid:19553 memory: 370335744 command: python gradient: 0 Jun 19 14:40:29 2010: Candidate: 5, pid:11015 memory: 376897536 command: python gradient: 0 Jun 19 14:40:29 2010: Candidate: 6, pid:9366 memory: 422141952 command: python gradient: 0 Jun 19 14:40:29 2010: Candidate: 7, pid:19272 memory: 628723712 command: python gradient: 0 Jun 19 14:40:29 2010: Candidate: 8, pid:5783 memory: 28532256768 command: httpd gradient: 4647541 Jun 19 14:40:31 2010: Tree: init(1)???migration/0(2) ??ksoftirqd/0(3) ??watchdog/0(4) ??migration/1(5) ??ksoftirqd/1(6) ??watchdog/1(7) ??migration/2(8) ??ksoftirqd/2(9) ??watchdog/2(10) ??migration/3(11) ??ksoftirqd/3(12) ??watchdog/3(13) ??migration/4(14) ??ksoftirqd/4(15) ??watchdog/4(16) ??migration/5(17) ??ksoftirqd/5(18) ??watchdog/5(19) ??migration/6(20) ??ksoftirqd/6(21) ??watchdog/6(22) ??migration/7(23) ??ksoftirqd/7(24) ??watchdog/7(25) ??events/0(26) ??events/1(27) ??events/2(28) ??events/3(29) ??events/4(30) ??events/5(31) ??events/6(32) ??events/7(33) ??khelper(34) ??kthread(235)???kblockd/0(247) ? ??kblockd/1(248) ? ??kblockd/2(249) ? ??kblockd/3(250) ? ??kblockd/4(251) ? ??kblockd/5(252) ? ??kblockd/6(253) ? ??kblockd/7(254) ? ??kacpid(255) ? ??cqueue/0(373) ? ??cqueue/1(374) ? ??cqueue/2(375) ? ??cqueue/3(376) ? ??cqueue/4(377) ? ??cqueue/5(378) ? ??cqueue/6(379) ? ??cqueue/7(380) ? ??khubd(383) ? ??kseriod(385) ? ??khungtaskd(510) ? ??kswapd0(513) ? ??kswapd1(514) ? ??aio/0(515) ? ??aio/1(516) ? ??aio/2(517) ? ??aio/3(518) ? ??aio/4(519) ? ??aio/5(520) ? ??aio/6(521) ? ??aio/7(522) ? ??kpsmoused(671) ? ??mpt_poll_0(776) ? ??mpt/0(777) ? ??scsi_eh_0(778) ? ??kstriped(806) ? ??kjournald(843) ? ??kauditd(868) ? ??kmpathd/0(2234) ? ??kmpathd/1(2235) ? ??kmpathd/2(2236) ? ??kmpathd/3(2237) ? ??kmpathd/4(2238) ? ??kmpathd/5(2239) ? ??kmpathd/6(2240) ? ??kmpathd/7(2241) ? ??kmpath_handlerd(2242) ? ??kjournald(2278) ? ??kjournald(2280) ? ??kjournald(2282) ? ??kjournald(2284) ? ??kjournald(2286) ? ??kipmi0(3703) ? ??pdflush(19759) ? ??pdflush(19764) ??udevd(896) ??mcstransd(2555) ??auditd(2721)???{auditd}(2722) ? ??audispd(2723)???{audispd}(2724) ??restorecond(2739) ??syslogd(2747) ??klogd(2751) ??irqbalance(2871) ??named(2896,named)???{named}(2897) ? ??{named}(2898) ? ??{named}(2899) ? ??{named}(2900) ? ??{named}(2901) ? ??{named}(2902) ? ??{named}(2903) ? ??{named}(2904) ? ??{named}(2905) ? ??{named}(2906) ??dbus-daemon(2974,dbus)???{dbus-daemon}(2975) ??acpid(2984) ??hald(2992,haldaemon)???hald-runner(2993,root)???hald-addon-acpi(3000,haldaemon) ? ??hald-addon-keyb(3002,haldaemon) ? ??hald-addon-keyb(3006,haldaemon) ? ??hald-addon-keyb(3013,haldaemon) ??cdp-listend(3041) ??ncm-cdispd(3051) ??beat(3061) ??sshd(3072)???sshd(9829)???sshd(9832,cmsweb)???bashs(9833) ? ??sshd(19776)???sshd(19783,lat)???zsh(19784)???top(19809) ??cupsd(3080) ??xinetd(3091) ??ntpd(3107,ntp) ??afs_rxlistener(3361) ??afs_rxevent(3362) ??afs_callback(3363) ??afsd(3370) ??afs_checkserver(3371) ??afs_background(3373) ??afs_background(3375) ??afs_background(3377) ??afs_background(3379) ??afs_background(3381) ??afs_background(3384) ??afs_background(3387) ??afs_background(3388) ??afs_background(3390) ??afs_background(3391) ??afs_cachetrim(3397) ??notd(3443) ??atd(3920) ??smartd(3940) ??mingetty(3947) ??mingetty(3948) ??mingetty(3949) ??mingetty(3950) ??mingetty(3951) ??mingetty(3952) ??httpd(5783,cmsweb)???rotatelogs(5786) ? ??rotatelogs(5787) ? ??rotatelogs(5788) ? ??rotatelogs(5789) ? ??httpd(17673) ? ??httpd(17709) ? ??httpd(17842) ? ??httpd(19684) ? ??httpd(19690) ? ??httpd(19727) ? ??httpd(19765) ? ??httpd(19810) ? ??httpd(19820) ? ??httpd(19836) ??python(9366,cmsweb)???{python}(9393) ? ??{python}(9394) ? ??{python}(9395) ? ??{python}(9396) ? ??{python}(9397) ? ??{python}(9398) ? ??{python}(9399) ? ??{python}(9400) ? ??{python}(9401) ? ??{python}(9402) ? ??{python}(9403) ? ??{python}(9404) ? ??{python}(9405) ? ??makeStatic(11649) ??python(11015,cmsweb)???{python}(11041) ? ??{python}(11042) ? ??{python}(11043) ? ??{python}(11044) ? ??{python}(11045) ? ??{python}(11046) ? ??{python}(11047) ? ??{python}(11048) ? ??{python}(11049) ? ??{python}(11050) ? ??{python}(11051) ? ??{python}(11052) ? ??{python}(11053) ??python(12359)???sh(19837)???pstree(19838) ??agetty(16158) ??python(19272,cmsweb)???{python}(19297) ? ??{python}(19298) ? ??{python}(19299) ? ??{python}(19300) ? ??{python}(19301) ? ??{python}(19302) ? ??{python}(19303) ? ??{python}(19304) ? ??{python}(19305) ? ??{python}(19306) ? ??{python}(19307) ? ??{python}(19308) ? ??{python}(19309) ? ??{python}(19310) ? ??{python}(19311) ? ??{python}(19312) ? ??{python}(19313) ? ??{python}(19314) ? ??{python}(19315) ? ??{python}(19316) ? ??{python}(19317) ? ??{python}(19318) ? ??{python}(19319) ? ??{python}(19320) ? ??{python}(19321) ? ??{python}(19322) ? ??{python}(19323) ? ??{python}(19324) ? ??{python}(19325) ? ??{python}(19326) ? ??{python}(19327) ? ??{python}(19328) ? ??{python}(19329) ??python(19553,cmsweb)???{python}(19578) ? ??{python}(19579) ? ??{python}(19580) ? ??{python}(19581) ? ??{python}(19582) ? ??{python}(19583) ? ??{python}(19584) ? ??{python}(19585) ? ??{python}(19586) ? ??{python}(19587) ? ??{python}(19588) ? ??{python}(19589) ? ??{python}(19590) ??snmpd(22100)???{snmpd}(22102) ??sendmail(22164) ??sendmail(22173,smmsp) ??crond(22201) ??lemon-agent(22297)???{lemon-agent}(22299) ? ??lemon-sensor-fi(22373) ? ??perl(22374) ? ??lemon-sensor-li(22375) ? ??lemon-sensor-ex(22376) ? ??perl(22377) ? ??perl(22378) ? ??perl(22379) ? ??perl(22380) ? ??perl(22381) ??nscd(31858,nscd)???{nscd}(31859) ??{nscd}(31861) ??{nscd}(31862) ??{nscd}(31863) ??{nscd}(31864) ??{nscd}(31865) ??{nscd}(31866) Jun 19 14:40:31 2010: Swap usage too high, killing fastest or biggest of the candidates. Jun 19 14:40:31 2010: Killing process: httpd (pid=5783) of user cmsweb with children: 19810,17673,17842,17709,19765,5787,19684,19690,5786,5788,19820,19836,19727,5789 has memory consumption: 26.57GB. Jun 19 14:40:35 2010: E-mail sent to: cms-service-webtools@cern.ch Jun 19 14:40:35 2010: Killed. -verbatim-

ericvaandering commented 11 years ago

Comment by lat on Sat Jun 19 07:46:25 2010

PhEDEx data service restarted on vocms{106,107}:

+verbatim+ [cmsweb@vocms106 ~]$ sw=/data/projects/phedex-datasvc/sw [cmsweb@vocms106 ~]$ $sw/apache2/etc/init.d/httpd status httpd dead but pid file exists [cmsweb@vocms106 ~]$ $sw/apache2/etc/init.d/httpd start Starting httpd: [ OK ] [cmsweb@vocms106 ~]$ $sw/apache2/etc/init.d/httpd status httpd (pid 20148) is running... -verbatim-

+verbatim+ [cmsweb@vocms107 ~]$ sw=/data/projects/phedex-datasvc/sw [cmsweb@vocms107 ~]$ $sw/apache2/etc/init.d/httpd status httpd dead but pid file exists [cmsweb@vocms107 ~]$ $sw/apache2/etc/init.d/httpd start Starting httpd: [ OK ] [cmsweb@vocms107 ~]$ $sw/apache2/etc/init.d/httpd status httpd (pid 944) is running... -verbatim-

ericvaandering commented 11 years ago

Comment by lat on Sat Jun 19 07:52:05 2010

As far as webtools is concerned, situation handled. Assigning ticket to PhEDEx for any future tracking; feel free to close if that's appropriate.

ericvaandering commented 11 years ago

Comment by lat on Sat Jun 19 07:52:06 2010

THIS ITEM WAS REASSIGNED TO SR #115264.

Please, do not post any new comments to this item.

ericvaandering commented 11 years ago

Comment by lat on Sat Jun 19 07:53:38 2010

This item has been reassigned from the project CMS Computing Infrastructure Support support tracker to your tracker.

The original report is still available at support #115263

Following are the information included in the original report:

Unless I hear anything in a few moments I ll try to identify what exactly got killed and will attempt to restart.

From /var/log/oom_kill.log

Jun 19 14:06:15 2010: Memory usage over threshold - usage: 96.2 > threshold: 95.000000 Jun 19 14:06:15 2010: Candidate: 1, pid:30137 memory: 113070080 command: sshd gradient: 0 Jun 19 14:06:15 2010: Candidate: 2, pid:31247 memory: 138739712 command: crond gradient: 0 Jun 19 14:06:15 2010: Candidate: 3, pid:13036 memory: 252637184 command: nscd gradient: 24 Jun 19 14:06:15 2010: Candidate: 4, pid:2900 memory: 303480832 command: named gradient: 0 Jun 19 14:06:15 2010: Candidate: 5, pid:1447 memory: 366751744 command: python gradient: 0 Jun 19 14:06:15 2010: Candidate: 6, pid:25423 memory: 383979520 command: python gradient: 0 Jun 19 14:06:15 2010: Candidate: 7, pid:1169 memory: 643989504 command: python gradient: 0 Jun 19 14:06:15 2010: Candidate: 8, pid:19505 memory: 28813746176 command: httpd gradient: 4677027 Jun 19 14:06:28 2010: Tree: init(1)???migration/0(2) ??ksoftirqd/0(3) ??watchdog/0(4) ??migration/1(5) ??ksoftirqd/1(6) ??watchdog/1(7) ??migration/2(8) ??ksoftirqd/2(9) ??watchdog/2(10) ??migration/3(11) ??ksoftirqd/3(12) ??watchdog/3(13) ??migration/4(14) ??ksoftirqd/4(15) ??watchdog/4(16) ??migration/5(17) ??ksoftirqd/5(18) ??watchdog/5(19) ??migration/6(20) ??ksoftirqd/6(21) ??watchdog/6(22) ??migration/7(23) ??ksoftirqd/7(24) ??watchdog/7(25) ??events/0(26) ??events/1(27) ??events/2(28) ??events/3(29) ??events/4(30) ??events/5(31) ??events/6(32) ??events/7(33) ??khelper(34) ??kthread(235)???kblockd/0(247) ? ??kblockd/1(248) ? ??kblockd/2(249) ? ??kblockd/3(250) ? ??kblockd/4(251) ? ??kblockd/5(252) ? ??kblockd/6(253) ? ??kblockd/7(254) ? ??kacpid(255) ? ??cqueue/0(373) ? ??cqueue/1(374) ? ??cqueue/2(375) ? ??cqueue/3(376) ? ??cqueue/4(377) ? ??cqueue/5(378) ? ??cqueue/6(379) ? ??cqueue/7(380) ? ??khubd(383) ? ??kseriod(385) ? ??khungtaskd(510) ? ??pdflush(511) ? ??pdflush(512) ? ??kswapd0(513) ? ??kswapd1(514) ? ??aio/0(515) ? ??aio/1(516) ? ??aio/2(517) ? ??aio/3(518) ? ??aio/4(519) ? ??aio/5(520) ? ??aio/6(521) ? ??aio/7(522) ? ??pdflush(594) ? ??pdflush(595) ? ??kpsmoused(672) ? ??mpt_poll_0(776) ? ??mpt/0(777) ? ??scsi_eh_0(778) ? ??kstriped(806) ? ??kjournald(843) ? ??kauditd(868) ? ??kmpathd/0(2237) ? ??kmpathd/1(2238) ? ??kmpathd/2(2239) ? ??kmpathd/3(2240) ? ??kmpathd/4(2241) ? ??kmpathd/5(2242) ? ??kmpathd/6(2243) ? ??kmpathd/7(2244) ? ??kmpath_handlerd(2245) ? ??kjournald(2282) ? ??kjournald(2284) ? ??kjournald(2286) ? ??kjournald(2288) ? ??kjournald(2290) ? ??kipmi0(3706) ??udevd(896) ??python(1169,cmsweb)???{python}(1194) ? ??{python}(1195) ? ??{python}(1196) ? ??{python}(1197) ? ??{python}(1198) ? ??{python}(1199) ? ??{python}(1200) ? ??{python}(1201) ? ??{python}(1202) ? ??{python}(1203) ? ??{python}(1204) ? ??{python}(1205) ? ??{python}(1206) ? ??{python}(1207) ? ??{python}(1208) ? ??{python}(1209) ? ??{python}(1210) ? ??{python}(1211) ? ??{python}(1212) ? ??{python}(1213) ? ??{python}(1214) ? ??{python}(1215) ? ??{python}(1216) ? ??{python}(1217) ? ??{python}(1218) ? ??{python}(1219) ? ??{python}(1220) ? ??{python}(1221) ? ??{python}(1222) ? ??{python}(1223) ? ??{python}(1224) ? ??{python}(1225) ? ??{python}(1226) ??python(1447,cmsweb)???{python}(1472) ? ??{python}(1473) ? ??{python}(1474) ? ??{python}(1475) ? ??{python}(1476) ? ??{python}(1477) ? ??{python}(1478) ? ??{python}(1479) ? ??{python}(1480) ? ??{python}(1481) ? ??{python}(1482) ? ??{python}(1483) ? ??{python}(1484) ??agetty(1804) ??mcstransd(2559) ??auditd(2725)???{auditd}(2726) ? ??audispd(2727)???{audispd}(2728) ??restorecond(2743) ??syslogd(2751) ??klogd(2755) ??irqbalance(2875) ??named(2900,named)???{named}(2901) ? ??{named}(2902) ? ??{named}(2903) ? ??{named}(2904) ? ??{named}(2905) ? ??{named}(2906) ? ??{named}(2907) ? ??{named}(2908) ? ??{named}(2909) ? ??{named}(2910) ??dbus-daemon(2978,dbus)???{dbus-daemon}(2979) ??acpid(2988) ??hald(2996,haldaemon)???hald-runner(2997,root)???hald-addon-acpi(3004,haldaemon) ? ??hald-addon-keyb(3006,haldaemon) ? ??hald-addon-keyb(3010,haldaemon) ? ??hald-addon-keyb(3017,haldaemon) ??cdp-listend(3045) ??ncm-cdispd(3055) ??beat(3065) ??sshd(3076)???sshd(30135)???sshd(30137,cmsweb)???bashs(30138) ??cupsd(3084) ??xinetd(3095) ??ntpd(3110,ntp) ??afs_callback(3363) ??afs_rxlistener(3365) ??afs_rxevent(3366) ??afsd(3374) ??afs_checkserver(3375) ??afs_background(3377) ??afs_background(3380) ??afs_background(3382) ??afs_background(3383) ??afs_background(3385) ??afs_background(3388) ??afs_background(3391) ??afs_background(3392) ??afs_background(3394) ??afs_background(3395) ??afs_cachetrim(3401) ??notd(3447) ??atd(3924) ??smartd(3944) ??mingetty(3951) ??mingetty(3952) ??mingetty(3953) ??mingetty(3954) ??mingetty(3955) ??mingetty(3956) ??nscd(13036,nscd)???{nscd}(13037) ? ??{nscd}(13039) ? ??{nscd}(13040) ? ??{nscd}(13041) ? ??{nscd}(13042) ? ??{nscd}(13043) ? ??{nscd}(13044) ??snmpd(15591)???{snmpd}(15593) ??sendmail(15655) ??sendmail(15664,smmsp) ??crond(15692)???crond(31246)???sh(31248)???sh(31249)???perl(31250) ? ??crond(31247,cmsweb)???sh(31251)???trim-cache(31252)???htcacheclean(31304) ??lemon-agent(15788)???{lemon-agent}(15790) ? ??perl(15864) ? ??perl(15865) ? ??lemon-sensor-li(15866) ? ??perl(15867) ? ??perl(15868) ? ??perl(15869) ? ??lemon-sensor-ex(15870) ? ??perl(15871) ? ??lemon-sensor-fi(15872) ??httpd(19505,cmsweb)???httpd(587) ? ??httpd(588) ? ??httpd(589) ? ??httpd(591) ? ??rotatelogs(19508) ? ??rotatelogs(19509) ? ??rotatelogs(19510) ? ??rotatelogs(19511) ? ??httpd(29228) ? ??httpd(29339) ? ??httpd(31206) ? ??httpd(31208) ? ??httpd(31224) ? ??httpd(31305) ??python(25423,cmsweb)???{python}(25449) ? ??{python}(25450) ? ??{python}(25451) ? ??{python}(25452) ? ??{python}(25453) ? ??{python}(25454) ? ??{python}(25455) ? ??{python}(25456) ? ??{python}(25457) ? ??{python}(25458) ? ??{python}(25459) ? ??{python}(25460) ? ??{python}(25461) ??python(28881)???sh(593)???pstree(596) Jun 19 14:06:28 2010: Swap usage above critical, killing biggest application Jun 19 14:06:29 2010: Killing process: httpd (pid=19505) of user cmsweb with children: 31305,591,31206,31208,19510,29339,587,19511,588,589,29228,19508,19509,31224 has memory consumption: 26.83GB. Jun 19 14:06:32 2010: E-mail sent to: cms-service-webtools@cern.ch Jun 19 14:06:32 2010: Killed.