dns: fix deadlock race [rsc] --rw-rw-r-- M 779778 glenda sys 28763 Apr 13 14:23 sys/src/cmd/ndb/dn.c /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dn.c:448,460 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dn.c:448,464 * garbage collect. block while garbage collecting. */ int - getactivity(Request *req) + getactivity(Request *req, int recursive) { int rv; - if(traceactivity) syslog(0, "dns", "get %d by %d", dnvars.active, getpid()); + if(traceactivity) syslog(0, "dns", "get %d by %d from %p", dnvars.active, getpid(), getcallerpc(&req)); lock(&dnvars); - while(dnvars.mutex){ + /* + * can't block here if we're already holding one + * of the dnvars.active (recursive). will deadlock. + */ + while(!recursive && dnvars.mutex){ unlock(&dnvars); sleep(200); lock(&dnvars); /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dn.c:467,473 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dn.c:471,477 return rv; } void - putactivity(void) + putactivity(int recursive) { static ulong lastclean; /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dn.c:478,485 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dn.c:482,491 /* * clean out old entries and check for new db periodicly + * can't block here if being called to let go a "recursive" lock + * or we'll deadlock waiting for ourselves to give up the dnvars.active. */ - if(dnvars.mutex || (needrefresh == 0 && dnvars.active > 0)){ + if(recursive || dnvars.mutex || (needrefresh == 0 && dnvars.active > 0)){ unlock(&dnvars); return; } /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dn.c:1234,1254 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dn.c:1240,1272 slave(Request *req) { static int slaveid; + int ppid; if(req->isslave) return; /* we're already a slave process */ + /* + * These calls to putactivity cannot block. + * After getactivity(), the current process is counted + * twice in dnvars.active (one will pass to the child). + * If putactivity tries to wait for dnvars.active == 0, + * it will never happen. + */ + /* limit parallelism */ - if(getactivity(req) > Maxactive){ - putactivity(); + if(getactivity(req, 1) > Maxactive){ + if(traceactivity) syslog(0, "dns", "[%d] too much activity", getpid()); + putactivity(1); return; } + ppid = getpid(); switch(rfork(RFPROC|RFNOTEG|RFMEM|RFNOWAIT)){ case -1: - putactivity(); + putactivity(1); break; case 0: + if(traceactivity) syslog(0, "dns", "[%d] take activity from %d", ppid, getpid()); req->isslave = 1; break; default: [rsc] --rw-rw-r-- M 779778 rsc sys 3066 Apr 13 14:23 sys/src/cmd/ndb/dnnotify.c /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnnotify.c:150,158 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnnotify.c:150,158 req.isslave = 1; /* son't fork off subprocesses */ for(;;){ - getactivity(&req); + getactivity(&req, 0); notify_areas(owned, &req); - putactivity(); + putactivity(0); sleep(60*1000); } } [rsc] --rw-rw-r-- M 779778 glenda sys 15346 Apr 13 14:23 sys/src/cmd/ndb/dnresolve.c /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnresolve.c:94,100 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnresolve.c:94,100 char *cp; if(debug) - syslog(0, LOG, "dnresolve1 %s %d %d", name, type, class); + syslog(0, LOG, "[%d] dnresolve1 %s %d %d", getpid(), name, type, class); /* only class Cin implemented so far */ if(class != Cin) [rsc] --rw-rw-r-- M 779778 glenda sys 15308 Apr 13 14:23 sys/src/cmd/ndb/dns.c /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dns.c:376,382 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dns.c:376,382 * through 'mret'. */ if(setjmp(req.mret)) - putactivity(); + putactivity(0); req.isslave = 0; for(;;){ n = read9pmsg(mfd[0], mdata, sizeof mdata); /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dns.c:393,399 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dns.c:393,399 if(debug) syslog(0, logfile, "%F", &job->request); - getactivity(&req); + getactivity(&req, 0); req.aborttime = now + 60; /* don't spend more than 60 seconds */ switch(job->request.type){ /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dns.c:447,457 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dns.c:447,457 * slave processes die after replying */ if(req.isslave){ - putactivity(); + putactivity(0); _exits(0); } - putactivity(); + putactivity(0); } } /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dns.c:871,878 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dns.c:871,878 { char buf[12]; - syslog(0, LOG, "%d.%d: sending to %I/%s %s %s", - id, subid, addr, sname, rname, rrname(type, buf, sizeof buf)); + syslog(0, LOG, "[%d] %d.%d: sending to %I/%s %s %s", + getpid(), id, subid, addr, sname, rname, rrname(type, buf, sizeof buf)); } RR* [rsc] --rw-rw-r-- M 779778 glenda sys 8492 Apr 13 14:23 sys/src/cmd/ndb/dnsdebug.c /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnsdebug.c:414,420 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnsdebug.c:414,420 return; } - getactivity(&req); + getactivity(&req, 0); req.isslave = 1; req.aborttime = now + 60; /* don't spend more than 60 seconds */ rr = dnresolve(buf, Cin, type, &req, 0, 0, Recurse, rooted, 0); /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnsdebug.c:426,432 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnsdebug.c:426,432 } rrfreelist(rr); - putactivity(); + putactivity(0); } void [rsc] --rw-rw-r-- M 779778 glenda sys 7349 Apr 13 14:23 sys/src/cmd/ndb/dnstcp.c [diffs elided - too long] [diff -c /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnstcp.c /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnstcp.c] [rsc] --rw-rw-r-- M 779778 glenda sys 5337 Apr 13 18:58 sys/src/cmd/ndb/dnudpserver.c /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:97,107 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:97,107 while((fd = udpannounce(mntpt)) < 0) sleep(5000); if(setjmp(req.mret)) - putactivity(); + putactivity(0); req.isslave = 0; /* loop on requests */ - for(;; putactivity()){ + for(;; putactivity(0)){ memset(&repmsg, 0, sizeof(repmsg)); memset(&reqmsg, 0, sizeof(reqmsg)); alarm(60*1000); /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:111,117 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:111,117 goto restart; uh = (OUdphdr*)buf; len -= OUdphdrsize; - getactivity(&req); + getactivity(&req, 0); req.aborttime = now + 30; /* don't spend more than 30 seconds */ err = convM2DNS(&buf[OUdphdrsize], len, &reqmsg); if(err){ /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:174,180 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:174,180 rrfreelist(reqmsg.ar); if(req.isslave){ - putactivity(); + putactivity(0); _exits(0); } /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:190,195 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:190,196 static int udpannounce(char *mntpt) { + static int whined; int data, ctl; char dir[64]; char datafile[64+6]; /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:198,204 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:199,206 sprint(datafile, "%s/udp!*!dns", mntpt); ctl = announce(datafile, dir); if(ctl < 0){ - warning("can't announce on dns udp port"); + if(!whined++) + warning("can't announce on dns udp port"); return -1; } snprint(datafile, sizeof(datafile), "%s/data", dir); /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dnudpserver.c:210,216 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dnudpserver.c:212,219 data = open(datafile, ORDWR); if(data < 0){ close(ctl); - warning("can't announce on dns udp port"); + if(!whined++) + warning("can't announce on dns udp port"); return -1; } [rsc] --rw-rw-r-- M 779778 glenda sys 10171 Apr 13 18:58 sys/src/cmd/ndb/dns.h /n/sourcesdump/2006/0413/plan9/sys/src/cmd/ndb/dns.h:370,377 - /n/sourcesdump/2006/0414/plan9/sys/src/cmd/ndb/dns.h:370,377 extern char* rrname(int, char*, int); extern int tsame(int, int); extern void dndump(char*); - extern int getactivity(Request*); - extern void putactivity(void); + extern int getactivity(Request*, int); + extern void putactivity(int); extern void abort(); /* char*, ... */; extern void warning(char*, ...); extern void slave(Request*); [rsc] --rw-rw-r-- M 779778 glenda sys 5337 Apr 13 18:58 sys/src/cmd/ndb/dnudpserver.c