pc/mmu.c 644 0 0 64272 11215566045 13576ustar00cinap_lenrekcinap_lenrek/* * Memory mappings. Life was easier when 2G of memory was enough. * * The kernel memory starts at KZERO, with the text loaded at KZERO+1M * (9load sits under 1M during the load). The memory from KZERO to the * top of memory is mapped 1-1 with physical memory, starting at physical * address 0. All kernel memory and data structures (i.e., the entries stored * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000, * then the kernel can only have 256MB of memory for itself. * * The 256M below KZERO comprises three parts. The lowest 4M is the * virtual page table, a virtual address representation of the current * page table tree. The second 4M is used for temporary per-process * mappings managed by kmap and kunmap. The remaining 248M is used * for global (shared by all procs and all processors) device memory * mappings and managed by vmap and vunmap. The total amount (256M) * could probably be reduced somewhat if desired. The largest device * mapping is that of the video card, and even though modern video cards * have embarrassing amounts of memory, the video drivers only use one * frame buffer worth (at most 16M). Each is described in more detail below. * * The VPT is a 4M frame constructed by inserting the pdb into itself. * This short-circuits one level of the page tables, with the result that * the contents of second-level page tables can be accessed at VPT. * We use the VPT to edit the page tables (see mmu) after inserting them * into the page directory. It is a convenient mechanism for mapping what * might be otherwise-inaccessible pages. The idea was borrowed from * the Exokernel. * * The VPT doesn't solve all our problems, because we still need to * prepare page directories before we can install them. For that, we * use tmpmap/tmpunmap, which map a single page at TMPADDR. */ #include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "../port/error.h" #include "ureg.h" #include "io.h" /* * Simple segment descriptors with no translation. */ #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW } #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR } #define EXEC16SEGM(p) { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR } #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\ ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP } Segdesc gdt[NGDT] = { [NULLSEG] { 0, 0}, /* null descriptor */ [KDSEG] DATASEGM(0), /* kernel data/stack */ [KESEG] EXECSEGM(0), /* kernel code */ [UDSEG] DATASEGM(3), /* user data/stack */ [UESEG] EXECSEGM(3), /* user code */ [TSSSEG] TSSSEGM(0,0), /* tss segment */ [KESEG16] EXEC16SEGM(0), /* kernel code 16-bit */ }; static int didmmuinit; static void taskswitch(ulong, ulong); static void memglobal(void); static long _gdtread(Proc*, void*, long, ulong); static long _gdtwrite(Proc*, void*, long, ulong); #define vpt ((ulong*)VPT) #define VPTX(va) (((ulong)(va))>>12) #define vpd (vpt+VPTX(VPT)) extern long (*gdtread)(Proc*, void*, long, ulong); extern long (*gdtwrite)(Proc*, void*, long, ulong); void mmuinit0(void) { memmove(m->gdt, gdt, sizeof gdt); /* for devproc */ gdtread = _gdtread; gdtwrite = _gdtwrite; } void mmuinit(void) { ulong x, *p; ushort ptr[3]; didmmuinit = 1; if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n", VPT, vpd, KMAP); memglobal(); m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID; m->tss = malloc(sizeof(Tss)); memset(m->tss, 0, sizeof(Tss)); m->tss->iomap = 0xDFFF<<16; /* * We used to keep the GDT in the Mach structure, but it * turns out that that slows down access to the rest of the * page. Since the Mach structure is accessed quite often, * it pays off anywhere from a factor of 1.25 to 2 on real * hardware to separate them (the AMDs are more sensitive * than Intels in this regard). Under VMware it pays off * a factor of about 10 to 100. */ memmove(m->gdt, gdt, sizeof gdt); x = (ulong)m->tss; m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss); m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP; ptr[0] = sizeof(gdt)-1; x = (ulong)m->gdt; ptr[1] = x & 0xFFFF; ptr[2] = (x>>16) & 0xFFFF; lgdt(ptr); ptr[0] = sizeof(Segdesc)*256-1; x = IDTADDR; ptr[1] = x & 0xFFFF; ptr[2] = (x>>16) & 0xFFFF; lidt(ptr); /* make kernel text unwritable */ for(x = KTZERO; x < (ulong)etext; x += BY2PG){ p = mmuwalk(m->pdb, x, 2, 0); if(p == nil) panic("mmuinit"); *p &= ~PTEWRITE; } taskswitch(PADDR(m->pdb), (ulong)m + BY2PG); ltr(TSSSEL); } /* * On processors that support it, we set the PTEGLOBAL bit in * page table and page directory entries that map kernel memory. * Doing this tells the processor not to bother flushing them * from the TLB when doing the TLB flush associated with a * context switch (write to CR3). Since kernel memory mappings * are never removed, this is safe. (If we ever remove kernel memory * mappings, we can do a full flush by turning off the PGE bit in CR4, * writing to CR3, and then turning the PGE bit back on.) * * See also mmukmap below. * * Processor support for the PTEGLOBAL bit is enabled in devarch.c. */ static void memglobal(void) { int i, j; ulong *pde, *pte; /* only need to do this once, on bootstrap processor */ if(m->machno != 0) return; if(!m->havepge) return; pde = m->pdb; for(i=PDX(KZERO); i<1024; i++){ if(pde[i] & PTEVALID){ pde[i] |= PTEGLOBAL; if(!(pde[i] & PTESIZE)){ pte = KADDR(pde[i]&~(BY2PG-1)); for(j=0; j<1024; j++) if(pte[j] & PTEVALID) pte[j] |= PTEGLOBAL; } } } } /* * Flush all the user-space and device-mapping mmu info * for this process, because something has been deleted. * It will be paged back in on demand. */ void flushmmu(void) { int s; s = splhi(); up->newtlb = 1; mmuswitch(up); splx(s); } /* * Flush a single page mapping from the tlb. */ void flushpg(ulong va) { if(X86FAMILY(m->cpuidax) >= 4) invlpg(va); else putcr3(getcr3()); } /* * Allocate a new page for a page directory. * We keep a small cache of pre-initialized * page directories in each mach. */ static Page* mmupdballoc(void) { int s; Page *page; ulong *pdb; s = splhi(); m->pdballoc++; if(m->pdbpool == 0){ spllo(); page = newpage(0, 0, 0); page->va = (ulong)vpd; splhi(); pdb = tmpmap(page); memmove(pdb, m->pdb, BY2PG); pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID; /* set up VPT */ tmpunmap(pdb); }else{ page = m->pdbpool; m->pdbpool = page->next; m->pdbcnt--; } splx(s); return page; } static void mmupdbfree(Proc *proc, Page *p) { if(islo()) panic("mmupdbfree: islo"); m->pdbfree++; if(m->pdbcnt >= 10){ p->next = proc->mmufree; proc->mmufree = p; }else{ p->next = m->pdbpool; m->pdbpool = p; m->pdbcnt++; } } /* * A user-space memory segment has been deleted, or the * process is exiting. Clear all the pde entries for user-space * memory mappings and device mappings. Any entries that * are needed will be paged back in as necessary. */ static void mmuptefree(Proc* proc) { int s; ulong *pdb; Page **last, *page; if(proc->mmupdb == nil || proc->mmuused == nil) return; s = splhi(); pdb = tmpmap(proc->mmupdb); last = &proc->mmuused; for(page = *last; page; page = page->next){ pdb[page->daddr] = 0; last = &page->next; } tmpunmap(pdb); splx(s); *last = proc->mmufree; proc->mmufree = proc->mmuused; proc->mmuused = 0; } static void taskswitch(ulong pdb, ulong stack) { Tss *tss; tss = m->tss; tss->ss0 = KDSEL; tss->esp0 = stack; tss->ss1 = KDSEL; tss->esp1 = stack; tss->ss2 = KDSEL; tss->esp2 = stack; putcr3(pdb); } void mmuswitch(Proc* proc) { ulong *pdb; if(proc->newtlb){ mmuptefree(proc); proc->newtlb = 0; } if(proc->mmupdb){ pdb = tmpmap(proc->mmupdb); pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)]; tmpunmap(pdb); taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK)); }else taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK)); } /* * Release any pages allocated for a page directory base or page-tables * for this process: * switch to the prototype pdb for this processor (m->pdb); * call mmuptefree() to place all pages used for page-tables (proc->mmuused) * onto the process' free list (proc->mmufree). This has the side-effect of * cleaning any user entries in the pdb (proc->mmupdb); * if there's a pdb put it in the cache of pre-initialised pdb's * for this processor (m->pdbpool) or on the process' free list; * finally, place any pages freed back into the free pool (palloc). * This routine is only called from schedinit() with palloc locked. */ void mmurelease(Proc* proc) { Page *page, *next; ulong *pdb; if(islo()) panic("mmurelease: islo"); taskswitch(PADDR(m->pdb), (ulong)m + BY2PG); if(proc->kmaptable){ if(proc->mmupdb == nil) panic("mmurelease: no mmupdb"); if(--proc->kmaptable->ref) panic("mmurelease: kmap ref %d", proc->kmaptable->ref); if(proc->nkmap) panic("mmurelease: nkmap %d", proc->nkmap); /* * remove kmaptable from pdb before putting pdb up for reuse. */ pdb = tmpmap(proc->mmupdb); if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa) panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux", pdb[PDX(KMAP)], proc->kmaptable->pa); pdb[PDX(KMAP)] = 0; tmpunmap(pdb); /* * move kmaptable to free list. */ pagechainhead(proc->kmaptable); proc->kmaptable = 0; } if(proc->mmupdb){ mmuptefree(proc); mmupdbfree(proc, proc->mmupdb); proc->mmupdb = 0; } for(page = proc->mmufree; page; page = next){ next = page->next; if(--page->ref) panic("mmurelease: page->ref %d", page->ref); pagechainhead(page); } if(proc->mmufree && palloc.r.p) wakeup(&palloc.r); proc->mmufree = 0; } /* * Allocate and install pdb for the current process. */ static void upallocpdb(void) { int s; ulong *pdb; Page *page; if(up->mmupdb != nil) return; page = mmupdballoc(); s = splhi(); if(up->mmupdb != nil){ /* * Perhaps we got an interrupt while * mmupdballoc was sleeping and that * interrupt allocated an mmupdb? * Seems unlikely. */ mmupdbfree(up, page); splx(s); return; } pdb = tmpmap(page); pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)]; tmpunmap(pdb); up->mmupdb = page; putcr3(up->mmupdb->pa); splx(s); } /* * Update the mmu in response to a user fault. pa may have PTEWRITE set. */ void putmmu(ulong va, ulong pa, Page*) { int old, s; Page *page; if(up->mmupdb == nil) upallocpdb(); /* * We should be able to get through this with interrupts * turned on (if we get interrupted we'll just pick up * where we left off) but we get many faults accessing * vpt[] near the end of this function, and they always happen * after the process has been switched out and then * switched back, usually many times in a row (perhaps * it cannot switch back successfully for some reason). * * In any event, I'm tired of searching for this bug. * Turn off interrupts during putmmu even though * we shouldn't need to. - rsc */ s = splhi(); if(!(vpd[PDX(va)]&PTEVALID)){ if(up->mmufree == 0){ spllo(); page = newpage(0, 0, 0); splhi(); } else{ page = up->mmufree; up->mmufree = page->next; } vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID; /* page is now mapped into the VPT - clear it */ memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG); page->daddr = PDX(va); page->next = up->mmuused; up->mmuused = page; } old = vpt[VPTX(va)]; vpt[VPTX(va)] = pa|PTEUSER|PTEVALID; if(old&PTEVALID) flushpg(va); if(getcr3() != up->mmupdb->pa) print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa); splx(s); } /* * Double-check the user MMU. * Error checking only. */ void checkmmu(ulong va, ulong pa) { if(up->mmupdb == 0) return; if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID)) return; if(PPN(vpt[VPTX(va)]) != pa) print("%ld %s: va=%#08lux pa=%#08lux pte=%#08lux\n", up->pid, up->text, va, pa, vpt[VPTX(va)]); } /* * Walk the page-table pointed to by pdb and return a pointer * to the entry for virtual address va at the requested level. * If the entry is invalid and create isn't requested then bail * out early. Otherwise, for the 2nd level walk, allocate a new * page-table page and register it in the 1st level. This is used * only to edit kernel mappings, which use pages from kernel memory, * so it's okay to use KADDR to look at the tables. */ ulong* mmuwalk(ulong* pdb, ulong va, int level, int create) { ulong *table; void *map; table = &pdb[PDX(va)]; if(!(*table & PTEVALID) && create == 0) return 0; switch(level){ default: return 0; case 1: return table; case 2: if(*table & PTESIZE) panic("mmuwalk2: va %luX entry %luX", va, *table); if(!(*table & PTEVALID)){ /* * Have to call low-level allocator from * memory.c if we haven't set up the xalloc * tables yet. */ if(didmmuinit) map = xspanalloc(BY2PG, BY2PG, 0); else map = rampage(); if(map == nil) panic("mmuwalk xspanalloc failed"); *table = PADDR(map)|PTEWRITE|PTEVALID; } table = KADDR(PPN(*table)); return &table[PTX(va)]; } } /* * Device mappings are shared by all procs and processors and * live in the virtual range VMAP to VMAP+VMAPSIZE. The master * copy of the mappings is stored in mach0->pdb, and they are * paged in from there as necessary by vmapsync during faults. */ static Lock vmaplock; static int findhole(ulong *a, int n, int count); static ulong vmapalloc(ulong size); static void pdbunmap(ulong*, ulong, int); /* * Add a device mapping to the vmap range. */ void* vmap(ulong pa, int size) { int osize; ulong o, va; /* * might be asking for less than a page. */ osize = size; o = pa & (BY2PG-1); pa -= o; size += o; size = ROUND(size, BY2PG); if(pa == 0){ print("vmap pa=0 pc=%#p\n", getcallerpc(&pa)); return nil; } ilock(&vmaplock); if((va = vmapalloc(size)) == 0 || pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){ iunlock(&vmaplock); return 0; } iunlock(&vmaplock); /* avoid trap on local processor for(i=0; i %#.8lux\n", pa+o, osize, va+o); return (void*)(va + o); } static int findhole(ulong *a, int n, int count) { int have, i; have = 0; for(i=0; i= count) return i+1 - have; } return -1; } /* * Look for free space in the vmap. */ static ulong vmapalloc(ulong size) { int i, n, o; ulong *vpdb; int vpdbsize; vpdb = &MACHP(0)->pdb[PDX(VMAP)]; vpdbsize = VMAPSIZE/(4*MB); if(size >= 4*MB){ n = (size+4*MB-1) / (4*MB); if((o = findhole(vpdb, vpdbsize, n)) != -1) return VMAP + o*4*MB; return 0; } n = (size+BY2PG-1) / BY2PG; for(i=0; i VMAP+VMAPSIZE) panic("vunmap va=%#.8lux size=%#x pc=%#.8lux", va, size, getcallerpc(&va)); pdbunmap(MACHP(0)->pdb, va, size); /* * Flush mapping from all the tlbs and copied pdbs. * This can be (and is) slow, since it is called only rarely. * It is possible for vunmap to be called with up == nil, * e.g. from the reset/init driver routines during system * boot. In that case it suffices to flush the MACH(0) TLB * and return. */ if(!active.thunderbirdsarego){ putcr3(PADDR(MACHP(0)->pdb)); return; } for(i=0; istate == Dead) continue; if(p != up) p->newtlb = 1; } for(i=0; iflushmmu = 1; } flushmmu(); for(i=0; imachno)) && nm->flushmmu) ; } } /* * Add kernel mappings for pa -> va for a section of size bytes. */ int pdbmap(ulong *pdb, ulong pa, ulong va, int size) { int pse; ulong pgsz, *pte, *table; ulong flag, off; flag = pa&0xFFF; pa &= ~0xFFF; if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10)) pse = 1; else pse = 0; for(off=0; off= 4MB and processor can do it. */ if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){ *table = (pa+off)|flag|PTESIZE|PTEVALID; pgsz = 4*MB; }else{ pte = mmuwalk(pdb, va+off, 2, 1); if(*pte&PTEVALID) panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux", va+off, pa+off, *pte); *pte = (pa+off)|flag|PTEVALID; pgsz = BY2PG; } } return 0; } /* * Remove mappings. Must already exist, for sanity. * Only used for kernel mappings, so okay to use KADDR. */ static void pdbunmap(ulong *pdb, ulong va, int size) { ulong vae; ulong *table; vae = va+size; while(va < vae){ table = &pdb[PDX(va)]; if(!(*table & PTEVALID)){ panic("vunmap: not mapped"); /* va = (va+4*MB-1) & ~(4*MB-1); continue; */ } if(*table & PTESIZE){ *table = 0; va = (va+4*MB-1) & ~(4*MB-1); continue; } table = KADDR(PPN(*table)); if(!(table[PTX(va)] & PTEVALID)) panic("vunmap: not mapped"); table[PTX(va)] = 0; va += BY2PG; } } /* * Handle a fault by bringing vmap up to date. * Only copy pdb entries and they never go away, * so no locking needed. */ int vmapsync(ulong va) { ulong entry, *table; if(va < VMAP || va >= VMAP+VMAPSIZE) return 0; entry = MACHP(0)->pdb[PDX(va)]; if(!(entry&PTEVALID)) return 0; if(!(entry&PTESIZE)){ /* make sure entry will help the fault */ table = KADDR(PPN(entry)); if(!(table[PTX(va)]&PTEVALID)) return 0; } vpd[PDX(va)] = entry; /* * TLB doesn't cache negative results, so no flush needed. */ return 1; } /* * KMap is used to map individual pages into virtual memory. * It is rare to have more than a few KMaps at a time (in the * absence of interrupts, only two at a time are ever used, * but interrupts can stack). The mappings are local to a process, * so we can use the same range of virtual address space for * all processes without any coordination. */ #define kpt (vpt+VPTX(KMAP)) #define NKPT (KMAPSIZE/BY2PG) KMap* kmap(Page *page) { int i, o, s; if(up == nil) panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page)); if(up->mmupdb == nil) upallocpdb(); if(up->nkmap < 0) panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap); /* * Splhi shouldn't be necessary here, but paranoia reigns. * See comment in putmmu above. */ s = splhi(); up->nkmap++; if(!(vpd[PDX(KMAP)]&PTEVALID)){ /* allocate page directory */ if(KMAPSIZE > BY2XPG) panic("bad kmapsize"); if(up->kmaptable != nil) panic("kmaptable"); spllo(); up->kmaptable = newpage(0, 0, 0); splhi(); vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID; flushpg((ulong)kpt); memset(kpt, 0, BY2PG); kpt[0] = page->pa|PTEWRITE|PTEVALID; up->lastkmap = 0; splx(s); return (KMap*)KMAP; } if(up->kmaptable == nil) panic("no kmaptable"); o = up->lastkmap+1; for(i=0; ipa|PTEWRITE|PTEVALID; up->lastkmap = o; splx(s); return (KMap*)(KMAP+o*BY2PG); } } panic("out of kmap"); return nil; } void kunmap(KMap *k) { ulong va; va = (ulong)k; if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID)) panic("kunmap: no kmaps"); if(va < KMAP || va >= KMAP+KMAPSIZE) panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k)); if(!(vpt[VPTX(va)]&PTEVALID)) panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k)); up->nkmap--; if(up->nkmap < 0) panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap); vpt[VPTX(va)] = 0; flushpg(va); } /* * Temporary one-page mapping used to edit page directories. * * The fasttmp #define controls whether the code optimizes * the case where the page is already mapped in the physical * memory window. */ #define fasttmp 1 void* tmpmap(Page *p) { ulong i; ulong *entry; if(islo()) panic("tmpaddr: islo"); if(fasttmp && p->pa < -KZERO) return KADDR(p->pa); /* * PDX(TMPADDR) == PDX(MACHADDR), so this * entry is private to the processor and shared * between up->mmupdb (if any) and m->pdb. */ entry = &vpt[VPTX(TMPADDR)]; if(!(*entry&PTEVALID)){ for(i=KZERO; i<=CPU0MACH; i+=BY2PG) print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i)); panic("tmpmap: no entry"); } if(PPN(*entry) != PPN(TMPADDR-KZERO)) panic("tmpmap: already mapped entry=%#.8lux", *entry); *entry = p->pa|PTEWRITE|PTEVALID; flushpg(TMPADDR); return (void*)TMPADDR; } void tmpunmap(void *v) { ulong *entry; if(islo()) panic("tmpaddr: islo"); if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR) return; if(v != (void*)TMPADDR) panic("tmpunmap: bad address"); entry = &vpt[VPTX(TMPADDR)]; if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR))) panic("tmpmap: not mapped entry=%#.8lux", *entry); *entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID; flushpg(TMPADDR); } /* * These could go back to being macros once the kernel is debugged, * but the extra checking is nice to have. */ void* kaddr(ulong pa) { if(pa > (ulong)-KZERO) panic("kaddr: pa=%#.8lux", pa); return (void*)(pa+KZERO); } ulong paddr(void *v) { ulong va; va = (ulong)v; if(va < KZERO) panic("paddr: va=%#.8lux pc=%#p", va, getcallerpc(&v)); return va-KZERO; } /* * More debugging. */ void countpagerefs(ulong *ref, int print) { int i, n; Mach *mm; Page *pg; Proc *p; n = 0; for(i=0; immupdb){ if(print){ if(ref[pagenumber(p->mmupdb)]) iprint("page %#.8lux is proc %d (pid %lud) pdb\n", p->mmupdb->pa, i, p->pid); continue; } if(ref[pagenumber(p->mmupdb)]++ == 0) n++; else iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n", p->mmupdb->pa, i, p->pid); } if(p->kmaptable){ if(print){ if(ref[pagenumber(p->kmaptable)]) iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n", p->kmaptable->pa, i, p->pid); continue; } if(ref[pagenumber(p->kmaptable)]++ == 0) n++; else iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n", p->kmaptable->pa, i, p->pid); } for(pg=p->mmuused; pg; pg=pg->next){ if(print){ if(ref[pagenumber(pg)]) iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n", pg->pa, i, p->pid); continue; } if(ref[pagenumber(pg)]++ == 0) n++; else iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n", pg->pa, i, p->pid); } for(pg=p->mmufree; pg; pg=pg->next){ if(print){ if(ref[pagenumber(pg)]) iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n", pg->pa, i, p->pid); continue; } if(ref[pagenumber(pg)]++ == 0) n++; else iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n", pg->pa, i, p->pid); } } if(!print) iprint("%d pages in proc mmu\n", n); n = 0; for(i=0; ipdbpool; pg; pg=pg->next){ if(print){ if(ref[pagenumber(pg)]) iprint("page %#.8lux is in cpu%d pdbpool\n", pg->pa, i); continue; } if(ref[pagenumber(pg)]++ == 0) n++; else iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n", pg->pa, i); } } if(!print){ iprint("%d pages in mach pdbpools\n", n); for(i=0; ipdballoc, MACHP(i)->pdbfree); } } void checkfault(ulong, ulong) { } /* * Return the number of bytes that can be accessed via KADDR(pa). * If pa is not a valid argument to KADDR, return 0. */ ulong cankaddr(ulong pa) { if(pa >= -KZERO) return 0; return -KZERO - pa; } static int validdesc(Segdesc *d, char *reg) { if((d->d1 & SEGP) != SEGP) return 0; if((d->d1 & 0x6000) != SEGPL(3)) return 0; switch(*reg){ case 'c': if((d->d1 & (SEGEXEC|SEGR|SEGD)) != (SEGEXEC|SEGR|SEGD)) return 0; break; case 's': if((d->d1 & SEGW) != SEGW) return 0; case 'd': case 'e': case 'f': case 'g': if((d->d1 & SEGDATA) != SEGDATA) return 0; } if(((d->d1 & 0x00FF0000) | (d->d0 & 0xFFFF)) == 0) return 0; return 1; } static int validselu(Segdesc *ugdt, ulong sel, char *reg) { int idx; sel &= 0xFFFF; switch(sel){ case NULLSEL: return *reg != 's' && *reg != 'c'; case UESEL: return *reg != 's'; case UDSEL: return *reg != 'c'; } if(ugdt == 0) return 0; if(sel & SELLDT) return 0; if((sel & 3) != 3) return 0; idx = (sel >> 3) - UGDTSEG0; if(idx < 0 || idx >= NUGDT) return 0; return validdesc(ugdt + idx, reg); } int validsel(ulong sel, char *reg) { return validselu(up->ugdt, sel, reg); } static long _gdtread(Proc *p, void *v, long n, ulong o) { Segdesc buf[NGDT]; memmove(buf, m->gdt, sizeof(buf)); memmove(buf+UGDTSEG0, p->ugdt, sizeof(p->ugdt)); if(o >= sizeof(buf)){ n = 0; } else if(o + n > sizeof(buf)) { n = sizeof(buf) - o; } memmove(v, (uchar*)buf + o, n); return n; } static long _gdtwrite(Proc *p, void *v, long n, ulong o) { Segdesc buf[NUGDT]; Ureg *u; int i; if((u = p->dbgreg) == 0) error(Enoreg); memmove(buf, p->ugdt, sizeof(buf)); i = o; i -= UGDTSEG0*sizeof(Segdesc); if((i < 0) || (i + n > sizeof(buf))) error(Ebadarg); memmove((uchar*)buf + i, v, n); if( validselu(buf, u->cs, "cs")==0 || validselu(buf, u->ss, "ss")==0 || validselu(buf, u->ds, "ds")==0 || validselu(buf, u->es, "es")==0 || validselu(buf, u->fs, "fs")==0 || validselu(buf, u->gs, "gs")==0){ error("invalid segment selector"); } memmove(p->ugdt, buf, sizeof(buf)); if(p == up) memmove(m->gdt+UGDTSEG0, p->ugdt, sizeof(buf)); return n; } flushmmu) ; } } /* * Add kernel mappings for pa -> va for a section of size bytes. */ int pdbmap(ulong *pdb, ulong pa, ulong va, int size) { int pse; ulong pgsz, *pte, *table; ulong flag, off; flag = pa&0xFFF; pa &= ~0xFFF; if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10)) pse = 1; else pse = 0; forpc/mem.h 644 0 0 12453 11215161231 13541ustar00cinap_lenrekcinap_lenrek/* * Memory and machine-specific definitions. Used in C and assembler. */ /* * Sizes */ #define BI2BY 8 /* bits per byte */ #define BI2WD 32 /* bits per word */ #define BY2WD 4 /* bytes per word */ #define BY2V 8 /* bytes per double word */ #define BY2PG 4096 /* bytes per page */ #define WD2PG (BY2PG/BY2WD) /* words per page */ #define BY2XPG (4096*1024) /* bytes per big page */ #define PGSHIFT 12 /* log(BY2PG) */ #define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1)) #define PGROUND(s) ROUND(s, BY2PG) #define BLOCKALIGN 8 #define MAXMACH 8 /* max # cpus system can run */ #define KSTACK 4096 /* Size of kernel stack */ /* * Time */ #define HZ (100) /* clock frequency */ #define MS2HZ (1000/HZ) /* millisec per clock tick */ #define TK2SEC(t) ((t)/HZ) /* ticks to seconds */ /* * Address spaces */ #define KZERO 0xF0000000 /* base of kernel address space */ #define KTZERO (KZERO+0x100000) /* first address in kernel text - 9load sits below */ #define VPT (KZERO-VPTSIZE) #define VPTSIZE BY2XPG #define NVPT (VPTSIZE/BY2WD) #define KMAP (VPT-KMAPSIZE) #define KMAPSIZE BY2XPG #define VMAP (KMAP-VMAPSIZE) #define VMAPSIZE (0x10000000-VPTSIZE-KMAPSIZE) #define UZERO 0 /* base of user address space */ #define UTZERO (UZERO+BY2PG) /* first address in user text */ #define USTKTOP (VMAP-BY2PG) /* byte just beyond user stack */ #define USTKSIZE (16*1024*1024) /* size of user stack */ #define TSTKTOP (USTKTOP-USTKSIZE) /* end of new stack in sysexec */ #define TSTKSIZ 100 /* pages in new stack; limits exec args */ /* * Fundamental addresses - bottom 64kB saved for return to real mode */ #define CONFADDR (KZERO+0x1200) /* info passed from boot loader */ #define TMPADDR (KZERO+0x2000) /* used for temporary mappings */ #define APBOOTSTRAP (KZERO+0x3000) /* AP bootstrap code */ #define RMUADDR (KZERO+0x7C00) /* real mode Ureg */ #define RMCODE (KZERO+0x8000) /* copy of first page of KTEXT */ #define RMBUF (KZERO+0x9000) /* buffer for user space - known to vga */ #define IDTADDR (KZERO+0x10800) /* idt */ #define REBOOTADDR (0x11000) /* reboot code - physical address */ #define CPU0PDB (KZERO+0x12000) /* bootstrap processor PDB */ #define CPU0PTE (KZERO+0x13000) /* bootstrap processor PTE's for 0-4MB */ #define CPU0GDT (KZERO+0x14000) /* bootstrap processor GDT */ #define MACHADDR (KZERO+0x15000) /* as seen by current processor */ #define CPU0MACH (KZERO+0x16000) /* Mach for bootstrap processor */ #define MACHSIZE BY2PG #define CPU0PTE1 (KZERO+0x17000) /* bootstrap processor PTE's for 4MB-8MB */ #define CPU0END (CPU0PTE1+BY2PG) /* * N.B. ramscan knows that CPU0END is the end of reserved data * N.B. _startPADDR knows that CPU0PDB is the first reserved page * and that there are 6 of them. */ /* * known x86 segments (in GDT) and their selectors */ #define NULLSEG 0 /* null segment */ #define KDSEG 1 /* kernel data/stack */ #define KESEG 2 /* kernel executable */ #define UDSEG 3 /* user data/stack */ #define UESEG 4 /* user executable */ #define TSSSEG 5 /* task segment */ #define APMCSEG 6 /* APM code segment */ #define APMCSEG16 7 /* APM 16-bit code segment */ #define APMDSEG 8 /* APM data segment */ #define KESEG16 9 /* kernel executable 16-bit */ #define UGDTSEG0 10 /* user changable segments */ #define NUGDT 4 #define NGDT 14 /* number of GDT entries required */ /* #define APM40SEG 8 /* APM segment 0x40 */ #define SELGDT (0<<2) /* selector is in gdt */ #define SELLDT (1<<2) /* selector is in ldt */ #define SELECTOR(i, t, p) (((i)<<3) | (t) | (p)) #define NULLSEL SELECTOR(NULLSEG, SELGDT, 0) #define KDSEL SELECTOR(KDSEG, SELGDT, 0) #define KESEL SELECTOR(KESEG, SELGDT, 0) #define UESEL SELECTOR(UESEG, SELGDT, 3) #define UDSEL SELECTOR(UDSEG, SELGDT, 3) #define TSSSEL SELECTOR(TSSSEG, SELGDT, 0) #define APMCSEL SELECTOR(APMCSEG, SELGDT, 0) #define APMCSEL16 SELECTOR(APMCSEG16, SELGDT, 0) #define APMDSEL SELECTOR(APMDSEG, SELGDT, 0) /* #define APM40SEL SELECTOR(APM40SEG, SELGDT, 0) */ /* * fields in segment descriptors */ #define SEGDATA (0x10<<8) /* data/stack segment */ #define SEGEXEC (0x18<<8) /* executable segment */ #define SEGTSS (0x9<<8) /* TSS segment */ #define SEGCG (0x0C<<8) /* call gate */ #define SEGIG (0x0E<<8) /* interrupt gate */ #define SEGTG (0x0F<<8) /* trap gate */ #define SEGTYPE (0x1F<<8) #define SEGP (1<<15) /* segment present */ #define SEGPL(x) ((x)<<13) /* priority level */ #define SEGB (1<<22) /* granularity 1==4k (for expand-down) */ #define SEGG (1<<23) /* granularity 1==4k (for other) */ #define SEGE (1<<10) /* expand down */ #define SEGW (1<<9) /* writable (for data/stack) */ #define SEGR (1<<9) /* readable (for code) */ #define SEGD (1<<22) /* default 1==32bit (for code) */ /* * virtual MMU */ #define PTEMAPMEM (1024*1024) #define PTEPERTAB (PTEMAPMEM/BY2PG) #define SEGMAPSIZE 1984 #define SSEGMAPSIZE 16 #define PPN(x) ((x)&~(BY2PG-1)) /* * physical MMU */ #define PTEVALID (1<<0) #define PTEWT (1<<3) #define PTEUNCACHED (1<<4) #define PTEWRITE (1<<1) #define PTERONLY (0<<1) #define PTEKERNEL (0<<2) #define PTEUSER (1<<2) #define PTESIZE (1<<7) #define PTEGLOBAL (1<<8) /* * Macros for calculating offsets within the page directory base * and page tables. */ #define PDX(va) ((((ulong)(va))>>22) & 0x03FF) #define PTX(va) ((((ulong)(va))>>12) & 0x03FF) #define getpgcolor(a) 0 t("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n", p->mmupdb->pa, i, p->pid); } if(p->kmaptable){ if(print){ if(ref[pagenumber(p->kmaptable)]) iprint("page %#.8lux is proc %d (ppc/dat.h 644 0 0 16667 11215261021 13544ustar00cinap_lenrekcinap_lenrektypedef struct BIOS32si BIOS32si; typedef struct BIOS32ci BIOS32ci; typedef struct Conf Conf; typedef struct Confmem Confmem; typedef struct FPsave FPsave; typedef struct ISAConf ISAConf; typedef struct Label Label; typedef struct Lock Lock; typedef struct MMU MMU; typedef struct Mach Mach; typedef struct Notsave Notsave; typedef struct PCArch PCArch; typedef struct Pcidev Pcidev; typedef struct PCMmap PCMmap; typedef struct PCMslot PCMslot; typedef struct Page Page; typedef struct PMMU PMMU; typedef struct Proc Proc; typedef struct Segdesc Segdesc; typedef vlong Tval; typedef struct Ureg Ureg; typedef struct Vctl Vctl; #pragma incomplete BIOS32si #pragma incomplete Pcidev #pragma incomplete Ureg #define MAXSYSARG 5 /* for mount(fd, afd, mpt, flag, arg) */ /* * parameters for sysproc.c */ #define AOUT_MAGIC (I_MAGIC) struct Lock { ulong key; ulong sr; ulong pc; Proc *p; Mach *m; ushort isilock; long lockcycles; }; struct Label { ulong sp; ulong pc; }; /* * FPsave.status */ enum { /* this is a state */ FPinit= 0, FPactive= 1, FPinactive= 2, /* the following is a bit that can be or'd into the state */ FPillegal= 0x100, }; struct FPsave { ushort control; ushort r1; ushort status; ushort r2; ushort tag; ushort r3; ulong pc; ushort selector; ushort r4; ulong operand; ushort oselector; ushort r5; uchar regs[80]; /* floating point registers */ }; struct Confmem { ulong base; ulong npage; ulong kbase; ulong klimit; }; struct Conf { ulong nmach; /* processors */ ulong nproc; /* processes */ ulong monitor; /* has monitor? */ Confmem mem[4]; /* physical memory */ ulong npage; /* total physical pages of memory */ ulong upages; /* user page pool */ ulong nimage; /* number of page cache image headers */ ulong nswap; /* number of swap pages */ int nswppo; /* max # of pageouts per segment pass */ ulong base0; /* base of bank 0 */ ulong base1; /* base of bank 1 */ ulong copymode; /* 0 is copy on write, 1 is copy on reference */ ulong ialloc; /* max interrupt time allocation in bytes */ ulong pipeqsize; /* size in bytes of pipe queues */ int nuart; /* number of uart devices */ }; struct Segdesc { ulong d0; ulong d1; }; /* * MMU stuff in proc */ #define NCOLOR 1 struct PMMU { Page* mmupdb; /* page directory base */ Page* mmufree; /* unused page table pages */ Page* mmuused; /* used page table pages */ Page* kmaptable; /* page table used by kmap */ uint lastkmap; /* last entry used by kmap */ int nkmap; /* number of current kmaps */ Segdesc ugdt[NUGDT]; /* saved user changable segments */ }; /* * things saved in the Proc structure during a notify */ struct Notsave { ulong svflags; ulong svcs; ulong svss; }; #include "../port/portdat.h" typedef struct { ulong link; /* link (old TSS selector) */ ulong esp0; /* privilege level 0 stack pointer */ ulong ss0; /* privilege level 0 stack selector */ ulong esp1; /* privilege level 1 stack pointer */ ulong ss1; /* privilege level 1 stack selector */ ulong esp2; /* privilege level 2 stack pointer */ ulong ss2; /* privilege level 2 stack selector */ ulong xcr3; /* page directory base register - not used because we don't use trap gates */ ulong eip; /* instruction pointer */ ulong eflags; /* flags register */ ulong eax; /* general registers */ ulong ecx; ulong edx; ulong ebx; ulong esp; ulong ebp; ulong esi; ulong edi; ulong es; /* segment selectors */ ulong cs; ulong ss; ulong ds; ulong fs; ulong gs; ulong ldt; /* selector for task's LDT */ ulong iomap; /* I/O map base address + T-bit */ } Tss; struct Mach { int machno; /* physical id of processor (KNOWN TO ASSEMBLY) */ ulong splpc; /* pc of last caller to splhi */ ulong* pdb; /* page directory base for this processor (va) */ Tss* tss; /* tss for this processor */ Segdesc *gdt; /* gdt for this processor */ Proc* proc; /* current process on this processor */ Proc* externup; /* extern register Proc *up */ Page* pdbpool; int pdbcnt; ulong ticks; /* of the clock since boot time */ Label sched; /* scheduler wakeup */ Lock alarmlock; /* access to alarm list */ void* alarm; /* alarms bound to this clock */ int inclockintr; Proc* readied; /* for runproc */ ulong schedticks; /* next forced context switch */ int tlbfault; int tlbpurge; int pfault; int cs; int syscall; int load; int intr; int flushmmu; /* make current proc flush it's mmu state */ int ilockdepth; Perf perf; /* performance counters */ ulong spuriousintr; int lastintr; int loopconst; Lock apictimerlock; int cpumhz; uvlong cyclefreq; /* Frequency of user readable cycle counter */ uvlong cpuhz; int cpuidax; int cpuiddx; char cpuidid[16]; char* cpuidtype; int havetsc; int havepge; uvlong tscticks; int pdballoc; int pdbfree; vlong mtrrcap; vlong mtrrdef; vlong mtrrfix[11]; vlong mtrrvar[32]; /* 256 max. */ int stack[1]; }; /* * KMap the structure doesn't exist, but the functions do. */ typedef struct KMap KMap; #define VA(k) ((void*)(k)) KMap* kmap(Page*); void kunmap(KMap*); struct { Lock; int machs; /* bitmap of active CPUs */ int exiting; /* shutdown */ int ispanic; /* shutdown in response to a panic */ int thunderbirdsarego; /* lets the added processors continue to schedinit */ }active; /* * routines for things outside the PC model, like power management */ struct PCArch { char* id; int (*ident)(void); /* this should be in the model */ void (*reset)(void); /* this should be in the model */ int (*serialpower)(int); /* 1 == on, 0 == off */ int (*modempower)(int); /* 1 == on, 0 == off */ void (*intrinit)(void); int (*intrenable)(Vctl*); int (*intrvecno)(int); int (*intrdisable)(int); void (*introff)(void); void (*intron)(void); void (*clockenable)(void); uvlong (*fastclock)(uvlong*); void (*timerset)(uvlong); }; /* cpuid instruction result register bits */ enum { /* dx */ Fpuonchip = 1<<0, // Pse = 1<<3, /* page size extensions */ Tsc = 1<<4, /* time-stamp counter */ Cpumsr = 1<<5, /* model-specific registers, rdmsr/wrmsr */ Pae = 1<<6, /* physical-addr extensions */ Mce = 1<<7, /* machine-check exception */ Cmpxchg8b = 1<<8, Cpuapic = 1<<9, Mtrr = 1<<12, /* memory-type range regs. */ Pge = 1<<13, /* page global extension */ // Pse2 = 1<<17, /* more page size extensions */ Clflush = 1<<19, Mmx = 1<<23, Sse = 1<<25, /* thus sfence instr. */ Sse2 = 1<<26, /* thus mfence & lfence instr.s */ }; /* * a parsed plan9.ini line */ #define NISAOPT 8 struct ISAConf { char *type; ulong port; int irq; ulong dma; ulong mem; ulong size; ulong freq; int nopt; char *opt[NISAOPT]; }; extern PCArch *arch; /* PC architecture */ /* * Each processor sees its own Mach structure at address MACHADDR. * However, the Mach structures must also be available via the per-processor * MMU information array machp, mainly for disambiguation and access to * the clock which is only maintained by the bootstrap processor (0). */ Mach* machp[MAXMACH]; #define MACHP(n) (machp[n]) extern Mach *m; #define up (((Mach*)MACHADDR)->externup) /* * hardware info about a device */ typedef struct { ulong port; int size; } Devport; struct DevConf { ulong intnum; /* interrupt number */ char *type; /* card type, malloced */ int nports; /* Number of ports */ Devport *ports; /* The ports themselves */ }; typedef struct BIOS32ci { /* BIOS32 Calling Interface */ u32int eax; u32int ebx; u32int ecx; u32int edx; u32int esi; u32int edi; } BIOS32ci; (1<<2) /* selector is in ldt */ #define SELECTOR(i, t, p) (((i)<<3) | (tpc/fns.h 644 0 0 11431 11215467655 13567ustar00cinap_lenrekcinap_lenrek#include "../port/portfns.h" void aamloop(int); Dirtab* addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong)); void archinit(void); int bios32call(BIOS32ci*, u16int[3]); int bios32ci(BIOS32si*, BIOS32ci*); void bios32close(BIOS32si*); BIOS32si* bios32open(char*); void bootargs(void*); ulong cankaddr(ulong); void clockintr(Ureg*, void*); int (*cmpswap)(long*, long, long); int cmpswap486(long*, long, long); void (*coherence)(void); void cpuid(int, ulong regs[]); int cpuidentify(void); void cpuidprint(void); void (*cycles)(uvlong*); void delay(int); int dmacount(int); int dmadone(int); void dmaend(int); int dmainit(int, int); long dmasetup(int, void*, long, int); #define evenaddr(x) /* x86 doesn't care */ void fpclear(void); void fpenv(FPsave*); void fpinit(void); void fpoff(void); void fprestore(FPsave*); void fpsave(FPsave*); ulong fpstatus(void); ulong getcr0(void); ulong getcr2(void); ulong getcr3(void); ulong getcr4(void); char* getconf(char*); void guesscpuhz(int); void halt(void); int i8042auxcmd(int); int i8042auxcmds(uchar*, int); void i8042auxenable(void (*)(int, int)); void i8042reset(void); void i8250console(void); void* i8250alloc(int, int, int); void i8250mouse(char*, int (*)(Queue*, int), int); void i8250setmouseputc(char*, int (*)(Queue*, int)); void i8253enable(void); void i8253init(void); void i8253link(void); uvlong i8253read(uvlong*); void i8253timerset(uvlong); int i8259disable(int); int i8259enable(Vctl*); void i8259init(void); int i8259isr(int); void i8259on(void); void i8259off(void); int i8259vecno(int); void idle(void); void idlehands(void); int inb(int); void insb(int, void*, int); ushort ins(int); void inss(int, void*, int); ulong inl(int); void insl(int, void*, int); int intrdisable(int, void (*)(Ureg *, void *), void*, int, char*); void intrenable(int, void (*)(Ureg*, void*), void*, int, char*); void introff(void); void intron(void); void invlpg(ulong); void iofree(int); void ioinit(void); int iounused(int, int); int ioalloc(int, int, int, char*); int ioreserve(int, int, int, char*); int iprint(char*, ...); int isaconfig(char*, int, ISAConf*); void* kaddr(ulong); void kbdenable(void); void kbdinit(void); #define kmapinval() void lgdt(ushort[3]); void lidt(ushort[3]); void links(void); void ltr(ulong); void mach0init(void); void mathinit(void); void mb386(void); void mb586(void); void meminit(void); void memorysummary(void); void mfence(void); #define mmuflushtlb(pdb) putcr3(pdb) void mmuinit(void); ulong* mmuwalk(ulong*, ulong, int, int); int mtrr(uintptr, long, char *); void mtrrclock(void); int mtrrprint(char *, long); uchar nvramread(int); void nvramwrite(int, uchar); void outb(int, int); void outsb(int, void*, int); void outs(int, ushort); void outss(int, void*, int); void outl(int, ulong); void outsl(int, void*, int); ulong paddr(void*); ulong pcibarsize(Pcidev*, int); void pcibussize(Pcidev*, ulong*, ulong*); int pcicfgr8(Pcidev*, int); int pcicfgr16(Pcidev*, int); int pcicfgr32(Pcidev*, int); void pcicfgw8(Pcidev*, int, int); void pcicfgw16(Pcidev*, int, int); void pcicfgw32(Pcidev*, int, int); void pciclrbme(Pcidev*); void pciclrioe(Pcidev*); void pciclrmwi(Pcidev*); int pcigetpms(Pcidev*); void pcihinv(Pcidev*); uchar pciipin(Pcidev*, uchar); Pcidev* pcimatch(Pcidev*, int, int); Pcidev* pcimatchtbdf(int); void pcireset(void); int pciscan(int, Pcidev**); void pcisetbme(Pcidev*); void pcisetioe(Pcidev*); void pcisetmwi(Pcidev*); int pcisetpms(Pcidev*, int); void pcmcisread(PCMslot*); int pcmcistuple(int, int, int, void*, int); PCMmap* pcmmap(int, ulong, int, int); int pcmspecial(char*, ISAConf*); int (*_pcmspecial)(char *, ISAConf *); void pcmspecialclose(int); void (*_pcmspecialclose)(int); void pcmunmap(int, PCMmap*); int pdbmap(ulong*, ulong, ulong, int); void procrestore(Proc*); void procsave(Proc*); void procsetup(Proc*); void putcr0(ulong); void putcr3(ulong); void putcr4(ulong); void* rampage(void); void rdmsr(int, vlong*); void realmode(Ureg*); void screeninit(void); void (*screenputs)(char*, int); void syncclock(void); void* tmpmap(Page*); void tmpunmap(void*); void touser(void*); void trapenable(int, void (*)(Ureg*, void*), void*, char*); void trapinit(void); void trapinit0(void); int tas(void*); uvlong tscticks(uvlong*); ulong umbmalloc(ulong, int, int); void umbfree(ulong, int); ulong umbrwmalloc(ulong, int, int); void umbrwfree(ulong, int); ulong upaalloc(int, int); void upafree(ulong, int); void upareserve(ulong, int); #define userureg(ur) (((ur)->cs & 0xFFFF) == UESEL) void vectortable(void); void* vmap(ulong, int); int vmapsync(ulong); void vunmap(void*, int); void wbinvd(void); int validsel(ulong, char*); void wrmsr(int, vlong); int xchgw(ushort*, int); #define waserror() (up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1])) #define KADDR(a) kaddr(a) #define PADDR(a) paddr((void*)(a)) #define dcflush(a, b) esp0; /* privilege level 0 stack pointer */ ulong ss0; /* privilege level 0 stack selector */ ulong esp1; /* privilege level 1 stack pointer */ ulong ss1; /* privilege level 1 stack selector */ ulong esp2; /* privilegpc/main.c 644 0 0 36171 11215172364 13716ustar00cinap_lenrekcinap_lenrek#include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #include "ureg.h" #include "init.h" #include "pool.h" #include "reboot.h" Mach *m; /* * Where configuration info is left for the loaded programme. * This will turn into a structure as more is done by the boot loader * (e.g. why parse the .ini file twice?). * There are 3584 bytes available at CONFADDR. */ #define BOOTLINE ((char*)CONFADDR) #define BOOTLINELEN 64 #define BOOTARGS ((char*)(CONFADDR+BOOTLINELEN)) #define BOOTARGSLEN (4096-0x200-BOOTLINELEN) #define MAXCONF 64 char bootdisk[KNAMELEN]; Conf conf; char *confname[MAXCONF]; char *confval[MAXCONF]; int nconf; uchar *sp; /* user stack of init proc */ int delaylink; static void options(void) { long i, n; char *cp, *line[MAXCONF], *p, *q; /* * parse configuration args from dos file plan9.ini */ cp = BOOTARGS; /* where b.com leaves its config */ cp[BOOTARGSLEN-1] = 0; /* * Strip out '\r', change '\t' -> ' '. */ p = cp; for(q = cp; *q; q++){ if(*q == '\r') continue; if(*q == '\t') *q = ' '; *p++ = *q; } *p = 0; n = getfields(cp, line, MAXCONF, 1, "\n"); for(i = 0; i < n; i++){ if(*line[i] == '#') continue; cp = strchr(line[i], '='); if(cp == nil) continue; *cp++ = '\0'; confname[nconf] = line[i]; confval[nconf] = cp; nconf++; } } extern void mmuinit0(void); extern void (*i8237alloc)(void); void main(void) { mach0init(); options(); ioinit(); i8250console(); quotefmtinstall(); screeninit(); print("\nPlan 9\n"); trapinit0(); mmuinit0(); kbdinit(); i8253init(); cpuidentify(); meminit(); confinit(); archinit(); xinit(); if(i8237alloc != nil) i8237alloc(); trapinit(); printinit(); cpuidprint(); mmuinit(); if(arch->intrinit) /* launches other processors on an mp */ arch->intrinit(); timersinit(); mathinit(); kbdenable(); if(arch->clockenable) arch->clockenable(); procinit0(); initseg(); if(delaylink){ bootlinks(); pcimatch(0, 0, 0); }else links(); conf.monitor = 1; chandevreset(); pageinit(); i8253link(); swapinit(); userinit(); active.thunderbirdsarego = 1; schedinit(); } void mach0init(void) { conf.nmach = 1; MACHP(0) = (Mach*)CPU0MACH; m->pdb = (ulong*)CPU0PDB; m->gdt = (Segdesc*)CPU0GDT; machinit(); active.machs = 1; active.exiting = 0; } void machinit(void) { int machno; ulong *pdb; Segdesc *gdt; machno = m->machno; pdb = m->pdb; gdt = m->gdt; memset(m, 0, sizeof(Mach)); m->machno = machno; m->pdb = pdb; m->gdt = gdt; m->perf.period = 1; /* * For polled uart output at boot, need * a default delay constant. 100000 should * be enough for a while. Cpuidentify will * calculate the real value later. */ m->loopconst = 100000; } void init0(void) { int i; char buf[2*KNAMELEN]; up->nerrlab = 0; spllo(); /* * These are o.k. because rootinit is null. * Then early kproc's will have a root and dot. */ up->slash = namec("#/", Atodir, 0, 0); pathclose(up->slash->path); up->slash->path = newpath("/"); up->dot = cclone(up->slash); chandevinit(); if(!waserror()){ snprint(buf, sizeof(buf), "%s %s", arch->id, conffile); ksetenv("terminal", buf, 0); ksetenv("cputype", "386", 0); if(cpuserver) ksetenv("service", "cpu", 0); else ksetenv("service", "terminal", 0); for(i = 0; i < nconf; i++){ if(confname[i][0] != '*') ksetenv(confname[i], confval[i], 0); ksetenv(confname[i], confval[i], 1); } poperror(); } kproc("alarm", alarmkproc, 0); touser(sp); } void userinit(void) { void *v; Proc *p; Segment *s; Page *pg; p = newproc(); p->pgrp = newpgrp(); p->egrp = smalloc(sizeof(Egrp)); p->egrp->ref = 1; p->fgrp = dupfgrp(nil); p->rgrp = newrgrp(); p->procmode = 0640; kstrdup(&eve, ""); kstrdup(&p->text, "*init*"); kstrdup(&p->user, eve); p->fpstate = FPinit; fpoff(); /* * Kernel Stack * * N.B. make sure there's enough space for syscall to check * for valid args and * 4 bytes for gotolabel's return PC */ p->sched.pc = (ulong)init0; p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD); /* * User Stack * * N.B. cannot call newpage() with clear=1, because pc kmap * requires up != nil. use tmpmap instead. */ s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG); p->seg[SSEG] = s; pg = newpage(0, 0, USTKTOP-BY2PG); v = tmpmap(pg); memset(v, 0, BY2PG); segpage(s, pg); bootargs(v); tmpunmap(v); /* * Text */ s = newseg(SG_TEXT, UTZERO, 1); s->flushme++; p->seg[TSEG] = s; pg = newpage(0, 0, UTZERO); memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl)); segpage(s, pg); v = tmpmap(pg); memset(v, 0, BY2PG); memmove(v, initcode, sizeof initcode); tmpunmap(v); ready(p); } uchar * pusharg(char *p) { int n; n = strlen(p)+1; sp -= n; memmove(sp, p, n); return sp; } void bootargs(void *base) { int i, ac; uchar *av[32]; uchar **lsp; char *cp = BOOTLINE; char buf[64]; sp = (uchar*)base + BY2PG - MAXSYSARG*BY2WD; ac = 0; av[ac++] = pusharg("/386/9dos"); /* when boot is changed to only use rc, this code can go away */ cp[BOOTLINELEN-1] = 0; buf[0] = 0; if(strncmp(cp, "fd", 2) == 0){ sprint(buf, "local!#f/fd%lddisk", strtol(cp+2, 0, 0)); av[ac++] = pusharg(buf); } else if(strncmp(cp, "sd", 2) == 0){ sprint(buf, "local!#S/sd%c%c/fs", *(cp+2), *(cp+3)); av[ac++] = pusharg(buf); } else if(strncmp(cp, "ether", 5) == 0) av[ac++] = pusharg("-n"); /* 4 byte word align stack */ sp = (uchar*)((ulong)sp & ~3); /* build argc, argv on stack */ sp -= (ac+1)*sizeof(sp); lsp = (uchar**)sp; for(i = 0; i < ac; i++) *lsp++ = av[i] + ((USTKTOP - BY2PG) - (ulong)base); *lsp = 0; sp += (USTKTOP - BY2PG) - (ulong)base - sizeof(ulong); } char* getconf(char *name) { int i; for(i = 0; i < nconf; i++) if(cistrcmp(confname[i], name) == 0) return confval[i]; return 0; } static void writeconf(void) { char *p, *q; int n; p = getconfenv(); if(waserror()) { free(p); nexterror(); } /* convert to name=value\n format */ for(q=p; *q; q++) { q += strlen(q); *q = '='; q += strlen(q); *q = '\n'; } n = q - p + 1; if(n >= BOOTARGSLEN) error("kernel configuration too large"); memset(BOOTLINE, 0, BOOTLINELEN); memmove(BOOTARGS, p, n); poperror(); free(p); } void confinit(void) { char *p; int i, userpcnt; ulong kpages; if(p = getconf("*kernelpercent")) userpcnt = 100 - strtol(p, 0, 0); else userpcnt = 0; conf.npage = 0; for(i=0; i 2000) conf.nproc = 2000; conf.nimage = 200; conf.nswap = conf.nproc*80; conf.nswppo = 4096; if(cpuserver) { if(userpcnt < 10) userpcnt = 70; kpages = conf.npage - (conf.npage*userpcnt)/100; /* * Hack for the big boys. Only good while physmem < 4GB. * Give the kernel fixed max + enough to allocate the * page pool. * This is an overestimate as conf.upages < conf.npages. * The patch of nimage is a band-aid, scanning the whole * page list in imagereclaim just takes too long. */ if(kpages > (64*MB + conf.npage*sizeof(Page))/BY2PG){ kpages = (64*MB + conf.npage*sizeof(Page))/BY2PG; conf.nimage = 2000; kpages += (conf.nproc*KSTACK)/BY2PG; } } else { if(userpcnt < 10) { if(conf.npage*BY2PG < 16*MB) userpcnt = 40; else userpcnt = 60; } kpages = conf.npage - (conf.npage*userpcnt)/100; /* * Make sure terminals with low memory get at least * 4MB on the first Image chunk allocation. */ if(conf.npage*BY2PG < 16*MB) imagmem->minarena = 4*1024*1024; } /* * can't go past the end of virtual memory * (ulong)-KZERO is 2^32 - KZERO */ if(kpages > ((ulong)-KZERO)/BY2PG) kpages = ((ulong)-KZERO)/BY2PG; conf.upages = conf.npage - kpages; conf.ialloc = (kpages/2)*BY2PG; /* * Guess how much is taken by the large permanent * datastructures. Mntcache and Mntrpc are not accounted for * (probably ~300KB). */ kpages *= BY2PG; kpages -= conf.upages*sizeof(Page) + conf.nproc*sizeof(Proc) + conf.nimage*sizeof(Image) + conf.nswap + conf.nswppo*sizeof(Page); mainmem->maxsize = kpages; if(!cpuserver){ /* * give terminals lots of image memory, too; the dynamic * allocation will balance the load properly, hopefully. * be careful with 32-bit overflow. */ imagmem->maxsize = kpages; } } static char* mathmsg[] = { nil, /* handled below */ "denormalized operand", "division by zero", "numeric overflow", "numeric underflow", "precision loss", }; static void mathnote(void) { int i; ulong status; char *msg, note[ERRMAX]; status = up->fpsave.status; /* * Some attention should probably be paid here to the * exception masks and error summary. */ msg = "unknown exception"; for(i = 1; i <= 5; i++){ if(!((1<fpsave.pc, status); postnote(up, 1, note, NDebug); } /* * math coprocessor error */ static void matherror(Ureg *ur, void*) { /* * a write cycle to port 0xF0 clears the interrupt latch attached * to the error# line from the 387 */ if(!(m->cpuiddx & 0x01)) outb(0xF0, 0xFF); /* * save floating point state to check out error */ fpenv(&up->fpsave); mathnote(); if((ur->pc & 0xf0000000) == KZERO) panic("fp: status %ux fppc=0x%lux pc=0x%lux", up->fpsave.status, up->fpsave.pc, ur->pc); } /* * math coprocessor emulation fault */ static void mathemu(Ureg *ureg, void*) { if(up->fpstate & FPillegal){ /* someone did floating point in a note handler */ postnote(up, 1, "sys: floating point in note handler", NDebug); return; } switch(up->fpstate){ case FPinit: fpinit(); up->fpstate = FPactive; break; case FPinactive: /* * Before restoring the state, check for any pending * exceptions, there's no way to restore the state without * generating an unmasked exception. * More attention should probably be paid here to the * exception masks and error summary. */ if((up->fpsave.status & ~up->fpsave.control) & 0x07F){ mathnote(); break; } fprestore(&up->fpsave); up->fpstate = FPactive; break; case FPactive: panic("math emu pid %ld %s pc 0x%lux", up->pid, up->text, ureg->pc); break; } } /* * math coprocessor segment overrun */ static void mathover(Ureg*, void*) { pexit("math overrun", 0); } void mathinit(void) { trapenable(VectorCERR, matherror, 0, "matherror"); if(X86FAMILY(m->cpuidax) == 3) intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror"); trapenable(VectorCNA, mathemu, 0, "mathemu"); trapenable(VectorCSO, mathover, 0, "mathover"); } /* * set up floating point for a new process */ void procsetup(Proc*p) { int i; for(i=0; iugdt[i] = m->gdt[UDSEG]; p->fpstate = FPinit; fpoff(); } void procrestore(Proc *p) { uvlong t; int i; if(p->kp) return; for(i=0; igdt[UGDTSEG0+i] = p->ugdt[i]; cycles(&t); p->pcycles -= t; } /* * Save the mach dependent part of the process state. */ void procsave(Proc *p) { uvlong t; cycles(&t); p->pcycles += t; if(p->fpstate == FPactive){ if(p->state == Moribund) fpclear(); else{ /* * Fpsave() stores without handling pending * unmasked exeptions. Postnote() can't be called * here as sleep() already has up->rlock, so * the handling of pending exceptions is delayed * until the process runs again and generates an * emulation fault to activate the FPU. */ fpsave(&p->fpsave); } p->fpstate = FPinactive; } /* * While this processor is in the scheduler, the process could run * on another processor and exit, returning the page tables to * the free list where they could be reallocated and overwritten. * When this processor eventually has to get an entry from the * trashed page tables it will crash. * * If there's only one processor, this can't happen. * You might think it would be a win not to do this in that case, * especially on VMware, but it turns out not to matter. */ mmuflushtlb(PADDR(m->pdb)); } static void shutdown(int ispanic) { int ms, once; lock(&active); if(ispanic) active.ispanic = ispanic; else if(m->machno == 0 && (active.machs & (1<machno)) == 0) active.ispanic = 0; once = active.machs & (1<machno); active.machs &= ~(1<machno); active.exiting = 1; unlock(&active); if(once) iprint("cpu%d: exiting\n", m->machno); spllo(); for(ms = 5*1000; ms > 0; ms -= TK2MS(2)){ delay(TK2MS(2)); if(active.machs == 0 && consactive() == 0) break; } if(getconf("*debug")) delay(5*60*1000); if(active.ispanic){ if(!cpuserver) for(;;) halt(); delay(10000); }else delay(1000); } void reboot(void *entry, void *code, ulong size) { void (*f)(ulong, ulong, ulong); ulong *pdb; writeconf(); shutdown(0); /* * should be the only processor running now */ print("shutting down...\n"); delay(200); splhi(); /* turn off buffered serial console */ serialoq = nil; /* shutdown devices */ chandevshutdown(); /* * Modify the machine page table to directly map the low 4MB of memory * This allows the reboot code to turn off the page mapping */ pdb = m->pdb; pdb[PDX(0)] = pdb[PDX(KZERO)]; mmuflushtlb(PADDR(pdb)); /* setup reboot trampoline function */ f = (void*)REBOOTADDR; memmove(f, rebootcode, sizeof(rebootcode)); print("rebooting...\n"); /* off we go - never to return */ coherence(); (*f)(PADDR(entry), PADDR(code), size); } void exit(int ispanic) { shutdown(ispanic); arch->reset(); } int isaconfig(char *class, int ctlrno, ISAConf *isa) { char cc[32], *p; int i; snprint(cc, sizeof cc, "%s%d", class, ctlrno); p = getconf(cc); if(p == nil) return 0; isa->type = ""; isa->nopt = tokenize(p, isa->opt, NISAOPT); for(i = 0; i < isa->nopt; i++){ p = isa->opt[i]; if(cistrncmp(p, "type=", 5) == 0) isa->type = p + 5; else if(cistrncmp(p, "port=", 5) == 0) isa->port = strtoul(p+5, &p, 0); else if(cistrncmp(p, "irq=", 4) == 0) isa->irq = strtoul(p+4, &p, 0); else if(cistrncmp(p, "dma=", 4) == 0) isa->dma = strtoul(p+4, &p, 0); else if(cistrncmp(p, "mem=", 4) == 0) isa->mem = strtoul(p+4, &p, 0); else if(cistrncmp(p, "size=", 5) == 0) isa->size = strtoul(p+5, &p, 0); else if(cistrncmp(p, "freq=", 5) == 0) isa->freq = strtoul(p+5, &p, 0); } return 1; } int cistrcmp(char *a, char *b) { int ac, bc; for(;;){ ac = *a++; bc = *b++; if(ac >= 'A' && ac <= 'Z') ac = 'a' + (ac - 'A'); if(bc >= 'A' && bc <= 'Z') bc = 'a' + (bc - 'A'); ac -= bc; if(ac) return ac; if(bc == 0) break; } return 0; } int cistrncmp(char *a, char *b, int n) { unsigned ac, bc; while(n > 0){ ac = *a++; bc = *b++; n--; if(ac >= 'A' && ac <= 'Z') ac = 'a' + (ac - 'A'); if(bc >= 'A' && bc <= 'Z') bc = 'a' + (bc - 'A'); ac -= bc; if(ac) return ac; if(bc == 0) break; } return 0; } /* * put the processor in the halt state if we've no processes to run. * an interrupt will get us going again. */ void idlehands(void) { if(conf.nmach == 1) halt(); } tol(cp+2, 0, 0)); av[ac++] = pusharg(buf); } else if(strncmp(cp, "sd", 2) == 0){ sprint(buf, "local!#S/sd%c%c/fs", *(cp+2), *(cp+3)); av[ac++] = pusharg(buf); } else if(strncmp(cp, "ether", 5) == 0) av[ac++] = pusharg("-n"); /* 4 byte word align stack */ sp = (uchar*)((ulong)sp & ~3); /* build argc, argv on stack */ sp -= (ac+1)*sizeof(sp); lsp = (uchar**)sp; for(i = 0; pc/trap.c 644 0 0 52565 11215466233 13746ustar00cinap_lenrekcinap_lenrek#include "u.h" #include "tos.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #include "ureg.h" #include "../port/error.h" #include static int trapinited; void noted(Ureg*, ulong); static void debugbpt(Ureg*, void*); static void fault386(Ureg*, void*); static void doublefault(Ureg*, void*); static void unexpected(Ureg*, void*); static void _dumpstack(Ureg*); static Lock vctllock; static Vctl *vctl[256]; enum { Ntimevec = 20 /* number of time buckets for each intr */ }; ulong intrtimes[256][Ntimevec]; void intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name) { int vno; Vctl *v; if(f == nil){ print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n", irq, tbdf, name); return; } v = xalloc(sizeof(Vctl)); v->isintr = 1; v->irq = irq; v->tbdf = tbdf; v->f = f; v->a = a; strncpy(v->name, name, KNAMELEN-1); v->name[KNAMELEN-1] = 0; ilock(&vctllock); vno = arch->intrenable(v); if(vno == -1){ iunlock(&vctllock); print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n", irq, tbdf, v->name); xfree(v); return; } if(vctl[vno]){ if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi) panic("intrenable: handler: %s %s %#p %#p %#p %#p", vctl[vno]->name, v->name, vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi); v->next = vctl[vno]; } vctl[vno] = v; iunlock(&vctllock); } int intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name) { Vctl **pv, *v; int vno; /* * For now, none of this will work with the APIC code, * there is no mapping between irq and vector as the IRQ * is pretty meaningless. */ if(arch->intrvecno == nil) return -1; vno = arch->intrvecno(irq); ilock(&vctllock); pv = &vctl[vno]; while (*pv && ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a || strcmp((*pv)->name, name))) pv = &((*pv)->next); assert(*pv); v = *pv; *pv = (*pv)->next; /* Link out the entry */ if(vctl[vno] == nil && arch->intrdisable != nil) arch->intrdisable(irq); iunlock(&vctllock); xfree(v); return 0; } static long irqallocread(Chan*, void *vbuf, long n, vlong offset) { char *buf, *p, str[2*(11+1)+KNAMELEN+1+1]; int m, vno; long oldn; Vctl *v; if(n < 0 || offset < 0) error(Ebadarg); oldn = n; buf = vbuf; for(vno=0; vnonext){ m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name); if(m <= offset) /* if do not want this, skip entry */ offset -= m; else{ /* skip offset bytes */ m -= offset; p = str+offset; offset = 0; /* write at most max(n,m) bytes */ if(m > n) m = n; memmove(buf, p, m); n -= m; buf += m; if(n == 0) return oldn; } } } return oldn - n; } void trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name) { Vctl *v; if(vno < 0 || vno >= VectorPIC) panic("trapenable: vno %d", vno); v = xalloc(sizeof(Vctl)); v->tbdf = BUSUNKNOWN; v->f = f; v->a = a; strncpy(v->name, name, KNAMELEN); v->name[KNAMELEN-1] = 0; ilock(&vctllock); if(vctl[vno]) v->next = vctl[vno]->next; vctl[vno] = v; iunlock(&vctllock); } static void nmienable(void) { int x; /* * Hack: should be locked with NVRAM access. */ outb(0x70, 0x80); /* NMI latch clear */ outb(0x70, 0); x = inb(0x61) & 0x07; /* Enable NMI */ outb(0x61, 0x08|x); outb(0x61, x); } /* * Minimal trap setup. Just enough so that we can panic * on traps (bugs) during kernel initialization. * Called very early - malloc is not yet available. */ void trapinit0(void) { int d1, v; ulong vaddr; Segdesc *idt; idt = (Segdesc*)IDTADDR; vaddr = (ulong)vectortable; for(v = 0; v < 256; v++){ d1 = (vaddr & 0xFFFF0000)|SEGP; switch(v){ case VectorBPT: d1 |= SEGPL(3)|SEGIG; break; case VectorSYSCALL: d1 |= SEGPL(3)|SEGIG; break; default: d1 |= SEGPL(0)|SEGIG; break; } idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16); idt[v].d1 = d1; vaddr += 6; } } void trapinit(void) { /* * Special traps. * Syscall() is called directly without going through trap(). */ trapenable(VectorBPT, debugbpt, 0, "debugpt"); trapenable(VectorPF, fault386, 0, "fault386"); trapenable(Vector2F, doublefault, 0, "doublefault"); trapenable(Vector15, unexpected, 0, "unexpected"); nmienable(); addarchfile("irqalloc", 0444, irqallocread, nil); trapinited = 1; } static char* excname[32] = { "divide error", "debug exception", "nonmaskable interrupt", "breakpoint", "overflow", "bounds check", "invalid opcode", "coprocessor not available", "double fault", "coprocessor segment overrun", "invalid TSS", "segment not present", "stack exception", "general protection violation", "page fault", "15 (reserved)", "coprocessor error", "alignment check", "machine check", "19 (reserved)", "20 (reserved)", "21 (reserved)", "22 (reserved)", "23 (reserved)", "24 (reserved)", "25 (reserved)", "26 (reserved)", "27 (reserved)", "28 (reserved)", "29 (reserved)", "30 (reserved)", "31 (reserved)", }; /* * keep histogram of interrupt service times */ void intrtime(Mach*, int vno) { ulong diff; ulong x; x = perfticks(); diff = x - m->perf.intrts; m->perf.intrts = x; m->perf.inintr += diff; if(up == nil && m->perf.inidle > diff) m->perf.inidle -= diff; diff /= m->cpumhz*100; /* quantum = 100µsec */ if(diff >= Ntimevec) diff = Ntimevec-1; intrtimes[vno][diff]++; } /* go to user space */ void kexit(Ureg*) { uvlong t; Tos *tos; /* precise time accounting, kernel exit */ tos = (Tos*)(USTKTOP-sizeof(Tos)); cycles(&t); tos->kcycles += t - up->kentry; tos->pcycles = up->pcycles; tos->pid = up->pid; } /* * All traps come here. It is slower to have all traps call trap() * rather than directly vectoring the handler. However, this avoids a * lot of code duplication and possible bugs. The only exception is * VectorSYSCALL. * Trap is called with interrupts disabled via interrupt-gates. */ void trap(Ureg* ureg) { int clockintr, i, vno, user; char buf[ERRMAX]; Vctl *ctl, *v; Mach *mach; if(!trapinited){ /* fault386 can give a better error message */ if(ureg->trap == VectorPF) fault386(ureg, nil); panic("trap %lud: not ready", ureg->trap); } m->perf.intrts = perfticks(); user = (ureg->cs & 0xFFFF) == UESEL; if(user){ up->dbgreg = ureg; cycles(&up->kentry); } clockintr = 0; vno = ureg->trap; if(ctl = vctl[vno]){ if(ctl->isintr){ m->intr++; if(vno >= VectorPIC && vno != VectorSYSCALL) m->lastintr = ctl->irq; } if(ctl->isr) ctl->isr(vno); for(v = ctl; v != nil; v = v->next){ if(v->f) v->f(ureg, v->a); } if(ctl->eoi) ctl->eoi(vno); if(ctl->isintr){ intrtime(m, vno); if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER) clockintr = 1; if(up && !clockintr) preempted(); } } else if(vno < nelem(excname) && user){ spllo(); sprint(buf, "sys: trap: %s", excname[vno]); postnote(up, 1, buf, NDebug); } else if(vno >= VectorPIC && vno != VectorSYSCALL){ /* * An unknown interrupt. * Check for a default IRQ7. This can happen when * the IRQ input goes away before the acknowledge. * In this case, a 'default IRQ7' is generated, but * the corresponding bit in the ISR isn't set. * In fact, just ignore all such interrupts. */ /* call all interrupt routines, just in case */ for(i = VectorPIC; i <= MaxIrqLAPIC; i++){ ctl = vctl[i]; if(ctl == nil) continue; if(!ctl->isintr) continue; for(v = ctl; v != nil; v = v->next){ if(v->f) v->f(ureg, v->a); } /* should we do this? */ if(ctl->eoi) ctl->eoi(i); } /* clear the interrupt */ i8259isr(vno); if(0)print("cpu%d: spurious interrupt %d, last %d\n", m->machno, vno, m->lastintr); if(0)if(conf.nmach > 1){ for(i = 0; i < 32; i++){ if(!(active.machs & (1<machno == mach->machno) continue; print(" cpu%d: last %d", mach->machno, mach->lastintr); } print("\n"); } m->spuriousintr++; if(user) kexit(ureg); return; } else{ if(vno == VectorNMI){ /* * Don't re-enable, it confuses the crash dumps. nmienable(); */ iprint("cpu%d: PC %#8.8lux\n", m->machno, ureg->pc); while(m->machno != 0) ; } dumpregs(ureg); if(!user){ ureg->sp = (ulong)&ureg->sp; _dumpstack(ureg); } if(vno < nelem(excname)) panic("%s", excname[vno]); panic("unknown trap/intr: %d", vno); } splhi(); /* delaysched set because we held a lock or because our quantum ended */ if(up && up->delaysched && clockintr){ sched(); splhi(); } if(user){ if(up->procctl || up->nnote) notify(ureg); kexit(ureg); } } /* * dump registers */ void dumpregs2(Ureg* ureg) { if(up) iprint("cpu%d: registers for %s %lud\n", m->machno, up->text, up->pid); else iprint("cpu%d: registers for kernel\n", m->machno); iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX", ureg->flags, ureg->trap, ureg->ecode, ureg->pc); iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp); iprint(" AX %8.8luX BX %8.8luX CX %8.8luX DX %8.8luX\n", ureg->ax, ureg->bx, ureg->cx, ureg->dx); iprint(" SI %8.8luX DI %8.8luX BP %8.8luX\n", ureg->si, ureg->di, ureg->bp); iprint(" CS %4.4luX DS %4.4luX ES %4.4luX FS %4.4luX GS %4.4luX\n", ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF, ureg->fs & 0xFFFF, ureg->gs & 0xFFFF); } void dumpregs(Ureg* ureg) { vlong mca, mct; dumpregs2(ureg); /* * Processor control registers. * If machine check exception, time stamp counter, page size extensions * or enhanced virtual 8086 mode extensions are supported, there is a * CR4. If there is a CR4 and machine check extensions, read the machine * check address and machine check type registers if RDMSR supported. */ iprint(" CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux", getcr0(), getcr2(), getcr3()); if(m->cpuiddx & 0x9A){ iprint(" CR4 %8.8lux", getcr4()); if((m->cpuiddx & 0xA0) == 0xA0){ rdmsr(0x00, &mca); rdmsr(0x01, &mct); iprint("\n MCA %8.8llux MCT %8.8llux", mca, mct); } } iprint("\n ur %#p up %#p\n", ureg, up); } /* * Fill in enough of Ureg to get a stack trace, and call a function. * Used by debugging interface rdb. */ void callwithureg(void (*fn)(Ureg*)) { Ureg ureg; ureg.pc = getcallerpc(&fn); ureg.sp = (ulong)&fn; fn(&ureg); } static void _dumpstack(Ureg *ureg) { uintptr l, v, i, estack; extern ulong etext; int x; char *s; if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){ iprint("dumpstack disabled\n"); return; } iprint("dumpstack\n"); x = 0; x += iprint("ktrace /kernel/path %.8lux %.8lux <pc, ureg->sp); i = 0; if(up && (uintptr)&l >= (uintptr)up->kstack && (uintptr)&l <= (uintptr)up->kstack+KSTACK) estack = (uintptr)up->kstack+KSTACK; else if((uintptr)&l >= (uintptr)m->stack && (uintptr)&l <= (uintptr)m+MACHSIZE) estack = (uintptr)m+MACHSIZE; else return; x += iprint("estackx %p\n", estack); for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){ v = *(uintptr*)l; if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){ /* * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8) * and CALL indirect through AX * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0), * but this is too clever and misses faulting address. */ x += iprint("%.8p=%.8p ", l, v); i++; } if(i == 4){ i = 0; x += iprint("\n"); } } if(i) iprint("\n"); iprint("EOF\n"); if(ureg->trap != VectorNMI) return; i = 0; for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){ iprint("%.8p ", *(uintptr*)l); if(++i == 8){ i = 0; iprint("\n"); } } if(i) iprint("\n"); } void dumpstack(void) { callwithureg(_dumpstack); } static void debugbpt(Ureg* ureg, void*) { char buf[ERRMAX]; if(up == 0) panic("kernel bpt"); /* restore pc to instruction that caused the trap */ ureg->pc--; sprint(buf, "sys: breakpoint"); postnote(up, 1, buf, NDebug); } static void doublefault(Ureg*, void*) { panic("double fault"); } static void unexpected(Ureg* ureg, void*) { print("unexpected trap %lud; ignoring\n", ureg->trap); } extern void checkpages(void); extern void checkfault(ulong, ulong); static void fault386(Ureg* ureg, void*) { ulong addr; int read, user, n, insyscall; char buf[ERRMAX]; addr = getcr2(); read = !(ureg->ecode & 2); user = (ureg->cs & 0xFFFF) == UESEL; if(!user){ if(vmapsync(addr)) return; if(addr >= USTKTOP) panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr); if(up == nil) panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr); } if(up == nil) panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr); insyscall = up->insyscall; up->insyscall = 1; n = fault(addr, read); if(n < 0){ if(!user){ dumpregs(ureg); panic("fault: 0x%lux", addr); } checkpages(); checkfault(addr, ureg->pc); sprint(buf, "sys: trap: fault %s addr=0x%lux", read ? "read" : "write", addr); postnote(up, 1, buf, NDebug); } up->insyscall = insyscall; } /* * system calls */ #include "../port/systab.h" /* * Syscall is called directly from assembler without going through trap(). */ void syscall(Ureg* ureg) { char *e; ulong sp; long ret; int i, s; ulong scallnr; if((ureg->cs & 0xFFFF) != UESEL) panic("syscall: cs 0x%4.4luX", ureg->cs); cycles(&up->kentry); m->syscall++; up->insyscall = 1; up->pc = ureg->pc; up->dbgreg = ureg; if(up->procctl == Proc_tracesyscall){ up->procctl = Proc_stopme; procctl(up); } scallnr = ureg->ax; up->scallnr = scallnr; if(scallnr == RFORK && up->fpstate == FPactive){ fpsave(&up->fpsave); up->fpstate = FPinactive; } spllo(); sp = ureg->usp; up->nerrlab = 0; ret = -1; if(!waserror()){ if(scallnr >= nsyscall || systab[scallnr] == 0){ pprint("bad sys call number %d pc %lux\n", scallnr, ureg->pc); postnote(up, 1, "sys: bad sys call", NDebug); error(Ebadarg); } if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD)) validaddr(sp, sizeof(Sargs)+BY2WD, 0); up->s = *((Sargs*)(sp+BY2WD)); up->psstate = sysctab[scallnr]; ret = systab[scallnr](up->s.args); poperror(); }else{ /* failure: save the error buffer for errstr */ e = up->syserrstr; up->syserrstr = up->errstr; up->errstr = e; if(0 && up->pid == 1) print("syscall %lud error %s\n", scallnr, up->syserrstr); } if(up->nerrlab){ print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab); for(i = 0; i < NERR; i++) print("sp=%lux pc=%lux\n", up->errlab[i].sp, up->errlab[i].pc); panic("error stack"); } /* * Put return value in frame. On the x86 the syscall is * just another trap and the return value from syscall is * ignored. On other machines the return value is put into * the results register by caller of syscall. */ ureg->ax = ret; if(up->procctl == Proc_tracesyscall){ up->procctl = Proc_stopme; s = splhi(); procctl(up); splx(s); } up->insyscall = 0; up->psstate = 0; if(scallnr == NOTED) noted(ureg, *(ulong*)(sp+BY2WD)); if(scallnr!=RFORK && (up->procctl || up->nnote)){ splhi(); notify(ureg); } /* if we delayed sched because we held a lock, sched now */ if(up->delaysched) sched(); kexit(ureg); } /* * Call user, if necessary, with note. * Pass user the Ureg struct and the note on his stack. */ int notify(Ureg* ureg) { int l; ulong s, sp; Note *n; if(up->procctl) procctl(up); if(up->nnote == 0) return 0; if(up->fpstate == FPactive){ fpsave(&up->fpsave); up->fpstate = FPinactive; } up->fpstate |= FPillegal; s = spllo(); qlock(&up->debug); up->notepending = 0; n = &up->note[0]; if(strncmp(n->msg, "sys:", 4) == 0){ l = strlen(n->msg); if(l > ERRMAX-15) /* " pc=0x12345678\0" */ l = ERRMAX-15; sprint(n->msg+l, " pc=0x%.8lux", ureg->pc); } if(n->flag!=NUser && (up->notified || up->notify==0)){ if(n->flag == NDebug) pprint("suicide: %s\n", n->msg); qunlock(&up->debug); pexit(n->msg, n->flag!=NDebug); } if(up->notified){ qunlock(&up->debug); splhi(); return 0; } if(!up->notify){ qunlock(&up->debug); pexit(n->msg, n->flag!=NDebug); } sp = ureg->usp; sp -= 256; /* debugging: preserve context causing problem */ sp -= sizeof(Ureg); if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n", up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg); if(!okaddr((ulong)up->notify, 1, 0) || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){ pprint("suicide: bad address in notify\n"); qunlock(&up->debug); pexit("Suicide", 0); } memmove((Ureg*)sp, ureg, sizeof(Ureg)); *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */ up->ureg = (void*)sp; sp -= BY2WD+ERRMAX; memmove((char*)sp, up->note[0].msg, ERRMAX); sp -= 3*BY2WD; *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD; /* arg 2 is string */ *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg; /* arg 1 is ureg* */ *(ulong*)(sp+0*BY2WD) = 0; /* arg 0 is pc */ ureg->usp = sp; ureg->pc = (ulong)up->notify; up->notified = 1; up->nnote--; memmove(&up->lastnote, &up->note[0], sizeof(Note)); memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note)); qunlock(&up->debug); splx(s); return 1; } /* * Return user to state before notify() */ void noted(Ureg* ureg, ulong arg0) { Ureg *nureg; ulong oureg, sp; qlock(&up->debug); if(arg0!=NRSTR && !up->notified) { qunlock(&up->debug); pprint("call to noted() when not notified\n"); pexit("Suicide", 0); } up->notified = 0; nureg = up->ureg; /* pointer to user returned Ureg struct */ up->fpstate &= ~FPillegal; /* sanity clause */ oureg = (ulong)nureg; if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){ pprint("bad ureg in noted or call to noted when not notified\n"); qunlock(&up->debug); pexit("Suicide", 0); } /* * Check the segment selectors are all valid, otherwise * a fault will be taken on attempting to return to the * user process. */ if( validsel(nureg->cs, "cs")==0 || validsel(nureg->ss, "ss")==0 || validsel(nureg->ds, "ds")==0 || validsel(nureg->es, "es")==0 || validsel(nureg->fs, "fs")==0 || validsel(nureg->gs, "gs")==0){ pprint("bad segment selector in noted\n"); qunlock(&up->debug); pexit("Suicide", 0); } /* don't let user change system flags */ nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5); memmove(ureg, nureg, sizeof(Ureg)); switch(arg0){ case NCONT: case NRSTR: if(0) print("%s %lud: noted %.8lux %.8lux\n", up->text, up->pid, nureg->pc, nureg->usp); if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){ qunlock(&up->debug); pprint("suicide: trap in noted\n"); pexit("Suicide", 0); } up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD)); qunlock(&up->debug); break; case NSAVE: if(!okaddr(nureg->pc, BY2WD, 0) || !okaddr(nureg->usp, BY2WD, 0)){ qunlock(&up->debug); pprint("suicide: trap in noted\n"); pexit("Suicide", 0); } qunlock(&up->debug); sp = oureg-4*BY2WD-ERRMAX; splhi(); ureg->sp = sp; ((ulong*)sp)[1] = oureg; /* arg 1 0(FP) is ureg* */ ((ulong*)sp)[0] = 0; /* arg 0 is pc */ break; default: pprint("unknown noted arg 0x%lux\n", arg0); up->lastnote.flag = NDebug; /* fall through */ case NDFLT: if(up->lastnote.flag == NDebug){ qunlock(&up->debug); pprint("suicide: %s\n", up->lastnote.msg); } else qunlock(&up->debug); pexit(up->lastnote.msg, up->lastnote.flag!=NDebug); } } long execregs(ulong entry, ulong ssize, ulong nargs) { ulong *sp; Ureg *ureg; up->fpstate = FPinit; fpoff(); sp = (ulong*)(USTKTOP - ssize); *--sp = nargs; ureg = up->dbgreg; ureg->usp = (ulong)sp; ureg->pc = entry; return USTKTOP-sizeof(Tos); /* address of kernel/user shared data */ } /* * return the userpc the last exception happened at */ ulong userpc(void) { Ureg *ureg; ureg = (Ureg*)up->dbgreg; return ureg->pc; } /* This routine must save the values of registers the user is not permitted * to write from devproc and then restore the saved values before returning. */ void setregisters(Ureg* ureg, char* pureg, char* uva, int n) { ulong cs, ds, es, flags, fs, gs, ss; flags = ureg->flags; ss = ureg->ss; cs = ureg->cs; ds = ureg->ds; es = ureg->es; fs = ureg->fs; gs = ureg->gs; memmove(pureg, uva, n); ureg->gs = gs; ureg->fs = fs; ureg->es = es; ureg->ds = ds; ureg->cs = cs; ureg->ss = ss; ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00); } static void linkproc(void) { spllo(); up->kpfun(up->kparg); pexit("kproc dying", 0); } void kprocchild(Proc* p, void (*func)(void*), void* arg) { /* * gotolabel() needs a word on the stack in * which to place the return PC used to jump * to linkproc(). */ p->sched.pc = (ulong)linkproc; p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD; p->kpfun = func; p->kparg = arg; } void forkchild(Proc *p, Ureg *ureg) { Ureg *cureg; /* * Add 2*BY2WD to the stack to account for * - the return PC * - trap's argument (ur) */ p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD); p->sched.pc = (ulong)forkret; cureg = (Ureg*)(p->sched.sp+2*BY2WD); memmove(cureg, ureg, sizeof(Ureg)); /* return value of syscall in child */ cureg->ax = 0; /* Things from bottom of syscall which were never executed */ p->psstate = 0; p->insyscall = 0; } /* Give enough context in the ureg to produce a kernel stack for * a sleeping process */ void setkernur(Ureg* ureg, Proc* p) { ureg->pc = p->sched.pc; ureg->sp = p->sched.sp+4; } ulong dbgpc(Proc *p) { Ureg *ureg; ureg = p->dbgreg; if(ureg == 0) return 0; return ureg->pc; } } if(i) iprint("\n"); iprint("EOF\n"); if(ureg->trap != VectorNMI) return; i = 0; for(l = (uintptr)&l; l < estack; l += sizeofport/devproc.c 644 0 0 70167 11215470364 15022ustar00cinap_lenrekcinap_lenrek#include "u.h" #include #include "tos.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "../port/error.h" #include "ureg.h" #include "edf.h" enum { Qdir, Qtrace, Qargs, Qctl, Qfd, Qfpregs, Qkregs, Qgdt, Qmem, Qnote, Qnoteid, Qnotepg, Qns, Qproc, Qregs, Qsegment, Qstatus, Qtext, Qwait, Qprofile, }; enum { CMclose, CMclosefiles, CMfixedpri, CMhang, CMkill, CMnohang, CMnoswap, CMpri, CMprivate, CMprofile, CMstart, CMstartstop, CMstartsyscall, CMstop, CMwaitstop, CMwired, CMtrace, /* real time */ CMperiod, CMdeadline, CMcost, CMsporadic, CMdeadlinenotes, CMadmit, CMextra, CMexpel, CMevent, }; enum{ Nevents = 0x4000, Emask = Nevents - 1, }; #define STATSIZE (2*KNAMELEN+12+9*12) /* * Status, fd, and ns are left fully readable (0444) because of their use in debugging, * particularly on shared servers. * Arguably, ns and fd shouldn't be readable; if you'd prefer, change them to 0000 */ Dirtab procdir[] = { "args", {Qargs}, 0, 0660, "ctl", {Qctl}, 0, 0000, "fd", {Qfd}, 0, 0444, "fpregs", {Qfpregs}, sizeof(FPsave), 0000, "kregs", {Qkregs}, sizeof(Ureg), 0400, "gdt", {Qgdt}, 0, 0000, "mem", {Qmem}, 0, 0000, "note", {Qnote}, 0, 0000, "noteid", {Qnoteid}, 0, 0664, "notepg", {Qnotepg}, 0, 0000, "ns", {Qns}, 0, 0444, "proc", {Qproc}, 0, 0400, "regs", {Qregs}, sizeof(Ureg), 0000, "segment", {Qsegment}, 0, 0444, "status", {Qstatus}, STATSIZE, 0444, "text", {Qtext}, 0, 0000, "wait", {Qwait}, 0, 0400, "profile", {Qprofile}, 0, 0400, }; static Cmdtab proccmd[] = { CMclose, "close", 2, CMclosefiles, "closefiles", 1, CMfixedpri, "fixedpri", 2, CMhang, "hang", 1, CMnohang, "nohang", 1, CMnoswap, "noswap", 1, CMkill, "kill", 1, CMpri, "pri", 2, CMprivate, "private", 1, CMprofile, "profile", 1, CMstart, "start", 1, CMstartstop, "startstop", 1, CMstartsyscall, "startsyscall", 1, CMstop, "stop", 1, CMwaitstop, "waitstop", 1, CMwired, "wired", 2, CMtrace, "trace", 0, CMperiod, "period", 2, CMdeadline, "deadline", 2, CMcost, "cost", 2, CMsporadic, "sporadic", 1, CMdeadlinenotes, "deadlinenotes", 1, CMadmit, "admit", 1, CMextra, "extra", 1, CMexpel, "expel", 1, CMevent, "event", 1, }; /* Segment type from portdat.h */ static char *sname[]={ "Text", "Data", "Bss", "Stack", "Shared", "Phys", }; /* * Qids are, in path: * 4 bits of file type (qids above) * 23 bits of process slot number + 1 * in vers, * 32 bits of pid, for consistency checking * If notepg, c->pgrpid.path is pgrp slot, .vers is noteid. */ #define QSHIFT 5 /* location in qid of proc slot # */ #define QID(q) ((((ulong)(q).path)&0x0000001F)>>0) #define SLOT(q) (((((ulong)(q).path)&0x07FFFFFE0)>>QSHIFT)-1) #define PID(q) ((q).vers) #define NOTEID(q) ((q).vers) void procctlreq(Proc*, char*, int); int procctlmemio(Proc*, ulong, int, void*, int); Chan* proctext(Chan*, Proc*); Segment* txt2data(Proc*, Segment*); int procstopped(void*); void mntscan(Mntwalk*, Proc*); static Traceevent *tevents; static Lock tlock; static int topens; static int tproduced, tconsumed; void (*proctrace)(Proc*, int, vlong); long (*gdtread)(Proc*, void*, long, ulong); long (*gdtwrite)(Proc*, void*, long, ulong); extern int unfair; static void profclock(Ureg *ur, Timer *) { Tos *tos; if(up == 0 || up->state != Running) return; /* user profiling clock */ if(userureg(ur)){ tos = (Tos*)(USTKTOP-sizeof(Tos)); tos->clock += TK2MS(1); segclock(ur->pc); } } static int procgen(Chan *c, char *name, Dirtab *tab, int, int s, Dir *dp) { Qid qid; Proc *p; char *ename; Segment *q; ulong pid, path, perm, len; if(s == DEVDOTDOT){ mkqid(&qid, Qdir, 0, QTDIR); devdir(c, qid, "#p", 0, eve, 0555, dp); return 1; } if(c->qid.path == Qdir){ if(s == 0){ strcpy(up->genbuf, "trace"); mkqid(&qid, Qtrace, -1, QTFILE); devdir(c, qid, up->genbuf, 0, eve, 0444, dp); return 1; } if(name != nil){ /* ignore s and use name to find pid */ pid = strtol(name, &ename, 10); if(pid==0 || ename[0]!='\0') return -1; s = procindex(pid); if(s < 0) return -1; } else if(--s >= conf.nproc) return -1; p = proctab(s); pid = p->pid; if(pid == 0) return 0; sprint(up->genbuf, "%lud", pid); /* * String comparison is done in devwalk so name must match its formatted pid */ if(name != nil && strcmp(name, up->genbuf) != 0) return -1; mkqid(&qid, (s+1)<genbuf, 0, p->user, DMDIR|0555, dp); return 1; } if(c->qid.path == Qtrace){ strcpy(up->genbuf, "trace"); mkqid(&qid, Qtrace, -1, QTFILE); devdir(c, qid, up->genbuf, 0, eve, 0444, dp); return 1; } if(s >= nelem(procdir)) return -1; if(tab) panic("procgen"); tab = &procdir[s]; path = c->qid.path&~(((1<qid)); perm = tab->perm; if(perm == 0) perm = p->procmode; else /* just copy read bits */ perm |= p->procmode & 0444; len = tab->length; switch(QID(c->qid)) { case Qwait: len = p->nwait; /* incorrect size, but >0 means there's something to read */ break; case Qprofile: q = p->seg[TSEG]; if(q && q->profile) { len = (q->top-q->base)>>LRESPROF; len *= sizeof(*q->profile); } break; } mkqid(&qid, path|tab->qid.path, c->qid.vers, QTFILE); devdir(c, qid, tab->name, len, p->user, perm, dp); return 1; } static void _proctrace(Proc* p, Tevent etype, vlong ts) { Traceevent *te; if (p->trace == 0 || topens == 0 || tproduced - tconsumed >= Nevents) return; te = &tevents[tproduced&Emask]; te->pid = p->pid; te->etype = etype; if (ts == 0) te->time = todget(nil); else te->time = ts; tproduced++; } static void procinit(void) { if(conf.nproc >= (1<<(16-QSHIFT))-1) print("warning: too many procs for devproc\n"); addclock0link((void (*)(void))profclock, 113); /* Relative prime to HZ */ } static Chan* procattach(char *spec) { return devattach('p', spec); } static Walkqid* procwalk(Chan *c, Chan *nc, char **name, int nname) { return devwalk(c, nc, name, nname, 0, 0, procgen); } static int procstat(Chan *c, uchar *db, int n) { return devstat(c, db, n, 0, 0, procgen); } /* * none can't read or write state on other * processes. This is to contain access of * servers running as none should they be * subverted by, for example, a stack attack. */ static void nonone(Proc *p) { if(p == up) return; if(strcmp(up->user, "none") != 0) return; if(iseve()) return; error(Eperm); } static Chan* procopen(Chan *c, int omode) { Proc *p; Pgrp *pg; Chan *tc; int pid; if(c->qid.type & QTDIR) return devopen(c, omode, 0, 0, procgen); if(QID(c->qid) == Qtrace){ if (omode != OREAD) error(Eperm); lock(&tlock); if (waserror()){ unlock(&tlock); nexterror(); } if (topens > 0) error("already open"); topens++; if (tevents == nil){ tevents = (Traceevent*)malloc(sizeof(Traceevent) * Nevents); if(tevents == nil) error(Enomem); tproduced = tconsumed = 0; } proctrace = _proctrace; unlock(&tlock); poperror(); c->mode = openmode(omode); c->flag |= COPEN; c->offset = 0; return c; } p = proctab(SLOT(c->qid)); qlock(&p->debug); if(waserror()){ qunlock(&p->debug); nexterror(); } pid = PID(c->qid); if(p->pid != pid) error(Eprocdied); omode = openmode(omode); switch(QID(c->qid)){ case Qtext: if(omode != OREAD) error(Eperm); tc = proctext(c, p); tc->offset = 0; qunlock(&p->debug); poperror(); return tc; case Qproc: case Qkregs: case Qsegment: case Qprofile: case Qfd: if(omode != OREAD) error(Eperm); break; case Qnote: if(p->privatemem) error(Eperm); break; case Qmem: case Qctl: if(p->privatemem) error(Eperm); nonone(p); break; case Qargs: case Qnoteid: case Qstatus: case Qwait: case Qregs: case Qfpregs: case Qgdt: nonone(p); break; case Qns: if(omode != OREAD) error(Eperm); c->aux = malloc(sizeof(Mntwalk)); break; case Qnotepg: nonone(p); pg = p->pgrp; if(pg == nil) error(Eprocdied); if(omode!=OWRITE || pg->pgrpid == 1) error(Eperm); c->pgrpid.path = pg->pgrpid+1; c->pgrpid.vers = p->noteid; break; default: pprint("procopen %lux\n", c->qid); error(Egreg); } /* Affix pid to qid */ if(p->state != Dead) c->qid.vers = p->pid; /* make sure the process slot didn't get reallocated while we were playing */ coherence(); if(p->pid != pid) error(Eprocdied); tc = devopen(c, omode, 0, 0, procgen); qunlock(&p->debug); poperror(); return tc; } static int procwstat(Chan *c, uchar *db, int n) { Proc *p; Dir *d; if(c->qid.type&QTDIR) error(Eperm); if(QID(c->qid) == Qtrace) return devwstat(c, db, n); p = proctab(SLOT(c->qid)); nonone(p); d = nil; if(waserror()){ free(d); qunlock(&p->debug); nexterror(); } qlock(&p->debug); if(p->pid != PID(c->qid)) error(Eprocdied); if(strcmp(up->user, p->user) != 0 && strcmp(up->user, eve) != 0) error(Eperm); d = smalloc(sizeof(Dir)+n); n = convM2D(db, n, &d[0], (char*)&d[1]); if(n == 0) error(Eshortstat); if(!emptystr(d->uid) && strcmp(d->uid, p->user) != 0){ if(strcmp(up->user, eve) != 0) error(Eperm); else kstrdup(&p->user, d->uid); } if(d->mode != ~0UL) p->procmode = d->mode&0777; poperror(); free(d); qunlock(&p->debug); return n; } static long procoffset(long offset, char *va, int *np) { if(offset > 0) { offset -= *np; if(offset < 0) { memmove(va, va+*np+offset, -offset); *np = -offset; } else *np = 0; } return offset; } static int procqidwidth(Chan *c) { char buf[32]; return sprint(buf, "%lud", c->qid.vers); } int procfdprint(Chan *c, int fd, int w, char *s, int ns) { int n; if(w == 0) w = procqidwidth(c); n = snprint(s, ns, "%3d %.2s %C %4ld (%.16llux %*lud %.2ux) %5ld %8lld %s\n", fd, &"r w rw"[(c->mode&3)<<1], devtab[c->type]->dc, c->dev, c->qid.path, w, c->qid.vers, c->qid.type, c->iounit, c->offset, c->path->s); return n; } static int procfds(Proc *p, char *va, int count, long offset) { Fgrp *f; Chan *c; char buf[256]; int n, i, w, ww; char *a; /* print to buf to avoid holding fgrp lock while writing to user space */ if(count > sizeof buf) count = sizeof buf; a = buf; qlock(&p->debug); f = p->fgrp; if(f == nil){ qunlock(&p->debug); return 0; } lock(f); if(waserror()){ unlock(f); qunlock(&p->debug); nexterror(); } n = readstr(0, a, count, p->dot->path->s); n += snprint(a+n, count-n, "\n"); offset = procoffset(offset, a, &n); /* compute width of qid.path */ w = 0; for(i = 0; i <= f->maxfd; i++) { c = f->fd[i]; if(c == nil) continue; ww = procqidwidth(c); if(ww > w) w = ww; } for(i = 0; i <= f->maxfd; i++) { c = f->fd[i]; if(c == nil) continue; n += procfdprint(c, i, w, a+n, count-n); offset = procoffset(offset, a, &n); } unlock(f); qunlock(&p->debug); poperror(); /* copy result to user space, now that locks are released */ memmove(va, buf, n); return n; } static void procclose(Chan * c) { if(QID(c->qid) == Qtrace){ lock(&tlock); if(topens > 0) topens--; if(topens == 0) proctrace = nil; unlock(&tlock); } if(QID(c->qid) == Qns && c->aux != 0) free(c->aux); } static void int2flag(int flag, char *s) { if(flag == 0){ *s = '\0'; return; } *s++ = '-'; if(flag & MAFTER) *s++ = 'a'; if(flag & MBEFORE) *s++ = 'b'; if(flag & MCREATE) *s++ = 'c'; if(flag & MCACHE) *s++ = 'C'; *s = '\0'; } static int procargs(Proc *p, char *buf, int nbuf) { int j, k, m; char *a; int n; a = p->args; if(p->setargs){ snprint(buf, nbuf, "%s [%s]", p->text, p->args); return strlen(buf); } n = p->nargs; for(j = 0; j < nbuf - 1; j += m){ if(n <= 0) break; if(j != 0) buf[j++] = ' '; m = snprint(buf+j, nbuf-j, "%q", a); k = strlen(a) + 1; a += k; n -= k; } return j; } static int eventsavailable(void *) { return tproduced > tconsumed; } static long procread(Chan *c, void *va, long n, vlong off) { /* NSEG*32 was too small for worst cases */ char *a, flag[10], *sps, *srv, statbuf[NSEG*64]; int i, j, m, navail, ne, pid, rsize; long l; uchar *rptr; ulong offset; Confmem *cm; Mntwalk *mw; Proc *p; Segment *sg, *s; Ureg kur; Waitq *wq; a = va; offset = off; if(c->qid.type & QTDIR) return devdirread(c, a, n, 0, 0, procgen); if(QID(c->qid) == Qtrace){ if(!eventsavailable(nil)) return 0; rptr = (uchar*)va; navail = tproduced - tconsumed; if(navail > n / sizeof(Traceevent)) navail = n / sizeof(Traceevent); while(navail > 0) { ne = ((tconsumed & Emask) + navail > Nevents)? Nevents - (tconsumed & Emask): navail; memmove(rptr, &tevents[tconsumed & Emask], ne * sizeof(Traceevent)); tconsumed += ne; rptr += ne * sizeof(Traceevent); navail -= ne; } return rptr - (uchar*)va; } p = proctab(SLOT(c->qid)); if(p->pid != PID(c->qid)) error(Eprocdied); switch(QID(c->qid)){ case Qargs: qlock(&p->debug); j = procargs(p, up->genbuf, sizeof up->genbuf); qunlock(&p->debug); if(offset >= j) return 0; if(offset+n > j) n = j-offset; memmove(a, &up->genbuf[offset], n); return n; case Qmem: if(offset < KZERO) return procctlmemio(p, offset, n, va, 1); if(!iseve()) error(Eperm); /* validate kernel addresses */ if(offset < (ulong)end) { if(offset+n > (ulong)end) n = (ulong)end - offset; memmove(a, (char*)offset, n); return n; } for(i=0; ikbase <= offset && offset <= cm->klimit-1){ if(offset+n >= cm->klimit-1) n = cm->klimit - offset; memmove(a, (char*)offset, n); return n; } } error(Ebadarg); case Qprofile: s = p->seg[TSEG]; if(s == 0 || s->profile == 0) error("profile is off"); i = (s->top-s->base)>>LRESPROF; i *= sizeof(*s->profile); if(offset >= i) return 0; if(offset+n > i) n = i - offset; memmove(a, ((char*)s->profile)+offset, n); return n; case Qnote: qlock(&p->debug); if(waserror()){ qunlock(&p->debug); nexterror(); } if(p->pid != PID(c->qid)) error(Eprocdied); if(n < 1) /* must accept at least the '\0' */ error(Etoosmall); if(p->nnote == 0) n = 0; else { m = strlen(p->note[0].msg) + 1; if(m > n) m = n; memmove(va, p->note[0].msg, m); ((char*)va)[m-1] = '\0'; p->nnote--; memmove(p->note, p->note+1, p->nnote*sizeof(Note)); n = m; } if(p->nnote == 0) p->notepending = 0; poperror(); qunlock(&p->debug); return n; case Qproc: if(offset >= sizeof(Proc)) return 0; if(offset+n > sizeof(Proc)) n = sizeof(Proc) - offset; memmove(a, ((char*)p)+offset, n); return n; case Qgdt: if(gdtread == 0) error(Enoreg); return gdtread(p, a, n, offset); case Qregs: rptr = (uchar*)p->dbgreg; rsize = sizeof(Ureg); goto regread; case Qkregs: memset(&kur, 0, sizeof(Ureg)); setkernur(&kur, p); rptr = (uchar*)&kur; rsize = sizeof(Ureg); goto regread; case Qfpregs: rptr = (uchar*)&p->fpsave; rsize = sizeof(FPsave); regread: if(rptr == 0) error(Enoreg); if(offset >= rsize) return 0; if(offset+n > rsize) n = rsize - offset; memmove(a, rptr+offset, n); return n; case Qstatus: if(offset >= STATSIZE) return 0; if(offset+n > STATSIZE) n = STATSIZE - offset; sps = p->psstate; if(sps == 0) sps = statename[p->state]; memset(statbuf, ' ', sizeof statbuf); memmove(statbuf+0*KNAMELEN, p->text, strlen(p->text)); memmove(statbuf+1*KNAMELEN, p->user, strlen(p->user)); memmove(statbuf+2*KNAMELEN, sps, strlen(sps)); j = 2*KNAMELEN + 12; for(i = 0; i < 6; i++) { l = p->time[i]; if(i == TReal) l = MACHP(0)->ticks - l; l = TK2MS(l); readnum(0, statbuf+j+NUMSIZE*i, NUMSIZE, l, NUMSIZE); } /* ignore stack, which is mostly non-existent */ l = 0; for(i=1; iseg[i]; if(s) l += s->top - s->base; } readnum(0, statbuf+j+NUMSIZE*6, NUMSIZE, l>>10, NUMSIZE); readnum(0, statbuf+j+NUMSIZE*7, NUMSIZE, p->basepri, NUMSIZE); readnum(0, statbuf+j+NUMSIZE*8, NUMSIZE, p->priority, NUMSIZE); memmove(a, statbuf+offset, n); return n; case Qsegment: j = 0; for(i = 0; i < NSEG; i++) { sg = p->seg[i]; if(sg == 0) continue; j += sprint(statbuf+j, "%-6s %c%c %.8lux %.8lux %4ld\n", sname[sg->type&SG_TYPE], sg->type&SG_RONLY ? 'R' : ' ', sg->profile ? 'P' : ' ', sg->base, sg->top, sg->ref); } if(offset >= j) return 0; if(offset+n > j) n = j-offset; if(n == 0 && offset == 0) exhausted("segments"); memmove(a, &statbuf[offset], n); return n; case Qwait: if(!canqlock(&p->qwaitr)) error(Einuse); if(waserror()) { qunlock(&p->qwaitr); nexterror(); } lock(&p->exl); if(up == p && p->nchild == 0 && p->waitq == 0) { unlock(&p->exl); error(Enochild); } pid = p->pid; while(p->waitq == 0) { unlock(&p->exl); sleep(&p->waitr, haswaitq, p); if(p->pid != pid) error(Eprocdied); lock(&p->exl); } wq = p->waitq; p->waitq = wq->next; p->nwait--; unlock(&p->exl); qunlock(&p->qwaitr); poperror(); n = snprint(a, n, "%d %lud %lud %lud %q", wq->w.pid, wq->w.time[TUser], wq->w.time[TSys], wq->w.time[TReal], wq->w.msg); free(wq); return n; case Qns: qlock(&p->debug); if(waserror()){ qunlock(&p->debug); nexterror(); } if(p->pgrp == nil || p->pid != PID(c->qid)) error(Eprocdied); mw = c->aux; if(mw->cddone){ qunlock(&p->debug); poperror(); return 0; } mntscan(mw, p); if(mw->mh == 0){ mw->cddone = 1; i = snprint(a, n, "cd %s\n", p->dot->path->s); qunlock(&p->debug); poperror(); return i; } int2flag(mw->cm->mflag, flag); if(strcmp(mw->cm->to->path->s, "#M") == 0){ srv = srvname(mw->cm->to->mchan); i = snprint(a, n, "mount %s %s %s %s\n", flag, srv==nil? mw->cm->to->mchan->path->s : srv, mw->mh->from->path->s, mw->cm->spec? mw->cm->spec : ""); free(srv); }else i = snprint(a, n, "bind %s %s %s\n", flag, mw->cm->to->path->s, mw->mh->from->path->s); qunlock(&p->debug); poperror(); return i; case Qnoteid: return readnum(offset, va, n, p->noteid, NUMSIZE); case Qfd: return procfds(p, va, n, offset); } error(Egreg); return 0; /* not reached */ } void mntscan(Mntwalk *mw, Proc *p) { Pgrp *pg; Mount *t; Mhead *f; int nxt, i; ulong last, bestmid; pg = p->pgrp; rlock(&pg->ns); nxt = 0; bestmid = ~0; last = 0; if(mw->mh) last = mw->cm->mountid; for(i = 0; i < MNTHASH; i++) { for(f = pg->mnthash[i]; f; f = f->hash) { for(t = f->mount; t; t = t->next) { if(mw->mh == 0 || (t->mountid > last && t->mountid < bestmid)) { mw->cm = t; mw->mh = f; bestmid = mw->cm->mountid; nxt = 1; } } } } if(nxt == 0) mw->mh = 0; runlock(&pg->ns); } static long procwrite(Chan *c, void *va, long n, vlong off) { int id, m; Proc *p, *t, *et; char *a, *arg, buf[ERRMAX]; ulong offset = off; a = va; if(c->qid.type & QTDIR) error(Eisdir); p = proctab(SLOT(c->qid)); /* Use the remembered noteid in the channel rather * than the process pgrpid */ if(QID(c->qid) == Qnotepg) { pgrpnote(NOTEID(c->pgrpid), va, n, NUser); return n; } qlock(&p->debug); if(waserror()){ qunlock(&p->debug); nexterror(); } if(p->pid != PID(c->qid)) error(Eprocdied); switch(QID(c->qid)){ case Qargs: if(n == 0) error(Eshort); if(n >= ERRMAX) error(Etoobig); arg = malloc(n+1); if(arg == nil) error(Enomem); memmove(arg, va, n); m = n; if(arg[m-1] != 0) arg[m++] = 0; free(p->args); p->nargs = m; p->args = arg; p->setargs = 1; break; case Qmem: if(p->state != Stopped) error(Ebadctl); n = procctlmemio(p, offset, n, va, 0); break; case Qregs: if(offset >= sizeof(Ureg)) n = 0; else if(offset+n > sizeof(Ureg)) n = sizeof(Ureg) - offset; if(p->dbgreg == 0) error(Enoreg); setregisters(p->dbgreg, (char*)(p->dbgreg)+offset, va, n); break; case Qfpregs: if(offset >= sizeof(FPsave)) n = 0; else if(offset+n > sizeof(FPsave)) n = sizeof(FPsave) - offset; memmove((uchar*)&p->fpsave+offset, va, n); break; case Qgdt: if(gdtwrite == 0) error(Enoreg); n = gdtwrite(p, a, n, offset); break; case Qctl: procctlreq(p, va, n); break; case Qnote: if(p->kp) error(Eperm); if(n >= ERRMAX-1) error(Etoobig); memmove(buf, va, n); buf[n] = 0; if(!postnote(p, 0, buf, NUser)) error("note not posted"); break; case Qnoteid: id = atoi(a); if(id == p->pid) { p->noteid = id; break; } t = proctab(0); for(et = t+conf.nproc; t < et; t++) { if(t->state == Dead) continue; if(id == t->noteid) { if(strcmp(p->user, t->user) != 0) error(Eperm); p->noteid = id; break; } } if(p->noteid != id) error(Ebadarg); break; default: pprint("unknown qid in procwrite\n"); error(Egreg); } poperror(); qunlock(&p->debug); return n; } Dev procdevtab = { 'p', "proc", devreset, procinit, devshutdown, procattach, procwalk, procstat, procopen, devcreate, procclose, procread, devbread, procwrite, devbwrite, devremove, procwstat, }; Chan* proctext(Chan *c, Proc *p) { Chan *tc; Image *i; Segment *s; s = p->seg[TSEG]; if(s == 0) error(Enonexist); if(p->state==Dead) error(Eprocdied); lock(s); i = s->image; if(i == 0) { unlock(s); error(Eprocdied); } unlock(s); lock(i); if(waserror()) { unlock(i); nexterror(); } tc = i->c; if(tc == 0) error(Eprocdied); if(incref(tc) == 1 || (tc->flag&COPEN) == 0 || tc->mode!=OREAD) { cclose(tc); error(Eprocdied); } if(p->pid != PID(c->qid)) error(Eprocdied); unlock(i); poperror(); return tc; } void procstopwait(Proc *p, int ctl) { int pid; if(p->pdbg) error(Einuse); if(procstopped(p) || p->state == Broken) return; if(ctl != 0) p->procctl = ctl; p->pdbg = up; pid = p->pid; qunlock(&p->debug); up->psstate = "Stopwait"; if(waserror()) { p->pdbg = 0; qlock(&p->debug); nexterror(); } sleep(&up->sleep, procstopped, p); poperror(); qlock(&p->debug); if(p->pid != pid) error(Eprocdied); } static void procctlcloseone(Proc *p, Fgrp *f, int fd) { Chan *c; c = f->fd[fd]; if(c == nil) return; f->fd[fd] = nil; unlock(f); qunlock(&p->debug); cclose(c); qlock(&p->debug); lock(f); } void procctlclosefiles(Proc *p, int all, int fd) { int i; Fgrp *f; f = p->fgrp; if(f == nil) error(Eprocdied); lock(f); f->ref++; if(all) for(i = 0; i < f->maxfd; i++) procctlcloseone(p, f, i); else procctlcloseone(p, f, fd); unlock(f); closefgrp(f); } static char * parsetime(vlong *rt, char *s) { uvlong ticks; ulong l; char *e, *p; static int p10[] = {100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1}; if (s == nil) return("missing value"); ticks=strtoul(s, &e, 10); if (*e == '.'){ p = e+1; l = strtoul(p, &e, 10); if(e-p > nelem(p10)) return "too many digits after decimal point"; if(e-p == 0) return "ill-formed number"; l *= p10[e-p-1]; }else l = 0; if (*e == '\0' || strcmp(e, "s") == 0){ ticks = 1000000000 * ticks + l; }else if (strcmp(e, "ms") == 0){ ticks = 1000000 * ticks + l/1000; }else if (strcmp(e, "µs") == 0 || strcmp(e, "us") == 0){ ticks = 1000 * ticks + l/1000000; }else if (strcmp(e, "ns") != 0) return "unrecognized unit"; *rt = ticks; return nil; } void procctlreq(Proc *p, char *va, int n) { Segment *s; int npc, pri; Cmdbuf *cb; Cmdtab *ct; vlong time; char *e; void (*pt)(Proc*, int, vlong); if(p->kp) /* no ctl requests to kprocs */ error(Eperm); cb = parsecmd(va, n); if(waserror()){ free(cb); nexterror(); } ct = lookupcmd(cb, proccmd, nelem(proccmd)); switch(ct->index){ case CMclose: procctlclosefiles(p, 0, atoi(cb->f[1])); break; case CMclosefiles: procctlclosefiles(p, 1, 0); break; case CMhang: p->hang = 1; break; case CMkill: switch(p->state) { case Broken: unbreak(p); break; case Stopped: p->procctl = Proc_exitme; postnote(p, 0, "sys: killed", NExit); ready(p); break; default: p->procctl = Proc_exitme; postnote(p, 0, "sys: killed", NExit); } break; case CMnohang: p->hang = 0; break; case CMnoswap: p->noswap = 1; break; case CMpri: pri = atoi(cb->f[1]); if(pri > PriNormal && !iseve()) error(Eperm); procpriority(p, pri, 0); break; case CMfixedpri: pri = atoi(cb->f[1]); if(pri > PriNormal && !iseve()) error(Eperm); procpriority(p, pri, 1); break; case CMprivate: p->privatemem = 1; break; case CMprofile: s = p->seg[TSEG]; if(s == 0 || (s->type&SG_TYPE) != SG_TEXT) error(Ebadctl); if(s->profile != 0) free(s->profile); npc = (s->top-s->base)>>LRESPROF; s->profile = malloc(npc*sizeof(*s->profile)); if(s->profile == 0) error(Enomem); break; case CMstart: if(p->state != Stopped) error(Ebadctl); ready(p); break; case CMstartstop: if(p->state != Stopped) error(Ebadctl); p->procctl = Proc_traceme; ready(p); procstopwait(p, Proc_traceme); break; case CMstartsyscall: if(p->state != Stopped) error(Ebadctl); p->procctl = Proc_tracesyscall; ready(p); procstopwait(p, Proc_tracesyscall); break; case CMstop: procstopwait(p, Proc_stopme); break; case CMwaitstop: procstopwait(p, 0); break; case CMwired: procwired(p, atoi(cb->f[1])); break; case CMtrace: switch(cb->nf){ case 1: p->trace ^= 1; break; case 2: p->trace = (atoi(cb->f[1]) != 0); break; default: error("args"); } break; /* real time */ case CMperiod: if(p->edf == nil) edfinit(p); if(e=parsetime(&time, cb->f[1])) /* time in ns */ error(e); edfstop(p); p->edf->T = time/1000; /* Edf times are in µs */ break; case CMdeadline: if(p->edf == nil) edfinit(p); if(e=parsetime(&time, cb->f[1])) error(e); edfstop(p); p->edf->D = time/1000; break; case CMcost: if(p->edf == nil) edfinit(p); if(e=parsetime(&time, cb->f[1])) error(e); edfstop(p); p->edf->C = time/1000; break; case CMsporadic: if(p->edf == nil) edfinit(p); p->edf->flags |= Sporadic; break; case CMdeadlinenotes: if(p->edf == nil) edfinit(p); p->edf->flags |= Sendnotes; break; case CMadmit: if(p->edf == 0) error("edf params"); if(e = edfadmit(p)) error(e); break; case CMextra: if(p->edf == nil) edfinit(p); p->edf->flags |= Extratime; break; case CMexpel: if(p->edf) edfstop(p); break; case CMevent: pt = proctrace; if(up->trace && pt) pt(up, SUser, 0); break; } poperror(); free(cb); } int procstopped(void *a) { Proc *p = a; return p->state == Stopped; } int procctlmemio(Proc *p, ulong offset, int n, void *va, int read) { KMap *k; Pte *pte; Page *pg; Segment *s; ulong soff, l; char *a = va, *b; for(;;) { s = seg(p, offset, 1); if(s == 0) error(Ebadarg); if(offset+n >= s->top) n = s->top-offset; if(!read && (s->type&SG_TYPE) == SG_TEXT) s = txt2data(p, s); s->steal++; soff = offset-s->base; if(waserror()) { s->steal--; nexterror(); } if(fixfault(s, offset, read, 0) == 0) break; poperror(); s->steal--; } poperror(); pte = s->map[soff/PTEMAPMEM]; if(pte == 0) panic("procctlmemio"); pg = pte->pages[(soff&(PTEMAPMEM-1))/BY2PG]; if(pagedout(pg)) panic("procctlmemio1"); l = BY2PG - (offset&(BY2PG-1)); if(n > l) n = l; k = kmap(pg); if(waserror()) { s->steal--; kunmap(k); nexterror(); } b = (char*)VA(k); b += offset&(BY2PG-1); if(read == 1) memmove(a, b, n); /* This can fault */ else memmove(b, a, n); kunmap(k); poperror(); /* Ensure the process sees text page changes */ if(s->flushme) memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl)); s->steal--; if(read == 0) p->newtlb = 1; return n; } Segment* txt2data(Proc *p, Segment *s) { int i; Segment *ps; ps = newseg(SG_DATA, s->base, s->size); ps->image = s->image; incref(ps->image); ps->fstart = s->fstart; ps->flen = s->flen; ps->flushme = 1; qlock(&p->seglock); for(i = 0; i < NSEG; i++) if(p->seg[i] == s) break; if(i == NSEG) panic("segment gone"); qunlock(&s->lk); putseg(s); qlock(&ps->lk); p->seg[i] = ps; qunlock(&p->seglock); return ps; } Segment* data2txt(Segment *s) { Segment *ps; ps = newseg(SG_TEXT, s->base, s->size); ps->image = s->image; incref(ps->image); ps->fstart = s->fstart; ps->flen = s->flen; ps->flushme = 1; return ps; } void mntscan(Mntwalk *mw, Proc *p) { Pgrp *pg; Mount *t; Mhead *f; int nxt, i; ulong last, bestmid; pg = p->pgrp; rlock(&pg->ns); nxt = 0; bestmid = ~0; last = 0; if(mw->mh) last = mw->cm->mountid; for(i = 0; i < MNTHASH; i++) { for(f = pg->mnthash[i]; f; f = f->hash) { for(t = f->mount; t; t = t->next) { if(mw->mh == 0 || (t->mountid > last && t->mountid