## diffname carrera/memset.s 1994/0207 ## diff -e /dev/null /n/fornaxdump/1994/0207/sys/src/brazil/carrera/memset.s 0a /* * R4000 double-word version. Only works in kernel mode. * Must have interrupts disabled because we save and restore R5 in a 32-bit hole. * Here for reference... */ /* * R4000 instructions */ #define LD(base, rt) WORD $((067<<26)|((base)<<21)|((rt)<<16)) #define STD(rt, offset, base) WORD $((077<<26)|((base)<<21)|((rt)<<16)|((offset)&0xFFFF)) #define DSLL(sa, rt, rd) WORD $(((rt)<<16)|((rd)<<11)|((sa)<<6)|070) #define DSLL32(sa, rt, rd) WORD $(((rt)<<16)|((rd)<<11)|((sa)<<6)|070) #define DSRA(sa, rt, rd) WORD $(((rt)<<16)|((rd)<<11)|((sa)<<6)|074) #define LL(base, rt) WORD $((060<<26)|((base)<<21)|((rt)<<16)) #define SC(base, rt) WORD $((070<<26)|((base)<<21)|((rt)<<16)) TEXT memset(SB),$16 /* $16 for hole to build temporary */ MOVW R1, 0(FP) /* * performance: * about 1us/call and 28mb/sec (on what machine, ken? huh? -rob) */ MOVW n+8(FP), R3 /* R3 is count */ MOVW p+0(FP), R4 /* R4 is pointer */ MOVW c+4(FP), R5 /* R5 is char */ ADDU R3,R4, R6 /* R6 is end pointer */ /* * if not at least 8 chars, * don't even mess around. * 7 chars to guarantee any * rounding up to a doubleword * boundary and 8 characters * to get at least maybe one * full doubleword store. */ SGT $8,R3, R1 BNE R1, out /* * turn R5 into a doubleword of characters. * build doubleword on stack; can't use OR on 64-bit registers */ AND $0xff, R5 SLL $8,R5, R1 OR R1, R5 SLL $16,R5, R1 OR R1, R5 MOVW R29, R1 ADD $8, R1 AND $~7, R1 MOVW R5, 0(R1) MOVW R5, 4(R1) LD (1, 5) /* * store one byte at a time until pointer * is aligned on a doubleword boundary */ l1: AND $7,R4, R1 BEQ R1, l2 MOVB R5, 0(R4) ADDU $1, R4 JMP l1 /* * turn R3 into end pointer-31 * store 32 at a time while there's room */ l2: ADDU $-31,R6, R3 l3: SGTU R3,R4, R1 BEQ R1, l4 STD (5, 0, 4) STD (5, 8, 4) ADDU $32, R4 STD (5, -16, 4) STD (5, -8, 4) JMP l3 /* * turn R3 into end pointer-3 * store 4 at a time while there's room */ l4: ADDU $-3,R6, R3 l5: SGTU R3,R4, R1 BEQ R1, out MOVW R5, 0(R4) ADDU $4, R4 JMP l5 /* * last loop, store byte at a time */ out: SGTU R6,R4 ,R1 BEQ R1, ret MOVB R5, 0(R4) ADDU $1, R4 JMP out ret: MOVW s1+0(FP), R1 RET END . ## diffname carrera/memset.s 1994/0209 ## diff -e /n/fornaxdump/1994/0207/sys/src/brazil/carrera/memset.s /n/fornaxdump/1994/0209/sys/src/brazil/carrera/memset.s 20,24d 12,16d 3,4c * Requires low-core support to save R5 in a 64-bit hole. . ## diffname carrera/memset.s 1997/1101 ## diff -e /n/fornaxdump/1994/0209/sys/src/brazil/carrera/memset.s /n/emeliedump/1997/1101/sys/src/brazil/carrera/memset.s 77c * store 4 at a time while theres room . 71,72c MOVV R5, -16(R4) MOVV R5, -8(R4) . 68,69c MOVV R5, 0(R4) MOVV R5, 8(R4) . 61c * store 32 at a time while theres room . 50c * is alligned on a doubleword boundary . 41,47c SLLV $32,R5, R1 OR R1, R5 . 39c SLLV $16,R5, R1 . 37c SLLV $8,R5, R1 . 33,34c * turn R5 into a doubleword of characters . 27c * full word store. . 24c * rounding up to a word . 22c * dont even mess around. . 6,14d 4a TEXT memset(SB),$16 MOVW R1, 0(FP) . 2,3c * R4000-compatible memset uses V instructions. * The kernel uses the 3000 compilers, but we can * use this procedure for speed. . ## diffname carrera/memset.s 2001/0527 # deleted ## diff -e /n/emeliedump/1997/1101/sys/src/brazil/carrera/memset.s /n/emeliedump/2001/0527/sys/src/9/carrera/memset.s 1,90d