diff -r 740b5184fe2b sys/src/libc/spim/memccpy.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/libc/spim/memccpy.s Thu Aug 20 00:25:22 2015 -0700 @@ -0,0 +1,20 @@ + TEXT memccpy(SB), $0 +MOVW R1, 0(FP) + MOVW n+12(FP), R1 + BEQ R1, ret + MOVW s1+0(FP), R3 + MOVW s2+4(FP), R2 + MOVBU c+8(FP), R4 + ADDU R1, R2, R5 + +l1: MOVBU (R2), R6 + ADDU $1, R2 + MOVBU R6, (R3) + ADDU $1, R3 + BEQ R4, R6, eq + BNE R2, R5, l1 + MOVW $0, R1 + RET + +eq: MOVW R3, R1 +ret: RET diff -r 740b5184fe2b sys/src/libc/spim/memchr.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/libc/spim/memchr.s Thu Aug 20 00:25:22 2015 -0700 @@ -0,0 +1,39 @@ + TEXT memchr(SB), $0 +MOVW R1, 0(FP) + + MOVW n+8(FP), R1 + MOVW s1+0(FP), R2 + MOVBU c+4(FP), R3 + ADDU R1, R2, R6 + + AND $(~1), R1, R5 + ADDU R2, R5 + BEQ R2, R5, lt2 + +l1: + MOVBU 0(R2), R4 + MOVBU 1(R2), R7 + BEQ R3, R4, eq0 + ADDU $2, R2 + BEQ R3, R7, eq + BNE R2, R5, l1 + +lt2: + BEQ R2, R6, zret + +l2: + MOVBU (R2), R4 + ADDU $1, R2 + BEQ R3, R4, eq + BNE R2, R6, l2 +zret: + MOVW R0, R1 + RET + +eq0: + MOVW R2, R1 + RET + +eq: + SUBU $1,R2, R1 + RET diff -r 740b5184fe2b sys/src/libc/spim/memcmp.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/libc/spim/memcmp.s Thu Aug 20 00:25:22 2015 -0700 @@ -0,0 +1,116 @@ + TEXT memcmp(SB), $0 +MOVW R1, 0(FP) + +/* + * performance: + * alligned about 1.0us/call and 17.4mb/sec + * unalligned is about 3.1mb/sec + */ + + MOVW n+8(FP), R3 /* R3 is count */ + MOVW s1+0(FP), R4 /* R4 is pointer1 */ + MOVW s2+4(FP), R5 /* R5 is pointer2 */ + ADDU R3,R4, R6 /* R6 is end pointer1 */ + + JMP out + +/* + * if not at least 4 chars, + * dont even mess around. + * 3 chars to guarantee any + * rounding up to a word + * boundary and 4 characters + * to get at least maybe one + * full word cmp. + */ + SGT $4,R3, R1 + BNE R1, out + +/* + * test if both pointers + * are similarly word alligned + */ + XOR R4,R5, R1 + AND $3, R1 + BNE R1, out + +/* + * byte at a time to word allign + */ +l1: + AND $3,R4, R1 + BEQ R1, l2 + MOVBU 0(R4), R8 + MOVBU 0(R5), R9 + ADDU $1, R4 + BNE R8,R9, ne + ADDU $1, R5 + JMP l1 + +/* + * turn R3 into end pointer1-15 + * cmp 16 at a time while theres room + */ +l2: + ADDU $-15,R6, R3 +l3: + SGTU R3,R4, R1 + BEQ R1, l4 + MOVW 0(R4), R8 + MOVW 0(R5), R9 + MOVW 4(R4), R10 + BNE R8,R9, ne + MOVW 4(R5), R11 + MOVW 8(R4), R8 + BNE R10,R11, ne1 + MOVW 8(R5), R9 + MOVW 12(R4), R10 + BNE R8,R9, ne + MOVW 12(R5), R11 + ADDU $16, R4 + BNE R10,R11, ne1 + BNE R8,R9, ne + ADDU $16, R5 + JMP l3 + +/* + * turn R3 into end pointer1-3 + * cmp 4 at a time while theres room + */ +l4: + ADDU $-3,R6, R3 +l5: + SGTU R3,R4, R1 + BEQ R1, out + MOVW 0(R4), R8 + MOVW 0(R5), R9 + ADDU $4, R4 + BNE R8,R9, ne /* only works because big endian */ + ADDU $4, R5 + JMP l5 + +/* + * last loop, cmp byte at a time + */ +out: + SGTU R6,R4, R1 + BEQ R1, ret + MOVBU 0(R4), R8 + MOVBU 0(R5), R9 + ADDU $1, R4 + BNE R8,R9, ne + ADDU $1, R5 + JMP out + +ne1: + SGTU R10,R11, R1 + BNE R1, ret + MOVW $-1,R1 + RET +ne: + SGTU R8,R9, R1 + BNE R1, ret + MOVW $-1,R1 +ret: + RET + END diff -r 740b5184fe2b sys/src/libc/spim/mkfile --- a/sys/src/libc/spim/mkfile Tue Aug 18 01:24:22 2015 -0700 +++ b/sys/src/libc/spim/mkfile Thu Aug 20 00:25:22 2015 -0700 @@ -9,13 +9,9 @@ getfcr.s\ main9.s\ main9p.s\ - memccpy.s\ - memchr.s\ - memcmp.s\ memmove.s\ memset.s\ setjmp.s\ - strchr.s\ strcmp.s\ strcpy.s\ tas.s\ @@ -26,6 +22,10 @@ notejmp.c\ SFILES=\ + memccpy.s\ + memchr.s\ + memcmp.s\ + strchr.s\ vlop.s\ CFILES=\ diff -r 740b5184fe2b sys/src/libc/spim/strchr.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/libc/spim/strchr.s Thu Aug 20 00:25:22 2015 -0700 @@ -0,0 +1,63 @@ + TEXT strchr(SB), $0 +MOVW R1, 0(FP) + MOVB c+4(FP), R4 + MOVW s+0(FP), R3 + + BEQ R4, l2 + +/* + * char is not null + */ +l1: + MOVB (R3), R1 + ADDU $1, R3 + BEQ R1, ret + BNE R1,R4, l1 + JMP rm1 + +/* + * char is null + * align to word + */ +l2: + AND $3,R3, R1 + BEQ R1, l3 + MOVB (R3), R1 + ADDU $1, R3 + BNE R1, l2 + JMP rm1 + +l3: + MOVW $0xff000000, R6 + MOVW $0x00ff0000, R7 + +l4: + MOVW (R3), R5 + ADDU $4, R3 + AND R6,R5, R1 + AND R7,R5, R2 + BEQ R1, b0 + AND $0xff00,R5, R1 + BEQ R2, b1 + AND $0xff,R5, R2 + BEQ R1, b2 + BNE R2, l4 + +rm1: + ADDU $-1,R3, R1 + JMP ret + +b2: + ADDU $-2,R3, R1 + JMP ret + +b1: + ADDU $-3,R3, R1 + JMP ret + +b0: + ADDU $-4,R3, R1 + JMP ret + +ret: + RET