sys/include/regexp.h 664 0 0 2642 11305066156 12457ustar00syssys#pragma src "/sys/src/libregexp" #pragma lib "libregexp.a" enum { NSPANS = 128, /* max rune ranges per character class */ NCLASS = 16, /* max character classes per program */ }; typedef struct Resub Resub; typedef struct Reclass Reclass; typedef struct Reinst Reinst; typedef struct Reprog Reprog; /* * Sub expression matches */ struct Resub{ union { char *sp; Rune *rsp; }; union { char *ep; Rune *rep; }; }; /* * character class, each pair of rune's defines a range */ struct Reclass{ Rune *end; Rune spans[NSPANS*2]; }; /* * Machine instructions */ struct Reinst{ int type; union { Reclass *cp; /* class pointer */ Rune r; /* character */ int subid; /* sub-expression id for RBRA and LBRA */ Reinst *right; /* right child of OR */ }; union { /* regexp relies on these two being in the same union */ Reinst *left; /* left child of OR */ Reinst *next; /* next instruction for CAT & LBRA */ }; }; /* * Reprogram definition */ struct Reprog{ Reinst *startinst; /* start pc */ Reclass class[NCLASS]; /* .data */ Reinst firstinst[5]; /* .text */ }; extern Reprog *regcomp(char*); extern Reprog *regcomplit(char*); extern Reprog *regcompnl(char*); extern void regerror(char*); extern int regexec(Reprog*, char*, Resub*, int); extern void regsub(char*, char*, int, Resub*, int); extern int rregexec(Reprog*, Rune*, Resub*, int); extern void rregsub(Rune*, Rune*, int, Resub*, int); sys/src/libregexp/ 775 0 0 0 11634277114 121405ustar00syssyssys/src/libregexp/mkfile 664 0 0 601 7555545524 13301ustar00syssys #include #include "regexp.h" #include "regcomp.h" /* * save a new match in mp */ extern void _renewmatch(Resub *mp, int ms, Resublist *sp) { int i; if(mp==0 || ms<=0) return; if(mp[0].sp==0 || sp->m[0].spm[0].sp==mp[0].sp && sp->m[0].ep>mp[0].ep)){ for(i=0; im[i]; for(; iinst; p++){ if(p->inst == ip){ if(sep->m[0].sp < p->se.m[0].sp){ if(ms > 1) p->se = *sep; else p->se.m[0] = sep->m[0]; } return 0; } } p->inst = ip; if(ms > 1) p->se = *sep; else p->se.m[0] = sep->m[0]; (++p)->inst = 0; return p; } /* * same as renewthread, but called with * initial empty start pointer. */ extern Relist* _renewemptythread(Relist *lp, /* _relist to add to */ Reinst *ip, /* instruction to add */ int ms, char *sp) /* pointers to subexpressions */ { Relist *p; for(p=lp; p->inst; p++){ if(p->inst == ip){ if(sp < p->se.m[0].sp) { if(ms > 1) memset(&p->se, 0, sizeof(p->se)); p->se.m[0].sp = sp; } return 0; } } p->inst = ip; if(ms > 1) memset(&p->se, 0, sizeof(p->se)); p->se.m[0].sp = sp; (++p)->inst = 0; return p; } extern Relist* _rrenewemptythread(Relist *lp, /* _relist to add to */ Reinst *ip, /* instruction to add */ int ms, Rune *rsp) /* pointers to subexpressions */ { Relist *p; for(p=lp; p->inst; p++){ if(p->inst == ip){ if(rsp < p->se.m[0].rsp) { if(ms > 1) memset(&p->se, 0, sizeof(p->se)); p->se.m[0].rsp = rsp; } return 0; } } p->inst = ip; if(ms > 1) memset(&p->se, 0, sizeof(p->se)); p->se.m[0].rsp = rsp; (++p)->inst = 0; return p; } sys/src/libregexp/regcomp.c 664 0 0 23377 12062365664 14000ustar00syssys#include #include #include "regexp.h" #include "regcomp.h" #define TRUE 1 #define FALSE 0 /* * Parser Information */ typedef struct Node { Reinst* first; Reinst* last; }Node; #define NSTACK 20 static Node andstack[NSTACK]; static Node *andp; static int atorstack[NSTACK]; static int* atorp; static int cursubid; /* id of current subexpression */ static int subidstack[NSTACK]; /* parallel to atorstack */ static int* subidp; static int lastwasand; /* Last token was operand */ static int nbra; static char* exprp; /* pointer to next character in source expression */ static int lexdone; static int nclass; static Reclass*classp; static Reinst* freep; static int errors; static Rune yyrune; /* last lex'd rune */ static Reclass*yyclassp; /* last lex'd class */ /* predeclared crap */ static void operator(int); static void pushand(Reinst*, Reinst*); static void pushator(int); static void evaluntil(int); static int bldcclass(void); static jmp_buf regkaboom; static void rcerror(char *s) { errors++; regerror(s); longjmp(regkaboom, 1); } static Reinst* newinst(int t) { freep->type = t; freep->left = 0; freep->right = 0; return freep++; } static void operand(int t) { Reinst *i; if(lastwasand) operator(CAT); /* catenate is implicit */ i = newinst(t); if(t == CCLASS || t == NCCLASS) i->cp = yyclassp; if(t == RUNE) i->r = yyrune; pushand(i, i); lastwasand = TRUE; } static void operator(int t) { if(t==RBRA && --nbra<0) rcerror("unmatched right paren"); if(t==LBRA){ if(++cursubid >= NSUBEXP) rcerror ("too many subexpressions"); nbra++; if(lastwasand) operator(CAT); } else evaluntil(t); if(t != RBRA) pushator(t); lastwasand = FALSE; if(t==STAR || t==QUEST || t==PLUS || t==RBRA) lastwasand = TRUE; /* these look like operands */ } static void regerr2(char *s, int c) { char buf[100]; char *cp = buf; while(*s) *cp++ = *s++; *cp++ = c; *cp = '\0'; rcerror(buf); } static void cant(char *s) { char buf[100]; strcpy(buf, "can't happen: "); strcat(buf, s); rcerror(buf); } static void pushand(Reinst *f, Reinst *l) { if(andp >= &andstack[NSTACK]) cant("operand stack overflow"); andp->first = f; andp->last = l; andp++; } static void pushator(int t) { if(atorp >= &atorstack[NSTACK]) cant("operator stack overflow"); *atorp++ = t; *subidp++ = cursubid; } static Node* popand(int op) { Reinst *inst; if(andp <= &andstack[0]){ regerr2("missing operand for ", op); inst = newinst(NOP); pushand(inst,inst); } return --andp; } static int popator(void) { if(atorp <= &atorstack[0]) cant("operator stack underflow"); --subidp; return *--atorp; } static void evaluntil(int pri) { Node *op1, *op2; Reinst *inst1, *inst2; while(pri==RBRA || atorp[-1]>=pri){ switch(popator()){ default: rcerror("unknown operator in evaluntil"); break; case LBRA: /* must have been RBRA */ op1 = popand('('); inst2 = newinst(RBRA); inst2->subid = *subidp; op1->last->next = inst2; inst1 = newinst(LBRA); inst1->subid = *subidp; inst1->next = op1->first; pushand(inst1, inst2); return; case OR: op2 = popand('|'); op1 = popand('|'); inst2 = newinst(NOP); op2->last->next = inst2; op1->last->next = inst2; inst1 = newinst(OR); inst1->right = op1->first; inst1->left = op2->first; pushand(inst1, inst2); break; case CAT: op2 = popand(0); op1 = popand(0); op1->last->next = op2->first; pushand(op1->first, op2->last); break; case STAR: op2 = popand('*'); inst1 = newinst(OR); op2->last->next = inst1; inst1->right = op2->first; pushand(inst1, inst1); break; case PLUS: op2 = popand('+'); inst1 = newinst(OR); op2->last->next = inst1; inst1->right = op2->first; pushand(op2->first, inst1); break; case QUEST: op2 = popand('?'); inst1 = newinst(OR); inst2 = newinst(NOP); inst1->left = inst2; inst1->right = op2->first; op2->last->next = inst2; pushand(inst1, inst2); break; } } } static Reprog* optimize(Reprog *pp) { Reinst *inst, *target; int size; Reprog *npp; Reclass *cl; int diff; /* * get rid of NOOP chains */ for(inst=pp->firstinst; inst->type!=END; inst++){ target = inst->next; while(target->type == NOP) target = target->next; inst->next = target; } /* * The original allocation is for an area larger than * necessary. Reallocate to the actual space used * and then relocate the code. */ size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); npp = realloc(pp, size); if(npp==0 || npp==pp) return pp; diff = (char *)npp - (char *)pp; freep = (Reinst *)((char *)freep + diff); for(inst=npp->firstinst; insttype){ case OR: case STAR: case PLUS: case QUEST: *(char **)&inst->right += diff; break; case CCLASS: case NCCLASS: *(char **)&inst->right += diff; cl = inst->cp; *(char **)&cl->end += diff; break; } *(char **)&inst->left += diff; } *(char **)&npp->startinst += diff; return npp; } #ifdef DEBUG static void dumpstack(void){ Node *stk; int *ip; print("operators\n"); for(ip=atorstack; ipfirst->type, stk->last->type); } static void dump(Reprog *pp) { Reinst *l; Rune *p; l = pp->firstinst; do{ print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, l->left-pp->firstinst, l->right-pp->firstinst); if(l->type == RUNE) print("\t%C\n", l->r); else if(l->type == CCLASS || l->type == NCCLASS){ print("\t["); if(l->type == NCCLASS) print("^"); for(p = l->cp->spans; p < l->cp->end; p += 2) if(p[0] == p[1]) print("%C", p[0]); else print("%C-%C", p[0], p[1]); print("]\n"); } else print("\n"); }while(l++->type); } #endif static Reclass* newclass(void) { if(nclass >= NCLASS) rcerror("too many character classes; increase NCLASS"); return &(classp[nclass++]); } static int nextc(Rune *rp) { if(lexdone){ *rp = 0; return 1; } exprp += chartorune(rp, exprp); if(*rp == L'\\'){ exprp += chartorune(rp, exprp); return 1; } if(*rp == 0) lexdone = 1; return 0; } static int lex(int literal, int dot_type) { int quoted; quoted = nextc(&yyrune); if(literal || quoted){ if(yyrune == 0) return END; return RUNE; } switch(yyrune){ case 0: return END; case L'*': return STAR; case L'?': return QUEST; case L'+': return PLUS; case L'|': return OR; case L'.': return dot_type; case L'(': return LBRA; case L')': return RBRA; case L'^': return BOL; case L'$': return EOL; case L'[': return bldcclass(); } return RUNE; } static void debugspan(void) { #ifdef DEBUG int i, nspan; Rune r; nspan = yyclassp->end - yyclassp->spans >>1; fprint(2, "nspan = %d\n", nspan); p = yyclassp->spans; for(i = 0; i < nspan; i++) print("%C %C %.4ux %.4ux\n", p[2*i], p[2*i+1], p[2*i], p[2*i+1]); #endif } static int bldcclass(void) { int type; Rune r[NSPANS*2]; Rune *p, *ep, *np; Rune rune; int quoted; /* we have already seen the '[' */ type = CCLASS; yyclassp = newclass(); /* look ahead for negation */ /* SPECIAL CASE!!! negated classes don't match \n */ ep = r; quoted = nextc(&rune); if(!quoted && rune == L'^'){ type = NCCLASS; quoted = nextc(&rune); *ep++ = L'\n'; *ep++ = L'\n'; } /* parse class into a set of spans */ for(;;){ if(ep == r + nelem(r)){ rcerror("class too large"); return 0; } if(rune == 0){ rcerror("malformed '[]'"); return 0; } if(!quoted && rune == L']') break; if(!quoted && rune == L'-'){ if(ep == r){ rcerror("malformed '[]'"); return 0; } quoted = nextc(&rune); if((!quoted && rune == L']') || rune == 0){ rcerror("malformed '[]'"); return 0; } *(ep-1) = rune; } else { *ep++ = rune; *ep++ = rune; } quoted = nextc(&rune); } /* sort on span start */ for(p = r; p < ep; p += 2){ for(np = p; np < ep; np += 2) if(*np < *p){ rune = np[0]; np[0] = p[0]; p[0] = rune; rune = np[1]; np[1] = p[1]; p[1] = rune; } } /* merge spans */ np = yyclassp->spans; p = r; if(r == ep) yyclassp->end = np; else { np[0] = *p++; np[1] = *p++; for(; p < ep; p += 2) /* overlapping or adjacent ranges? */ if(p[0] <= np[1] + 1){ if(p[1] >= np[1]) np[1] = p[1]; /* coalesce */ } else { np += 2; np[0] = p[0]; np[1] = p[1]; } yyclassp->end = np+2; debugspan(); } return type; } static Reprog* regcomp1(char *s, int literal, int dot_type) { int token; Reprog *pp; /* get memory for the program */ pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); if(pp == 0){ regerror("out of memory"); return 0; } freep = pp->firstinst; classp = pp->class; errors = 0; if(setjmp(regkaboom)) goto out; /* go compile the sucker */ lexdone = 0; exprp = s; nclass = 0; nbra = 0; atorp = atorstack; andp = andstack; subidp = subidstack; lastwasand = FALSE; cursubid = 0; /* Start with a low priority operator to prime parser */ pushator(START-1); while((token = lex(literal, dot_type)) != END){ if((token&0300) == OPERATOR) operator(token); else operand(token); } /* Close with a low priority operator */ evaluntil(START); /* Force END */ operand(END); evaluntil(START); #ifdef DEBUG dumpstack(); #endif if(nbra) rcerror("unmatched left paren"); --andp; /* points to first and only operand */ pp->startinst = andp->first; #ifdef DEBUG dump(pp); #endif pp = optimize(pp); #ifdef DEBUG print("start: %d\n", andp->first-pp->firstinst); dump(pp); #endif out: if(errors){ free(pp); pp = 0; } return pp; } extern Reprog* regcomp(char *s) { return regcomp1(s, 0, ANY); } extern Reprog* regcomplit(char *s) { return regcomp1(s, 1, ANY); } extern Reprog* regcompnl(char *s) { return regcomp1(s, 0, ANYNL); } ustar00syssyssys/src/libregexp/regcomp.h 664 0 0 3372 11305066415 13745ustar00syssys/* * substitution list */ #define NSUBEXP 32 typedef struct Resublist Resublist; struct Resublist { Resub m[NSUBEXP]; }; /* * Actions and Tokens (Reinst types) * * 02xx are operators, value == precedence * 03xx are tokens, i.e. operands for operators */ #define RUNE 0177 #define OPERATOR 0200 /* Bitmask of all operators */ #define START 0200 /* Start, used for marker on stack */ #define RBRA 0201 /* Right bracket, ) */ #define LBRA 0202 /* Left bracket, ( */ #define OR 0203 /* Alternation, | */ #define CAT 0204 /* Concatentation, implicit operator */ #define STAR 0205 /* Closure, * */ #define PLUS 0206 /* a+ == aa* */ #define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ #define ANY 0300 /* Any character except newline, . */ #define ANYNL 0301 /* Any character including newline, . */ #define NOP 0302 /* No operation, internal use only */ #define BOL 0303 /* Beginning of line, ^ */ #define EOL 0304 /* End of line, $ */ #define CCLASS 0305 /* Character class, [] */ #define NCCLASS 0306 /* Negated character class, [] */ #define END 0377 /* Terminate: match found */ /* * regexec execution lists */ #define LISTSIZE 10 #define BIGLISTSIZE (25*LISTSIZE) typedef struct Relist Relist; struct Relist { Reinst* inst; /* Reinstruction of the thread */ Resublist se; /* matched subexpressions in this thread */ }; typedef struct Reljunk Reljunk; struct Reljunk { Relist* relist[2]; Relist* reliste[2]; int starttype; Rune startchar; char* starts; char* eol; Rune* rstarts; Rune* reol; }; extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); extern void _renewmatch(Resub*, int, Resublist*); extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); erator stack overflow"); *atorp++ = t; *subidp++ = cursubid; } static Node* popand(int op) { Reinst *inst; if(andp <= &andstack[0]){ regerr2("missing operand for ", op); inst = newinst(NOP); pushand(inst,inst); } return --andp; } static int popatosys/src/libregexp/regerror.c 664 0 0 322 7024574327 14074ustar00syssys#include #include #include "regexp.h" void regerror(char *s) { char buf[132]; strcpy(buf, "regerror: "); strcat(buf, s); strcat(buf, "\n"); write(2, buf, strlen(buf)); exits("regerr"); } sys/src/libregexp/regexec.c 664 0 0 11561 10742223570 13746ustar00syssys#include #include #include "regexp.h" #include "regcomp.h" /* * return 0 if no match * >0 if a match * <0 if we ran out of _relist space */ static int regexec1(Reprog *progp, /* program to run */ char *bol, /* string to run machine on */ Resub *mp, /* subexpression elements */ int ms, /* number of elements at mp */ Reljunk *j ) { int flag=0; Reinst *inst; Relist *tlp; char *s; int i, checkstart; Rune r, *rp, *ep; int n; Relist* tl; /* This list, next list */ Relist* nl; Relist* tle; /* ends of this and next list */ Relist* nle; int match; char *p; match = 0; checkstart = j->starttype; if(mp) for(i=0; irelist[0][0].inst = 0; j->relist[1][0].inst = 0; /* Execute machine once for each character, including terminal NUL */ s = j->starts; do{ /* fast check for first char */ if(checkstart) { switch(j->starttype) { case RUNE: p = utfrune(s, j->startchar); if(p == 0 || s == j->eol) return match; s = p; break; case BOL: if(s == bol) break; p = utfrune(s, '\n'); if(p == 0 || s == j->eol) return match; s = p+1; break; } } r = *(uchar*)s; if(r < Runeself) n = 1; else n = chartorune(&r, s); /* switch run lists */ tl = j->relist[flag]; tle = j->reliste[flag]; nl = j->relist[flag^=1]; nle = j->reliste[flag]; nl->inst = 0; /* Add first instruction to current list */ if(match == 0) _renewemptythread(tl, progp->startinst, ms, s); /* Execute machine until current list is empty */ for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ for(inst = tlp->inst; ; inst = inst->next){ switch(inst->type){ case RUNE: /* regular character */ if(inst->r == r){ if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; } break; case LBRA: tlp->se.m[inst->subid].sp = s; continue; case RBRA: tlp->se.m[inst->subid].ep = s; continue; case ANY: if(r != '\n') if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; break; case ANYNL: if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; break; case BOL: if(s == bol || *(s-1) == '\n') continue; break; case EOL: if(s == j->eol || r == 0 || r == '\n') continue; break; case CCLASS: ep = inst->cp->end; for(rp = inst->cp->spans; rp < ep; rp += 2) if(r >= rp[0] && r <= rp[1]){ if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; break; } break; case NCCLASS: ep = inst->cp->end; for(rp = inst->cp->spans; rp < ep; rp += 2) if(r >= rp[0] && r <= rp[1]) break; if(rp == ep) if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; break; case OR: /* evaluate right choice later */ if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) return -1; /* efficiency: advance and re-evaluate */ continue; case END: /* Match! */ match = 1; tlp->se.m[0].ep = s; if(mp != 0) _renewmatch(mp, ms, &tlp->se); break; } break; } } if(s == j->eol) break; checkstart = j->starttype && nl->inst==0; s += n; }while(r); return match; } static int regexec2(Reprog *progp, /* program to run */ char *bol, /* string to run machine on */ Resub *mp, /* subexpression elements */ int ms, /* number of elements at mp */ Reljunk *j ) { int rv; Relist *relist0, *relist1; /* mark space */ relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); if(relist0 == nil) return -1; relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); if(relist1 == nil){ free(relist1); return -1; } j->relist[0] = relist0; j->relist[1] = relist1; j->reliste[0] = relist0 + BIGLISTSIZE - 2; j->reliste[1] = relist1 + BIGLISTSIZE - 2; rv = regexec1(progp, bol, mp, ms, j); free(relist0); free(relist1); return rv; } extern int regexec(Reprog *progp, /* program to run */ char *bol, /* string to run machine on */ Resub *mp, /* subexpression elements */ int ms) /* number of elements at mp */ { Reljunk j; Relist relist0[LISTSIZE], relist1[LISTSIZE]; int rv; /* * use user-specified starting/ending location if specified */ j.starts = bol; j.eol = 0; if(mp && ms>0){ if(mp->sp) j.starts = mp->sp; if(mp->ep) j.eol = mp->ep; } j.starttype = 0; j.startchar = 0; if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { j.starttype = RUNE; j.startchar = progp->startinst->r; } if(progp->startinst->type == BOL) j.starttype = BOL; /* mark space */ j.relist[0] = relist0; j.relist[1] = relist1; j.reliste[0] = relist0 + nelem(relist0) - 2; j.reliste[1] = relist1 + nelem(relist1) - 2; rv = regexec1(progp, bol, mp, ms, &j); if(rv >= 0) return rv; rv = regexec2(progp, bol, mp, ms, &j); if(rv >= 0) return rv; return -1; } lastwasand = FALSE; cursubid = 0; /* Start with a low priority operator to prime parser */ pushator(START-1); while((token = lex(literalsys/src/libregexp/regsub.c 664 0 0 2124 11422403227 13561ustar00syssys#include #include #include "regexp.h" /* substitute into one string using the matches from the last regexec() */ extern void regsub(char *sp, /* source string */ char *dp, /* destination string */ int dlen, Resub *mp, /* subexpression elements */ int ms) /* number of elements pointed to by mp */ { char *ssp, *ep; int i; ep = dp+dlen-1; while(*sp != '\0'){ if(*sp == '\\'){ switch(*++sp){ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': i = *sp-'0'; if(mp!=0 && mp[i].sp != 0 && ms>i) for(ssp = mp[i].sp; ssp < mp[i].ep; ssp++) if(dp < ep) *dp++ = *ssp; break; case '\\': if(dp < ep) *dp++ = '\\'; break; case '\0': sp--; break; default: if(dp < ep) *dp++ = *sp; break; } }else if(*sp == '&'){ if(mp!=0 && mp[0].sp != 0 && ms>0) for(ssp = mp[0].sp; ssp < mp[0].ep; ssp++) if(dp < ep) *dp++ = *ssp; }else{ if(dp < ep) *dp++ = *sp; } sp++; } *dp = '\0'; } struct Resublist { Resub m[NSUBEXP]; }; /* * Actions and Tokens (Reinst types) * * 02xx are operators, value == precedence * 03xx are tokens, i.e. operands for operators */ #define RUNE 0177 #define OPERATOR 0200 /* Bitmask of all operators */ #define START 0200 /* Start, used for marker on stack */ #define RBRA 0201 /* Right bracket, ) */ #define LBRA 0202 /* Left bracket, ( */ #define OR 0203 /* Alternation, | sys/src/libregexp/rregexec.c 664 0 0 11104 10742223552 14121ustar00syssys#include #include #include "regexp.h" #include "regcomp.h" /* * return 0 if no match * >0 if a match * <0 if we ran out of _relist space */ static int rregexec1(Reprog *progp, /* program to run */ Rune *bol, /* string to run machine on */ Resub *mp, /* subexpression elements */ int ms, /* number of elements at mp */ Reljunk *j) { int flag=0; Reinst *inst; Relist *tlp; Rune *s; int i, checkstart; Rune r, *rp, *ep; Relist* tl; /* This list, next list */ Relist* nl; Relist* tle; /* ends of this and next list */ Relist* nle; int match; Rune *p; match = 0; checkstart = j->startchar; if(mp) for(i=0; irelist[0][0].inst = 0; j->relist[1][0].inst = 0; /* Execute machine once for each character, including terminal NUL */ s = j->rstarts; do{ /* fast check for first char */ if(checkstart) { switch(j->starttype) { case RUNE: p = runestrchr(s, j->startchar); if(p == 0 || s == j->reol) return match; s = p; break; case BOL: if(s == bol) break; p = runestrchr(s, '\n'); if(p == 0 || s == j->reol) return match; s = p+1; break; } } r = *s; /* switch run lists */ tl = j->relist[flag]; tle = j->reliste[flag]; nl = j->relist[flag^=1]; nle = j->reliste[flag]; nl->inst = 0; /* Add first instruction to current list */ _rrenewemptythread(tl, progp->startinst, ms, s); /* Execute machine until current list is empty */ for(tlp=tl; tlp->inst; tlp++){ for(inst=tlp->inst; ; inst = inst->next){ switch(inst->type){ case RUNE: /* regular character */ if(inst->r == r) if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; break; case LBRA: tlp->se.m[inst->subid].rsp = s; continue; case RBRA: tlp->se.m[inst->subid].rep = s; continue; case ANY: if(r != '\n') if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; break; case ANYNL: if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; break; case BOL: if(s == bol || *(s-1) == '\n') continue; break; case EOL: if(s == j->reol || r == 0 || r == '\n') continue; break; case CCLASS: ep = inst->cp->end; for(rp = inst->cp->spans; rp < ep; rp += 2) if(r >= rp[0] && r <= rp[1]){ if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; break; } break; case NCCLASS: ep = inst->cp->end; for(rp = inst->cp->spans; rp < ep; rp += 2) if(r >= rp[0] && r <= rp[1]) break; if(rp == ep) if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; break; case OR: /* evaluate right choice later */ if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) return -1; /* efficiency: advance and re-evaluate */ continue; case END: /* Match! */ match = 1; tlp->se.m[0].rep = s; if(mp != 0) _renewmatch(mp, ms, &tlp->se); break; } break; } } if(s == j->reol) break; checkstart = j->startchar && nl->inst==0; s++; }while(r); return match; } static int rregexec2(Reprog *progp, /* program to run */ Rune *bol, /* string to run machine on */ Resub *mp, /* subexpression elements */ int ms, /* number of elements at mp */ Reljunk *j ) { Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; /* mark space */ j->relist[0] = relist0; j->relist[1] = relist1; j->reliste[0] = relist0 + nelem(relist0) - 2; j->reliste[1] = relist1 + nelem(relist1) - 2; return rregexec1(progp, bol, mp, ms, j); } extern int rregexec(Reprog *progp, /* program to run */ Rune *bol, /* string to run machine on */ Resub *mp, /* subexpression elements */ int ms) /* number of elements at mp */ { Reljunk j; Relist relist0[LISTSIZE], relist1[LISTSIZE]; int rv; /* * use user-specified starting/ending location if specified */ j.rstarts = bol; j.reol = 0; if(mp && ms>0){ if(mp->sp) j.rstarts = mp->rsp; if(mp->ep) j.reol = mp->rep; } j.starttype = 0; j.startchar = 0; if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { j.starttype = RUNE; j.startchar = progp->startinst->r; } if(progp->startinst->type == BOL) j.starttype = BOL; /* mark space */ j.relist[0] = relist0; j.relist[1] = relist1; j.reliste[0] = relist0 + nelem(relist0) - 2; j.reliste[1] = relist1 + nelem(relist1) - 2; rv = rregexec1(progp, bol, mp, ms, &j); if(rv >= 0) return rv; rv = rregexec2(progp, bol, mp, ms, &j); if(rv >= 0) return rv; return -1; } ; break; case ANYNL: if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) return -1; break; case BOL: if(s == bol || *(s-1) == '\n') continue; break; case EOL: if(s == j->eol || r == 0 || r == '\n') continue; break; case CCLASS: ep = inst->cp->end; for(rp = inst->cp->spans; rp < ep; rp += 2) if(r >= rp[0] && r <= rp[1]){ if(_renewthread(nl, inst->nexsys/src/libregexp/rregsub.c 664 0 0 2204 7271617415 13737ustar00syssys#include #include #include "regexp.h" /* substitute into one string using the matches from the last regexec() */ extern void rregsub(Rune *sp, /* source string */ Rune *dp, /* destination string */ int dlen, Resub *mp, /* subexpression elements */ int ms) /* number of elements pointed to by mp */ { Rune *ssp, *ep; int i; ep = dp+(dlen/sizeof(Rune))-1; while(*sp != '\0'){ if(*sp == '\\'){ switch(*++sp){ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': i = *sp-'0'; if(mp[i].rsp != 0 && mp!=0 && ms>i) for(ssp = mp[i].rsp; ssp < mp[i].rep; ssp++) if(dp < ep) *dp++ = *ssp; break; case '\\': if(dp < ep) *dp++ = '\\'; break; case '\0': sp--; break; default: if(dp < ep) *dp++ = *sp; break; } }else if(*sp == '&'){ if(mp[0].rsp != 0 && mp!=0 && ms>0) if(mp[0].rsp != 0) for(ssp = mp[0].rsp; ssp < mp[0].rep; ssp++) if(dp < ep) *dp++ = *ssp; }else{ if(dp < ep) *dp++ = *sp; } sp++; } *dp = '\0'; } relist1[LISTSIZE]; int rv; /* * use user-specified starting/ending location if specified */ j.starts = bol; j.eol = 0; if(mp && ms>0){ if(mp->sp) j.starts = mp->sp; if(mp->ep) j.eol = mp->ep; } j.starttype = 0; j.startchar = 0; if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { j.starttype = RUNE; j.startchar = progp->startinst-