#include "i.h" typedef struct HTTP_Header HTTP_Header; typedef struct Nameval Nameval; // Default IP port numbers enum { HTTPD = 80, HTTPSD = 443 }; // tstate bits enum { THTTP_1_0 = 1, TPersist = 2, TProxy = 4, TSSL = 8 }; // Header fields (in order: general, request, response, entity) enum { HCacheControl, HConnection, HDate, HPragma, HTransferEncoding, HUpgrade, HVia, HKeepAlive, // extension HAccept, HAcceptCharset, HAcceptEncoding, HAcceptLanguage, HAuthorization, HExpect, HFrom, HHost, HIfModifiedSince, HIfMatch, HIfNoneMatch, HIfRange, HIfUnmodifiedSince, HMaxForwards, HProxyAuthorization, HRange, HReferer, HUserAgent, HCookie, // extension HAcceptRanges, HAge, HLocation, HProxyAuthenticate, HPublic, HRetryAfter, HServer, HSetProxy, HVary, HWarning, HWWWAuthenticate, HContentDisposition, HSetCookie, HRefresh, // extensions HWindowTarget, HPICSLabel, // more extensions HAllow, HContentBase, HContentEncoding, HContentLanguage, HContentLength, HContentLocation, HContentMD5, HContentRange, HContentType, HETag, HExpires, HLastModified, HXReqTime, HXRespTime, HXUrl, // our extensions, for cached entities NumHfields }; struct HTTP_Header { Rune* startline; // following four fields only filled in if this is a response header int protomajor; int protominor; int code; Rune* reason; Rune** vals; }; struct Nameval { Rune* key; Rune* val; Nameval* next; // in list of namevals }; // (track above enumeration) Rune* hdrnames[]= { L"Cache-Control", L"Connection", L"Date", L"Pragma", L"Transfer-Encoding", L"Upgrade", L"Via", L"Keep-Alive", L"Accept", L"Accept-Charset", L"Accept-Encoding", L"Accept-Language", L"Authorization", L"Expect", L"From", L"Host", L"If-Modified-Since", L"If-Match", L"If-None-Match", L"If-Range", L"If-Unmodified-Since", L"Max-Forwards", L"Proxy-Authorization", L"Range", L"Refererer", L"User-Agent", L"Cookie", L"Accept-Ranges", L"Age", L"Location", L"Proxy-Authenticate", L"Public", L"Retry-After", L"Server", L"Set-Proxy", L"Vary", L"Warning", L"WWW-Authenticate", L"Content-Disposition", L"Set-Cookie", L"Refresh", L"Window-Target", L"PICS-Label", L"Allow", L"Content-Base", L"Content-Encoding", L"Content-Language", L"Content-Length", L"Content-Location", L"Content-MD5", L"Content-Range", L"Content-Type", L"ETag", L"Expires", L"Last-Modified", L"X-Req-Time", L"X-Resp-Time", L"X-Url" }; // For fast lookup; track above, and keep sorted and lowercase StringInt hdrtable[]= { {L"accept", HAccept}, {L"accept-charset", HAcceptCharset}, {L"accept-encoding", HAcceptEncoding}, {L"accept-language", HAcceptLanguage}, {L"accept-ranges", HAcceptRanges}, {L"age", HAge}, {L"allow", HAllow}, {L"authorization", HAuthorization}, {L"cache-control", HCacheControl}, {L"connection", HConnection}, {L"content-base", HContentBase}, {L"content-disposition", HContentDisposition}, {L"content-encoding", HContentEncoding}, {L"content-language", HContentLanguage}, {L"content-length", HContentLength}, {L"content-location", HContentLocation}, {L"content-md5", HContentMD5}, {L"content-range", HContentRange}, {L"content-type", HContentType}, {L"cookie", HCookie}, {L"date", HDate}, {L"etag", HETag}, {L"expect", HExpect}, {L"expires", HExpires}, {L"from", HFrom}, {L"host", HHost}, {L"if-modified-since", HIfModifiedSince}, {L"if-match", HIfMatch}, {L"if-none-match", HIfNoneMatch}, {L"if-range", HIfRange}, {L"if-unmodified-since", HIfUnmodifiedSince}, {L"keep-alive", HKeepAlive}, {L"last-modified", HLastModified}, {L"location", HLocation}, {L"max-forwards", HMaxForwards}, {L"pics-label", HPICSLabel}, {L"pragma", HPragma}, {L"proxy-authenticate", HProxyAuthenticate}, {L"proxy-authorization", HProxyAuthorization}, {L"public", HPublic}, {L"range", HRange}, {L"referer", HReferer}, {L"refresh", HRefresh}, {L"retry-after", HRetryAfter}, {L"server", HServer}, {L"set-cookie", HSetCookie}, {L"set-proxy", HSetProxy}, {L"transfer-encoding", HTransferEncoding}, {L"upgrade", HUpgrade}, {L"user-agent", HUserAgent}, {L"vary", HVary}, {L"via", HVia}, {L"warning", HWarning}, {L"window-target", HWindowTarget}, {L"www-authenticate", HWWWAuthenticate}, {L"x-req-time", HXReqTime}, {L"x-resp-time", HXRespTime}, {L"x-url", HXUrl} }; #define NHDRTABLE (sizeof(hdrtable)/sizeof(StringInt)) StringInt* mediatable; Rune* agent; int dbghttp; static HTTP_Header* newhttpheader(void); static int readhttpheader(HTTP_Header* h, int fd, uchar* buf, int buflen, int* preststart, int* prestend); static int writehttpheader(HTTP_Header* h, int fd); static void addhttpheaderval(HTTP_Header* h, int key, Rune* val); static Nameval* namevals(Rune* s, int ns, int sep); static int namevalfind(Nameval* l, Rune* key, Rune** pans); static Header* hdrconv(HTTP_Header* hh, ParsedUrl* u, uchar* initcontent, int initlen); static void setmtype(Header* hdr, Rune* s); static Rune* trim(Rune* s); static int copyaslatin1(uchar* a, Rune* s, int ns, int i, int addcrlf); static Rune* gettok(Rune* s, int* pi, int n); static Rune* getqstring(Rune* s, int* pi, int n); static void closeconn(Netconn* nc); void httpinit(void) { mediatable = makestrinttab(mnames, NMEDIATYPES); agent = config.agentname; dbghttp = config.dbg['n']; } void httpconnect(Netconn* nc, ByteSource* bs) { int port; Rune* dialhost; int err; char dir[SMALLBUFSIZE]; char addr[BIGBUFSIZE]; if(nc->scheme == HTTPS) nc->tstate |= TSSL; if(config.httpminor == 0) nc->tstate |= THTTP_1_0; port = nc->port; if(config.httpproxy != nil) { nc->tstate |= TProxy; dialhost = Strndup(config.httpproxy->host, config.httpproxy->nhost); if(config.httpproxy->nport != 0) port = Strtol(config.httpproxy->port, nil, 10); } else dialhost = nc->host; snprint(addr, sizeof(addr), "tcp!%S!%d", dialhost, port); err = 0; if(dbghttp) trace("http %d, bs %d: dialing %s\n", nc->id, bs->id, addr); nc->dfd = dial(addr, nil, dir, &nc->cfd); if(nc->dfd < 0) err = ERRconnecterr; else { if(dbghttp) trace("http %d, bs %d: connected\n", nc->id, bs->id); if(nc->tstate&TSSL) err = ERRunsupscheme; } if(!err) { nc->connected = 1; nc->state = NCgethdr; } else { if(dbghttp) trace("http %d, bs %d: connection failed: %S\n", nc->id, bs->id, errphrase(err)); bs->err = err; closeconn(nc); } } void httpwritereq(Netconn* nc, ByteSource* bs) { ReqInfo* req; ParsedUrl* u; Rune* requ; Rune* httpvers; HTTP_Header* reqhdr; int err; Rune* p; int mlen, rulen; int rv; int n; // Prepare the request req = bs->req; u = req->url; if(nc->tstate&TProxy) requ = u->url; else { n = u->npstart + u->npath + (u->nquery? u->nquery+1 : 0); requ = newstr(n); p = Stradd(requ, u->pstart, u->npstart); p = Stradd(p, u->path, u->npath); if(u->nquery != 0) { *p++ = '?'; p = Stradd(p, u->query, u->nquery); } *p = 0; } if(nc->tstate&THTTP_1_0) httpvers = L"1.0"; else httpvers = L"1.1"; reqhdr = newhttpheader(); mlen = Strlen(hmeth[req->method]); rulen = Strlen(requ); reqhdr->startline = p = newstr(mlen + 1 + rulen + 6 + 3); // see Stradds below p = Stradd(p, hmeth[req->method], mlen); *p++ = ' '; p = Stradd(p, requ, rulen); p = Stradd(p, L" HTTP/", 6); p = Stradd(p, httpvers, 3); *p = 0; addhttpheaderval(reqhdr, HHost, Strndup(u->host,u->nhost)); addhttpheaderval(reqhdr, HUserAgent, agent); if(req->auth != nil) addhttpheaderval(reqhdr, HAuthorization, Strdup2(L"Basic ", req->auth)); if(req->method == HPost) { addhttpheaderval(reqhdr, HContentLength, ltoStr(req->bodylen)); addhttpheaderval(reqhdr, HContentType, L"application/x-www-form-urlencoded"); } // Issue the request err = 0; if(dbghttp > 1) { trace("http %d, bs %d: writing request:\n", nc->id, bs->id); writehttpheader(reqhdr, 1); } rv = writehttpheader(reqhdr, nc->dfd); if(rv >= 0 && req->method == HPost) { if(dbghttp > 1) trace("http %d, bs %d: writing body:\n%S\n", nc->id, bs->id, toStr(req->body, req->bodylen, UTF_8)); rv = write(nc->dfd, req->body, req->bodylen); } if(rv < 0) err = ERRwriteerr; if(err) { if(dbghttp) trace("http %d, bs %d: error: %S", nc->id, bs->id, errphrase(err)); bs->err = err; closeconn(nc); } } #define HREADLEN 8000 void httpgethdr(Netconn* nc, ByteSource* bs) { HTTP_Header* resph; uchar* hbuf; int err; int i; int j; resph = newhttpheader(); hbuf = (uchar*)emalloc(HREADLEN); err = readhttpheader(resph, nc->dfd, hbuf, HREADLEN, &i, &j); if(err) { if(!(nc->tstate&THTTP_1_0)) { if(dbghttp) trace("http %d, bs %d: switching to HTTP/1.0\n", nc->id, bs->id); nc->tstate |= THTTP_1_0; } } else { if(dbghttp) { trace("http %d, bs %d: got response header:\n", nc->id, bs->id); writehttpheader(resph, 1); trace("http %d: %d bytes remaining from read\n", nc->id, j - i); } if(resph->protomajor == 1) { if(!(nc->tstate&THTTP_1_0) && resph->protominor == 0) { nc->tstate |= THTTP_1_0; if(dbghttp) trace("http %d: switching to HTTP/1.0\n", nc->id); } } else if(warn) trace("warning: unimplemented major protocol %d.%d\n", resph->protomajor, resph->protominor); if(j > i) { nc->tbuf = hbuf; nc->tbuflen = HREADLEN; nc->tn1 = i; nc->tn2 = j; } else { nc->tbuf = nil; nc->tbuflen = 0; nc->tn1 = 0; nc->tn2 = 0; } bs->hdr = hdrconv(resph, bs->req->url, nc->tbuf+i, j-i); if(bs->hdr->length == 0 && (nc->tstate&THTTP_1_0)) closeconn(nc); } if(err) { if(dbghttp) trace("http %d, bs %d: error %S\n", nc->id, bs->id, errphrase(err)); bs->err = err; closeconn(nc); } } void httpgetdata(Netconn* nc, ByteSource* bs) { uchar* buf; int n; int buflen; buf = bs->data; buflen = bs->dalloclen; n = 0; if(nc->tbuf != nil) { // initial data from overread of header n = nc->tn2 - nc->tn1; if(buflen < n) { if(warn) trace("more initial data than specified length\n"); bs->data = buf = (uchar*)erealloc(buf, n); bs->dalloclen = buflen = n; } memmove(buf, nc->tbuf+nc->tn1, n); nc->tbuf = nil; nc->tbuflen = 0; nc->tn1 = 0; nc->tn2 = 0; } if(n == 0) n = read(nc->dfd, buf+bs->edata , buflen - bs->edata); if(dbghttp > 1) trace("http %d, bs %d: read %d bytes\n", nc->id, bs->id, n); if(n <= 0) { if(n < 0) bs->err = ERRreaderr; else closeconn(nc); } else { bs->edata += n; if(bs->edata == buflen && bs->hdr->length != 100000000) { if(nc->tstate&THTTP_1_0) closeconn(nc); } } if(bs->err) { if(dbghttp) trace("http %d, bs %d: error %S\n", nc->id, bs->id, errphrase(bs->err)); closeconn(nc); } } int httpdefaultport(int scheme) { if(scheme == HTTPS) return HTTPSD; else return HTTPD; } static HTTP_Header* newhttpheader(void) { HTTP_Header* h; h = (HTTP_Header*)emalloc(sizeof(HTTP_Header)); h->startline = nil; h->protomajor = 0; h->protominor = 0; h->code = 0; h->reason = nil; h->vals = emallocz(NumHfields * sizeof(Rune*)); return h; } // Read into supplied buf. // Returns 0 if read was ok, else an error code. // Sets *preststart to start of non-header bytes // Sets *prestend to end of non-header bytes // If bytes > 127 appear, assume Latin-1 // // Header values added will always be trimmed (see trim() above). static int readhttpheader(HTTP_Header* h, int fd, uchar* buf, int buflen, int* preststart, int* prestend) { int i; int j; uchar* aline; int alinelen; int eof; Rune* vers; int verslen; Rune* srest; int srestlen; Rune* scode; int scodelen; Rune* reason; int reasonlen; int ok; int prevkey; Rune* smaj; int smajlen; Rune* vrest; int vrestlen; int key; Rune* p; int k; Rune* q; Rune* nam; int namlen; Rune* val; int vallen; Rune* line; int linelen; i = 0; j = 0; *preststart = 0; *prestend = 0; aline = nil; eof = getline(fd, buf, buflen, &i, &j, &aline, &alinelen); if(eof) return ERReof; h->startline = toStr(aline, alinelen, ISO_8859_1); linelen = alinelen; if(dbghttp > 1) trace("header read, startline=%S\n", h->startline); splitl(h->startline, linelen, L" ", &vers, &verslen, &srest, &srestlen); if(srestlen > 0) { srest++; srestlen--; } splitl(srest, srestlen, L" ", &scode, &scodelen, &reason, &reasonlen); ok = 1; if(verslen >= 8 && !Strncmpci(vers, 5, L"http/")) { splitl(vers+5, verslen-5, L".", &smaj, &smajlen, &vrest, &vrestlen); if(smajlen == 0 || vrestlen <= 1) ok = 0; else { h->protomajor = Strtol(smaj, nil, 10); if(h->protomajor < 1) ok = 0; else h->protominor = Strtol(vrest+1, nil, 10); } if(scodelen != 3) ok = 0; else { h->code = Strtol(scode, nil, 10); if(h->code < 100) ok = 0; } if(reasonlen > 1) h->reason = Strndup(reason+1, reasonlen-1); else h->reason = nil; } else ok = 0; if(!ok) return ERRhttperr; prevkey = -1; while(alinelen > 0) { eof = getline(fd, buf, buflen, &i, &j, &aline, &alinelen); if(eof) return ERRhttperr; if(alinelen == 0) break; line = toStr(aline, alinelen, ISO_8859_1); linelen = alinelen; if(dbghttp > 1) trace("%S\n", line); if(isspace(line[0])) { if(prevkey < 0) { if(warn) trace("warning: header continuation line at beginning: %S\n", line); } else { trimwhite(line, linelen, &val, &vallen); if(vallen > 0) { k = Strlen(h->vals[prevkey]); p = newstr(k + 1 + vallen); q = Stradd(p, h->vals[prevkey], k); *q++ = ' '; q = Stradd(q, val, vallen); *q = 0; h->vals[prevkey] = p; } } } else { splitl(line, linelen, L":", &nam, &namlen, &val, &vallen); if(vallen == 0) { if(warn) trace("warning: header line has no colon: %S\n", line); } else { if(!lookup(hdrtable, NHDRTABLE, nam, namlen, &key)) { if(warn) trace("warning: unknown header field: %S\n", line); } else { trimwhite(val+1, vallen-1, &val, &vallen); if(vallen > 0) addhttpheaderval(h, key, Strndup(val, vallen)); prevkey = key; } } } } *preststart = i; *prestend = j; return 0; } // Write in big hunks. Convert to Latin1. // Return last write() return value. static int writehttpheader(HTTP_Header* h, int fd) { int i; int buflen; int need; int key; int n; int k; Rune* val; uchar* buf; uchar xbuf[BIGBUFSIZE]; i = 0; buf = xbuf; buflen = sizeof(xbuf); n = Strlen(h->startline); need = n + 2 + 2; if(need > buflen) { buf = (uchar*)emalloc(need); buflen = need; } i = copyaslatin1(buf, h->startline, n, i, 1); for(key = 0; key < NumHfields; key++) { val = h->vals[key]; if(val != nil) { k = Strlen(val); n = Strlen(hdrnames[key]); need = k + n + 4 + 2; if(i + need > buflen) { do buflen += BIGBUFSIZE; while(i+need > buflen); if(buf == xbuf) { buf = (uchar*)emalloc(buflen); memmove(buf, xbuf, i); } else buf = (uchar*)erealloc(buf, buflen); } i = copyaslatin1(buf, hdrnames[key], n, i, 0); buf[i++] = ':'; buf[i++] = ' '; i = copyaslatin1(buf, val, k, i, 1); } } buf[i++] = '\r'; buf[i++] = '\n'; n = 0; k = 0; while(k < i) { n = write(fd, buf+k, i - k); if(n <= 0) break; k += n; } return n; } // Add val for given key. static void addhttpheaderval(HTTP_Header* h, int key, Rune* val) { Rune* oldv; oldv = h->vals[key]; if(oldv != nil) { // check that hdr type allows list of things switch(key) { case HAccept: case HAcceptCharset: case HAcceptEncoding: case HAcceptLanguage: case HAcceptRanges: case HCacheControl: case HConnection: case HContentEncoding: case HContentLanguage: case HIfMatch: case HIfNoneMatch: case HPragma: case HPublic: case HUpgrade: case HVia: case HWarning: case HWWWAuthenticate: case HExpect: case HSetCookie: val = Strdup3(oldv, L", ", val); break; default: if(warn) trace("warning: multiple %S headers not allowed\n", hdrnames[key]); break; } } h->vals[key] = val; } // Split s[0:n] (guaranteed trimmed) into sep-separated list of one of // token // token = token // token = "quoted string" // and make a list of Namevals from these. static Nameval* namevals(Rune* s, int n, int sep) { Nameval* ans; Nameval* nv; int i; Rune* tok; Rune* val; ans = nil; i = 0; while(i < n) { tok = gettok(s, &i, n); if(tok == nil) break; val = nil; while(i < n && isspace(s[i])) i++; if(i == n || s[i] == sep) i++; else if(s[i] == '=') { while(i < n && isspace(s[i])) i++; if(s[i] == '"') val = getqstring(s, &i, n); else val = gettok(s, &i, n); } else break; nv = (Nameval*)emalloc(sizeof(Nameval)); nv->key = tok; nv->val = val; nv->next = ans; ans = nv; } if(warn && i < n) trace("warning: failed to parse namevals: '%S'\n", s); return ans; } // Look for something matching key (should be lowercase) // in list l, and if found, return 1 and set *pans to corresponding val. // Otherwise return 0. static int namevalfind(Nameval* l, Rune* key, Rune** pans) { for(; l != nil; l = l->next) if(!Strncmpci(l->key, Strlen(l->key), key)) { *pans = l->val; return 1; } *pans = nil; return 0; } static Header* hdrconv(HTTP_Header* hh, ParsedUrl* u, uchar* initcontent, int initlen) { Header* hdr; Rune* s; hdr = newheader(); hdr->code = hh->code; hdr->actual = u; s = hh->vals[HContentBase]; if(s != nil) hdr->base = makeurl(s, 0); else hdr->base = hdr->actual; s = hh->vals[HLocation]; if(s != nil) hdr->location = makeurl(s, 0); s = hh->vals[HContentLength]; if(s != nil) hdr->length = Strtol(s, nil, 10); else hdr->length = -1; s = hh->vals[HContentType]; if(s != nil) setmtype(hdr, s); if(hdr->mtype == UnknownType) setmediatype(hdr, u->path, initcontent, initlen); hdr->msg = hh->reason; hh->reason = nil; hdr->refresh = hh->vals[HRefresh]; hh->vals[HRefresh] = nil; hdr->chal = hh->vals[HWWWAuthenticate]; hh->vals[HWWWAuthenticate] = nil; s = hh->vals[HContentEncoding]; if(s != nil) { if(warn) trace("warning: unhandled content encoding: %S\n", s); hdr->mtype = UnknownType; } hdr->warn = hh->vals[HWarning]; hh->vals[HWarning] = nil; return hdr; } // Set hdr's media type and chset (if a text type). // If can't set media type, leave it alone (caller will guess). static void setmtype(Header* hdr, Rune* s) { int n; Rune* ty; int tylen; Rune* parms; int parmslen; int val; int cty; Nameval* nvs; n = Strlen(s); splitl(s, n, L";", &ty, &tylen, &parms, &parmslen); trimwhite(ty, tylen, &ty, &tylen); if(ty == nil) return; if(lookup(mediatable, NMEDIATYPES, ty, tylen, &val)) { hdr->mtype = val; hdr->chset = ISO_8859_1; if(parmslen > 0 && val >= TextPlain && val <= TextSgml) { nvs = namevals(parms+1, parmslen-1, ';'); if(namevalfind(nvs, L"chset", &s)) { cty = Strlookup(chsetnames, NCHARSETS, s, Strlen(s)); if(cty >= 0) hdr->chset = cty; else if(warn) trace("warning: unknown character set in %S\n", s); } } } else { if(warn) trace("warning: unknown media type in %S\n", s); } } // Copy s[0:ns] into a[i:], converting to Latin1. // Add cr/lf if addcrlf is true. // Assume caller has checked that a has enough room. static int copyaslatin1(uchar* a, Rune* s, int ns, int i, int addcrlf) { int k; int c; for(k = 0; k < ns; k++) { c = s[k]; if(c < 256) a[i++] = c; else { if(warn) trace("warning: non-latin1 char in header ignored: '%C'\n", c); } } if(addcrlf) { a[i++] ='\r'; a[i++] = '\n'; } return i; } // Look for token starting at s[*pi] and not going further // than s[n-1]. // Return emalloced string containing the token and update // i to be next place to look. static Rune* gettok(Rune* s, int* pi, int n) { int i; int is; int c; i = *pi; while(i < n && isspace(s[i])) i++; if(i == n) { *pi = i; return nil; } for(is = i; i < n; i++) { c = s[i]; if(isspace(c) || iscntrl(c) || inclass(c, L"()<>@,;:\\\"/[]?={}")) break; } *pi = i; return Strndup(s+is, i-is); } // Like gettok, but look for quoted string, and return value without quotes. static Rune* getqstring(Rune* s, int* pi, int n) { int i; int is; int c; i = *pi; while(i < n && isspace(s[i])) i++; if(i == n || s[i] != '"') { *pi = i; return nil; } for(is = ++i; i < n; i++) { c = s[i]; if(c == '\\') i++; else if(c == '"') { *pi = i+1; return Strndup(s+is, i-is); } } if(warn) trace("warning: quoted string not closed: %S\n", s+*pi); *pi = i; return Strndup(s+is, i-is); } // Close nc's network connection file descriptors // and mark nc as unconnected. static void closeconn(Netconn* nc) { if(nc->dfd >= 0) { close(nc->dfd); close(nc->cfd); nc->dfd = -1; nc->cfd = -1; } nc->connected = 0; }