Branch: Tag:

1999-07-15

1999-07-15 16:59:28 by David Hedbor <david@hedbor.org>

Performance tweaking: Added supports caching, rewrote request parsing, fixed HTTP/0.9 support and more

Rev: server/base_server/configuration.pike:1.204
Rev: server/base_server/roxen.pike:1.306
Rev: server/base_server/roxenlib.pike:1.108
Rev: server/etc/include/config.h:1.15
Rev: server/modules/filesystems/filesystem.pike:1.56
Rev: server/modules/misc/contenttypes.pike:1.13
Rev: server/protocols/http.pike:1.142

6: Inside #if defined(MAGIC_ERROR)
  #ifdef MAGIC_ERROR   inherit "highlight_pike";   #endif - constant cvs_version = "$Id: http.pike,v 1.141 1999/07/05 17:58:00 grubba Exp $"; + constant cvs_version = "$Id: http.pike,v 1.142 1999/07/15 16:59:28 neotron Exp $";   // HTTP protocol module.   #include <config.h>   private inherit "roxenlib";
24: Inside #if defined(PROFILE)
  #ifdef PROFILE   int req_time = HRTIME();   #endif -  +    #ifdef REQUEST_DEBUG - #define DPERROR(X) roxen_perror((X)+"\n") + int footime, bartime; + #define DPERROR(X) bartime = gethrtime()-footime; werror((X)+" (%d)\n", bartime);footime=gethrtime()   #else   #define DPERROR(X)   #endif
280:    "Content-Type: text/html\r\n"    "Content-Length: 0\r\n\r\n");    } -  return -2; +  return 2;   }      private static mixed f, line;   private static int hstart;    - private int parse_got(string s) + private int parse_got(string raw)   {    multiset (string) sup;    array mod_config; -  string a, b, linename, contents; +  string a, b, s, linename, contents;    int config_in_url;       DPERROR(sprintf("HTTP: parse_got(%O)", s)); -  -  raw = s; -  +     if (!line) { -  // Used to search for \r\n, but Netscape 4.5 sends just a \n -  // when doing a proxy-request. +  // We check for \n only if \r\n fails, since Netscape 4.5 sends " +  // just a \n when doing a proxy-request.    // example line:    // "CONNECT mikabran:443 HTTP/1.0\n"    // "User-Agent: Mozilla/4.5 [en] (X11; U; Linux 2.0.35 i586)" -  hstart = search(s, "\n"); +  // Die Netscape, die! *grumble* +  // Luckily the solution below shouldn't ever cause any slowdowns    -  if ((< -1, 0 >)[hstart]) { -  // Not enough data, or malformed request. -  DPERROR(sprintf("HTTP: parse_got(%O): " -  "Not enough data, or malformed request.", +  if (sscanf(raw, "%s\r\n%s", line, s) != 2 && +  sscanf(raw, "%s\n%s", line, s) != 2) { +  // Not enough data. Unless the client writes one byte at a time, +  // this should never happen, really. +  +  DPERROR(sprintf("HTTP: parse_got(%O): Not enough data.",    s)); -  return ([ -1:0, 0:2 ])[hstart]; +  return 0;    } -  +  if(strlen(line) < 4) +  { +  // Incorrect request actually - min possible (HTTP/0.9) is "GET /" +  // but need to support PING of course!    -  if (s[hstart-1] == '\r') { -  line = s[..hstart-2]; -  } else { -  // Kludge for Netscape 4.5 sending bad requests. -  line = s[..hstart-1]; +  DPERROR(sprintf("HTTP: parse_got(%O): Malformed request.", +  s)); +  return 1;    }    -  // Parse the command -  int start = search(line, " "); -  if (start != -1) { -  method = upper_case(line[..start-1]); -  -  string l = reverse(line[start+1..]); -  -  int end; -  -  if (!(end = search(l, " "))) { -  // Seems the line has extra spaces at the end. -  // Get rid of them. -  sscanf(l, "%*[ ]%s", l); -  line = line[..sizeof(l)+start]; -  end = search(l, " "); +  // David H new request parse. +  // Less forgiving, more to the spec and hopefully somewhat (although +  // it's marginal) faster. Still forgives incorrect protocols however, +  // since that is how Apache handles "GET /incorrect uri HTTP/1.0". +  string p1,p2,p3; +  switch(sscanf(line+" ", "%s %s %s", p1, p2, p3)) +  { +  case 1: +  // PING... +  if(p1 == "PING") { +  my_fd->write("PONG\r\n"); +  return 2;    } -  if (end != -1) { -  f = line[start+1..sizeof(line)-(end+2)]; -  prot = clientprot = line[sizeof(line)-end..]; +  // only PING is valid here. +  return 1;    -  if (!(< "HTTP/0.9", "HTTP/1.0", "HTTP/1.1" >)[upper_case(prot)]) { -  if (upper_case(prot)[..3] == "HTTP") { -  // Latest implemented version of HTTP implemented by this -  // module is HTTP/1.1. -  prot = "HTTP/1.1"; -  } else { -  // Unknown protocol -  werror(sprintf("HTTP: Bad request: %O\n", line)); -  my_fd->write(sprintf("400 Unknown Protocol HTTP/1.1\r\n\r\n" -  "Protocol %O is not HTTP.\r\n", prot)); -  return -2; +  case 2: +  case 3: +  if(!p3 || !strlen(p3)) { +  // HTTP/0.9 +  clientprot = prot = "HTTP/0.9"; +  f = p2; +  method = "GET"; // 0.9 only supports get. +  s = ""; // no headers... +  break;    } -  } -  -  // Check that the request is complete +  // >= HTTP/1.0 +  clientprot = prot = p3; +  f = p2; +  method = upper_case(p1); +  if(!(< "HTTP/1.0", "HTTP/1.1" >)[prot]) +  // Where nice here and assumes HTTP even if the protocol +  // is something very weird (like if a browser sends file names +  // with unencoded spaces. +  prot = "HTTP/1.1";    int end; -  if ((end = search(s, "\r\n\r\n")) == -1) { +  if (!sscanf(s, "%s\r\n\r\n%s", s, data)) {    // No, we need more data.    DPERROR("HTTP: parse_got(): Request is not complete.");    return 0;    } -  data = s[end+4..]; -  s = s[hstart+1..end-1]; -  } else { -  f = line[start+1..]; -  prot = clientprot = "HTTP/0.9"; -  data = s[sizeof(line)+2..]; -  s = ""; // No headers. +  break; +  default: +  // Too many or too few entries -> Hum. +  return 1;    }    } else { -  method = upper_case(line); -  f = "/"; -  prot = clientprot = "HTTP/0.9"; -  } -  } else { +     // HTTP/1.0 or later    // Check that the request is complete    int end; -  if ((end = search(s, "\r\n\r\n")) == -1) { -  // No, we still need more data. +  if (!sscanf(raw, "%s\r\n\r\n%s", s, data)) { +  // No, we need more data.    DPERROR("HTTP: parse_got(): Request is not complete.");    return 0;    } -  data = s[end+4..]; -  s = s[hstart+1..end-1]; +     }    -  -  if(method == "PING") -  { -  my_fd->write("PONG\r\n"); -  return -2; -  } -  -  if(!(<"CONNECT", "GET", "HEAD", "POST", "PUT", "MOVE", "DELETE">)[method] ) { -  send_result(http_low_answer(501, "<title>Method Not Implemented</title>" -  "\n<h1>Method not implemented.</h1>\n")); -  return -2; -  } -  +     raw_url = f;    time = _time(1); -  +  if(!data) data = "";    DPERROR(sprintf("RAW_URL:%O", raw_url));       if(!remoteaddr)    { -  if(my_fd) catch(remoteaddr = ((my_fd->query_address()||"")/" ")[0]); +  if(my_fd) { +  remoteaddr = my_fd->query_address(); +  if(remoteaddr) +  sscanf(remoteaddr, "%s %*s", remoteaddr); +  }    if(!remoteaddr) {    DPERROR("HTTP: parse_request(): No remote address.");    end(); -  return -2; +  return 2;    }    }    -  +  DPERROR(sprintf("After Remote Addr:%O", f)); +     f = scan_for_query( f );       DPERROR(sprintf("After query scan:%O", f));       f = http_decode_string( f ); -  -  if (sscanf(f, "/<%s>/%s", a, f)==2) +  string prf = f[1..1]; +  if (prf == "<" && sscanf(f, "/<%s>/%s", a, f)==2)    {    config_in_url = 1;    mod_config = (a/",");
430:       DPERROR(sprintf("After cookie scan:%O", f));    -  if ((sscanf(f, "/(%s)/%s", a, f)==2) && strlen(a)) +  if (prf == "(" && (sscanf(f, "/(%s)/%s", a, f)==2) && strlen(a))    {    prestate = aggregate_multiset(@(a/","-({""})));    f = "/"+f;
448:   // sscanf(s, "%s\r\n\r\n%s", s, data);   // s = replace(s, "\n\t", ", ") - "\r";   // Handle rfc822 continuation lines and strip \r -  foreach(s/"\r\n" - ({ "" }), line) +  foreach(s/"\r\n" - ({""}), line)    { -  linename=contents=0; -  sscanf(line, "%s:%s", linename, contents); -  if(linename && contents) +  // DPERROR(sprintf("Header :%s", line)); +  // linename=contents=0; +  +  if(sscanf(line, "%s:%*[ \t]%s", linename, contents) == 3)    { -  +  DPERROR(sprintf("Header-sscanf :%s", linename));    linename=lower_case(linename); -  sscanf(contents, "%*[\t ]%s", contents); +  DPERROR(sprintf("lower-case :%s", linename));       request_headers[linename] = contents; -  +     if(strlen(contents))    {    switch (linename) {    case "content-length": -  misc->len = (int)(contents-" "); +  misc->len = (int)contents;    if(!misc->len) continue;    if(method == "POST")    {
585:    break;       case "connection": -  contents = lower_case(contents); -  +     case "content-type":    misc[linename] = lower_case(contents);    break;
670:    }    }    } -  if(!client) client = ({ "unknown" }); +  +  DPERROR("HTTP: parse_got(): after header scan"); + #ifndef DISABLE_SUPPORTS +  if(!client) { +  client = ({ "unknown" }); +  supports = find_supports("", supports); // This makes it somewhat faster. +  } else    supports = find_supports(lower_case(client*" "), supports); -  + #else +  supports = (< "images", "gifinline", "forms", "mailto">); + #endif +  DPERROR("HTTP: parse_got(): supports");    if(!referer) referer = ({ });    if(misc->proxyauth) {    // The Proxy-authorization header should be removed... So there.
683:    }    raw = tmp2 * "\n";    } -  +     if(config_in_url) {    DPERROR("HTTP: parse_got(): config_in_url");    return really_set_config( mod_config );
703:    if (QUERY(set_cookie_only_once))    cache_set("hosts_for_cookie",remoteaddr,1);    } -  return 1; // Done. +  return 3; // Done.   }      void disconnect()
740: Inside #if defined(KEEP_ALIVE)
  #ifdef KEEP_ALIVE    if(keepit &&    (!(file->raw || file->len <= 0)) -  && (misc->connection || (prot == "HTTP/1.1")) +  && (misc->connection == "keep-alive" || +  (prot == "HTTP/1.1" && misc->connection != "close"))    && my_fd)    {    // Now.. Transfer control to a new http-object. Reset all variables etc..
1281:    else if(!file->type) file->type="text/plain";    }    -  if(!file->raw && prot != "HTTP/0.9") +  if(!file->raw)    { -  string h; -  heads= -  (["MIME-Version":(file["mime-version"] || "1.0"), -  "Content-type":file["type"], -  "Accept-Ranges": "bytes", -  "Server":replace(version(), " ", "·"), -  "Date":http_date(time) ]); -  -  if(file->encoding) -  heads["Content-Encoding"] = file->encoding; -  -  if(!file->error) -  file->error=200; -  -  if(file->expires) -  heads->Expires = http_date(file->expires); -  +  heads = ([]);    if(!file->len)    {    if(objectp(file->file))
1311:    if(file->file && !file->len)    file->len = fstat[1];    -  +  if(prot != "HTTP/0.9") {    heads["Last-Modified"] = http_date(fstat[3]);       if(since)
1325:    }    }    } +  }    if(stringp(file->data))    file->len += strlen(file->data);    } -  +  if(prot != "HTTP/0.9") { +  string h; +  heads += ([ +  "MIME-Version" : (file["mime-version"] || "1.0"), +  "Content-Type" : file["type"], +  "Accept-Ranges" : "bytes", +  "Server" : replace(version(), " ", "·"), + #ifdef KEEP_ALIVE +  "Connection": (misc->connection == "close" ? "close": "Keep-Alive"), + #else +  "Connection" : "close", + #endif +  "Date" : http_date(time) +  ]);    -  +  +  if(file->encoding) +  heads["Content-Encoding"] = file->encoding; +  +  if(!file->error) +  file->error=200; +  +  if(file->expires) +  heads->Expires = http_date(file->expires); +     if(mappingp(file->extra_heads)) {    heads |= file->extra_heads;    }
1385:    }    }    -  array myheads = ({prot+" "+(file->rettext||errors[file->error])}); +  head_string = prot+" "+(file->rettext||errors[file->error])+"\r\n"; +  array tmp_head = ({});    foreach(indices(heads), h)    if(arrayp(heads[h]))    foreach(heads[h], tmp) -  myheads += ({ `+(h,": ", tmp)}); +  tmp_head += ({ `+(h, ": ", tmp) });    else -  myheads += ({ `+(h, ": ", heads[h])}); +  tmp_head += ({ `+(h, ": ", heads[h]) }); +  head_string += tmp_head * "\r\n";    -  +     if(file->len > -1) -  myheads += ({"Content-Length: " + file->len }); - #ifdef KEEP_ALIVE -  myheads += ({ "Connection: Keep-Alive" }); - #endif -  head_string = (myheads+({"",""}))*"\r\n"; +  head_string += "\r\nContent-Length: "+ file->len +"\r\n"; +  head_string += "\r\n";    -  if(conf) conf->hsent+=strlen(head_string||""); +  if(conf) conf->hsent += strlen(head_string);    } -  +  }   #ifdef REQUEST_DEBUG    roxen_perror(sprintf("Sending result for prot:%O, method:%O file:%O\n",    prot, method, file));   #endif /* REQUEST_DEBUG */    -  if(method == "HEAD") -  { -  file->file = 0; -  file->data=""; -  } +     MARK_FD("HTTP handled");    -  +    #ifdef KEEP_ALIVE    if(!leftovers) leftovers = data||"";   #endif
1515:   void got_data(mixed fooid, string s)   {    int tmp; +     MARK_FD("HTTP got data");    remove_call_out(do_timeout);    call_out(do_timeout, 30); // Close down if we don't get more data
1533:    }    }    +     if(cache)    {    s = cache*""+s;    cache = 0;    } -  sscanf(s, "%*[\n\r]%s", s); -  if(strlen(s)) tmp = parse_got(s); +     -  switch(-tmp) +  // If the request starts with newlines, it's a broken request. Really! +  // sscanf(s, "%*[\n\r]%s", s); +  if(strlen(s)) tmp = parse_got(s); +  switch(tmp)    {    case 0:    if(this_object())
1551:       case 1:    DPERROR("HTTP: Stupid Client Error"); -  end(prot+" 500 Stupid Client Error\r\nContent-Length: 0\r\n\r\n"); +  end((prot||"HTTP/1.0")+" 500 Stupid Client Error\r\nContent-Length: 0\r\n\r\n");    return; // Stupid request.       case 2:
1562:       if(conf)    { + #ifndef DISABLE_VIRTUAL_HOSTING    // IP-Less support. -  conf = roxen->find_site_for(this_object()); -  +  roxen->find_site_for(this_object()); + #endif    conf->received += strlen(s);    conf->requests++;    }
1637:   {    if(f)    { -  f->set_nonblocking(); +  MARK_FD("HTTP connection"); +  f->set_nonblocking(got_data, 0, end);    my_fd = f;    conf = c; -  MARK_FD("HTTP connection"); -  my_fd->set_close_callback(end); -  my_fd->set_read_callback(got_data); +  // my_fd->set_close_callback(end); +  // my_fd->set_read_callback(got_data);    // No need to wait more than 30 seconds to get more data.    call_out(do_timeout, 30);    time = _time(1);