f41b982009-05-07Martin Stjernholm // This is a roxen module. Copyright © 2000 - 2009, Roxen IS.
42cd712001-09-03Martin Nilsson 
ed76492000-07-23Stefan Wallström #include <module.h>
0917d32013-03-04Anders Johansson constant cvs_version = "$Id$";
ed76492000-07-23Stefan Wallström 
3632432000-02-10Per Hedbor inherit "module";
b03caf2000-02-10Per Hedbor constant module_type = MODULE_FIRST|MODULE_LAST;
3632432000-02-10Per Hedbor  constant module_name =
17013c2001-05-16Per Hedbor "Proxies: HTTP Relay module";
3632432000-02-10Per Hedbor  constant module_doc = "Smart HTTP relay module. Can relay according to " "regular expressions.";
f260fb2000-02-10Per Hedbor array(Relayer) relays = ({});
3632432000-02-10Per Hedbor  class Relay { RequestID id; string url; multiset options; string request_data; string host; int port;
4cd9e42000-11-02Per Hedbor  string file;
3632432000-02-10Per Hedbor  Stdio.File fd;
4b9f6f2010-07-12Martin Stjernholm  mapping make_headers( RequestID from, int trim )
3632432000-02-10Per Hedbor  {
a2ed682016-05-25Henrik Grubbström (Grubba)  string remoteaddr = from->remoteaddr; if (has_value(remoteaddr, ":")) { // IPv6. remoteaddr = "[" + remoteaddr + "]"; } string myip = from->port_obj->ip; if (has_value(myip, ":")) { // IPv6. myip = "[" + myip + "]"; } if (from->prot_obj->port != from->prot_obj->default_port) { myip += ":" + from->prot_obj->port; } array(array(string|int)) forwarded = from->misc->forwarded || ({}); if (from->request_headers->host) { forwarded += ({ ({ "by", '=', myip, ';', "for", '=', remoteaddr, ';', "host", '=', from->request_headers->host, ';', "proto", '=', from->port_obj->prot_name, }) }); } else { forwarded += ({ ({ "by", '=', myip, ';', "for", '=', remoteaddr, ';', "proto", '=', from->port_obj->prot_name, }) }); } mapping res = ([]); if( !trim ) { foreach( from->request_headers; string i; string|array(string) v) { switch( i ) { case "accept-encoding": // We need to support the stuff we pass on in the proxy // otherwise we might end up with things like double // gzipped data. break; case "connection": /* We do not support keep-alive yet. */ res->Connection = "close"; break; default: res[Roxen.canonicalize_http_header (i) || String.capitalize (i)] = v; break; } } } res += ([
4b9f6f2010-07-12Martin Stjernholm  "Proxy-Software":roxen->version(),
a2ed682016-05-25Henrik Grubbström (Grubba)  // RFC 7239 "Forwarded": map(forwarded, MIME.quote), "Host": host + ":" + port,
4b9f6f2010-07-12Martin Stjernholm  ]); // Also try to model X-Forwarded-Server after Apaches mod_proxy. // The following is ripped from RequestID.url_base. string server_host; if (Protocol port = from->port_obj) { server_host = port->conf_data[from->conf]->hostname; if (server_host == "*") server_host = from->conf->get_host(); } else server_host = my_configuration()->get_host();
a2ed682016-05-25Henrik Grubbström (Grubba)  // These are set by Apaches mod_proxy and are more or less // defacto standard. foreach(([ "X-Forwarded-For": remoteaddr, "X-Forwarded-Host": from->request_headers->host, "X-Forwarded-Proto": from->port_obj->prot_name, "X-Forwarded-Server": server_host, // RFC 7230 5.7.1 "Via": from->clientprot + " " + server_host, ]); string field; string|array(string) value) { if (!value) continue; array(string)|string old_val = res[lower_case(field)]; if (arrayp(old_val)) { value = old_val + ({ value }); } else if (stringp(old_val)) { value = ({ old_val, value });
3632432000-02-10Per Hedbor  }
a2ed682016-05-25Henrik Grubbström (Grubba)  res[field] = value;
3632432000-02-10Per Hedbor  }
a2ed682016-05-25Henrik Grubbström (Grubba) 
3632432000-02-10Per Hedbor  return res; } string encode_headers( mapping q ) { string res = "";
ed76492000-07-23Stefan Wallström  string content_length="";
3632432000-02-10Per Hedbor  foreach( sort( indices( q ) ), string h )
ed76492000-07-23Stefan Wallström  if(lower_case(h)=="content-length") content_length = (string)h+": "+(string)q[h]+"\r\n"; else
deb3712001-05-11Per Hedbor  if( arrayp( q[h] ) ) foreach( q[h], string w ) res += (string)h+": "+(string)w+"\r\n"; else res += (string)h+": "+(string)q[h]+"\r\n";
ed76492000-07-23Stefan Wallström  return res+content_length;
3632432000-02-10Per Hedbor  } string buffer; void got_some_more_data( mixed q, string d) { buffer += d; } void done_with_data( ) { destruct(fd);
bc3adb2000-09-19Per Hedbor  string headers, data; string type, charset, status;
4cd9e42000-11-02Per Hedbor  int code;
bc3adb2000-09-19Per Hedbor  mapping h = ([]);
4cd9e42000-11-02Per Hedbor 
b75ef42016-02-12Pontus Östlund  if (additional_headers) { h = copy_value(additional_headers); }
16ba722001-05-07Peter Bortas  if(!id) { destruct(); return; }
b75ef42016-02-12Pontus Östlund 
4cd9e42000-11-02Per Hedbor  string rewrite( string what ) { // in what: URL. if(!strlen(what)) return what; // local, is OK. if( what[0] == '/' ) // absolute, is not OK. { string base = id->not_query; string f2 = (file/"?")[0]; if( strlen(f2) && search(id->not_query, f2 ) != -1) base = base[..search(id->not_query, f2 )-2]; return combine_path( base, what[1..] ); } else if( search( what, url ) == 0 ) { return replace( what, url, id->not_query ); } return what; }; string do_rewrite( string what ) { Parser.HTML p = Parser.HTML();
c3b5182015-10-19Stefan Wallström  p->xml_tag_syntax(1); p->_set_tag_callback( lambda(object p, string s) { string tag_name = p->tag_name(); string rewrite_arg = 0; switch(tag_name) { case "a": rewrite_arg = "href"; break; case "img": rewrite_arg = "src"; break; case "form": rewrite_arg = "action"; break; } if(rewrite_arg) { mapping args = p->tag_args(); if(string val = args[rewrite_arg]) { string new_val = rewrite(val); if( val != new_val) { int is_closed = has_suffix(s,"/>"); args[rewrite_arg] = new_val; return ({ Roxen.make_tag(tag_name, args, is_closed) }); } } } }); return p->finish( what )->read();
4cd9e42000-11-02Per Hedbor  };
b75ef42016-02-12Pontus Östlund 
51cb682003-03-18Anders Johansson  if( !options->cache ) NO_PROTO_CACHE();
4cd9e42000-11-02Per Hedbor 
bc3adb2000-09-19Per Hedbor  if( sscanf( buffer, "%s\r\n\r\n%s", headers, data ) != 2 ) sscanf( buffer, "%s\n\n%s", headers, data ); if( headers ) {
16ba722001-05-07Peter Bortas  sscanf( headers, "HTTP/%*[^ ] %d", code );
4cd9e42000-11-02Per Hedbor  foreach( ((headers-"\r")/"\n")[1..], string header )
bc3adb2000-09-19Per Hedbor  { if( sscanf( header, "%s:%s", string a, string b ) == 2 ) { a = String.trim_all_whites( a ); b = String.trim_all_whites( b );
5b94d42000-11-16Peter Bortas 
57be122000-12-22Per Hedbor  switch( lower_case( a ) ) { case "connection":
741d902001-04-17Per Hedbor  case "content-length": case "content-location":
57be122000-12-22Per Hedbor  break; case "content-type": h["Content-Type"] = b; type = b; break; default:
9167de2003-03-18Anders Johansson  if (h[a]) { if (arrayp(h[a])) h[a] += ({ b }); else h[a] = ({ h[a], b }); } else h[a] = b;
57be122000-12-22Per Hedbor  }
b75ef42016-02-12Pontus Östlund  } else
bc3adb2000-09-19Per Hedbor  status = header; }
285c862001-05-03Per Hedbor  if(!type) type = "text/html";
4216382007-03-29Jonas Wallden  else if( sscanf( type-" ", "text/%*s;charset=%s", charset ) == 2 )
bc3adb2000-09-19Per Hedbor  type = String.trim_all_whites( (type/";")[0] ); }
4216382007-03-29Jonas Wallden #ifdef RELAY_DEBUG werror("RELAY: url: %O, type: %O, data: %O bytes, headers: \n%s\n\n", id->not_query, type, sizeof(data), headers); #endif
b75ef42016-02-12Pontus Östlund 
bc3adb2000-09-19Per Hedbor  if( !headers || !data ) { mapping q = Roxen.http_string_answer( buffer, "" ); q->raw = 1; id->send_result( q ); destruct( ); return; }
4216382007-03-29Jonas Wallden  if( options->rxml && code >= 200 && code < 300 &&
4cd9e42000-11-02Per Hedbor  (lower_case(type) == "text/html" || lower_case(type) == "text/plain" ))
bc3adb2000-09-19Per Hedbor  { if( charset ) { id->set_output_charset( charset ); catch {
50aa162015-04-28Jonas Walldén  data = Charset.decoder(charset)->feed(data)->drain();
b75ef42016-02-12Pontus Östlund  };
bc3adb2000-09-19Per Hedbor  }
4cd9e42000-11-02Per Hedbor  if( options->rewrite ) do_rewrite( data ); id->misc->defines = ([]); id->misc->defines[" _extra_heads"] = h; id->misc->defines[" _error"] = code;
4216382007-03-29Jonas Wallden #ifdef RELAY_DEBUG werror("RELAY: parsing rxml\n"); #endif
bc3adb2000-09-19Per Hedbor  id->send_result( Roxen.http_rxml_answer( query("pre-rxml") + data + query("post-rxml"), id ) );
4cd9e42000-11-02Per Hedbor  } else if( options->rewrite && (lower_case(type) == "text/html" || lower_case(type) == "text/plain" )) { id->send_result(([ "data":do_rewrite(data), "type":type, "extra_heads":h, "error":code ]) ); } else { id->send_result(([ "data":data, "type":type, "extra_heads":h, "error":code ]) ); }
3632432000-02-10Per Hedbor  destruct(); return; }
deb3712001-05-11Per Hedbor  string obuffer; void write_more( ) { if( strlen( obuffer ) ) obuffer = obuffer[ fd->write( obuffer ) .. ]; #ifdef RELAY_DEBUG else werror("RELAY: Request sent OK\n"); #endif }
b75ef42016-02-12Pontus Östlund 
deb3712001-05-11Per Hedbor 
3632432000-02-10Per Hedbor  void connected( int how ) { if( !how ) {
deb3712001-05-11Per Hedbor #ifdef RELAY_DEBUG
bc1f302009-09-14Marcus Wellhardh  werror("RELAY: Connection failed: %s (%d)\n", strerror (fd->errno()), fd->errno());
deb3712001-05-11Per Hedbor #endif
c546762002-01-05Per Hedbor  NOCACHE();
bc1f302009-09-14Marcus Wellhardh  id->send_result( Roxen.http_low_answer(Protocols.HTTP.HTTP_GW_TIMEOUT, "504 Gateway Timeout: " "Connection to remote HTTP host failed."));
3632432000-02-10Per Hedbor  destruct(); return; }
deb3712001-05-11Per Hedbor #ifdef RELAY_DEBUG werror("RELAY: Connection OK\n"); #endif
4cd9e42000-11-02Per Hedbor  // Send headers to remote server. (non-blocking)
b75ef42016-02-12Pontus Östlund 
4cd9e42000-11-02Per Hedbor  if( options->stream )
3632432000-02-10Per Hedbor  {
deb3712001-05-11Per Hedbor  Stdio.sendfile( ({ request_data }), 0, 0, 0, 0, fd, lambda(int q) { #ifdef RELAY_DEBUG werror("RELAY: Request sent OK\n"); #endif Stdio.sendfile( 0, fd, 0, 0, 0, id->my_fd ); } );
4cd9e42000-11-02Per Hedbor  destruct();
3632432000-02-10Per Hedbor  }
4cd9e42000-11-02Per Hedbor  else {
deb3712001-05-11Per Hedbor  obuffer = request_data; request_data = 0;
4cd9e42000-11-02Per Hedbor  buffer="";
deb3712001-05-11Per Hedbor  fd->set_nonblocking( got_some_more_data, write_more, done_with_data ); }
3632432000-02-10Per Hedbor  } void create( RequestID _id, string _url, multiset _options ) { id = _id; url = _url; options = _options;
dc89282008-12-11Jonas Wallden  // Support IPv6 addresses Standards.URI uri = Standards.URI(url); host = uri->host; port = uri->port || 80; file = uri->get_path_query(); if (has_prefix(file, "/")) file = file[1..];
b75ef42016-02-12Pontus Östlund 
3632432000-02-10Per Hedbor  if( options->raw ) request_data = _id->raw; else { mapping headers = ([]); headers = make_headers( id, options->trimheaders );
9f72362003-03-04Anders Johansson  request_data = (id->method+" /"+file+" HTTP/1.0\r\n"+
3632432000-02-10Per Hedbor  encode_headers( headers ) + "\r\n" + id->data );
f260fb2000-02-10Per Hedbor 
3632432000-02-10Per Hedbor  } if( options->utf8 )
f260fb2000-02-10Per Hedbor  request_data = string_to_utf8( request_data );
3632432000-02-10Per Hedbor  fd = Stdio.File( );
deb3712001-05-11Per Hedbor  #ifdef RELAY_DEBUG werror("RELAY: Connecting to "+host+":"+port+"\n"); #endif
74e4242003-04-22Anders Johansson  // Kludge for bug 3127. if (linux) { if( fd->connect( host, port ) ) connected( 1 ); else connected( 0 ); return; }
3331d32001-05-16Per Hedbor  fd->async_connect( host, port, connected );
3632432000-02-10Per Hedbor  } }
b03caf2000-02-10Per Hedbor mapping stats = ([ ]);
3632432000-02-10Per Hedbor  class Relayer {
f260fb2000-02-10Per Hedbor  object r;
3632432000-02-10Per Hedbor  string pattern; string url; multiset options;
b03caf2000-02-10Per Hedbor  int last;
3632432000-02-10Per Hedbor 
fa5bdf2005-04-20Martin Stjernholm  string do_replace( array(string) to )
3632432000-02-10Per Hedbor  { array from = map( indices( to ), lambda(int q ){ return "\\"+(q+1); } );
f260fb2000-02-10Per Hedbor  if( sizeof( to ) )
fa5bdf2005-04-20Martin Stjernholm  return predef::replace( url, from, (array(string)) to );
f260fb2000-02-10Per Hedbor  return url;
3632432000-02-10Per Hedbor  }
98e2012003-07-09Marcus Wellhardh  int(0..1) relay( object id )
3632432000-02-10Per Hedbor  { string file = id->not_query; if( id->query ) file = file+"?"+id->query;
fa5bdf2005-04-20Martin Stjernholm  // Workaround widestring deficiency in the regexp module.
bf40ef2005-06-21Martin Stjernholm  int use_utf8 = String.width (file) > 8;
fa5bdf2005-04-20Martin Stjernholm  if (use_utf8) file = string_to_utf8 (file); if (array(string) split = r->split( file ) )
b03caf2000-02-10Per Hedbor  {
fa5bdf2005-04-20Martin Stjernholm  if (use_utf8) for (int i = sizeof (split); i--;) if (stringp (split[i])) // Catch errors in case the split broke apart a utf8 sequence. catch (split[i] = utf8_to_string (split[i]));
b03caf2000-02-10Per Hedbor  stats[ pattern ]++;
fa5bdf2005-04-20Martin Stjernholm  Relay( id, do_replace( split ), options );
98e2012003-07-09Marcus Wellhardh  return 1;
b03caf2000-02-10Per Hedbor  }
3632432000-02-10Per Hedbor  }
b03caf2000-02-10Per Hedbor  void create( string p, string u, int _last, multiset o )
3632432000-02-10Per Hedbor  {
b03caf2000-02-10Per Hedbor  last = _last;
f260fb2000-02-10Per Hedbor  pattern = p;
3632432000-02-10Per Hedbor  options = o; url = u;
f260fb2000-02-10Per Hedbor  r = Regexp( pattern );
3632432000-02-10Per Hedbor  } } /****** module callbacks **/ void create( Configuration c ) { if( c )
bc3adb2000-09-19Per Hedbor  {
3632432000-02-10Per Hedbor  defvar( "patterns", "", "Relay patterns", TYPE_TEXT,
e8359b2002-04-20Johan Sundström  "<p>Syntax:\n"
3632432000-02-10Per Hedbor  "<pre>\n"
e8359b2002-04-20Johan Sundström  "[LAST ]EXTENSION extension CALL url-prefix [rxml] [trimheaders] [raw] [utf8] [cache] [stream] [rewrite]\n" "[LAST ]LOCATION location CALL url-prefix [rxml] [trimheaders] [raw] [utf8] [cache] [stream] [rewrite]\n" "[LAST ]MATCH regexp CALL url [rxml] [trimheaders] [raw] [utf8] [cache] [stream] [rewrite]\n" "</pre> \\1 to \\9 will be replaced with submatches from the " "regexp.</p><p>" "Rxml, trimheaders etc. are flags. If <b>rxml</b> is specified, " "the result of the relay will be RXML-parsed. Trimheaders and raw " "are mutually exclusive. If <b>trimheaders</b> is present, only " "the most essential headers are sent to the remote server " "(actually, no headers at all right now), if <b>raw</b> is " "specified, the request is sent to the remote server exactly as it " "arrived to Roxen, not even the Host: header is changed. If " "<b>utf8</b> is specified the request is utf-8 encoded before it " "is sent to the remote server.</p><p>" "Cache and stream alter the sending of data to the client. If " "<b>cache</b> is specified, the data can end up in the roxen " "data cache, if <b>stream</b> is specified, the data is streamed " "directly from the server to the client. This disables logging, " "headers will be exactly those sent by the remote server, and this " "only works for http clients. Less memory is used, however.</p><p>"
c7faf12007-10-24 Erik Dahl  "For <b>EXTENSION</b> and <b>LOCATION</b>, the URL path+query "
e8359b2002-04-20Johan Sundström  "components (<b>location</b> part trimmed off) is appended to the " "<b>url-prefix</b> specified; no replacing is done.</p><p>"
c7faf12007-10-24 Erik Dahl  "Note that /login.xml is a submatch in itself, it will match all paths containing /login.xml, " "since /login.xml will be translated to /login.xml(.*) and therefore " "will match /login.xml, /foo/login.xml, /bar/foo/login.xml and /login.xml?x=3" "Changing the pattern to ^/login.xml will make it only match /login.xml and anything " "after /login.xml - e.g. /login.xml?x=2&amp;y=34.</p><p>"
e8359b2002-04-20Johan Sundström  "If <b>LAST</b> is specified, the match is only tried if Roxen " "fails to find a file (a 404 error). If <b>rewrite</b> is " "specified, redirects and file contents are rewritten if possible, "
2eeee52008-05-30Mathias Södermark  "so that links and images point to the correct place.</p><p>" "Example:\n" "<pre>\n"
b75ef42016-02-12Pontus Östlund  "LOCATION /&lt;path&gt;/ CALL http://&lt;domain&gt;/&lt;path&gt;/\n"
2eeee52008-05-30Mathias Södermark  "</pre></p>");
e8359b2002-04-20Johan Sundström  defvar("pre-rxml", "",
bc3adb2000-09-19Per Hedbor  "Header-RXML", TYPE_TEXT, "Included before the page contents for redirectpatterns with " "the 'rxml' attribute set if the content-type is text/*" );
e8359b2002-04-20Johan Sundström  defvar("post-rxml", "",
bc3adb2000-09-19Per Hedbor  "Footer-RXML", TYPE_TEXT, "Included after the page contents for redirectpatterns with " "the 'rxml' attribute set if the content-type is text/*" );
b75ef42016-02-12Pontus Östlund  defvar("additional-headers", "", "Additional response headers", TYPE_TEXT, "Additional headers to add to the response. Write in the format:<br/>" "<tt>Header-Name: header-value</tt><br/>One header per line.");
bc3adb2000-09-19Per Hedbor  }
b03caf2000-02-10Per Hedbor } string status() { string res = "Relays per regexp:<p>\n"
8573e62000-02-11Per Hedbor  "<table cellpadding=0 border=0>";
b03caf2000-02-10Per Hedbor  foreach( sort(indices(stats)), string s ) res += sprintf("<tr><td>%s</td><td align=right>%d</td></tr>\n", s, stats[s]); return res + "</table>\n";
3632432000-02-10Per Hedbor }
74e4242003-04-22Anders Johansson int linux;
b75ef42016-02-12Pontus Östlund mapping additional_headers = ([]);
f260fb2000-02-10Per Hedbor void start( int i, Configuration c )
3632432000-02-10Per Hedbor {
74e4242003-04-22Anders Johansson  if (uname()->sysname == "Linux") linux = 1;
3632432000-02-10Per Hedbor  if( c ) { relays = ({});
bc3adb2000-09-19Per Hedbor  foreach( (query( "patterns" )-"\r") / "\n" - ({ "" }), string line )
3632432000-02-10Per Hedbor  { if( strlen(line) && line[0] == '#' ) continue; sscanf( line, "%s#", line );
69a6e92000-02-22Martin Nilsson  array tokens = replace( String.trim_whites( line ), "\t", " ")/" " - ({ "" });
b03caf2000-02-10Per Hedbor  int last;
3632432000-02-10Per Hedbor  if( sizeof( tokens ) > 2 ) { tokens -= ({ "CALL", "call", "OPTIONS", "options" });
b03caf2000-02-10Per Hedbor  if( lower_case( tokens[0] ) == "last" ) { last = 1; tokens = tokens[1..]; }
3632432000-02-10Per Hedbor  switch( lower_case(tokens[ 0 ]) ) { case "match": tokens = tokens[1..]; break;
f260fb2000-02-10Per Hedbor 
3632432000-02-10Per Hedbor  case "location": tokens = tokens[1..]; tokens[0] = replace(tokens[0], ({"*", ".", "?" }), ({ "\\*", "\\.", "\\?" }) )+"(.*)";
f260fb2000-02-10Per Hedbor  tokens[1] += "\\1";
3632432000-02-10Per Hedbor  break;
f260fb2000-02-10Per Hedbor 
3632432000-02-10Per Hedbor  case "extension": tokens = tokens[1..];
f260fb2000-02-10Per Hedbor  tokens[1] += "\\1."+tokens[0]+"\\2";
9228b42002-04-20Johan Sundström  tokens[0] = "^([^?]*)\\."+
3632432000-02-10Per Hedbor  replace(tokens[0], ({"*", ".", "?" }), ({ "\\*", "\\.", "\\?" }) ) +"(.*)";
f260fb2000-02-10Per Hedbor  break;
3632432000-02-10Per Hedbor  default: report_warning( "Unknown rule: "+tokens[0]+"\n"); break; }
f260fb2000-02-10Per Hedbor 
b03caf2000-02-10Per Hedbor  if(mixed e = catch ( relays += ({ Relayer( tokens[0], tokens[1],last,
f260fb2000-02-10Per Hedbor  (multiset)map(tokens[2..], lower_case))
3632432000-02-10Per Hedbor  }) ) ) report_warning( "Syntax error in regular expression: "+
f260fb2000-02-10Per Hedbor  tokens[0]+": %s\n", ((array)e)[0] ); }
3632432000-02-10Per Hedbor  }
f260fb2000-02-10Per Hedbor 
b75ef42016-02-12Pontus Östlund  additional_headers = ([]); foreach ((query("additional-headers")-"\r")/"\n", string line) { line = String.trim_all_whites(line); if (!sizeof(line) || line[0] == '#') { continue; } if (sscanf(line, "%s:%s", string name, string val) == 2) { string n = Roxen.canonicalize_http_header(name) || String.capitalize(name); additional_headers[n] = String.trim_all_whites(val); } } #ifdef RELAY_DEBUG werror("Additional headers: %O\n", additional_headers); #endif }
3632432000-02-10Per Hedbor } mapping first_try( RequestID id ) { foreach( relays, Relayer q )
b03caf2000-02-10Per Hedbor  if( !q->last && q->relay( id ) )
e9f4092000-07-03Martin Nilsson  return Roxen.http_pipe_in_progress( );
b03caf2000-02-10Per Hedbor } mapping last_resort( RequestID id ) { foreach( relays, Relayer q ) if( q->last && q->relay( id ) )
e9f4092000-07-03Martin Nilsson  return Roxen.http_pipe_in_progress( );
3632432000-02-10Per Hedbor }