Roxen.git / server / modules / tags / insert_cached_href.pike

version» Context lines:

Roxen.git/server/modules/tags/insert_cached_href.pike:1:   // This is a roxen module. Copyright © 2000 - 2009, Roxen IS.   //      #include <module.h>   inherit "module";      //<locale-token project="mod_insert_cached_href">LOCALE</locale-token>   #define LOCALE(X,Y) _DEF_LOCALE("mod_insert_cached_href",X,Y)    - constant cvs_version = "$Id: insert_cached_href.pike,v 1.29 2010/03/08 16:09:20 grubba Exp $"; + constant cvs_version = "$Id: insert_cached_href.pike,v 1.30 2010/12/01 22:07:27 mast Exp $";      constant thread_safe = 1;   constant module_type = MODULE_TAG;   LocaleString module_name = LOCALE(1, "Tags: Insert cached href");   LocaleString module_doc = LOCALE(2, "This module contains the RXML tag \"insert "    "cached-href\". Useful when implementing e.g."    " RSS syndication.");      #if DEBUG_INSERT_CACHED_HREF   #define DWRITE(x) report_debug("INSERT_CACHED_HREF: " + x + "\n")
Roxen.git/server/modules/tags/insert_cached_href.pike:179:       return 0;   }      /*    Takes action based on HTTP status codes in reply.    Synchronous:   */   public string get_result_sync(HTTPClient client, mapping args, mapping header) {    if (!is_redirect(client->status) || !MAX_REDIRECTS) -  return decode_data(client->data(), client->con->headers); +  return decode_data(client->data(), client->con->headers, client->url);       int counter;    string location = client->con->headers->location;       if (!location || !sizeof(location)) -  return decode_data(client->data(), client->con->headers); +  return decode_data(client->data(), client->con->headers, client->url);       DWRITE("Following redirect from " + (string)client->url +    " to " + location);       // Normalize; Some sites (dn.se) use relative locations.    location = (string)Standards.URI(location, client->url);       args["cached-href"] = location;    HTTPClient new_client = HTTPClient(args, header);       new_client->orig_url = (string)client->url;    new_client->run();    counter++;       while (is_redirect(new_client->status) && counter < MAX_REDIRECTS) {    location = new_client->con->headers->location;       if (!location || !sizeof(location)) -  return decode_data(new_client->data(), new_client->con->headers); +  return decode_data(new_client->data(), new_client->con->headers, +  new_client->url);       DWRITE("Following redirect from " + (string)new_client->url +    " to " + location);       location = (string)Standards.URI(location, new_client->url);       args["cached-href"] = location;    new_client = HTTPClient(args, header);    new_client->orig_url = (string)client->url;    new_client->run();    counter++;    }    -  return decode_data(new_client->data(), new_client->con->headers); +  return decode_data(new_client->data(), new_client->con->headers, +  new_client->url);   }      /*    Takes action based on HTTP status codes in reply.    Asynchronous:   */   public void get_result_async(HTTPClient client, mapping args, mapping header) {    if (!is_redirect(client->status))    return;   
Roxen.git/server/modules/tags/insert_cached_href.pike:283: Inside #if defined(THREADS)
   client->run();       if (to_fetch["sync"])    return get_result_sync(client, args, header);   #else    client = Protocols.HTTP.get_url(to_fetch["url"], 0);       // In practice a server never runs unthreaded. Keep it    // simple and only return when status code < 300:    if(client && client->status > 0 && client->status < 300) { -  string data = decode_data(client->data(), client->headers); +  string data = decode_data(client->data(), client->headers, client->url);    href_database->update_data(to_fetch["url"], data);    return data;    } else    return "";   #endif   }         /* This class represents the database in which the data of the URL:s are stored */   class HrefDatabase {
Roxen.git/server/modules/tags/insert_cached_href.pike:834:    queue->write("@");    }       void data_ok() {    DWRITE("Received data from " + (string)url + " OK");    status = con->status;    finish_up();       if (href_database)    if (orig_url) -  href_database->update_data(orig_url, decode_data(con->data(), con->headers)); +  href_database->update_data(orig_url, +  decode_data(con->data(), con->headers, +  orig_url));    else -  href_database->update_data((string)url, decode_data(con->data(), con->headers)); +  href_database->update_data((string)url, +  decode_data(con->data(), con->headers, url));       if (sync)    queue->write("@");    }       void data_fail() {    DWRITE("Receiving data from " + (string)url + " FAILED");    status = 0;    finish_up();   
Roxen.git/server/modules/tags/insert_cached_href.pike:880:    }    }   }   #endif      /*    Decodes data based on 1) HTTP headers or 2) fallbacks on    data content, meta http-equiv for html and BOM + encoding=''    for xml   */ - string decode_data(string data, mapping headers) { + string decode_data(string data, mapping headers, string|Standards.URI url) {    if (data == "" || !headers)    return data; -  -  function get_ct_cs = -  lambda(string ct) { -  string cs; -  foreach((ct/";")[1..], string s) { -  string s2 = String.trim_all_whites(s); -  string _cs; -  if(sscanf(s2, "charset=%s", _cs) == 1) -  cs = String.trim_all_whites(_cs); +  return Roxen.low_parse_http_response ( +  headers, data, 0, +  "retrieved from " + (string) url + " by <insert cached-href>");   } -  return cs; -  }; +     -  function get_cs_from_html = -  lambda(string data) { -  string cs; -  Parser.HTML parser = Parser.HTML(); -  parser->case_insensitive_tag(1); -  parser->lazy_entity_end(1); -  parser->ignore_unknown(1); -  parser->match_tag(0); -  parser->add_tags( ([ "meta": lambda( Parser.HTML p, mapping m) -  { -  if(m["content"] && m["http-equiv"] && -  lower_case(m["http-equiv"]) == "content-type") -  cs = get_ct_cs(m["content"]); -  } ]) ); -  parser->finish(data); -  return cs; -  }; -  -  function get_cs_from_xml_enc = -  lambda(string data) { -  string cs,tmp; -  sscanf(data, "%*s<?xml%s?>%*s", tmp); -  sscanf(lower_case(tmp), "%*sencoding=\"%s\"%*s", cs); -  if (!cs) -  cs = "utf-8"; // UTF-8 is default XML encoding when omitted -  return cs; -  }; -  -  string ct, cs; -  -  if(!(ct = headers["content-type"])) { -  // Don't even try to decode, might be binary for all we know -  return data; -  } -  -  ct = String.trim_all_whites(lower_case(ct)); -  -  // If text, look for charset: -  if(has_prefix(ct,"text/") || has_prefix(ct, "application/xml")) { -  cs = get_ct_cs(ct); -  -  if (!cs) { -  // No charset in content-type header, look in data for encoding hints -  -  if(has_prefix(ct, "text/html")) { -  cs = get_cs_from_html(data); -  } else if(has_prefix(ct, "text/xml") || has_prefix(ct, "application/xml")) { -  string data2; -  mixed result = catch { -  data2 = Parser.XML.Simple()->autoconvert(data); -  }; -  -  if (!result) -  return remove_bom(data2); -  -  cs = get_cs_from_xml_enc(data); -  } -  } -  } -  -  if(has_prefix(ct, "text/xml") || has_prefix(ct, "application/xml")) -  data = remove_bom(data); -  -  if(cs) { -  catch { -  data = Locale.Charset.decoder(cs)->feed(data)->drain(); -  return data; -  }; -  } -  -  return data; - } -  +    string remove_bom(string data) {    return data[search(data, "<")..];   }      TAGDOCUMENTATION;   #ifdef manual   constant tagdoc=([       "insert#cached-href":#"<desc type='plugin'>   <p>