Roxen.git / server / modules / tags / insert_cached_href.pike

version» Context lines:

Roxen.git/server/modules/tags/insert_cached_href.pike:1:   // This is a roxen module. Copyright © 2000 - 2004, Roxen IS.   //      #include <module.h>   inherit "module";      //<locale-token project="mod_insert_cached_href">LOCALE</locale-token>   #define LOCALE(X,Y) _DEF_LOCALE("mod_insert_cached_href",X,Y)    - constant cvs_version = "$Id: insert_cached_href.pike,v 1.18 2007/03/15 11:49:48 liin Exp $"; + constant cvs_version = "$Id: insert_cached_href.pike,v 1.19 2007/03/19 08:33:16 liin Exp $";      constant thread_safe = 1;   constant module_type = MODULE_TAG;   LocaleString module_name = LOCALE(0, "Tags: Insert cached href");   LocaleString module_doc = LOCALE(0, "This module contains the RXML tag \"insert "    "cached-href\". Useful when implementing e.g."    " RSS syndication.");      #if DEBUG_INSERT_CACHED_HREF   #define DWRITE(x) report_debug("INSERT_CACHED_HREF: " + x + "\n")
Roxen.git/server/modules/tags/insert_cached_href.pike:22:   #define DWRITE(x)   #endif      #ifdef THREADS   private roxen.BackgroundProcess bg_process;   private array(HTTPClient) initiated; /* Contains initiated but unfinished data fetches */   private Thread.Mutex mutex;   private Thread.MutexKey mutex_key;   #endif    + constant MAX_REDIRECTS = 5; +    private HrefDatabase href_database;      void create() {    defvar("fetch-interval", "5 minutes", LOCALE(0, "Fetch interval"),    TYPE_STRING|VAR_MORE,    LOCALE(0, "States how often the data of an URL should be updated. "    "In seconds, minutes, hours or days."));       defvar("fresh-time", "0", LOCALE(0, "Fresh time"),    TYPE_STRING|VAR_MORE,
Roxen.git/server/modules/tags/insert_cached_href.pike:153:    return number;    }       return number;   }      static int(0..1) is_number(int char) {    return (char >= 48 && char <= 57) ? 1 : 0;   }    + #ifdef THREADS   public int(0..1) already_initiated(string url) {    foreach(initiated, HTTPClient client) {    if (url == (string)client->url)    return 1;    }       return 0;   } -  + #endif    -  + public int(0..1) is_redirect(int status) { +  /* +  A 304 will never happen since the +  GET is never conditional. +  */ +  if (status >= 300 && status < 400 && status != 304) +  return 1; +  +  return 0; + } +  + /* +  Takes action based on HTTP status codes in reply. +  Synchronous: + */ + public string get_result_sync(HTTPClient client, mapping args, mapping header) { +  if (!is_redirect(client->status) || !MAX_REDIRECTS) +  return client->data(); +  +  int counter; +  string location = client->con->headers->location; +  +  if (!location || !sizeof(location)) +  return client->data(); +  +  DWRITE("Following redirect from " + (string)client->url + +  " to " + location); +  +  args["cached-href"] = location; +  HTTPClient new_client = HTTPClient("GET", args, header); +  +  new_client->orig_url = (string)client->url; +  new_client->run(); +  counter++; +  +  while (is_redirect(new_client->status) && counter < MAX_REDIRECTS) { +  location = new_client->con->headers->location; +  +  if (!location || !sizeof(location)) +  return new_client->data(); +  +  DWRITE("Following redirect from " + (string)new_client->url + +  " to " + location); +  +  args["cached-href"] = location; +  new_client = HTTPClient("GET", args, header); +  new_client->orig_url = (string)client->url; +  new_client->run(); +  counter++; +  } +  +  return new_client->data(); + } +  + /* +  Takes action based on HTTP status codes in reply. +  Asynchronous: + */ + public void get_result_async(HTTPClient client, mapping args, mapping header) { +  if (!is_redirect(client->status)) +  return; +  +  int redirects = client->redirects + 1; +  string location = client->con->headers->location; +  +  if (redirects > MAX_REDIRECTS || +  !location || +  !sizeof(location)) +  return; +  +  DWRITE("Following redirect from " + (string)client->url + +  " to " + location); +  +  args["cached-href"] = location; +  HTTPClient new_client = HTTPClient("GET", args, header); +  +  new_client->orig_url = client->orig_url; +  new_client->redirects = redirects; +  new_client->run(); + } +    public void|string fetch_url(mapping(string:mixed) to_fetch, void|mapping header) {    DWRITE(sprintf("fetch_url(): To fetch: %s, with timeout: %d", to_fetch["url"],    to_fetch["timeout"]));       mapping(string:mixed) args = (["timeout":to_fetch["timeout"],    "cached-href":to_fetch["url"],    "sync":to_fetch["sync"]]);       object client;   
Roxen.git/server/modules/tags/insert_cached_href.pike:183: Inside #if defined(THREADS)
   mutex_key = mutex->lock();       if (!to_fetch["sync"] && already_initiated(to_fetch["url"])) {    mutex_key = 0;    return;    }       client = HTTPClient("GET", args, header);    initiated += ({client});    mutex_key = 0; +  client->orig_url = (string)client->url;    client->run();    -  if (to_fetch["sync"]) { -  if(client->status > 0 && client->status < 400) { -  return client->data(); -  } else -  return ""; -  } +  if (to_fetch["sync"]) +  return get_result_sync(client, args, header);   #else    client = Protocols.HTTP.get_url(to_fetch["url"], 0);    -  if(client && client->status > 0 && client->status < 400) { +  // In practice a server never runs unthreaded. Keep it +  // simple and only return when status code < 300: +  if(client && client->status > 0 && client->status < 300) {    href_database->update_data(to_fetch["url"], client->data());    return client->data();    } else    return "";   #endif   }         /* This class represents the database in which the data of the URL:s are stored */   class HrefDatabase {
Roxen.git/server/modules/tags/insert_cached_href.pike:231:    private string request_table;    private string data_table;       public void create() {    // Failure to create tables will lead to zero return values    request_table = get_my_table("request", ({request_table_def}));    data_table = get_my_table("data", ({data_table_def}));    }       public void empty_db() { -  /* Might as well clean up the database in a mutex section, +  /* +  Might as well clean up the database in a mutex section,    just to be sure. No performance issue since this function is only    supposed to be used when the "Clear database" button in the admin interface    is pressed.    */   #ifdef THREADS    mutex_key = mutex->lock();   #endif    sql_query("DELETE FROM " + request_table);    sql_query("DELETE FROM " + data_table);    DWRITE("Database has been emptied.");
Roxen.git/server/modules/tags/insert_cached_href.pike:257:    public int ready_to_run()    {    // Only ok to run if both tables are accessible    return request_table && data_table && 1;    }       public void update_db() {    DWRITE(sprintf("########### update_db(): Called every %d seconds ##########"    , get_time_in_seconds(query("update-interval"))));    + #ifdef THREADS    foreach(initiated, HTTPClient client) {    DWRITE("STILL initiated (should be empty!!!!!): " + (string)client->url);    } -  + #endif      #ifdef OFFLINE    // Don't alter entries when running server without network connections.    return;   #endif       remove_old_entrys();       if (!nr_of_requests()) {    DWRITE("There are no requests, returning from update_db()");    return;    }       array(mapping(string:mixed)) to_fetch = urls_to_fetch();       foreach(to_fetch, mapping next) {    fetch_url(next, (["x-roxen-recursion-depth":1]));    }    -  + #ifdef THREADS    foreach(initiated, HTTPClient client) {    DWRITE("initiated: " + (string)client->url);    } -  + #endif       DWRITE("----------------- Leaving update_db() ------------------------");    }       public string get_data(mapping args, mapping header) {    int next_fetch = 0;    array(mapping(string:mixed)) result;       /* if the tag argument time-of-day is provided, the database column next_fetch    needs to be calculated: */
Roxen.git/server/modules/tags/insert_cached_href.pike:568:    string res = href_database->get_data(Attributes(args)->get_db_args(),    (["x-roxen-recursion-depth":recursion_depth]));       if(args["decode-xml"]) {    // Parse xml header and recode content to internal representation.    mixed result = catch {    res = Parser.XML.Simple()->autoconvert(res);    };       if (result) { -  werror("insert#cached-href: An error occurred trying to decode the data.\n"); +  werror("INSERT_CACHED_HREF: An error occurred trying to decode the data from " + +  args["cached-href"] + ".\n");    }       // Remove any bytes potentially still preceeding the first '<' in the xml file    return res[search(res, "<")..];    }       return res;    }   }      #ifdef THREADS      /* This class represents the retrieval of data from an URL */   class HTTPClient { -  int status, timeout, start_time; +  int status, timeout, start_time, redirects;    object con;    Standards.URI url; -  string path, query, req_data,method; +  string path, query, req_data,method, orig_url;    mapping request_headers;    Thread.Queue queue = Thread.Queue();    int(0..1) sync;       void do_method(string _method,    string|Standards.URI _url,    void|mapping query_variables,    void|mapping _request_headers,    void|Protocols.HTTP.Query _con, void|string _data)    {
Roxen.git/server/modules/tags/insert_cached_href.pike:659:    else    query=Protocols.HTTP.http_encode_query(query_variables);    }       path=url->path;    if(path=="") path="/";    }       string data() {    if(!con->ok) -  return 0; +  return "";    -  +  if(status > 0 && status < 300)    return con->data(); -  +  +  return "";    }       void req_ok() {    DWRITE("Received headers from " + (string)url + " OK");    status = con->status;    -  +  /* +  Error, abort: +  */    if (status >= 400) { -  DWRITE("HTTP status code " + (string)status + " for " + (string)url + ", aborting fetch"); +  DWRITE("HTTP status code " + (string)status + " for " + (string)url + ", aborting.");    finish_up();       if (sync)    queue->write("@");       return;    }    -  +  /* +  Redirection: +  */ +  if (is_redirect(status)) { +  finish_up(); +  +  if (sync) { +  queue->write("@"); +  return; +  } +  +  mapping args = (["cached-href" : (string)url, +  "timeout" : timeout, +  "sync" : 0]); +  +  get_result_async(this_object(), args, request_headers); +  +  return; +  } +  +  /* +  HTTP status code OK, continuing +  with data fetch: +  */    int data_timeout = timeout - (time() - start_time);    con->data_timeout = data_timeout >= 0 ? data_timeout : 0;    con->timed_async_fetch(data_ok, data_fail);    }       void req_fail() {    DWRITE("Receiving headers from " + (string)url + " FAILED");    status = 0;    finish_up();       if (sync)    queue->write("@");    }       void data_ok() {    DWRITE("Received data from " + (string)url + " OK");    status = con->status;    finish_up();       if (href_database) -  +  if (orig_url) +  href_database->update_data(orig_url, con->data()); +  else    href_database->update_data((string)url, con->data());       if (sync)    queue->write("@");    }       void data_fail() {    DWRITE("Receiving data from " + (string)url + " FAILED");    status = 0;    finish_up();