Roxen.git / server / modules / tags / insert_cached_href.pike

version» Context lines:

Roxen.git/server/modules/tags/insert_cached_href.pike:1:   // This is a roxen module. Copyright © 2000 - 2004, Roxen IS.   //      #include <module.h>   inherit "module";      //<locale-token project="mod_insert_cached_href">LOCALE</locale-token>   #define LOCALE(X,Y) _DEF_LOCALE("mod_insert_cached_href",X,Y)    - constant cvs_version = "$Id: insert_cached_href.pike,v 1.14 2006/07/05 07:38:08 jonasw Exp $"; + constant cvs_version = "$Id: insert_cached_href.pike,v 1.15 2006/09/24 09:06:55 liin Exp $";      constant thread_safe = 1;   constant module_type = MODULE_TAG;   LocaleString module_name = LOCALE(0, "Tags: Insert cached href");   LocaleString module_doc = LOCALE(0, "This module contains the RXML tag \"insert "    "cached-href\". Useful when implementing e.g."    " RSS syndication.");      #if DEBUG_INSERT_CACHED_HREF   #define DWRITE(x) report_debug("INSERT_CACHED_HREF: " + x + "\n")
Roxen.git/server/modules/tags/insert_cached_href.pike:23:   #endif      #ifdef THREADS   private roxen.BackgroundProcess bg_process;   private array(HTTPClient) initiated; /* Contains initiated but unfinished data fetches */   private Thread.Mutex mutex;   private Thread.MutexKey mutex_key;   #endif      private HrefDatabase href_database; - private constant unavailable = "The requested page is unavailable at the moment. " -  "Please try again later"; +       void create() {    defvar("fetch-interval", "5 minutes", LOCALE(0, "Fetch interval"),    TYPE_STRING|VAR_MORE,    LOCALE(0, "States how often the data of an URL should be updated. "    "In seconds, minutes, hours or days."));       defvar("fresh-time", "0", LOCALE(0, "Fresh time"),    TYPE_STRING|VAR_MORE,    LOCALE(0, "States how long data in the database can be considered fresh enough"
Roxen.git/server/modules/tags/insert_cached_href.pike:159:   public int(0..1) already_initiated(string url) {    foreach(initiated, HTTPClient client) {    if (url == (string)client->url)    return 1;    }       return 0;   }      public void|string fetch_url(mapping(string:mixed) to_fetch, void|mapping header) { -  DWRITE(sprintf("in fetch_url(): To fetch: %s, with timeout: %d", to_fetch["url"], +  DWRITE(sprintf("fetch_url(): To fetch: %s, with timeout: %d", to_fetch["url"],    to_fetch["timeout"]));       mapping(string:mixed) args = (["timeout":to_fetch["timeout"],    "cached-href":to_fetch["url"],    "sync":to_fetch["sync"]]);    -  string method = "GET"; +     object client;      #ifdef THREADS    mutex_key = mutex->lock();       if (!to_fetch["sync"] && already_initiated(to_fetch["url"])) {    mutex_key = 0;    return;    }    -  client = HTTPClient(method, args, header); +  client = HTTPClient("GET", args, header);    initiated += ({client});    mutex_key = 0;    client->run();       if (to_fetch["sync"]) { -  if(client->status > 0) { +  if(client->status > 0 && client->status < 400) {    return client->data();    } else -  return unavailable; +  return "";    }   #else    client = Protocols.HTTP.get_url(to_fetch["url"], 0);    -  if(client && client->status > 0) { +  if(client && client->status > 0 && client->status < 400) {    href_database->update_data(to_fetch["url"], client->data());    return client->data();    } else -  return unavailable; +  return "";   #endif   }         /* This class represents the database in which the data of the URL:s are stored */   class HrefDatabase {    private constant request_table_def = "url VARCHAR(255) NOT NULL,"    "fetch_interval INT UNSIGNED NOT NULL,"    "fresh_time INT UNSIGNED NOT NULL,"    "ttl INT UNSIGNED NOT NULL,"
Roxen.git/server/modules/tags/insert_cached_href.pike:248:    DWRITE("STILL initiated (should be empty!!!!!): " + (string)client->url);    }      #ifdef OFFLINE    // Don't alter entries when running server without network connections.    return;   #endif       remove_old_entrys();    -  if (no_requests()) { +  if (!nr_of_requests()) {    DWRITE("There are no requests, returning from update_db()");    return;    }       array(mapping(string:mixed)) to_fetch = urls_to_fetch();       foreach(to_fetch, mapping next) {    fetch_url(next);    }   
Roxen.git/server/modules/tags/insert_cached_href.pike:309:    args["timeout"], args["time-of-day"], next_fetch, time());       sql_query("INSERT IGNORE INTO " + data_table + " values (%s, '', 0)",    args["cached-href"]);       result = sql_query("SELECT data FROM " + data_table + " WHERE url='" +    args["cached-href"] + "' AND (" + time() + " - latest_write < "    + args["fresh-time"] + " OR " + args["fresh-time"] + " = 0)");       if (result && sizeof(result) && result[0]["data"] != "") { -  DWRITE("in get_data(): Returning cached data"); +  DWRITE("get_data(): Returning cached data for " + args["cached-href"]);       return result[0]["data"];    } else if (!args["pure-db"]) { -  DWRITE("in get_data(): No cached data existed so performing a synchronous fetch"); +  DWRITE("get_data(): No cached data existed for " + args["cached-href"] + " so performing a synchronous fetch");       string data = fetch_url((["url":args["cached-href"], "timeout":args["timeout"],    "sync":1]), header);       return data;    } else { -  DWRITE("in get_data(): No cached data existed and pure-db data " -  "was desired, so simply returning 'unavailable'"); +  DWRITE("get_data(): No cached data existed for " + args["cached-href"] + " and pure-db data " +  "was desired, so simply returning the empty string");    -  return unavailable; +  return "";    }    }       private array(mapping(string:mixed)) no_duplicate_add(array(mapping(string:mixed))    to_fetch, string url,    int timeout) {    foreach(to_fetch, mapping one) {    if (search(one, url))    return to_fetch;    }       to_fetch += ({(["url":url, "timeout":timeout])});       return to_fetch;    }    -  private int(0..1) no_requests() { -  array(mapping(string:mixed)) result = sql_query("SELECT url from " + request_table); -  -  return sizeof(result) == 0 ? 1 : 0; +  private int nr_of_requests() { +  return sizeof(sql_query("SELECT url from " + request_table));    }       private void remove_old_entrys() {    sql_query("DELETE FROM " + request_table + " WHERE " + time() + " - latest_request "    "> ttl");       sql_query("DELETE " + data_table + " FROM " + data_table + " LEFT JOIN " +    request_table + " ON " + data_table + ".url=" + request_table +    ".url WHERE " + request_table + ".url IS NULL");    }
Roxen.git/server/modules/tags/insert_cached_href.pike:412:    one["timeout"] = (int)row["max(timeout)"];    break;    }    }    }       return to_fetch;    }       public void update_data(string url, string data) { -  DWRITE(sprintf("in update_data(): Saving the fetched data to the db for url %s" +  DWRITE(sprintf("update_data(): Saving the fetched data to the db for url %s"    , url));       sql_query("UPDATE " + data_table + " SET data=%s, latest_write=%d WHERE url=%s",    data, time(), url);       sql_query("UPDATE " + request_table + " SET next_fetch=next_fetch + " + (24 * 3600)    + " WHERE time_of_day > 0 AND " + time() + " > next_fetch AND url='"    + url + "'");    }   }
Roxen.git/server/modules/tags/insert_cached_href.pike:643:    string data() {    if(!con->ok)    return 0;       return con->data();    }       void req_ok() {    DWRITE("Received headers from " + (string)url + " OK");    status = con->status; +  +  if (status >= 400) { +  DWRITE("HTTP status code " + (string)status + " for " + (string)url + ", aborting fetch"); +  finish_up(); +  +  if (sync) +  queue->write("@"); +  +  return; +  } +     int data_timeout = timeout - (time() - start_time);    con->data_timeout = data_timeout >= 0 ? data_timeout : 0;    con->timed_async_fetch(data_ok, data_fail);    }       void req_fail() {    DWRITE("Receiving headers from " + (string)url + " FAILED");    status = 0; -  mutex_key = mutex->lock(); -  initiated -= ({this_object()}); -  mutex_key = 0; +  finish_up();       if (sync)    queue->write("@");    }       void data_ok() {    DWRITE("Received data from " + (string)url + " OK");    status = con->status; -  mutex_key = mutex->lock(); -  initiated -= ({this_object()}); -  mutex_key = 0; +  finish_up();       if (href_database)    href_database->update_data((string)url, con->data());       if (sync)    queue->write("@");    }       void data_fail() {    DWRITE("Receiving data from " + (string)url + " FAILED");    status = 0; -  mutex_key = mutex->lock(); -  initiated -= ({this_object()}); -  mutex_key = 0; +  finish_up();       if (sync)    queue->write("@");    }    -  +  private void finish_up() { +  mutex_key = mutex->lock(); +  initiated -= ({this_object()}); +  mutex_key = 0; +  } +     void run() {    con->set_callbacks(req_ok, req_fail);    con->timeout = timeout;    start_time = time();    con->async_request(url->host,url->port,    method+" "+path+(query?("?"+query):"")+" HTTP/1.0",    request_headers, req_data);    status = con->status;       if (sync) { -  DWRITE("Waiting for fetch to complete (sync fetch)......"); +  DWRITE("Initiating synchronous fetch for " + (string)url);    queue->read(); -  DWRITE("Done waiting for fetch."); +  DWRITE("Synchronous fetch for " + (string)url + " completed.");    }    }       void create(string method, mapping args, mapping|void headers) {    if(method == "POST") {    mapping vars = ([ ]);   #if constant(roxen)    foreach( (args["post-variables"] || "") / ",", string var) {    array a = var / "=";    if(sizeof(a) == 2)