1a215d2005-08-23Tobias Liin // This is a roxen module. Copyright © 2000 - 2004, Roxen IS. // #include <module.h> inherit "module"; //<locale-token project="mod_insert_cached_href">LOCALE</locale-token> #define LOCALE(X,Y) _DEF_LOCALE("mod_insert_cached_href",X,Y)
239cf42008-02-11Jonas Wallden constant cvs_version = "$Id: insert_cached_href.pike,v 1.20 2008/02/11 10:27:24 jonasw Exp $";
8628ed2006-05-09Anders Johansson 
1a215d2005-08-23Tobias Liin constant thread_safe = 1; constant module_type = MODULE_TAG;
239cf42008-02-11Jonas Wallden LocaleString module_name = LOCALE(1, "Tags: Insert cached href"); LocaleString module_doc = LOCALE(2, "This module contains the RXML tag \"insert "
bbb9b42006-06-01Jonas Wallden  "cached-href\". Useful when implementing e.g."
64f8132005-08-26Tobias Liin  " RSS syndication.");
1a215d2005-08-23Tobias Liin  #if DEBUG_INSERT_CACHED_HREF #define DWRITE(x) report_debug("INSERT_CACHED_HREF: " + x + "\n") #else #define DWRITE(x) #endif #ifdef THREADS private roxen.BackgroundProcess bg_process; private array(HTTPClient) initiated; /* Contains initiated but unfinished data fetches */ private Thread.Mutex mutex; private Thread.MutexKey mutex_key; #endif
3ded942007-03-19Tobias Liin constant MAX_REDIRECTS = 5;
1a215d2005-08-23Tobias Liin private HrefDatabase href_database; void create() {
239cf42008-02-11Jonas Wallden  defvar("fetch-interval", "5 minutes", LOCALE(3, "Fetch interval"),
1a215d2005-08-23Tobias Liin  TYPE_STRING|VAR_MORE,
239cf42008-02-11Jonas Wallden  LOCALE(4, "States how often the data of an URL should be updated. "
1a215d2005-08-23Tobias Liin  "In seconds, minutes, hours or days."));
239cf42008-02-11Jonas Wallden  defvar("fresh-time", "0", LOCALE(5, "Fresh time"),
1a215d2005-08-23Tobias Liin  TYPE_STRING|VAR_MORE,
239cf42008-02-11Jonas Wallden  LOCALE(6, "States how long data in the database can be considered fresh enough"
30bc4c2005-12-05Tobias Liin  " to display. In seconds, minutes, hours or days. As default this" " is 0, which means that this attribute is not used and that there" " are no restrictions on data freshness."));
1a215d2005-08-23Tobias Liin 
239cf42008-02-11Jonas Wallden  defvar("ttl", "7 days", LOCALE(7, "Time to live"),
1a215d2005-08-23Tobias Liin  TYPE_STRING|VAR_MORE,
239cf42008-02-11Jonas Wallden  LOCALE(8, "States how long unrequested data can exist in the database"
1a215d2005-08-23Tobias Liin  " before being removed. In seconds, minutes, hours or days."));
239cf42008-02-11Jonas Wallden  defvar("timeout", "10 seconds", LOCALE(9, "Timeout"),
1a215d2005-08-23Tobias Liin  TYPE_STRING|VAR_MORE,
239cf42008-02-11Jonas Wallden  LOCALE(10, "The timeout when fetching data from a server. In seconds, minutes, "
1a215d2005-08-23Tobias Liin  "hours or days."));
239cf42008-02-11Jonas Wallden  defvar("update-interval", "1 minute", LOCALE(11, "Update interval"),
1a215d2005-08-23Tobias Liin  TYPE_STRING|VAR_MORE,
239cf42008-02-11Jonas Wallden  LOCALE(12, "States how often the module will check if the database needs to "
1a215d2005-08-23Tobias Liin  "be updated. In seconds, minutes, hours or days."));
239cf42008-02-11Jonas Wallden  defvar("recursion_limit", 2, LOCALE(13, "Maximum recursion depth"),
1a215d2005-08-23Tobias Liin  TYPE_INT|VAR_MORE,
239cf42008-02-11Jonas Wallden  LOCALE(14,"Maximum number of nested <tt>&lt;insert cached-href&gt;</tt>'s "
1a215d2005-08-23Tobias Liin  "allowed. May be set to zero to disable the limit.")); } void start(int occasion, Configuration conf) { DWRITE("start(), occasion: " + occasion); if (occasion == 0) { href_database = HrefDatabase(); #ifdef THREADS initiated = ({}); mutex = Thread.Mutex(); #endif } #ifdef THREADS if (occasion == 2)
7b2b4e2006-06-01Jonas Wallden  bg_process && bg_process->stop();
bbb9b42006-06-01Jonas Wallden  // Check whether setup is ok before scheduling background task if (href_database) {
7b2b4e2006-06-01Jonas Wallden  if (href_database->ready_to_run()) {
bbb9b42006-06-01Jonas Wallden  bg_process = roxen.BackgroundProcess(get_time_in_seconds(query("update-interval")), href_database->update_db, 0);
7b2b4e2006-06-01Jonas Wallden  } else {
bbb9b42006-06-01Jonas Wallden  report_error("Insert cached href: Failed to initialize SQL tables. " "Permission error?\n");
7b2b4e2006-06-01Jonas Wallden  }
bbb9b42006-06-01Jonas Wallden  }
1a215d2005-08-23Tobias Liin #endif }
16eefd2006-11-15Tobias Liin mapping(string:function) query_action_buttons() {
239cf42008-02-11Jonas Wallden  return ([LOCALE(15, "Clear database") : href_database->empty_db]);
16eefd2006-11-15Tobias Liin }
1a215d2005-08-23Tobias Liin void stop() { DWRITE("stop()"); #ifdef THREADS
7b2b4e2006-06-01Jonas Wallden  bg_process && bg_process->stop();
1a215d2005-08-23Tobias Liin  bg_process = 0; mutex_key = mutex->lock(); /* Removing registered callbacks for unfinished data fetches to avoid having the back-end thread call them after the module has been destructed: */ foreach(initiated, HTTPClient client) { client->con->request_ok = 0; client->con->request_fail = 0; } mutex_key = 0; initiated = 0; mutex = 0; #endif href_database = 0; } static int get_time_in_seconds(string input) { input = lower_case((string)input); input = String.trim_whites(input); int pos = 0; int number; string the_rest; for (int i = 0; i < sizeof(input) && is_number(input[i]); i++) { pos++; } number = (int)input[0..(pos - 1)]; the_rest = String.trim_whites(input[pos..(sizeof(input) - 1)]); if (!sizeof(the_rest)) return number; switch(the_rest[0]) { case 'd': return number * 24 * 3600; case 'h': return number * 3600; case 'm': return number * 60; case 's': return number; } return number; } static int(0..1) is_number(int char) { return (char >= 48 && char <= 57) ? 1 : 0; }
3ded942007-03-19Tobias Liin #ifdef THREADS
1a215d2005-08-23Tobias Liin public int(0..1) already_initiated(string url) { foreach(initiated, HTTPClient client) { if (url == (string)client->url) return 1; } return 0; }
3ded942007-03-19Tobias Liin #endif public int(0..1) is_redirect(int status) { /* A 304 will never happen since the GET is never conditional. */ if (status >= 300 && status < 400 && status != 304) return 1; return 0; } /* Takes action based on HTTP status codes in reply. Synchronous: */ public string get_result_sync(HTTPClient client, mapping args, mapping header) { if (!is_redirect(client->status) || !MAX_REDIRECTS) return client->data(); int counter; string location = client->con->headers->location; if (!location || !sizeof(location)) return client->data(); DWRITE("Following redirect from " + (string)client->url + " to " + location); args["cached-href"] = location; HTTPClient new_client = HTTPClient("GET", args, header); new_client->orig_url = (string)client->url; new_client->run(); counter++; while (is_redirect(new_client->status) && counter < MAX_REDIRECTS) { location = new_client->con->headers->location; if (!location || !sizeof(location)) return new_client->data(); DWRITE("Following redirect from " + (string)new_client->url + " to " + location); args["cached-href"] = location; new_client = HTTPClient("GET", args, header); new_client->orig_url = (string)client->url; new_client->run(); counter++; } return new_client->data(); } /* Takes action based on HTTP status codes in reply. Asynchronous: */ public void get_result_async(HTTPClient client, mapping args, mapping header) { if (!is_redirect(client->status)) return; int redirects = client->redirects + 1; string location = client->con->headers->location; if (redirects > MAX_REDIRECTS || !location || !sizeof(location)) return; DWRITE("Following redirect from " + (string)client->url + " to " + location); args["cached-href"] = location; HTTPClient new_client = HTTPClient("GET", args, header); new_client->orig_url = client->orig_url; new_client->redirects = redirects; new_client->run(); }
1a215d2005-08-23Tobias Liin  public void|string fetch_url(mapping(string:mixed) to_fetch, void|mapping header) {
7ec09b2006-09-24Tobias Liin  DWRITE(sprintf("fetch_url(): To fetch: %s, with timeout: %d", to_fetch["url"],
1a215d2005-08-23Tobias Liin  to_fetch["timeout"])); mapping(string:mixed) args = (["timeout":to_fetch["timeout"], "cached-href":to_fetch["url"], "sync":to_fetch["sync"]]); object client; #ifdef THREADS mutex_key = mutex->lock(); if (!to_fetch["sync"] && already_initiated(to_fetch["url"])) { mutex_key = 0; return; }
7ec09b2006-09-24Tobias Liin  client = HTTPClient("GET", args, header);
1a215d2005-08-23Tobias Liin  initiated += ({client}); mutex_key = 0;
3ded942007-03-19Tobias Liin  client->orig_url = (string)client->url;
1a215d2005-08-23Tobias Liin  client->run();
3ded942007-03-19Tobias Liin  if (to_fetch["sync"]) return get_result_sync(client, args, header);
1a215d2005-08-23Tobias Liin #else client = Protocols.HTTP.get_url(to_fetch["url"], 0);
3ded942007-03-19Tobias Liin  // In practice a server never runs unthreaded. Keep it // simple and only return when status code < 300: if(client && client->status > 0 && client->status < 300) {
1a215d2005-08-23Tobias Liin  href_database->update_data(to_fetch["url"], client->data()); return client->data(); } else
7ec09b2006-09-24Tobias Liin  return "";
1a215d2005-08-23Tobias Liin #endif } /* This class represents the database in which the data of the URL:s are stored */ class HrefDatabase { private constant request_table_def = "url VARCHAR(255) NOT NULL," "fetch_interval INT UNSIGNED NOT NULL," "fresh_time INT UNSIGNED NOT NULL," "ttl INT UNSIGNED NOT NULL," "timeout INT UNSIGNED NOT NULL," "time_of_day INT UNSIGNED NOT NULL," "next_fetch INT UNSIGNED," "latest_request INT UNSIGNED," "PRIMARY KEY (url, fetch_interval, " "fresh_time, ttl, timeout, time_of_day)"; private constant data_table_def = "url VARCHAR(255) NOT NULL," "data LONGBLOB," "latest_write INT UNSIGNED," "PRIMARY KEY (url)"; private string request_table; private string data_table; public void create() {
bbb9b42006-06-01Jonas Wallden  // Failure to create tables will lead to zero return values
1a215d2005-08-23Tobias Liin  request_table = get_my_table("request", ({request_table_def})); data_table = get_my_table("data", ({data_table_def})); }
16eefd2006-11-15Tobias Liin  public void empty_db() {
3ded942007-03-19Tobias Liin  /* Might as well clean up the database in a mutex section,
16eefd2006-11-15Tobias Liin  just to be sure. No performance issue since this function is only supposed to be used when the "Clear database" button in the admin interface is pressed. */ #ifdef THREADS mutex_key = mutex->lock(); #endif sql_query("DELETE FROM " + request_table); sql_query("DELETE FROM " + data_table); DWRITE("Database has been emptied."); #ifdef THREADS mutex_key = 0; #endif }
bbb9b42006-06-01Jonas Wallden  public int ready_to_run() { // Only ok to run if both tables are accessible return request_table && data_table && 1; }
1a215d2005-08-23Tobias Liin  public void update_db() { DWRITE(sprintf("########### update_db(): Called every %d seconds ##########" , get_time_in_seconds(query("update-interval"))));
3ded942007-03-19Tobias Liin  #ifdef THREADS
1a215d2005-08-23Tobias Liin  foreach(initiated, HTTPClient client) { DWRITE("STILL initiated (should be empty!!!!!): " + (string)client->url); }
3ded942007-03-19Tobias Liin #endif
d024412006-07-05Jonas Wallden  #ifdef OFFLINE // Don't alter entries when running server without network connections. return; #endif
1a215d2005-08-23Tobias Liin  remove_old_entrys();
7ec09b2006-09-24Tobias Liin  if (!nr_of_requests()) {
1a215d2005-08-23Tobias Liin  DWRITE("There are no requests, returning from update_db()"); return; } array(mapping(string:mixed)) to_fetch = urls_to_fetch(); foreach(to_fetch, mapping next) {
b7bc0d2007-03-08Tobias Liin  fetch_url(next, (["x-roxen-recursion-depth":1]));
1a215d2005-08-23Tobias Liin  }
3ded942007-03-19Tobias Liin #ifdef THREADS
1a215d2005-08-23Tobias Liin  foreach(initiated, HTTPClient client) { DWRITE("initiated: " + (string)client->url); }
3ded942007-03-19Tobias Liin #endif
1a215d2005-08-23Tobias Liin  DWRITE("----------------- Leaving update_db() ------------------------"); } public string get_data(mapping args, mapping header) { int next_fetch = 0; array(mapping(string:mixed)) result; /* if the tag argument time-of-day is provided, the database column next_fetch needs to be calculated: */ if (args["time-of-day"]) { mapping now = localtime(time()); now["hour"] = 0; now["min"] = 0; now["sec"] = 0; next_fetch = mktime(now) + args["time-of-day"]; if (next_fetch < time()) next_fetch += 24 * 3600; } #ifndef THREADS /* When running unthreaded the database still needs to be kept up-to-date */ remove_old_entrys(); #endif sql_query("UPDATE " + request_table + " SET latest_request=" + time() + " WHERE url='" + args["cached-href"] + "' AND fetch_interval=" + args["fetch-interval"] + " AND fresh_time=" + args["fresh-time"] + " AND ttl=" + args["ttl"] + " AND timeout=" + args["timeout"] + " AND time_of_day=" + args["time-of-day"]); sql_query("INSERT IGNORE INTO " + request_table + " values (%s, %d, %d, %d, %d, %d, %d, %d)", args["cached-href"], args["fetch-interval"], args["fresh-time"], args["ttl"], args["timeout"], args["time-of-day"], next_fetch, time()); sql_query("INSERT IGNORE INTO " + data_table + " values (%s, '', 0)", args["cached-href"]); result = sql_query("SELECT data FROM " + data_table + " WHERE url='" +
30bc4c2005-12-05Tobias Liin  args["cached-href"] + "' AND (" + time() + " - latest_write < " + args["fresh-time"] + " OR " + args["fresh-time"] + " = 0)");
1a215d2005-08-23Tobias Liin  if (result && sizeof(result) && result[0]["data"] != "") {
7ec09b2006-09-24Tobias Liin  DWRITE("get_data(): Returning cached data for " + args["cached-href"]);
1a215d2005-08-23Tobias Liin  return result[0]["data"]; } else if (!args["pure-db"]) {
7ec09b2006-09-24Tobias Liin  DWRITE("get_data(): No cached data existed for " + args["cached-href"] + " so performing a synchronous fetch");
1a215d2005-08-23Tobias Liin  string data = fetch_url((["url":args["cached-href"], "timeout":args["timeout"], "sync":1]), header); return data; } else {
7ec09b2006-09-24Tobias Liin  DWRITE("get_data(): No cached data existed for " + args["cached-href"] + " and pure-db data " "was desired, so simply returning the empty string");
1a215d2005-08-23Tobias Liin 
7ec09b2006-09-24Tobias Liin  return "";
1a215d2005-08-23Tobias Liin  } } private array(mapping(string:mixed)) no_duplicate_add(array(mapping(string:mixed)) to_fetch, string url, int timeout) { foreach(to_fetch, mapping one) { if (search(one, url)) return to_fetch; } to_fetch += ({(["url":url, "timeout":timeout])}); return to_fetch; }
7ec09b2006-09-24Tobias Liin  private int nr_of_requests() { return sizeof(sql_query("SELECT url from " + request_table));
1a215d2005-08-23Tobias Liin  }
7ec09b2006-09-24Tobias Liin 
1a215d2005-08-23Tobias Liin  private void remove_old_entrys() { sql_query("DELETE FROM " + request_table + " WHERE " + time() + " - latest_request " "> ttl"); sql_query("DELETE " + data_table + " FROM " + data_table + " LEFT JOIN " + request_table + " ON " + data_table + ".url=" + request_table + ".url WHERE " + request_table + ".url IS NULL"); } private array(mapping(string:mixed)) urls_to_fetch() { array(mapping(string:mixed)) to_fetch = ({}); array(mapping(string:mixed)) result = sql_query("SELECT " + data_table + ".url, " + request_table + ".timeout FROM " + data_table + " LEFT JOIN " + request_table + " ON " + data_table + ".url=" + request_table + ".url WHERE " + data_table + ".data='' ORDER BY " "url, timeout DESC"); foreach(result, mapping row) { to_fetch = no_duplicate_add(to_fetch, row["url"], 0); } result = sql_query("SELECT " + data_table + ".url, " + request_table + ".timeout, " + data_table + ".latest_write, " + request_table + ".fetch_interval FROM " + data_table + " LEFT JOIN " + request_table + " ON " + data_table + ".url=" + request_table + ".url WHERE " + data_table + ".data!='' AND " + request_table + ".fetch_interval > 0 AND ((" + time() + " - " + data_table + ".latest_write) > " + request_table + ".fetch_interval) ORDER BY " "url, timeout DESC"); foreach(result, mapping row) { to_fetch = no_duplicate_add(to_fetch, row["url"], 0); } result = sql_query("SELECT " + data_table + ".url, " + request_table + ".timeout, " + request_table + ".time_of_day, " + request_table + ".next_fetch FROM " + data_table + " LEFT JOIN " + request_table + " ON " + data_table + ".url=" + request_table + ".url WHERE " + data_table + ".data!='' AND " + request_table + ".time_of_day > 0 AND " + time() + " > " + request_table + ".next_fetch ORDER BY url, timeout DESC"); foreach(result, mapping row) { to_fetch = no_duplicate_add(to_fetch, row["url"], 0); } result = sql_query("SELECT url, max(timeout) FROM " + request_table + " AS url GROUP BY url"); foreach(to_fetch, mapping one) { foreach(result, mapping row) { if (one["url"] == row["url"]) { one["timeout"] = (int)row["max(timeout)"]; break; } } } return to_fetch; } public void update_data(string url, string data) {
7ec09b2006-09-24Tobias Liin  DWRITE(sprintf("update_data(): Saving the fetched data to the db for url %s"
1a215d2005-08-23Tobias Liin  , url)); sql_query("UPDATE " + data_table + " SET data=%s, latest_write=%d WHERE url=%s", data, time(), url); sql_query("UPDATE " + request_table + " SET next_fetch=next_fetch + " + (24 * 3600) + " WHERE time_of_day > 0 AND " + time() + " > next_fetch AND url='" + url + "'"); } } /* This class represents a set of attributes given to the tag 'insert cached-href' */ class Attributes { private mapping orig_args; /* The attributes given to the tag */ private mapping db_args; /* Checked attributes with relevance for the database */ void create(mapping args) { orig_args = args; db_args = (["cached-href" : 0, "fetch-interval" : 0, "fresh-time" : 0, "ttl" : 0, "timeout" : 0, "time-of-day" : 0, "pure-db" : 0]); check_args(); } private int(0..1) valid_arg(string arg) { arg = String.trim_whites(arg); if (!sizeof(arg) || !is_number(arg[0]) || arg[0] == 48) return 0; return 1; } private void check_args() { if (orig_args["cached-href"][0..6] != "http://" && orig_args["cached-href"][0..7] != "https://") RXML.run_error("An invalid URL has been provided"); else db_args["cached-href"] = orig_args["cached-href"]; if (orig_args["time-of-day"] && orig_args["fetch-interval"]) RXML.run_error("Supply either time-of-day or fetch-interval, not both"); if (orig_args["time-of-day"]) { if (sizeof(orig_args["time-of-day"]) != 5) RXML.run_error("Wrong timeformat. The correct format is hh:mm"); if (orig_args["time-of-day"][2] != ':') RXML.run_error("Wrong timeformat. The correct format is hh:mm"); int hour = (int)orig_args["time-of-day"][0..1]; int minute = (int)orig_args["time-of-day"][3..4]; if (hour < 0 || hour > 23 || minute < 0 || minute > 59) RXML.run_error("Hour must be between 0 and 23, minutes between 0 and 59"); db_args["time-of-day"] = hour * 3600 + minute * 60; } else if (orig_args["fetch-interval"] && valid_arg(orig_args["fetch-interval"])) { db_args["fetch-interval"] = orig_args["fetch-interval"]; } else { db_args["fetch-interval"] = query("fetch-interval"); }
30bc4c2005-12-05Tobias Liin  if (orig_args["fresh-time"] && (valid_arg(orig_args["fresh-time"]) || orig_args["fresh-time"] == "0"))
1a215d2005-08-23Tobias Liin  db_args["fresh-time"] = orig_args["fresh-time"]; else db_args["fresh-time"] = query("fresh-time");
30bc4c2005-12-05Tobias Liin 
1a215d2005-08-23Tobias Liin  if (orig_args["ttl"] && valid_arg(orig_args["ttl"])) db_args["ttl"] = orig_args["ttl"]; else db_args["ttl"] = query("ttl"); if (orig_args["timeout"] && valid_arg(orig_args["timeout"])) db_args["timeout"] = orig_args["timeout"]; else db_args["timeout"] = query("timeout"); db_args["fetch-interval"] = get_time_in_seconds(db_args["fetch-interval"]); db_args["fresh-time"] = get_time_in_seconds(db_args["fresh-time"]); db_args["ttl"] = get_time_in_seconds(db_args["ttl"]); db_args["timeout"] = get_time_in_seconds(db_args["timeout"]); if (orig_args["pure-db"]) db_args["pure-db"] = 1; } public mapping get_orig_args() { return orig_args; } public mapping get_db_args() { return db_args; } } class TagInsertCachedHref { inherit RXML.Tag; constant name = "insert"; constant plugin_name = "cached-href"; string get_data(string var, mapping args, RequestID id) { int recursion_depth = (int)id->request_headers["x-roxen-recursion-depth"]; if (query("recursion_limit") && (recursion_depth >= query("recursion_limit"))) RXML.run_error("Too deep insert cached-href recursion.");
7b2b4e2006-06-01Jonas Wallden  // Verify that database connection is working if (!href_database || !href_database->ready_to_run()) RXML.run_error("Insert cached href: Database connection not working. " "Permission problems?\n");
1a215d2005-08-23Tobias Liin  recursion_depth++; if(args->nocache) NOCACHE(); else CACHE(60);
5cfb0c2005-08-24Marcus Wellhardh  string res = href_database->get_data(Attributes(args)->get_db_args(), (["x-roxen-recursion-depth":recursion_depth]));
86254a2005-09-02Tobias Liin  if(args["decode-xml"]) {
5cfb0c2005-08-24Marcus Wellhardh  // Parse xml header and recode content to internal representation.
7613612007-03-15Tobias Liin  mixed result = catch { res = Parser.XML.Simple()->autoconvert(res); }; if (result) {
3ded942007-03-19Tobias Liin  werror("INSERT_CACHED_HREF: An error occurred trying to decode the data from " + args["cached-href"] + ".\n");
7613612007-03-15Tobias Liin  }
1a215d2005-08-23Tobias Liin 
86254a2005-09-02Tobias Liin  // Remove any bytes potentially still preceeding the first '<' in the xml file return res[search(res, "<")..]; }
5cfb0c2005-08-24Marcus Wellhardh  return res;
1a215d2005-08-23Tobias Liin  }
5cfb0c2005-08-24Marcus Wellhardh }
1a215d2005-08-23Tobias Liin  #ifdef THREADS /* This class represents the retrieval of data from an URL */ class HTTPClient {
3ded942007-03-19Tobias Liin  int status, timeout, start_time, redirects;
1a215d2005-08-23Tobias Liin  object con; Standards.URI url;
3ded942007-03-19Tobias Liin  string path, query, req_data,method, orig_url;
1a215d2005-08-23Tobias Liin  mapping request_headers; Thread.Queue queue = Thread.Queue(); int(0..1) sync; void do_method(string _method, string|Standards.URI _url, void|mapping query_variables, void|mapping _request_headers, void|Protocols.HTTP.Query _con, void|string _data) { if(!_con) { con = Protocols.HTTP.Query(); } else con = _con; method = _method; if(!_request_headers) request_headers = ([]); else request_headers = _request_headers; req_data = _data; if(stringp(_url)) { if (mixed err = catch (url=Standards.URI(_url))) RXML.parse_error ("Invalid URL: %s\n", describe_error (err)); } else url = _url; #if constant(SSL.sslfile) if(url->scheme!="http" && url->scheme!="https") error("Protocols.HTTP can't handle %O or any other protocols than HTTP or HTTPS\n", url->scheme); con->https= (url->scheme=="https")? 1 : 0; #else if(url->scheme!="http" ) error("Protocols.HTTP can't handle %O or any other protocol than HTTP\n", url->scheme); #endif if(!request_headers) request_headers = ([]); mapping default_headers = ([ "user-agent" : "Mozilla/4.0 compatible (Pike HTTP client)",
b7bc0d2007-03-08Tobias Liin  "host" : sprintf("%s:%d", url->host, url->port) ]);
1a215d2005-08-23Tobias Liin  if(url->user || url->passwd) default_headers->authorization = "Basic " + MIME.encode_base64(url->user + ":" + (url->password || "")); request_headers = default_headers | request_headers; query=url->query; if(query_variables && sizeof(query_variables)) { if(query) query+="&"+Protocols.HTTP.http_encode_query(query_variables); else query=Protocols.HTTP.http_encode_query(query_variables); } path=url->path; if(path=="") path="/"; } string data() { if(!con->ok)
3ded942007-03-19Tobias Liin  return "";
1a215d2005-08-23Tobias Liin 
3ded942007-03-19Tobias Liin  if(status > 0 && status < 300) return con->data(); return "";
1a215d2005-08-23Tobias Liin  } void req_ok() { DWRITE("Received headers from " + (string)url + " OK"); status = con->status;
7ec09b2006-09-24Tobias Liin 
3ded942007-03-19Tobias Liin  /* Error, abort: */
7ec09b2006-09-24Tobias Liin  if (status >= 400) {
3ded942007-03-19Tobias Liin  DWRITE("HTTP status code " + (string)status + " for " + (string)url + ", aborting.");
7ec09b2006-09-24Tobias Liin  finish_up(); if (sync) queue->write("@"); return; }
3ded942007-03-19Tobias Liin  /* Redirection: */ if (is_redirect(status)) { finish_up(); if (sync) { queue->write("@"); return; } mapping args = (["cached-href" : (string)url, "timeout" : timeout, "sync" : 0]); get_result_async(this_object(), args, request_headers); return; } /* HTTP status code OK, continuing with data fetch: */
1a215d2005-08-23Tobias Liin  int data_timeout = timeout - (time() - start_time); con->data_timeout = data_timeout >= 0 ? data_timeout : 0; con->timed_async_fetch(data_ok, data_fail); } void req_fail() { DWRITE("Receiving headers from " + (string)url + " FAILED"); status = 0;
7ec09b2006-09-24Tobias Liin  finish_up();
1a215d2005-08-23Tobias Liin  if (sync) queue->write("@"); } void data_ok() { DWRITE("Received data from " + (string)url + " OK"); status = con->status;
7ec09b2006-09-24Tobias Liin  finish_up();
1a215d2005-08-23Tobias Liin  if (href_database)
3ded942007-03-19Tobias Liin  if (orig_url) href_database->update_data(orig_url, con->data()); else href_database->update_data((string)url, con->data());
1a215d2005-08-23Tobias Liin  if (sync) queue->write("@"); } void data_fail() { DWRITE("Receiving data from " + (string)url + " FAILED"); status = 0;
7ec09b2006-09-24Tobias Liin  finish_up();
1a215d2005-08-23Tobias Liin  if (sync) queue->write("@"); }
7ec09b2006-09-24Tobias Liin  private void finish_up() { mutex_key = mutex->lock(); initiated -= ({this_object()}); mutex_key = 0; }
1a215d2005-08-23Tobias Liin  void run() { con->set_callbacks(req_ok, req_fail); con->timeout = timeout; start_time = time(); con->async_request(url->host,url->port, method+" "+path+(query?("?"+query):"")+" HTTP/1.0", request_headers, req_data); status = con->status; if (sync) {
7ec09b2006-09-24Tobias Liin  DWRITE("Initiating synchronous fetch for " + (string)url);
1a215d2005-08-23Tobias Liin  queue->read();
7ec09b2006-09-24Tobias Liin  DWRITE("Synchronous fetch for " + (string)url + " completed.");
1a215d2005-08-23Tobias Liin  } } void create(string method, mapping args, mapping|void headers) { if(method == "POST") { mapping vars = ([ ]); #if constant(roxen) foreach( (args["post-variables"] || "") / ",", string var) { array a = var / "="; if(sizeof(a) == 2) vars[String.trim_whites(a[0])] = RXML.user_get_var(String.trim_whites(a[1])); } #endif do_method("POST", args["cached-href"], vars, headers); } else do_method("GET", args["cached-href"], 0, headers); timeout = args["timeout"]; sync = args["sync"]; } } #endif TAGDOCUMENTATION; #ifdef manual constant tagdoc=([
3cf3102006-05-09Anders Johansson  "insert#cached-href":#"<desc type='plugin'>
1a215d2005-08-23Tobias Liin <p>
3cf3102006-05-09Anders Johansson  <short>This tag inserts the contents of the provided URL, as read from a database.</short> The database is updated repeatedly in the background by a background process that is initiated and run as soon as this module is loaded. If the database is empty when the tag is executed, the standard behavior is to fetch the data immediately. When providing values for the attributes fetch-interval, fresh-time, ttl, update-interval or timeout, the time can either be seconds, minutes, hours or days. If only a number is provided, it is interpreted as seconds, otherwise write the corresponding letter or word after the number, e.g: 10 days, 10d, 10 h, 10hours, 5 min, 5m, 2 hours and so on. Spaces between the number and the word are allowed. The values at the settings tab for fetch-interval, fresh-time, ttl and timeout are the standard values that the tag will be assigned if an attribute is left out. update-interval on the other hand, is central and common for all tags. </p> <note> <p> The data in the database for an URL is always shared by all tags at the same site. This means that when data for an URL is updated this affects all tags referring to this specific URL, even if the other attributes may differ. For example, if the same URL is referenced by one tag without the pure-db attribute and another tag WITH the pure-db attribute, the only guarantee is that the tag with the attribute pure-db never will generate a data fetch. The tag without the attribute still can. </p> <p> Another implication of the data being shared is for example if the same URL is referenced by two tags with different fetch intervals. The data will then be updated at the smallest of these two fetch intervals. Providing the time-of-day attribute will however always make the module fetch data when that time of day occur, without interference by any other tag referencing the same URL. </p> <p> Also important to note, is that since the fetching of the common data is performed centrally by the module, the timeout of every tag can not be used, of course. If several tags refer to the same URL but are provided with different timeouts, the longest timeout will be used. However, if there exists no data in the database for the URL of a tag when the tag is run, and the pure-db attribute is not provided, then the fetching of data will be performed for that specific tag and with the specified timeout. </p> </note> </desc>
1a215d2005-08-23Tobias Liin  <attr name='cached-href' value='string'> <p> The URL of the page to be inserted. </p> </attr> <attr name='fetch-interval' value='string'> <p>
ce1fca2005-08-24Henrik Grubbström (Grubba)  States at what interval the background process will fetch the
1a215d2005-08-23Tobias Liin  URL and update the database. </p> </attr> <attr name='time-of-day' value='string'> <p>
ce1fca2005-08-24Henrik Grubbström (Grubba)  Can be provided as an alternative to fetch-interval, if a fetch should be performed once per day at a specific time. The provided time must be of the format hh:mm.
1a215d2005-08-23Tobias Liin </p> </attr> <attr name='fresh-time' value='string'> <p> States how long the data for the URL in the database is considered fresh. </p> </attr> <attr name='ttl' value='string'> <p>
ce1fca2005-08-24Henrik Grubbström (Grubba)  States how long unrequested data will exist in the database before being removed.
1a215d2005-08-23Tobias Liin </p> </attr> <attr name='timeout' value='string'> <p>
ce1fca2005-08-24Henrik Grubbström (Grubba)  The timeout for the fetching of new data from a server.
1a215d2005-08-23Tobias Liin </p> </attr> <attr name='pure-db'> <p> If provided, the tag will only return data stored in the database, i.e never fetch data immediately if the database is empty. Instead, the
ce1fca2005-08-24Henrik Grubbström (Grubba)  data will not be available until the background process has updated the database.
1a215d2005-08-23Tobias Liin </p> </attr> <attr name='nocache' value='string'> <p> If provided the resulting page will get a zero cache time in the RAM cache. The default time is up to 60 seconds depending on the cache limit imposed by other RXML tags on the same page. </p>
5cfb0c2005-08-24Marcus Wellhardh </attr>
1a215d2005-08-23Tobias Liin 
5cfb0c2005-08-24Marcus Wellhardh <attr name='decode-xml' value='string'> <p> If provided the resulting content will be decoded to the internal charset representation by looking at a potential BOM (Byte Order Mark) and the specified encoding in the XML header. Defaults to UTF-8
ce1fca2005-08-24Henrik Grubbström (Grubba)  if no BOM or encoding was found.
5cfb0c2005-08-24Marcus Wellhardh </p> </attr>",
1a215d2005-08-23Tobias Liin ]); #endif