Roxen.git / server / modules / tags / insert_cached_href.pike

version» Context lines:

Roxen.git/server/modules/tags/insert_cached_href.pike:254:       args["cached-href"] = location;    HTTPClient new_client = HTTPClient(args, header);       new_client->orig_url = client->orig_url;    new_client->redirects = redirects;    new_client->run();   }      public void|string fetch_url(mapping(string:mixed) to_fetch, void|mapping header) { -  DWRITE(sprintf("fetch_url(): To fetch: %s, with timeout: %d", to_fetch["url"], -  to_fetch["timeout"])); +  DWRITE(sprintf("fetch_url(): To fetch: %s, with timeout: %d\nheaders: %O\n", +  to_fetch["url"], to_fetch["timeout"], header));       mapping(string:mixed) args = (["timeout":to_fetch["timeout"],    "cached-href":to_fetch["url"],    "sync":to_fetch["sync"]]);       object client;      #ifdef THREADS    mutex_key = mutex->lock();   
Roxen.git/server/modules/tags/insert_cached_href.pike:305:   class HrefDatabase {    private constant request_table_def = "url VARCHAR(768) NOT NULL,"    "fetch_interval INT UNSIGNED NOT NULL,"    "fresh_time INT UNSIGNED NOT NULL,"    "ttl INT UNSIGNED NOT NULL,"    "timeout INT UNSIGNED NOT NULL,"    "time_of_day INT UNSIGNED NOT NULL,"    "next_fetch INT UNSIGNED,"    "latest_request INT UNSIGNED,"    "out_of_date INT UNSIGNED," +  "request_headers BLOB," +  "header_delimiter VARCHAR(255),"    "PRIMARY KEY (url, fetch_interval, "    "fresh_time, ttl, timeout, time_of_day)";       private constant data_table_def = "url VARCHAR(768) NOT NULL,"    "data LONGBLOB,"    "latest_write INT UNSIGNED,"    "PRIMARY KEY (url)";       private string request_table;    private string data_table;
Roxen.git/server/modules/tags/insert_cached_href.pike:343:    if (sizeof(tbl_def) && lower_case(tbl_def[0]->Type) != "varchar(768)")    sql_query("ALTER TABLE " + request_table +    " MODIFY url VARCHAR(768) NOT NULL");    }    if (data_table) {    array(mapping) tbl_def = sql_query("DESCRIBE " + data_table + " url");    if (sizeof(tbl_def) && lower_case(tbl_def[0]->Type) != "varchar(768)")    sql_query("ALTER TABLE " + data_table +    " MODIFY url VARCHAR(768) NOT NULL");    } +  +  // Add header fields if upgrading from earlier versions +  if(request_table && !sizeof(sql_query("DESCRIBE " + request_table + " request_headers"))) { +  sql_query("ALTER TABLE " + request_table + " ADD COLUMN request_headers BLOB"); +  sql_query("ALTER TABLE " + request_table + " ADD COLUMN header_delimiter VARCHAR(255)");    } -  +  }       public void empty_db() {    /*    Might as well clean up the database in a mutex section,    just to be sure. No performance issue since this function is only    supposed to be used when the "Clear database" button in the admin interface    is pressed.    */   #ifdef THREADS    mutex_key = mutex->lock();
Roxen.git/server/modules/tags/insert_cached_href.pike:394:    remove_old_entrys();       if (!nr_of_requests()) {    DWRITE("There are no requests, returning from update_db()");    return;    }       array(mapping(string:mixed)) to_fetch = urls_to_fetch();       foreach(to_fetch, mapping next) { -  fetch_url(next, (["x-roxen-recursion-depth":1])); +  mapping headers = ([ "x-roxen-recursion-depth" : 1 ]); +  add_headers(headers, next); +  fetch_url(next, headers);    }      #ifdef THREADS    foreach(initiated, HTTPClient client) {    DWRITE("initiated: " + (string)client->url);    }   #endif       DWRITE("----------------- Leaving update_db() ------------------------");    }    -  +  private string get_db_url(mapping args) { +  if (!sizeof(args["request-headers"]||"")) { +  return args["cached-href"]; +  } +  +  string s = args["request-headers"] + (args["header-delimiter"]||","); +  return args["cached-href"] + "#" + String.string2hex(Crypto.MD5.hash(s)); +  } +  +  private mapping add_headers(mapping in_headers, mapping args) { +  if (sizeof(args["request-headers"] || "")) { +  foreach(args["request-headers"] / (args["header-delimiter"]||","), string h) { +  if (sscanf(h, "%[^=]=%s", string name, string val) == 2) { +  in_headers[name] = val; +  } +  } +  } +  return in_headers; +  } +     public string get_data(mapping args, mapping header) {    int next_fetch = 0;    array(mapping(string:mixed)) result;    int now = time();       /* if the tag argument time-of-day is provided, the database column next_fetch    needs to be calculated: */    if (args["time-of-day"]) {    mapping now_lt = localtime(now);   
Roxen.git/server/modules/tags/insert_cached_href.pike:431:       if (next_fetch < now)    next_fetch += 24 * 3600;    }      #ifndef THREADS    /* When running unthreaded the database still needs to be kept up-to-date */    remove_old_entrys();   #endif    -  string url = args["cached-href"]; +  string url = get_db_url(args);    sql_query("UPDATE " + request_table +    " SET latest_request = " + now + ", "    " out_of_date = NULL "    " WHERE url = %s "    " AND fetch_interval = %d "    " AND fresh_time = %d "    " AND ttl = %d "    " AND timeout = %d "    " AND time_of_day = %d",    url, args["fetch-interval"], args["fresh-time"], args["ttl"], -  args["timeout"], args["time-of-day"]); +  args["timeout"], args["time-of-day"], +  args["request-headers"], args["header-delimiter"]);          sql_query("INSERT IGNORE INTO " + request_table + -  " VALUES (%s, %d, %d, %d, %d, %d, %d, %d, %d)", +  " VALUES (%s, %d, %d, %d, %d, %d, %d, %d, %d, %s, %s)",    url,    args["fetch-interval"], args["fresh-time"], args["ttl"],    args["timeout"], args["time-of-day"], next_fetch, now, -  (args["ttl"] + now)); +  (args["ttl"] + now), +  args["request-headers"], args["header-delimiter"]);       sql_query("INSERT IGNORE INTO " + data_table +    " VALUES (%s, '', 0)",    url);       result = sql_query("SELECT data "    " FROM " + data_table +    " WHERE url = %s "    " AND (" + now + " - latest_write < %d "    " OR %d = 0)",    url, args["fresh-time"], args["fresh-time"]);       if (result && sizeof(result) && result[0]["data"] != "") {    DWRITE("get_data(): Returning cached data for " + url);       return utf8_to_string(result[0]["data"]);    } else if (!args["pure-db"]) {    DWRITE("get_data(): No cached data existed for " + url +    " so performing a synchronous fetch");    -  +  add_headers(header, args); +     string data = fetch_url( ([ "url" : url,    "timeout" : args["timeout"],    "sync" : 1]),    header);       return data;    } else {    DWRITE("get_data(): No cached data existed for " + url +    " and pure-db data was desired, so simply returning the "    "empty string");       return "";    }    }       private array(mapping(string:mixed)) no_duplicate_add(array(mapping(string:mixed)) -  to_fetch, string url, -  int timeout) { +  to_fetch, mapping row) { +  string url = row->url;    foreach(to_fetch, mapping one) {    if (search(one, url))    return to_fetch;    }    -  to_fetch += ({(["url":url, "timeout":timeout])}); +  to_fetch += ({ ([ "url": url, +  "timeout": 0, +  "request-headers": row["request_headers"], +  "header-delimiter": row["header_delimiter"], +  ]) });       return to_fetch;    }       private int nr_of_requests() {    return sizeof(sql_query("SELECT url from " + request_table));    }       private void remove_old_entrys() {   
Roxen.git/server/modules/tags/insert_cached_href.pike:525:    " LEFT JOIN " + request_table +    " ON " + data_table + ".url=" + request_table + ".url "    " WHERE " + request_table + ".url IS NULL");    }       private array(mapping(string:mixed)) urls_to_fetch() {    array(mapping(string:mixed)) to_fetch = ({});    int now = time();       array(mapping(string:mixed)) result = -  sql_query(" SELECT " + data_table + ".url, " + request_table + ".timeout " +  sql_query(" SELECT " + data_table + ".url," + +  request_table + ".request_headers, " + +  request_table + ".header_delimiter, " + +  request_table + ".timeout "    " FROM " + data_table +    " LEFT JOIN " + request_table +    " ON " + data_table + ".url=" + request_table + ".url "    " WHERE " + data_table + ".data='' "    " ORDER BY url, timeout DESC");       foreach(result, mapping row) { -  to_fetch = no_duplicate_add(to_fetch, row["url"], 0); +  to_fetch = no_duplicate_add(to_fetch, row);    }    -  result = sql_query(" SELECT " + data_table + ".url, " + request_table + ".timeout, " -  + data_table + ".latest_write, " + request_table + -  ".fetch_interval " +  result = sql_query(" SELECT " + data_table + ".url, " + +  data_table + ".latest_write, " + +  request_table + ".request_headers, " + +  request_table + ".header_delimiter, " + +  request_table + ".timeout, " + +  request_table + ".fetch_interval "    " FROM " + data_table +    " LEFT JOIN " + request_table +    " ON " + data_table + ".url=" + request_table + ".url "    " WHERE " + data_table + ".data!='' "    " AND " + request_table + ".fetch_interval > 0 "    " AND ((" + now + " - " + data_table + ".latest_write) > " + request_table + ".fetch_interval) "    " ORDER BY url, timeout DESC");       foreach(result, mapping row) { -  to_fetch = no_duplicate_add(to_fetch, row["url"], 0); +  to_fetch = no_duplicate_add(to_fetch, row);    }    -  result = sql_query(" SELECT " + data_table + ".url, " + request_table + ".timeout, " -  + request_table + ".time_of_day, " + request_table + -  ".next_fetch " +  result = sql_query(" SELECT " + data_table + ".url, " + +  request_table + ".request_headers, " + +  request_table + ".header_delimiter, " + +  request_table + ".timeout, " + +  request_table + ".time_of_day, " + +  request_table + ".next_fetch "    " FROM " + data_table +    " LEFT JOIN " + request_table +    " ON " + data_table + ".url=" + request_table + ".url "    " WHERE " + data_table + ".data!='' "    " AND " + request_table + ".time_of_day > 0 "    " AND " + now + " > " + request_table + ".next_fetch "    " ORDER BY url, timeout DESC");       foreach(result, mapping row) { -  to_fetch = no_duplicate_add(to_fetch, row["url"], 0); +  to_fetch = no_duplicate_add(to_fetch, row);    }       result = sql_query(" SELECT url, max(timeout) "    " FROM " + request_table + " AS url "    "GROUP BY url");       foreach(to_fetch, mapping one) {    foreach(result, mapping row) {    if (one["url"] == row["url"]) {    one["timeout"] = (int)row["max(timeout)"];
Roxen.git/server/modules/tags/insert_cached_href.pike:613:    private mapping db_args; /* Checked attributes with relevance for the database */       void create(mapping args) {    orig_args = args;    db_args = (["cached-href" : 0,    "fetch-interval" : 0,    "fresh-time" : 0,    "ttl" : 0,    "timeout" : 0,    "time-of-day" : 0, -  "pure-db" : 0]); +  "pure-db" : 0, +  "request-headers" : 0, +  "header-delimiter" : 0]);    check_args();    }       private int(0..1) valid_arg(string arg) {    arg = String.trim_whites(arg);       if (!sizeof(arg) || !is_number(arg[0]) || arg[0] == 48)    return 0;       return 1;
Roxen.git/server/modules/tags/insert_cached_href.pike:680:    else    db_args["timeout"] = query("timeout");       db_args["fetch-interval"] = get_time_in_seconds(db_args["fetch-interval"]);    db_args["fresh-time"] = get_time_in_seconds(db_args["fresh-time"]);    db_args["ttl"] = get_time_in_seconds(db_args["ttl"]);    db_args["timeout"] = get_time_in_seconds(db_args["timeout"]);       if (orig_args["pure-db"])    db_args["pure-db"] = 1; +  +  db_args["request-headers"] = orig_args["request-headers"]; +  db_args["header-delimiter"] = orig_args["header-delimiter"];    }       public mapping get_orig_args() {    return orig_args;    }       public mapping get_db_args() {    return db_args;    }   }