Roxen.git / server / modules / misc / periodic-fetcher.pike

version» Context lines:

Roxen.git/server/modules/misc/periodic-fetcher.pike:119:   ADT.Priority_queue event_queue;   array(Event) global_events;   function do_fetch_co;   function start_crawler_co;   string crawler_status = "<font color='FFB700'><b>Waiting</b></font>";      void create()   {    defvar("crawl_src", "http://localhost/periodic-crawl.txt",    "Crawl list URL", TYPE_STRING, -  "<p>The URL to the file that contains the list of URLs to fetch. " -  "It should be a text file with one URL, and its periodicity in " +  "<p>The URL to the file that contains the list of URLs or paths to fetch. " +  "It should be a text file with one URL or path, and its periodicity in "    "seconds separated by space, per line. It is also possible to specify "    "an optional host header at the end of the line, e.g:</p>"    "<pre>"    " http://localhost:8080/ 5<br/>"    " http://localhost:8080/ 5 mobile.roxen.com<br/>"    " http://localhost:8080/news 10<br/>"    " http://localhost:8080/sports 10<br/>" -  " http://localhost:8080/rss.xml?category=3455&id=47 20" -  "</pre>"); +  " /rss.xml?category=3455&id=47 20" +  "</pre>" +  "When a path is provided instead of a URL, a full URL will be constructed by " +  "prepending the path with the URL in the 'Base URL' setting.");    -  +  defvar("base_url", "http://localhost:8080", +  "Base URL", TYPE_STRING, +  "For lines in the text file that contain a path instead of URL, " +  "this URL is prepended to construct a complete URL. This is useful " +  "if the frontends need to crawl using separate URLs."); +     defvar("crawl_delay", 60,    "Crawl Delay", TYPE_INT,    "Wait this amount of second before starting the crawler after "    "the roxen server has started or the module has been reloaded.");       defvar("curl_path", "/usr/bin/curl",    "Curl Path", TYPE_STRING,    "The path to the curl binary.");       defvar("curl_timeout", 300,
Roxen.git/server/modules/misc/periodic-fetcher.pike:219:    "</font>",    query("crawl_src"));    return 0;    }       // One URL per line.    array(string) lines = (crawl_file-"\r") / "\n" - ({""});    array(Event) events = ({ });    foreach(lines, string line)    { +  string url;    array fields = line / " " - ({""});    if(sizeof(fields) < 2)    {    ERROR_MSG("Parse error in crawl source file:\n%s\n", crawl_file);    crawler_status =    sprintf("<font color='BC311B'>"    " <b>Parse error in crawl source file: %O.</b>"    "</font>",    query("crawl_src"));    return 0;    }    -  events += ({ Event(fields[0], (int)fields[1], (sizeof(fields) >= 3)? fields[2]:0) }); +  if (has_value(fields[0], "://")) { +  url = fields[0]; +  } else { +  url = query("base_url") + fields[0];    } -  +  +  events += ({ Event(url, (int)fields[1], (sizeof(fields) >= 3)? fields[2]:0) }); +  }    return events;   }      void start_crawler()   {    DEBUG_MSG("Starting Crawler\n");    if(!sizeof(event_queue))    {    ERROR_MSG("Queue empty\n");    return;