Roxen.git
/
server
/
modules
/
misc
/
periodic-fetcher.pike
version
»
Context lines:
10
20
40
80
file
none
3
Roxen.git/server/modules/misc/periodic-fetcher.pike:119:
ADT.Priority_queue event_queue; array(Event) global_events; function do_fetch_co; function start_crawler_co; string crawler_status = "<font color='FFB700'><b>Waiting</b></font>"; void create() { defvar("crawl_src", "http://localhost/periodic-crawl.txt", "Crawl list URL", TYPE_STRING,
-
"<p>The URL to the file that contains the list of URLs to fetch. "
-
"It should be a text file with one URL, and its periodicity in "
+
"<p>The URL to the file that contains the list of URLs
or paths
to fetch. "
+
"It should be a text file with one URL
or path
, and its periodicity in "
"seconds separated by space, per line. It is also possible to specify " "an optional host header at the end of the line, e.g:</p>" "<pre>" " http://localhost:8080/ 5<br/>" " http://localhost:8080/ 5 mobile.roxen.com<br/>" " http://localhost:8080/news 10<br/>" " http://localhost:8080/sports 10<br/>"
-
"
http:
/
/localhost:8080/
rss.xml?category=3455&id=47 20"
-
"</pre>");
+
" /rss.xml?category=3455&id=47 20"
+
"</pre>"
+
"When a path is provided instead of a URL, a full URL will be constructed by "
+
"prepending the path with the URL in the 'Base URL' setting."
);
-
+
defvar("base_url", "http://localhost:8080",
+
"Base URL", TYPE_STRING,
+
"For lines in the text file that contain a path instead of URL, "
+
"this URL is prepended to construct a complete URL. This is useful "
+
"if the frontends need to crawl using separate URLs.");
+
defvar("crawl_delay", 60, "Crawl Delay", TYPE_INT, "Wait this amount of second before starting the crawler after " "the roxen server has started or the module has been reloaded."); defvar("curl_path", "/usr/bin/curl", "Curl Path", TYPE_STRING, "The path to the curl binary."); defvar("curl_timeout", 300,
Roxen.git/server/modules/misc/periodic-fetcher.pike:219:
"</font>", query("crawl_src")); return 0; } // One URL per line. array(string) lines = (crawl_file-"\r") / "\n" - ({""}); array(Event) events = ({ }); foreach(lines, string line) {
+
string url;
array fields = line / " " - ({""}); if(sizeof(fields) < 2) { ERROR_MSG("Parse error in crawl source file:\n%s\n", crawl_file); crawler_status = sprintf("<font color='BC311B'>" " <b>Parse error in crawl source file: %O.</b>" "</font>", query("crawl_src")); return 0; }
-
events
+=
(
{ Event
(fields[0],
(int
)fields[
1
]
,
(sizeof(fields)
>
=
3
)
?
fields[
2]:
0
) })
;
+
if
(
has_value
(fields[0],
"://"
)
) {
+
url =
fields[
0
]
;
+
} else {
+
url
=
query("base_url"
)
+
fields[
0
];
}
-
+
+
events += ({ Event(url, (int)fields[1], (sizeof(fields) >= 3)? fields[2]:0) });
+
}
return events; } void start_crawler() { DEBUG_MSG("Starting Crawler\n"); if(!sizeof(event_queue)) { ERROR_MSG("Queue empty\n"); return;