87e926 | 2001-06-22 | Martin Nilsson | |
|
635165 | 2009-05-25 | Martin Stjernholm | |
|
87e926 | 2001-06-22 | Martin Nilsson | |
|
de9a7c | 2010-10-06 | Henrik Grubbström (Grubba) | |
|
f67261 | 2001-07-26 | Martin Nilsson | |
#if !constant(report_error)
#define report_error werror
|
2db1ff | 2001-09-13 | Martin Nilsson | | #define report_debug werror
#define report_warning werror
#endif
|
de9a7c | 2010-10-06 | Henrik Grubbström (Grubba) | | final constant dont_dump_module=1;
|
2db1ff | 2001-09-13 | Martin Nilsson | | #ifdef SEARCH_DEBUG
# define WERR(X) report_debug("search: "+(X)+"\n");
#else
# define WERR(X)
|
f67261 | 2001-07-26 | Martin Nilsson | | #endif
|
87e926 | 2001-06-22 | Martin Nilsson | |
|
a0fc19 | 2001-06-10 | Johan Schön | | public array(string) tokenize_and_normalize( string what )
|
477930 | 2001-06-05 | Per Hedbor | |
{
return Unicode.split_words_and_normalize( lower_case(what) );
}
|
90f564 | 2001-05-17 | Johan Schön | |
|
a0fc19 | 2001-06-10 | Johan Schön | | public array(string) tokenize(string in)
|
477930 | 2001-06-05 | Per Hedbor | |
|
90f564 | 2001-05-17 | Johan Schön | | {
|
477930 | 2001-06-05 | Per Hedbor | | return Unicode.split_words( in );
|
90f564 | 2001-05-17 | Johan Schön | | }
|
a0fc19 | 2001-06-10 | Johan Schön | | public string normalize(string in)
|
477930 | 2001-06-05 | Per Hedbor | |
|
90f564 | 2001-05-17 | Johan Schön | | {
|
477930 | 2001-06-05 | Per Hedbor | | return Unicode.normalize( lower_case(in), "KD" );
|
90f564 | 2001-05-17 | Johan Schön | | }
|
c88b0b | 2001-07-04 | Martin Nilsson | |
|
74de68 | 2001-07-13 | Martin Nilsson | |
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
889835 | 2001-07-21 | Martin Nilsson | | class ProfileEntry {
|
74de68 | 2001-07-13 | Martin Nilsson | |
|
889835 | 2001-07-21 | Martin Nilsson | | private int last_stat;
private int database_profile_id;
|
47d8e9 | 2001-08-19 | Martin Nilsson | | private int query_profile_id;
|
889835 | 2001-07-21 | Martin Nilsson | | private ProfileCache my_cache;
private mapping(string:mixed) database_values;
|
47d8e9 | 2001-08-19 | Martin Nilsson | | private mapping(string:mixed) query_values;
|
889835 | 2001-07-21 | Martin Nilsson | |
private Search.Database.MySQL db;
private Search.RankingProfile ranking;
private array(string) stop_words;
|
47d8e9 | 2001-08-19 | Martin Nilsson | |
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
889835 | 2001-07-21 | Martin Nilsson | | void create(int _database_profile_id,
|
47d8e9 | 2001-08-19 | Martin Nilsson | | int _query_profile_id,
|
889835 | 2001-07-21 | Martin Nilsson | | ProfileCache _my_cache) {
database_profile_id = _database_profile_id;
|
47d8e9 | 2001-08-19 | Martin Nilsson | | query_profile_id = _query_profile_id;
|
889835 | 2001-07-21 | Martin Nilsson | | my_cache = _my_cache;
|
db9a63 | 2002-02-20 | Johan Schön | | int last_stat = time();
|
f501ee | 2001-08-14 | Martin Nilsson | |
get_ranking();
|
889835 | 2001-07-21 | Martin Nilsson | | }
|
f501ee | 2001-08-14 | Martin Nilsson | |
int(0..1) check_timeout() {
|
db9a63 | 2002-02-20 | Johan Schön | | if(time()-last_stat < 5*60) return 0;
last_stat = time();
|
f501ee | 2001-08-14 | Martin Nilsson | | return 1;
|
889835 | 2001-07-21 | Martin Nilsson | | }
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
889835 | 2001-07-21 | Martin Nilsson | | mixed get_database_value(string index) {
if(!database_values)
database_values = my_cache->get_value_mapping(database_profile_id);
return database_values[index];
}
|
47d8e9 | 2001-08-19 | Martin Nilsson | |
mixed get_query_value(string index) {
if(!query_values)
query_values = my_cache->get_value_mapping(query_profile_id);
return query_values[index];
|
889835 | 2001-07-21 | Martin Nilsson | | }
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
889835 | 2001-07-21 | Martin Nilsson | | Search.Database.MySQL get_database() {
if(!db) {
|
f67261 | 2001-07-26 | Martin Nilsson | | #if constant(DBManager)
|
889835 | 2001-07-21 | Martin Nilsson | | db = Search.Database.MySQL( DBManager.db_url( get_database_value("db_name"), 1) );
|
f67261 | 2001-07-26 | Martin Nilsson | | #endif
|
889835 | 2001-07-21 | Martin Nilsson | | if(!db)
|
a65d56 | 2004-09-20 | Martin Stjernholm | | error("Could not aquire the database URL to database " +
|
889835 | 2001-07-21 | Martin Nilsson | | get_database_value("db_name") + ".\n");
}
return db;
}
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
47d8e9 | 2001-08-19 | Martin Nilsson | |
|
889835 | 2001-07-21 | Martin Nilsson | | Search.RankingProfile get_ranking() {
if(!ranking)
|
47d8e9 | 2001-08-19 | Martin Nilsson | | ranking = Search.RankingProfile(get_query_value("fi_cut"),
get_query_value("px_rank"),
|
889835 | 2001-07-21 | Martin Nilsson | | get_database(),
|
47d8e9 | 2001-08-19 | Martin Nilsson | | get_query_value("fi_rank"));
|
889835 | 2001-07-21 | Martin Nilsson | | return ranking;
}
class ADTSet {
private mapping vals = ([]);
ADTSet add (string|int|float in) {
vals[in] = 1;
return this_object();
}
ADTSet sub (string|int|float out) {
m_delete(vals, out);
return this_object();
}
ADTSet `+(mixed in) {
if(stringp(in)||intp(in)||floatp(in))
add(in);
else
map((array)in, add);
return this_object();
}
ADTSet `-(mixed out) {
if(stringp(out)||intp(out)||floatp(out))
sub(out);
else
map((array)out, sub);
return this_object();
}
mixed cast(string to) {
switch(to) {
case "object": return this_object();
case "array": return indices(vals);
case "multiset": return (multiset)indices(vals);
default:
|
a65d56 | 2004-09-20 | Martin Stjernholm | | error("Can not cast ADTSet to "+to+".\n");
|
889835 | 2001-07-21 | Martin Nilsson | | }
}
}
|
47d8e9 | 2001-08-19 | Martin Nilsson | |
|
889835 | 2001-07-21 | Martin Nilsson | | array(string) get_stop_words() {
if(!stop_words) {
ADTSet words = ADTSet();
|
47d8e9 | 2001-08-19 | Martin Nilsson | | foreach(get_query_value("sw_lists"), string fn) {
|
889835 | 2001-07-21 | Martin Nilsson | | string file = Stdio.read_file(fn);
if(!fn)
report_error("Could not load %O.\n", fn);
else
words + (Array.flatten(map(file/"\n",
lambda(string in) {
return in/" ";
}))-({""}));
}
|
47d8e9 | 2001-08-19 | Martin Nilsson | | words + (Array.flatten(map(get_query_value("sw_words")/"\n",
|
889835 | 2001-07-21 | Martin Nilsson | | lambda(string in) {
return in/" ";
}))-({""}));
stop_words = (array)words;
}
return stop_words;
}
}
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
889835 | 2001-07-21 | Martin Nilsson | | class ProfileCache (string db_name) {
|
f501ee | 2001-08-14 | Martin Nilsson | | private mapping(string:ProfileEntry) entry_cache = ([]);
|
74de68 | 2001-07-13 | Martin Nilsson | | private mapping(int:mapping(string:mixed)) value_cache = ([]);
private mapping(string:int) db_profile_names = ([]);
|
47d8e9 | 2001-08-19 | Martin Nilsson | | private mapping(string:int) query_profile_names = ([]);
|
889835 | 2001-07-21 | Martin Nilsson | | private mapping(int:int) profile_stat = ([]);
|
74de68 | 2001-07-13 | Martin Nilsson | |
private Sql.Sql get_db() {
|
f67261 | 2001-07-26 | Martin Nilsson | | Sql.Sql db;
#if constant(DBManager)
db = DBManager.cached_get(db_name);
#endif
|
a65d56 | 2004-09-20 | Martin Stjernholm | | if(!db) error("Could not connect to database " + db_name + ".\n");
|
74de68 | 2001-07-13 | Martin Nilsson | | return db;
}
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
f501ee | 2001-08-14 | Martin Nilsson | |
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
f501ee | 2001-08-14 | Martin Nilsson | |
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
889835 | 2001-07-21 | Martin Nilsson | | int(-1..1) up_to_datep(int profile_id) {
array(mapping(string:string)) res;
|
edd613 | 2001-08-22 | Martin Nilsson | | res = get_db()->query("SELECT altered,type FROM profile WHERE id=%d", profile_id);
|
889835 | 2001-07-21 | Martin Nilsson | |
if(!sizeof(res)) {
|
edd613 | 2001-08-22 | Martin Nilsson | | array(int) existing = (array(int))get_db()->query("SELECT id FROM profile")->id;
|
889835 | 2001-07-21 | Martin Nilsson | |
foreach(indices(value_cache), int id)
if(!has_value(existing, id))
m_delete(value_cache, id);
|
f501ee | 2001-08-14 | Martin Nilsson | | foreach(indices(entry_cache), string id) {
|
47d8e9 | 2001-08-19 | Martin Nilsson | | int dbp, qp;
sscanf(id, "%d:%d", dbp, qp);
|
f501ee | 2001-08-14 | Martin Nilsson | | if(!has_value(existing, dbp))
m_delete(entry_cache, id);
|
47d8e9 | 2001-08-19 | Martin Nilsson | | if(!has_value(existing, qp))
|
889835 | 2001-07-21 | Martin Nilsson | | m_delete(entry_cache, id);
|
f501ee | 2001-08-14 | Martin Nilsson | | }
|
889835 | 2001-07-21 | Martin Nilsson | |
foreach(indices(db_profile_names), string name)
if(!has_value(existing, db_profile_names[name]))
m_delete(db_profile_names, name);
|
47d8e9 | 2001-08-19 | Martin Nilsson | | foreach(indices(query_profile_names), string name)
if(!has_value(existing, query_profile_names[name]))
m_delete(query_profile_names, name);
|
889835 | 2001-07-21 | Martin Nilsson | |
return -1;
}
if((int)res[0]->altered == profile_stat[profile_id]) return 1;
|
320878 | 2001-07-26 | Martin Nilsson | | profile_stat[profile_id] = (int)res[0]->altered;
|
889835 | 2001-07-21 | Martin Nilsson | |
|
47d8e9 | 2001-08-19 | Martin Nilsson | |
|
220889 | 2001-08-09 | Per Hedbor | | if((int)res[0]->type == 2)
{
|
889835 | 2001-07-21 | Martin Nilsson | | m_delete(value_cache, profile_id);
|
f501ee | 2001-08-14 | Martin Nilsson | | foreach(indices(entry_cache), string id)
if(array_sscanf(id, "%d:%d")[1]==profile_id)
m_delete(entry_cache, id);
|
889835 | 2001-07-21 | Martin Nilsson | | return 0;
}
m_delete(value_cache, profile_id);
|
f501ee | 2001-08-14 | Martin Nilsson | | foreach(indices(entry_cache), string id)
if(array_sscanf(id, "%d:%d")[0]==profile_id)
m_delete(entry_cache, id);
|
889835 | 2001-07-21 | Martin Nilsson | | return 0;
}
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
74de68 | 2001-07-13 | Martin Nilsson | | int get_db_profile_number(string name) {
int db_profile;
if(db_profile=db_profile_names[name])
return db_profile;
array res = get_db()->
|
edd613 | 2001-08-22 | Martin Nilsson | | query("SELECT id FROM profile WHERE name=%s AND type=2", name);
|
74de68 | 2001-07-13 | Martin Nilsson | | if(!sizeof(res))
|
a65d56 | 2004-09-20 | Martin Stjernholm | | error("No database profile " + name + " found.\n");
|
74de68 | 2001-07-13 | Martin Nilsson | |
return db_profile_names[name] = (int)res[0]->id;
}
|
47d8e9 | 2001-08-19 | Martin Nilsson | |
int get_query_profile_number(string name)
|
220889 | 2001-08-09 | Per Hedbor | | {
|
47d8e9 | 2001-08-19 | Martin Nilsson | | int query_profile;
if( query_profile=query_profile_names[name] )
return query_profile;
|
74de68 | 2001-07-13 | Martin Nilsson | |
array res = get_db()->
|
edd613 | 2001-08-22 | Martin Nilsson | | query("SELECT id FROM profile WHERE name=%s AND type=1", name);
|
74de68 | 2001-07-13 | Martin Nilsson | | if(!sizeof(res))
|
a65d56 | 2004-09-20 | Martin Stjernholm | | error("No query profile " + name + " found.\n");
|
74de68 | 2001-07-13 | Martin Nilsson | |
|
47d8e9 | 2001-08-19 | Martin Nilsson | | return query_profile_names[name] = (int)res[0]->id;
|
74de68 | 2001-07-13 | Martin Nilsson | | }
|
23a658 | 2001-07-31 | David Norlin | | private int last_db_prof_stat = 0;
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
23a658 | 2001-07-31 | David Norlin | | array(string) list_db_profiles() {
|
dddfff | 2001-11-22 | Johan Schön | | |
db9a63 | 2002-02-20 | Johan Schön | | if (time() - last_db_prof_stat < 5*60)
|
dddfff | 2001-11-22 | Johan Schön | | return indices(db_profile_names);*/
|
edd613 | 2001-08-22 | Martin Nilsson | | array res = get_db()->query("SELECT name, id FROM profile WHERE type=2");
|
23a658 | 2001-07-31 | David Norlin | | db_profile_names = mkmapping(
res->name,
map(res->id, lambda(string s) { return (int) s; } ));
|
fbf68c | 2001-08-16 | Martin Nilsson | | if(sizeof(res))
|
db9a63 | 2002-02-20 | Johan Schön | | last_db_prof_stat = time();
|
23a658 | 2001-07-31 | David Norlin | | return res->name;
}
|
47d8e9 | 2001-08-19 | Martin Nilsson | | private int last_query_prof_stat = 0;
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
47d8e9 | 2001-08-19 | Martin Nilsson | |
array(string) list_query_profiles()
|
220889 | 2001-08-09 | Per Hedbor | | {
|
dddfff | 2001-11-22 | Johan Schön | | |
db9a63 | 2002-02-20 | Johan Schön | | if (time() - last_query_prof_stat < 5*60)
|
dddfff | 2001-11-22 | Johan Schön | | return indices(query_profile_names);*/
array res = get_db()->query("SELECT name, id FROM profile WHERE type=1");
query_profile_names = mkmapping( res->name, (array(int)) res->id );
if(sizeof(query_profile_names))
|
db9a63 | 2002-02-20 | Johan Schön | | last_query_prof_stat = time();
|
23a658 | 2001-07-31 | David Norlin | | }
|
74de68 | 2001-07-13 | Martin Nilsson | |
private object compile_handler = class {
mapping(string:mixed) get_default_module() {
return ([ "aggregate_mapping":aggregate_mapping,
"aggregate_multiset":aggregate_multiset,
"aggregate":aggregate,
"allocate":allocate,
|
c22829 | 2010-05-18 | Martin Stjernholm | | "`+": `+,
|
74de68 | 2001-07-13 | Martin Nilsson | | "this_program":0 ]);
}
mixed resolv(string id, void|string fn, void|string ch) {
throw( ({ sprintf("Found symbol %O while trying to decode Roxen Search "
"settings. The database is corrupt or has been "
"tampered with.\n", id),
backtrace() }) );
}
}();
private mixed reacodec_decode(string str) {
return compile_string("mixed foo=" + str + ";", 0, compile_handler)()->foo;
}
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
74de68 | 2001-07-13 | Martin Nilsson | | mapping get_value_mapping(int profile) {
mapping val;
if(val=copy_value(value_cache[profile]))
return val;
array res = get_db()->
|
edd613 | 2001-08-22 | Martin Nilsson | | query("SELECT name,value FROM value WHERE pid=%d", profile);
|
74de68 | 2001-07-13 | Martin Nilsson | |
val = mkmapping( res->name, map(res->value, reacodec_decode) );
value_cache[profile] = copy_value(val);
return val;
}
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
47d8e9 | 2001-08-19 | Martin Nilsson | |
ProfileEntry get_profile_entry(string db_name, void|string query_name) {
|
320878 | 2001-07-26 | Martin Nilsson | |
|
74de68 | 2001-07-13 | Martin Nilsson | | int db = get_db_profile_number(db_name);
|
47d8e9 | 2001-08-19 | Martin Nilsson | | int query = get_query_profile_number(query_name);
|
889835 | 2001-07-21 | Martin Nilsson | |
ProfileEntry entry;
|
47d8e9 | 2001-08-19 | Martin Nilsson | | if(entry=entry_cache[query +":"+ db]) {
|
f501ee | 2001-08-14 | Martin Nilsson | | if(!entry->check_timeout()) return entry;
if(up_to_datep(db) &&
|
47d8e9 | 2001-08-19 | Martin Nilsson | | up_to_datep(query)) return entry;
|
f501ee | 2001-08-14 | Martin Nilsson | | }
|
889835 | 2001-07-21 | Martin Nilsson | |
|
47d8e9 | 2001-08-19 | Martin Nilsson | | entry = ProfileEntry( db, query, this_object() );
return entry_cache[query +":"+ db] = entry;
|
74de68 | 2001-07-13 | Martin Nilsson | | }
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
74de68 | 2001-07-13 | Martin Nilsson | | void flush_profile(int p) {
m_delete(value_cache, p);
|
4abea0 | 2008-06-24 | Jonas Wallden | | foreach(db_profile_names; string name; int dbp)
if (dbp == p)
|
74de68 | 2001-07-13 | Martin Nilsson | | m_delete(db_profile_names, name);
|
4abea0 | 2008-06-24 | Jonas Wallden | | foreach(query_profile_names; string name; int qp)
if (qp == p)
m_delete(query_profile_names, name);
|
f501ee | 2001-08-14 | Martin Nilsson | | foreach(indices(entry_cache), string id) {
array ids = array_sscanf(id, "%d:%d");
if(ids[0]==p || ids[1]==p)
m_delete(entry_cache, id);
}
|
74de68 | 2001-07-13 | Martin Nilsson | | }
void flush_cache() {
value_cache = ([]);
db_profile_names = ([]);
|
47d8e9 | 2001-08-19 | Martin Nilsson | | query_profile_names = ([]);
|
f82b84 | 2001-08-31 | Johan Schön | | last_db_prof_stat = 0;
last_query_prof_stat = 0;
|
74de68 | 2001-07-13 | Martin Nilsson | | }
}
|
f501ee | 2001-08-14 | Martin Nilsson | | private mapping(string:ProfileCache) profile_cache_cache = ([]);
|
2db1ff | 2001-09-13 | Martin Nilsson | |
|
f501ee | 2001-08-14 | Martin Nilsson | | ProfileCache get_profile_cache(string db_name) {
if(profile_cache_cache[db_name])
return profile_cache_cache[db_name];
return profile_cache_cache[db_name] = ProfileCache(db_name);
}
|
2db1ff | 2001-09-13 | Martin Nilsson | |
|
f501ee | 2001-08-14 | Martin Nilsson | | void flush_profile(int p) {
values(profile_cache_cache)->flush_profile(p);
}
|
2db1ff | 2001-09-13 | Martin Nilsson | | private mapping(string:mapping) profile_storages = ([]);
mapping get_profile_storage(string db_name) {
if(profile_storages[db_name])
return profile_storages[db_name];
return profile_storages[db_name] = ([]);
}
private mapping(string:Scheduler) scheduler_storage = ([]);
Scheduler get_scheduler(string db_name) {
mapping dbp = get_profile_storage(db_name);
if(scheduler_storage[db_name])
return scheduler_storage[db_name];
scheduler_storage[db_name] = Scheduler(dbp);
return scheduler_storage[db_name] = Scheduler(dbp);
}
class Scheduler {
private int next_run;
|
580877 | 2004-03-08 | Anders Johansson | | private mapping(int:int) entry_queue = ([]);
private mapping(int:int) crawl_queue = ([]);
private mapping(int:int) compact_queue = ([]);
private array(int) priority_queue = ({});
|
2db1ff | 2001-09-13 | Martin Nilsson | | private mapping db_profiles;
|
916f66 | 2008-08-20 | Jonas Wallden | | #if constant(roxen)
|
27d162 | 2010-06-03 | Fredrik Noring | | private mixed schedule_run;
|
916f66 | 2008-08-20 | Jonas Wallden | | #endif
|
2db1ff | 2001-09-13 | Martin Nilsson | |
void create(mapping _db_profiles) {
db_profiles = _db_profiles;
schedule();
}
|
580877 | 2004-03-08 | Anders Johansson | | void check_priority_queue(int profile)
{
if (!has_value(priority_queue, profile))
priority_queue += ({ profile });
}
|
2db1ff | 2001-09-13 | Martin Nilsson | |
void new_entry(int latency, array(int) profiles) {
int would_be_indexed = time() + latency*60;
foreach(profiles, int profile)
|
580877 | 2004-03-08 | Anders Johansson | | {
|
1c17cd | 2003-08-14 | Mattias Andersson | | entry_queue[profile] = 0;
|
580877 | 2004-03-08 | Anders Johansson | | check_priority_queue(profile);
}
WERR("New entry. time: "+(would_be_indexed-time())+" profiles: "+
(array(string))profiles*",");
|
db9a63 | 2002-02-20 | Johan Schön | | if(next_run && next_run<would_be_indexed && next_run>=time())
|
2db1ff | 2001-09-13 | Martin Nilsson | | return;
next_run = would_be_indexed;
reschedule();
}
|
580877 | 2004-03-08 | Anders Johansson | | void schedule(void|int quiet) {
|
2db1ff | 2001-09-13 | Martin Nilsson | |
foreach(indices(db_profiles), int id) {
object dbp = db_profiles[id];
if(!dbp) {
report_warning("Search database profile %d destructed.\n", id);
m_delete(db_profiles, id);
continue;
}
|
580877 | 2004-03-08 | Anders Johansson | | if(!quiet) WERR("Scheduling for database profile "+dbp->name);
int next = dbp->next_recrawl();
|
2db1ff | 2001-09-13 | Martin Nilsson | | if(next != -1) {
crawl_queue[dbp->id] = next;
|
580877 | 2004-03-08 | Anders Johansson | | check_priority_queue(id);
if(!quiet) WERR(" Crawl: "+(next-time()));
|
2db1ff | 2001-09-13 | Martin Nilsson | | }
next = dbp->next_compact();
if(next != -1) {
compact_queue[dbp->id] = next;
|
580877 | 2004-03-08 | Anders Johansson | | if(!quiet) WERR(" Compact: "+(next-time()));
|
2db1ff | 2001-09-13 | Martin Nilsson | | }
|
580877 | 2004-03-08 | Anders Johansson | | if(!quiet) WERR("");
|
2db1ff | 2001-09-13 | Martin Nilsson | | }
|
580877 | 2004-03-08 | Anders Johansson | | if(!sizeof(crawl_queue) && !sizeof(compact_queue) && !sizeof(entry_queue))
return;
next_run = max( min( @values(crawl_queue) + values(compact_queue) +
values(entry_queue) ),
time() + 10 );
|
2db1ff | 2001-09-13 | Martin Nilsson | | reschedule();
}
|
8a06e9 | 2003-01-27 | Mattias Andersson | | #if constant (roxen)
private void reschedule() {
WERR("Scheduler runs next event in "+(next_run-time())+" seconds.");
|
27d162 | 2010-06-03 | Fredrik Noring | | remove_call_out(schedule_run);
schedule_run = roxen.background_run(next_run-time(), do_scheduled_stuff);
|
8a06e9 | 2003-01-27 | Mattias Andersson | | }
void unschedule() {
|
27d162 | 2010-06-03 | Fredrik Noring | | remove_call_out(schedule_run);
schedule_run = 0;
|
8a06e9 | 2003-01-27 | Mattias Andersson | | }
private void do_scheduled_stuff() {
|
27d162 | 2010-06-03 | Fredrik Noring | | remove_call_out(schedule_run);
schedule_run = 0;
|
8a06e9 | 2003-01-27 | Mattias Andersson | | WERR("Running scheduler event.");
|
580877 | 2004-03-08 | Anders Johansson | | foreach(indices(db_profiles), int id) {
if (db_profiles[id]->is_running()) {
WERR("Postponing crawl start, profile "+id+" still running.");
schedule(1);
return;
}
}
|
8a06e9 | 2003-01-27 | Mattias Andersson | | int t = time();
WERR(sizeof(crawl_queue)+" profiles in crawl queue.");
|
580877 | 2004-03-08 | Anders Johansson | | foreach(priority_queue & indices(crawl_queue), int id) {
|
8a06e9 | 2003-01-27 | Mattias Andersson | | if(crawl_queue[id]>t || !db_profiles[id]) continue;
object dbp = db_profiles[id];
|
1c17cd | 2003-08-14 | Mattias Andersson | | if(dbp && dbp->ready_to_crawl()) {
WERR("Scheduler starts crawling "+id);
|
593c0e | 2003-08-15 | Mattias Andersson | | dbp->recrawl();
|
580877 | 2004-03-08 | Anders Johansson | | m_delete(crawl_queue, id);
m_delete(entry_queue, id);
priority_queue -= ({ id });
|
1c17cd | 2003-08-14 | Mattias Andersson | | }
}
WERR(sizeof(entry_queue)+" profiles in entry queue.");
|
580877 | 2004-03-08 | Anders Johansson | | foreach(priority_queue & indices(entry_queue), int id) {
|
1c17cd | 2003-08-14 | Mattias Andersson | | if(entry_queue[id]>t || !db_profiles[id]) continue;
object dbp = db_profiles[id];
|
8a06e9 | 2003-01-27 | Mattias Andersson | | if(dbp && dbp->ready_to_crawl()) {
WERR("Scheduler starts crawling "+id);
dbp->start_indexer();
|
580877 | 2004-03-08 | Anders Johansson | | m_delete(entry_queue, id);
priority_queue -= ({ id });
break;
|
8a06e9 | 2003-01-27 | Mattias Andersson | | }
}
WERR(sizeof(compact_queue)+" profiles in compact queue.");
foreach(indices(compact_queue), int id) {
if(compact_queue[id]>t || !db_profiles[id]) continue;
db_profiles[id]->start_compact();
|
580877 | 2004-03-08 | Anders Johansson | | m_delete(compact_queue, id);
|
8a06e9 | 2003-01-27 | Mattias Andersson | | }
schedule();
}
#else
private void reschedule() {
remove_call_out(do_scheduled_stuff);
WERR("Scheduler runs next event in "+(next_run-time())+" seconds.");
call_out(do_scheduled_stuff, next_run-time());
}
void unschedule() {
remove_call_out(do_scheduled_stuff);
}
|
2db1ff | 2001-09-13 | Martin Nilsson | | private void do_scheduled_stuff() {
remove_call_out(do_scheduled_stuff);
WERR("Running scheduler event.");
int t = time();
WERR(sizeof(crawl_queue)+" profiles in crawl queue.");
foreach(indices(crawl_queue), int id) {
|
593c0e | 2003-08-15 | Mattias Andersson | | if(crawl_queue[id]>t || !db_profiles[id]) continue;
object dbp = db_profiles[id];
if(dbp && dbp->ready_to_crawl()) {
WERR("Scheduler starts crawling "+id);
dbp->recrawl();
entry_queue = ([]);
}
}
WERR(sizeof(crawl_queue)+" profiles in crawl queue.");
foreach(indices(crawl_queue), int id) {
|
2db1ff | 2001-09-13 | Martin Nilsson | | if(crawl_queue[id]>t || !db_profiles[id]) continue;
object dbp = db_profiles[id];
if(dbp && dbp->ready_to_crawl()) {
WERR("Scheduler starts crawling "+id);
dbp->start_indexer();
}
}
WERR(sizeof(compact_queue)+" profiles in compact queue.");
foreach(indices(compact_queue), int id) {
if(compact_queue[id]>t || !db_profiles[id]) continue;
db_profiles[id]->start_compact();
}
schedule();
}
|
8a06e9 | 2003-01-27 | Mattias Andersson | | #endif
|
2db1ff | 2001-09-13 | Martin Nilsson | | string info() {
string res = "<table border='1' cellspacing='0' cellpadding='2'>"
"<tr><th>Profile</th><th>Crawl</th>"
"<th>Compact</th><th>Next</th></tr>";
foreach(values(db_profiles), object dbp) {
if(!dbp) continue;
res += "</tr><td>" + dbp->name + "</td>";
int next = dbp->next_crawl();
if(next == -1)
res += "<td>Never</td>";
else
res +="<td>"+ (next-time()) + "</td>";
next = dbp->next_compact();
if(next == -1)
res += "<td>Never</td>";
else
res +="<td>"+ (next-time()) + "</td>";
res += "</tr>";
}
res += "</table>";
res += "<br />Next run: " + (next_run-time()) + "<br />";
return res;
}
}
|
f501ee | 2001-08-14 | Martin Nilsson | |
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
c88b0b | 2001-07-04 | Martin Nilsson | | class Logger {
private string|Sql.Sql logdb;
private int profile;
|
dbde02 | 2002-03-07 | Johan Schön | | private int stderr_logging;
|
c88b0b | 2001-07-04 | Martin Nilsson | |
|
f2b596 | 2010-04-28 | Fredrik Noring | | private int last_log_purge_time;
private constant log_purge_freq = 8*60*60;
|
c88b0b | 2001-07-04 | Martin Nilsson | | private Sql.Sql get_db() {
Sql.Sql db;
#if constant(DBManager)
if(stringp(logdb))
db = DBManager.get(logdb);
else
#endif
db = logdb;
return db;
}
|
dbde02 | 2002-03-07 | Johan Schön | |
void create(string|Sql.Sql _logdb, int _profile, int _stderr_logging) {
|
c88b0b | 2001-07-04 | Martin Nilsson | | logdb = _logdb;
profile = _profile;
|
dbde02 | 2002-03-07 | Johan Schön | | stderr_logging = _stderr_logging;
|
c88b0b | 2001-07-04 | Martin Nilsson | |
Sql.Sql db = get_db();
if(catch(db->query("SELECT code FROM eventlog WHERE event=0")))
db->query("CREATE TABLE eventlog ("
"event int unsigned auto_increment primary key,"
"at timestamp(14) not null,"
"profile int unsigned not null,"
"code int unsigned not null,"
"type enum('error','warning','notice') not null,"
"extra varchar(255))");
}
|
dbde02 | 2002-03-07 | Johan Schön | | void werror_event( int code, string type, void|string extra, void|int log_profile )
{
mapping types = ([ "error" : "Error",
"warning" : "Warning",
"notice" : "Notice", ]);
werror(sprintf("%sSearch: %s: %s\n",
" : ",
types[type],
extra?sprintf(codes[(int)code], @(extra/"\n")):codes[(int)code]));
}
|
6e1a6b | 2007-05-16 | Fredrik Noring | | void log_purge(int days)
{
Sql.Sql db = get_db();
if(!db) return;
|
f2b596 | 2010-04-28 | Fredrik Noring | | int t1 = time();
if (t1 < last_log_purge_time + log_purge_freq) return;
last_log_purge_time = t1;
|
6e1a6b | 2007-05-16 | Fredrik Noring | | if(days)
db->query("DELETE FROM eventlog "
" WHERE at <= NOW() - INTERVAL "+days+" DAY");
else
|
f2b596 | 2010-04-28 | Fredrik Noring | | db->query("DELETE FROM eventlog");
db->query("OPTIMIZE TABLE eventlog");
int t2 = time();
if (t2 - t1 > 10)
report_warning("Search log purge took %d s.\n", t2-t1);
|
6e1a6b | 2007-05-16 | Fredrik Noring | | }
|
dbde02 | 2002-03-07 | Johan Schön | |
|
5cc7ba | 2001-07-06 | Martin Nilsson | | void log_event( int code, string type, void|string extra, void|int log_profile ) {
|
c88b0b | 2001-07-04 | Martin Nilsson | | Sql.Sql db = get_db();
if(!db) return;
|
5cc7ba | 2001-07-06 | Martin Nilsson | | if(zero_type(log_profile))
log_profile = profile;
|
dbde02 | 2002-03-07 | Johan Schön | | if(stderr_logging)
werror_event(code, type, extra, log_profile);
|
c88b0b | 2001-07-04 | Martin Nilsson | | if(extra)
db->query("INSERT INTO eventlog (profile,code,type,extra) VALUES (%d,%d,%s,%s)",
|
5cc7ba | 2001-07-06 | Martin Nilsson | | log_profile, code, type, extra);
|
c88b0b | 2001-07-04 | Martin Nilsson | | else
db->query("INSERT INTO eventlog (profile, code,type) VALUES (%d,%d,%s)",
|
5cc7ba | 2001-07-06 | Martin Nilsson | | log_profile, code, type);
|
c88b0b | 2001-07-04 | Martin Nilsson | | }
|
dbde02 | 2002-03-07 | Johan Schön | |
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
5cc7ba | 2001-07-06 | Martin Nilsson | | void log_error( int code, void|string extra, void|int log_profile ) {
|
46a652 | 2001-08-06 | Martin Nilsson | | log_event( code, "error", extra, log_profile );
|
c88b0b | 2001-07-04 | Martin Nilsson | | }
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
5cc7ba | 2001-07-06 | Martin Nilsson | | void log_warning( int code, void|string extra, void|int log_profile ) {
log_event( code, "warning", extra, log_profile );
}
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
5cc7ba | 2001-07-06 | Martin Nilsson | | void log_notice( int code, void|string extra, void|int log_profile ) {
log_event( code, "notice", extra, log_profile );
|
c88b0b | 2001-07-04 | Martin Nilsson | | }
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
5cc7ba | 2001-07-06 | Martin Nilsson | | int add_program_name(int code, string name) {
int add = search( ({ "multiprocess_crawler", "buffer_c2f", "filter",
"buffer_f2i", "indexer" }), name );
if(add==-1)
throw( ({ "Unknown program name \""+name+"\".\n", backtrace() }) );
return code + add;
|
c88b0b | 2001-07-04 | Martin Nilsson | | }
private mapping codes = ([
|
5cc7ba | 2001-07-06 | Martin Nilsson | | 10 : "Started crawler with %s.",
11 : "Started crawler-to-filter buffer with %s.",
12 : "Started filter with %s.",
13 : "Started filter-to-indexer buffer with %s.",
14 : "Started indexer with %s.",
20 : "Exiting crawler due to signal.",
21 : "Exiting crawler-to-filter buffer due to signal.",
22 : "Exiting filter due to signal.",
23 : "Exiting filter-to-indexer buffer due to signal.",
24 : "Exiting indexer due to signal.",
30 : "Crawler failed to set up pipe.",
31 : "Crawler-to-filter buffer failed to set up pipe.",
32 : "Filter failed to set up pipe.",
33 : "Filter-to-indexer buffer failed to set up pipe.",
34 : "Indexer failed to set up pipe.",
40 : "Fetched %s.",
41 : "Unknown language code \"%s\".",
42 : "Crawler exited normally.",
43 : "Cleared search database.",
|
3e1a30 | 2001-07-11 | Johan Schön | | 44 : "Sitebuilder commit triggered indexing of %s.",
|
5cc7ba | 2001-07-06 | Martin Nilsson | |
50 : "Crawler did not get any connection from the process.",
|
11b2ed | 2007-03-15 | Jonas Wallden | | 51 : "Crawler-to-filter buffer did not get any connection from the process.",
|
5cc7ba | 2001-07-06 | Martin Nilsson | | 52 : "Filter did not get any connection from the process.",
53 : "Filter-to-indexer buffer did not get any connection from the process.",
54 : "Indexer did not get any connection from the process.",
|
c253d4 | 2001-07-12 | Johan Schön | |
|
26d75c | 2001-08-08 | Per Hedbor | | 60 : "Starting database compactor with %s",
61 : "Failed to find any data in the database.",
62 : "Exiting compacter due to signal.",
63 : "Done with database compacting and maintenance.",
|
c253d4 | 2001-07-12 | Johan Schön | | 300: "300 Redirection: Multiple Choices (%s)",
301: "301 Redirection: Moved Permanently (%s)",
302: "302 Redirection: Found (%s)",
303: "303 Redirection: See Other (%s)",
304: "304 Redirection: Not Modified (%s)",
305: "305 Redirection: Use Proxy (%s)",
306: "306 Redirection: (Unused) (%s)",
307: "307 Redirection: Temporary Redirect (%s)",
400: "400 Client Error: Bad Request (%s)",
401: "401 Client Error: Unauthorized (%s)",
402: "402 Client Error: Payment Required (%s)",
403: "403 Client Error: Forbidden (%s)",
404: "404 Client Error: Not Found (%s)",
405: "405 Client Error: Method Not Allowed (%s)",
406: "406 Client Error: Not Acceptable (%s)",
407: "407 Client Error: Proxy Authentication Required (%s)",
408: "408 Client Error: Request Timeout (%s)",
409: "409 Client Error: Conflict (%s)",
410: "410 Client Error: Gone (%s)",
411: "411 Client Error: Length Required (%s)",
412: "412 Client Error: Precondition Failed (%s)",
413: "413 Client Error: Request Entity Too Large (%s)",
414: "414 Client Error: Request-URI Too Long (%s)",
415: "415 Client Error: Unsupported Media Type (%s)",
416: "416 Client Error: Requested Range Not Satisfiable (%s)",
417: "417 Client Error: Expectation Failed (%s)",
500: "500 Server Error: Internal Server Error (%s)",
501: "501 Server Error: Not Implemented (%s)",
502: "502 Server Error: Bad Gateway (%s)",
503: "503 Server Error: Service Unavailable (%s)",
504: "504 Server Error: Gateway Timeout (%s)",
505: "505 Server Error: HTTP Version Not Supported (%s)",
|
ae8948 | 2001-08-13 | Anders Johansson | |
1000: "Disallowed by robots.txt. (%s)",
1001: "Can't handle scheme. (%s)",
|
898c18 | 2001-08-27 | Johan Schön | | 1002: "No matching filter. (%s)",
|
11b2ed | 2007-03-15 | Jonas Wallden | | 1003: "Too large content file -- indexing metadata only. (%s)",
|
898c18 | 2001-08-27 | Johan Schön | | 1100: "Failed to connect to %s.",
|
c88b0b | 2001-07-04 | Martin Nilsson | | ]);
|
c253d4 | 2001-07-12 | Johan Schön | |
|
c88b0b | 2001-07-04 | Martin Nilsson | |
|
46a652 | 2001-08-06 | Martin Nilsson | |
|
7d85aa | 2001-07-04 | Martin Nilsson | | array(array(string|int)) get_log( int profile, array(string) types,
int from, int to ) {
|
c88b0b | 2001-07-04 | Martin Nilsson | |
string sql = "";
|
599090 | 2001-07-05 | Martin Nilsson | | #define SQLADD(X) do{sizeof(sql)?(sql+=" AND "+(X)):(sql=" WHERE "+(X));}while(0)
|
c88b0b | 2001-07-04 | Martin Nilsson | | if(profile)
SQLADD("profile=" + profile);
if(!sizeof(types))
return ({});
if(sizeof(types)!=3)
SQLADD("(type='" + (types*"' OR type='") + "')");
if(from)
SQLADD("at>" + from);
if(to)
SQLADD("to<" + to);
#undef SQLADD
Sql.Sql db = get_db();
|
7d85aa | 2001-07-04 | Martin Nilsson | | if(!db) return ({});
|
c88b0b | 2001-07-04 | Martin Nilsson | |
|
2cd10c | 2001-07-05 | Martin Nilsson | | return map(db->query("SELECT unix_timestamp(at) as at,profile,code,type,extra FROM eventlog" +
sql + " ORDER BY event DESC"),
|
c88b0b | 2001-07-04 | Martin Nilsson | | lambda(mapping in) {
|
7d85aa | 2001-07-04 | Martin Nilsson | | return ({ (int)in->at, (int)in->profile, in->type,
|
2cd10c | 2001-07-05 | Martin Nilsson | | in->extra?sprintf(codes[(int)in->code], @(in->extra/"\n")):
codes[(int)in->code] });
|
c88b0b | 2001-07-04 | Martin Nilsson | | } );
}
}
|