pike.git / lib / modules / Search.pmod / Utils.pmod

version» Context lines:

pike.git/lib/modules/Search.pmod/Utils.pmod:34:         public string normalize(string in)   //! Normalize the input string. Performs unicode NFKD normalization   //! and then lowercases the whole string   {    return Unicode.normalize( lower_case(in), "KD" );   }       - #define THROW(X) throw( ({ (X), backtrace() }) ) -  +    //! A result entry from the @[ProfileCache].   class ProfileEntry {       private int last_stat;       private int database_profile_id;    private int query_profile_id;    private ProfileCache my_cache;       private mapping(string:mixed) database_values;
pike.git/lib/modules/Search.pmod/Utils.pmod:62:    //! @decl void create(int database_profile_id, int query_profile_id,@    //! ProfileCache cache)    //! @param cache    //! The parent cache object.    void create(int _database_profile_id,    int _query_profile_id,    ProfileCache _my_cache) {    database_profile_id = _database_profile_id;    query_profile_id = _query_profile_id;    my_cache = _my_cache; -  int last_stat = time(1); +  int last_stat = time();       // Prefetch..    get_ranking();    }       //! Checks if it is time to check if the profile values are    //! to old.    int(0..1) check_timeout() { -  if(time(1)-last_stat < 5*60) return 0; -  last_stat = time(1); +  if(time()-last_stat < 5*60) return 0; +  last_stat = time();    return 1;    }       //! Returns the database profile value @[index].    mixed get_database_value(string index) {    if(!database_values)    database_values = my_cache->get_value_mapping(database_profile_id);    return database_values[index];    }   
pike.git/lib/modules/Search.pmod/Utils.pmod:97:    return query_values[index];    }       //! Returns a cached search database for the current database profile.    Search.Database.MySQL get_database() {    if(!db) {   #if constant(DBManager)    db = Search.Database.MySQL( DBManager.db_url( get_database_value("db_name"), 1) );   #endif    if(!db) -  THROW("Could not aquire the database URL to database " + +  error("Could not aquire the database URL to database " +    get_database_value("db_name") + ".\n");    }    return db;    }       //! Returns a cached ranking profile for the current database and    //! query profile.    Search.RankingProfile get_ranking() {    if(!ranking)    ranking = Search.RankingProfile(get_query_value("fi_cut"),
pike.git/lib/modules/Search.pmod/Utils.pmod:149:    map((array)out, sub);    return this_object();    }       mixed cast(string to) {    switch(to) {    case "object": return this_object();    case "array": return indices(vals);    case "multiset": return (multiset)indices(vals);    default: -  THROW("Can not cast ADTSet to "+to+".\n"); +  error("Can not cast ADTSet to "+to+".\n");    }    }    }       //! Returns a cached array of stop words for the current query profile.    array(string) get_stop_words() {    if(!stop_words) {    ADTSet words = ADTSet();    foreach(get_query_value("sw_lists"), string fn) {    string file = Stdio.read_file(fn);
pike.git/lib/modules/Search.pmod/Utils.pmod:192:    private mapping(int:mapping(string:mixed)) value_cache = ([]);    private mapping(string:int) db_profile_names = ([]);    private mapping(string:int) query_profile_names = ([]);    private mapping(int:int) profile_stat = ([]);       private Sql.Sql get_db() {    Sql.Sql db;   #if constant(DBManager)    db = DBManager.cached_get(db_name);   #endif -  if(!db) THROW("Could not connect to database " + db_name + ".\n"); +  if(!db) error("Could not connect to database " + db_name + ".\n");    return db;    }       //! Checks if the profile @[profile_id] has been changed, and clears    //! related caches if so.    //! @returns    //! @int    //! @value -1    //! The profile is deleted.    //! @value 0
pike.git/lib/modules/Search.pmod/Utils.pmod:270:       //! Returns the profile number for the given database profile.    int get_db_profile_number(string name) {    int db_profile;    if(db_profile=db_profile_names[name])    return db_profile;       array res = get_db()->    query("SELECT id FROM profile WHERE name=%s AND type=2", name);    if(!sizeof(res)) -  THROW("No database profile " + name + " found.\n"); +  error("No database profile " + name + " found.\n");       return db_profile_names[name] = (int)res[0]->id;    }       //! Returns the profile number for the given query profile.    int get_query_profile_number(string name)    {    int query_profile;    if( query_profile=query_profile_names[name] )    return query_profile;       array res = get_db()->    query("SELECT id FROM profile WHERE name=%s AND type=1", name);    if(!sizeof(res)) -  THROW("No query profile " + name + " found.\n"); +  error("No query profile " + name + " found.\n");       return query_profile_names[name] = (int)res[0]->id;    }       private int last_db_prof_stat = 0; // 1970       //! Returns a list of available database profiles.    array(string) list_db_profiles() {    /* -  if (time(1) - last_db_prof_stat < 5*60) +  if (time() - last_db_prof_stat < 5*60)    return indices(db_profile_names);*/    array res = get_db()->query("SELECT name, id FROM profile WHERE type=2");    db_profile_names = mkmapping(    res->name,    map(res->id, lambda(string s) { return (int) s; } ));    if(sizeof(res)) -  last_db_prof_stat = time(1); +  last_db_prof_stat = time();    return res->name;    }       private int last_query_prof_stat = 0; // 1970       //! Returns a list of available query profiles.    array(string) list_query_profiles()    {    /* -  if (time(1) - last_query_prof_stat < 5*60) +  if (time() - last_query_prof_stat < 5*60)    return indices(query_profile_names);*/    array res = get_db()->query("SELECT name, id FROM profile WHERE type=1");    query_profile_names = mkmapping( res->name, (array(int)) res->id );    if(sizeof(query_profile_names)) -  last_query_prof_stat = time(1); +  last_query_prof_stat = time();    }       // Used when decoding text encoded pike data types.    private object compile_handler = class {    mapping(string:mixed) get_default_module() {    return ([ "aggregate_mapping":aggregate_mapping,    "aggregate_multiset":aggregate_multiset,    "aggregate":aggregate,    "allocate":allocate,    "this_program":0 ]);
pike.git/lib/modules/Search.pmod/Utils.pmod:382:    up_to_datep(query)) return entry;    }       entry = ProfileEntry( db, query, this_object() );    return entry_cache[query +":"+ db] = entry;    }       //! Flushes profile entry @[p] from the profile cache.    void flush_profile(int p) {    m_delete(value_cache, p); -  foreach(indices(db_profile_names), string name) -  if(db_profile_names[name]==p) +  foreach(db_profile_names; string name; int dbp) +  if (dbp == p)    m_delete(db_profile_names, name); -  m_delete(query_profile_names, p); +  foreach(query_profile_names; string name; int qp) +  if (qp == p) +  m_delete(query_profile_names, name);    foreach(indices(entry_cache), string id) {    array ids = array_sscanf(id, "%d:%d");    if(ids[0]==p || ids[1]==p)    m_delete(entry_cache, id);    }    }       //! Empty the whole cache.    void flush_cache() {    value_cache = ([]);
pike.git/lib/modules/Search.pmod/Utils.pmod:443:    mapping dbp = get_profile_storage(db_name);    if(scheduler_storage[db_name])    return scheduler_storage[db_name];    scheduler_storage[db_name] = Scheduler(dbp);    return scheduler_storage[db_name] = Scheduler(dbp);   }      class Scheduler {       private int next_run; -  private mapping(int:int) crawl_queue; -  private mapping(int:int) compact_queue; +  private mapping(int:int) entry_queue = ([]); +  private mapping(int:int) crawl_queue = ([]); +  private mapping(int:int) compact_queue = ([]); +  private array(int) priority_queue = ({});    private mapping db_profiles; -  +  private object schedule_process;       void create(mapping _db_profiles) {    db_profiles = _db_profiles;    schedule();    }    -  +  void check_priority_queue(int profile) +  { +  if (!has_value(priority_queue, profile)) +  priority_queue += ({ profile }); +  } +     //! Call this method to indicate that a new entry has been added    //! to the queue. The scheduler will delay indexing with at most    //! @[latency] minutes.    void new_entry(int latency, array(int) profiles) {    int would_be_indexed = time() + latency*60;    foreach(profiles, int profile) -  crawl_queue[profile] = 0; -  WERR("New entry. time: "+(would_be_indexed-time(1))+" profiles: "+(array(string))profiles*","); -  if(next_run && next_run < would_be_indexed) +  { +  entry_queue[profile] = 0; +  check_priority_queue(profile); +  } +  WERR("New entry. time: "+(would_be_indexed-time())+" profiles: "+ +  (array(string))profiles*","); +  if(next_run && next_run<would_be_indexed && next_run>=time())    return;    next_run = would_be_indexed;    reschedule();    }    -  private void reschedule() { -  remove_call_out(do_scheduled_stuff); -  WERR("Scheduler runs next event in "+(next_run-time(1))+" seconds."); -  call_out(do_scheduled_stuff, next_run-time(1)); -  } +  void schedule(void|int quiet) {    -  void unschedule() { -  remove_call_out(do_scheduled_stuff); -  } -  -  void schedule() { -  crawl_queue = ([]); -  compact_queue = ([]); -  +     foreach(indices(db_profiles), int id) {    object dbp = db_profiles[id];    if(!dbp) {    report_warning("Search database profile %d destructed.\n", id);    m_delete(db_profiles, id);    continue;    } -  WERR("Scheduling for database profile "+dbp->name); -  int next = dbp->next_crawl(); +  if(!quiet) WERR("Scheduling for database profile "+dbp->name); +  int next = dbp->next_recrawl();    if(next != -1) {    crawl_queue[dbp->id] = next; -  WERR(" Crawl: "+(next-time(1))); +  check_priority_queue(id); +  if(!quiet) WERR(" Crawl: "+(next-time()));    }    next = dbp->next_compact();    if(next != -1) {    compact_queue[dbp->id] = next; -  WERR(" Compact: "+(next-time(1))); +  if(!quiet) WERR(" Compact: "+(next-time()));    } -  WERR("\n"); +  if(!quiet) WERR("");    }    -  if(!sizeof(crawl_queue) && !sizeof(compact_queue)) return; -  next_run = min( @values(crawl_queue)+values(compact_queue) ); +  if(!sizeof(crawl_queue) && !sizeof(compact_queue) && !sizeof(entry_queue)) +  return; +  next_run = max( min( @values(crawl_queue) + values(compact_queue) + +  values(entry_queue) ), +  time() + 10 );    reschedule();    }    -  + #if constant (roxen) +  private void reschedule() { +  if( schedule_process ) +  schedule_process->stop(); +  WERR("Scheduler runs next event in "+(next_run-time())+" seconds."); +  // We use BackgroundProcess since there is no support for unscheduling +  // tasks created with background_run. +  schedule_process = +  roxen.BackgroundProcess(next_run-time(), do_scheduled_stuff); +  } +  +  void unschedule() { +  if( schedule_process ) +  schedule_process->stop(); +  } +  +     private void do_scheduled_stuff() { -  +  if( schedule_process ) +  schedule_process->stop(); +  WERR("Running scheduler event."); +  +  foreach(indices(db_profiles), int id) { +  if (db_profiles[id]->is_running()) { +  WERR("Postponing crawl start, profile "+id+" still running."); +  schedule(1); +  return; +  } +  } +  +  int t = time(); +  +  WERR(sizeof(crawl_queue)+" profiles in crawl queue."); +  foreach(priority_queue & indices(crawl_queue), int id) { +  if(crawl_queue[id]>t || !db_profiles[id]) continue; +  object dbp = db_profiles[id]; +  if(dbp && dbp->ready_to_crawl()) { +  WERR("Scheduler starts crawling "+id); +  dbp->recrawl(); +  m_delete(crawl_queue, id); +  m_delete(entry_queue, id); +  priority_queue -= ({ id }); +  } +  } +  +  WERR(sizeof(entry_queue)+" profiles in entry queue."); +  foreach(priority_queue & indices(entry_queue), int id) { +  if(entry_queue[id]>t || !db_profiles[id]) continue; +  object dbp = db_profiles[id]; +  if(dbp && dbp->ready_to_crawl()) { +  WERR("Scheduler starts crawling "+id); +  dbp->start_indexer(); +  m_delete(entry_queue, id); +  priority_queue -= ({ id }); +  break; +  } +  } +  +  WERR(sizeof(compact_queue)+" profiles in compact queue."); +  foreach(indices(compact_queue), int id) { +  if(compact_queue[id]>t || !db_profiles[id]) continue; +  db_profiles[id]->start_compact(); +  m_delete(compact_queue, id); +  } +  +  schedule(); +  } +  + #else +  private void reschedule() {    remove_call_out(do_scheduled_stuff); -  +  WERR("Scheduler runs next event in "+(next_run-time())+" seconds."); +  call_out(do_scheduled_stuff, next_run-time()); +  } +  +  void unschedule() { +  remove_call_out(do_scheduled_stuff); +  } +  +  private void do_scheduled_stuff() { +  remove_call_out(do_scheduled_stuff);    WERR("Running scheduler event.");       int t = time();       WERR(sizeof(crawl_queue)+" profiles in crawl queue.");    foreach(indices(crawl_queue), int id) {    if(crawl_queue[id]>t || !db_profiles[id]) continue;    object dbp = db_profiles[id];    if(dbp && dbp->ready_to_crawl()) {    WERR("Scheduler starts crawling "+id); -  +  dbp->recrawl(); +  entry_queue = ([]); +  } +  } +  +  WERR(sizeof(crawl_queue)+" profiles in crawl queue."); +  foreach(indices(crawl_queue), int id) { +  if(crawl_queue[id]>t || !db_profiles[id]) continue; +  object dbp = db_profiles[id]; +  if(dbp && dbp->ready_to_crawl()) { +  WERR("Scheduler starts crawling "+id);    dbp->start_indexer();    }    }       WERR(sizeof(compact_queue)+" profiles in compact queue.");    foreach(indices(compact_queue), int id) {    if(compact_queue[id]>t || !db_profiles[id]) continue;    db_profiles[id]->start_compact();    }       schedule();    }    -  + #endif +     string info() {    string res = "<table border='1' cellspacing='0' cellpadding='2'>"    "<tr><th>Profile</th><th>Crawl</th>"    "<th>Compact</th><th>Next</th></tr>";    foreach(values(db_profiles), object dbp) {    if(!dbp) continue;    res += "</tr><td>" + dbp->name + "</td>";    int next = dbp->next_crawl();    if(next == -1)    res += "<td>Never</td>";
pike.git/lib/modules/Search.pmod/Utils.pmod:565:    return res;    }   }         //!   class Logger {       private string|Sql.Sql logdb;    private int profile; +  private int stderr_logging;       private Sql.Sql get_db() {    Sql.Sql db;   #if constant(DBManager)    if(stringp(logdb))    db = DBManager.get(logdb);    else   #endif    db = logdb;       // if(!logdb || !logdb->query)    // throw( ({ "Couldn't find any database object.\n", backtrace() }) );       return db;    }    -  //! @decl void create(Sql.Sql db_object, int profile) -  //! @decl void create(string db_url, int profile) -  void create(string|Sql.Sql _logdb, int _profile) { +  //! @decl void create(Sql.Sql db_object, int profile, int stderr_logging) +  //! @decl void create(string db_url, int profile, int stderr_logging) +  void create(string|Sql.Sql _logdb, int _profile, int _stderr_logging) {    logdb = _logdb;    profile = _profile; -  +  stderr_logging = _stderr_logging;       // create table eventlog (event int unsigned auto_increment primary key,    // at timestamp(14) not null, code int unsigned not null, extra varchar(255))       Sql.Sql db = get_db();       if(catch(db->query("SELECT code FROM eventlog WHERE event=0")))    db->query("CREATE TABLE eventlog ("    "event int unsigned auto_increment primary key,"    "at timestamp(14) not null,"    "profile int unsigned not null,"    "code int unsigned not null,"    "type enum('error','warning','notice') not null,"    "extra varchar(255))");    }    -  +  void werror_event( int code, string type, void|string extra, void|int log_profile ) +  { +  mapping types = ([ "error" : "Error", +  "warning" : "Warning", +  "notice" : "Notice", ]); +  +  werror(sprintf("%sSearch: %s: %s\n", +  " : ", +  types[type], +  extra?sprintf(codes[(int)code], @(extra/"\n")):codes[(int)code])); +  } +  +  void log_purge(int days) +  { +  Sql.Sql db = get_db(); +  if(!db) return; +  if(days) +  db->query("DELETE FROM eventlog " +  " WHERE at <= NOW() - INTERVAL "+days+" DAY"); +  else +  db->query("DELETE FROM eventlog"); +  } +     //!    void log_event( int code, string type, void|string extra, void|int log_profile ) {    Sql.Sql db = get_db();    if(!db) return;       if(zero_type(log_profile))    log_profile = profile;    -  +  if(stderr_logging) +  werror_event(code, type, extra, log_profile); +     if(extra)    db->query("INSERT INTO eventlog (profile,code,type,extra) VALUES (%d,%d,%s,%s)",    log_profile, code, type, extra);    else    db->query("INSERT INTO eventlog (profile, code,type) VALUES (%d,%d,%s)",    log_profile, code, type);    }    -  +     //!    void log_error( int code, void|string extra, void|int log_profile ) {    log_event( code, "error", extra, log_profile );    }       //!    void log_warning( int code, void|string extra, void|int log_profile ) {    log_event( code, "warning", extra, log_profile );    }   
pike.git/lib/modules/Search.pmod/Utils.pmod:669:    33 : "Filter-to-indexer buffer failed to set up pipe.",    34 : "Indexer failed to set up pipe.",       40 : "Fetched %s.",    41 : "Unknown language code \"%s\".",    42 : "Crawler exited normally.",    43 : "Cleared search database.",    44 : "Sitebuilder commit triggered indexing of %s.",       50 : "Crawler did not get any connection from the process.", -  51 : "Crawler-to-filter bufferdid not get any connection from the process.", +  51 : "Crawler-to-filter buffer did not get any connection from the process.",    52 : "Filter did not get any connection from the process.",    53 : "Filter-to-indexer buffer did not get any connection from the process.",    54 : "Indexer did not get any connection from the process.",       60 : "Starting database compactor with %s",    61 : "Failed to find any data in the database.",    62 : "Exiting compacter due to signal.",    63 : "Done with database compacting and maintenance.",       300: "300 Redirection: Multiple Choices (%s)",
pike.git/lib/modules/Search.pmod/Utils.pmod:715:    500: "500 Server Error: Internal Server Error (%s)",    501: "501 Server Error: Not Implemented (%s)",    502: "502 Server Error: Bad Gateway (%s)",    503: "503 Server Error: Service Unavailable (%s)",    504: "504 Server Error: Gateway Timeout (%s)",    505: "505 Server Error: HTTP Version Not Supported (%s)",       1000: "Disallowed by robots.txt. (%s)",    1001: "Can't handle scheme. (%s)",    1002: "No matching filter. (%s)", +  1003: "Too large content file -- indexing metadata only. (%s)",    1100: "Failed to connect to %s.",    ]);          //!    array(array(string|int)) get_log( int profile, array(string) types,    int from, int to ) {       string sql = "";   #define SQLADD(X) do{sizeof(sql)?(sql+=" AND "+(X)):(sql=" WHERE "+(X));}while(0)