pike.git / lib / modules / Search.pmod / Queue.pmod / MySQL.pike

version» Context lines:

pike.git/lib/modules/Search.pmod/Queue.pmod/MySQL.pike:1:   #pike __REAL_VERSION__      inherit .Base;    - Sql.Sql db; + //! @[Search] crawler state stored in a @[Mysql] database. +    string url, table;    -  + protected Thread.Local _db = Thread.Local(); + Sql.Sql `db() + { +  // NB: We need to have a thread local connection, +  // since the status functions may get called +  // from some other thread while we're busy +  // performing sql queries elsewhere. +  Sql.Sql ret = _db->get(); +  if (ret && !ret->ping()) return ret; +  return _db->set(Sql.Sql( url )); + } +    Web.Crawler.Stats stats;   Web.Crawler.Policy policy;   Web.Crawler.RuleSet allow, deny;    - inherit Web.Crawler.Queue; -  +    static string to_md5(string url)   {    Crypto.MD5 md5 = Crypto.MD5();    md5->update(string_to_utf8(url));    return String.string2hex(md5->digest());   }    -  + //! @param _url + //! @[Sql.Sql] URL for the database to store the queue. + //! + //! @param _table + //! @[Sql.Sql] table name to store the queue in. + //! + //! If the table doesn't exist it will be created.   void create( Web.Crawler.Stats _stats,    Web.Crawler.Policy _policy,       string _url, string _table,       void|Web.Crawler.RuleSet _allow,    void|Web.Crawler.RuleSet _deny)   {    stats = _stats; policy = _policy;    allow=_allow; deny=_deny;    table = _table; -  +  url = _url;    -  db = Sql.Sql( _url ); +     perhaps_create_table( );   }      static void perhaps_create_table( )   {    db->query(   #"    create table IF NOT EXISTS "+table+#" (    uri blob not null,    uri_md5 char(32) not null default '',
pike.git/lib/modules/Search.pmod/Queue.pmod/MySQL.pike:218:    if (stage)    uris = db->query( "select * from "+table+" where stage=%d", stage );    else    uris = db->query( "select * from "+table );    uris = map(uris->uri, utf8_to_string);    uris = map(uris, Standards.URI);       return uris;   }    + //! @returns + //! Returns an array with all URI schemes currently used in the queue.   array(string) get_schemes()   {    // FIXME: Consider using SUBSTRING_INDEX().    array(string) schemes =    db->query("SELECT DISTINCT"    " SUBSTRING(uri, 1, 20) AS scheme"    " FROM "+table)->scheme;    schemes = map(schemes,    lambda(string s) {    return (s/":")[0];
pike.git/lib/modules/Search.pmod/Queue.pmod/MySQL.pike:299:    ((array(string))stage)*"," )[ 0 ]->c;   }      void set_stage( Standards.URI uri,    int stage )   {    db->query( "update "+table+" set stage=%d where uri_md5=%s",stage,    to_md5((string)uri));   }    + //! @returns + //! Returns the current stage for the specified URI. + //! + //! @seealso + //! @[set_stage()]   int get_stage( Standards.URI uri )   {    array a = db->query( "select stage from "+table+" where uri_md5=%s", to_md5((string)uri));    if(sizeof(a))    return (int)a[0]->stage;    else    return -1;   }    -  + //! Reset the stage to @expr{0@} (zero) for all URIs with the specified + //! @[uri_prefix]. If no @[uri_prefix] is specified reset the stage for + //! all URIs.   void reset_stage(string|void uri_prefix)   {    if (uri_prefix) {    db->query("UPDATE " + table + " SET stage = 0"    " WHERE SUBSTRING(uri, 1, " + sizeof(uri_prefix) + ") = %s",    uri_prefix);    } else {    db->query("UPDATE " + table + " SET stage = 0");    }   }