pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:3:
inherit .Base;
// Creates the SQL tables we need.
//#define SEARCH_DEBUG
#define DB_MAX_WORD_SIZE 64
protected
{
- // This is the database object that all queries will be made to.
- Sql.Sql db;
+ // This is the database that all queries will be made to.
string host;
-
+ Sql.Sql get_db()
+ {
+ return Sql.Sql(host);
+ }
mapping options;
string mergefile_path;
int mergefile_counter = 0;
int init_done = 0;
};
void create(string db_url, void|mapping _options)
{
- db=Sql.Sql(host=db_url);
+ host = db_url;
+ get_db();
options = _options || ([]);
mergefile_path = options->mergefiles;
if(!mergefile_path)
mergefile_path = "/tmp/";
if(options->mergefiles)
foreach(get_mergefiles(), string fn)
rm(fn);
}
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:37: Inside #if defined(SEARCH_DEBUG)
void destroy()
{
if (blobs_dirty)
werror("Search.Database.MySQL: WARNING: Forgot to sync before "
"abandoning db object?\n");
}
#endif
string _sprintf()
{
- return sprintf("Search.Database.MySQL(%O,%O)", host, mergefile_path);
+ return sprintf("Search.Database.MySQL(%O,%O)",
+ Sql.censor_sql_url(host), mergefile_path);
}
// Support for old- and new-style padded blobs must be determined at
// runtime. This is because the format must be compatible with whatever
// high-level Search module currently available, specifically the compactor.
int cache_supports_padded_blobs = -1;
int supports_padded_blobs()
{
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:67:
// ----------------------------------------------
// Database initialization
// ----------------------------------------------
void init_tables()
{
int use_padded_blobs = supports_padded_blobs();
+ Sql.Sql db = get_db();
db->query(
#"create table if not exists uri (id int unsigned primary key
auto_increment not null,
uri blob not null,
uri_md5 varchar(32) binary not null,
UNIQUE(uri_md5))"
);
db->query(
#"create table if not exists document (id int unsigned primary key
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:152:
db->query(
#"create table if not exists field (id tinyint unsigned primary key not null,
name varchar(127) not null,
UNIQUE(name))");
}
void clear()
{
+ Sql.Sql db = get_db();
db->query("delete from word_hit");
db->query("delete from uri");
db->query("delete from document");
db->query("delete from deleted_document");
db->query("delete from metadata");
db->query("delete from lastmodified");
}
// ----------------------------------------------
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:188:
#endif
}
// ----------------------------------------------
// Document handling
// ----------------------------------------------
int get_uri_id(string uri, void|int do_not_create)
{
+ Sql.Sql db = get_db();
string s=sprintf("select id from uri where uri_md5='%s'", to_md5(uri));
array a=db->query(s);
if(sizeof(a))
return (int)a[0]->id;
if(do_not_create)
return 0;
db->query("insert into uri (uri,uri_md5) "
"values (%s,%s)",
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:209:
return db->master_sql->insert_id();
}
int get_document_id(string uri, void|string language, void|int do_not_create)
{
int uri_id=get_uri_id(uri, do_not_create);
if (!uri_id)
return 0;
+ Sql.Sql db = get_db();
string s=sprintf("select id from document where "
"uri_id='%d'", uri_id);
if(language)
s+=sprintf(" and language='%s'",db->quote(language));
array a = db->query(s);
if(sizeof(a))
return (int)a[0]->id;
db->query("insert into document (uri_id, language) "
"values (%d,"+(language?"%s":"NULL")+")",
uri_id, language);
return db->master_sql->insert_id();
}
mapping get_uri_and_language(int|array(int) doc_id)
{
-
+ Sql.Sql db = get_db();
if(arrayp(doc_id))
{
array a=db->query("select document.id,document.language, uri.uri from document, uri "
"where uri.id=document.uri_id and document.id IN ("+
((array(string))doc_id)*","+")");
return mkmapping( (array(int))a->id, a );
}
else
{
array a=db->query("select document.language,uri.uri from document,uri "
"where uri.id=document.uri_id and document.id=%d",doc_id);
if(!sizeof(a))
return 0;
return (["uri":1,"language":1]) & a[0];
}
}
void remove_uri(string|Standards.URI uri)
{
-
+ Sql.Sql db = get_db();
db->query("delete from uri where uri_md5=%s", to_md5((string)uri));
}
void remove_uri_prefix(string|Standards.URI uri)
{
-
+ Sql.Sql db = get_db();
string uri_string = (string)uri;
db->query("delete from uri where uri like '" + db->quote(uri_string) + "%%'");
}
#ifdef SEARCH_DEBUG
protected int docs;
protected int blobs_dirty;
#endif
void remove_document(string|Standards.URI uri, void|string language)
{
#ifdef SEARCH_DEBUG
docs++;
#endif
int uri_id=get_uri_id((string)uri, 1);
if(!uri_id)
return;
-
+ Sql.Sql db = get_db();
array a;
if(language) {
// Need to remove this particular language fork as well as any
// non-language version of the document (since they are mutually
// exclusive).
//
// Note however that a document with several language forks where
// one fork is removed will keep that entry since we cannot know
// which entries that are garbage and hence leave them in place.
// It is up to the query filter to only show valid forks.
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:298:
if(!sizeof(a))
return;
db->query("delete from document where id in ("+a->id*","+")");
db->query("insert into deleted_document (doc_id) values "+
"("+a->id*"),("+")");
}
void remove_document_prefix(string|Standards.URI uri)
{
+ Sql.Sql db = get_db();
array a =
db->query("SELECT document.id AS id"
" FROM document, uri "
" WHERE document.uri_id=uri.id "
" AND uri.uri like '" + db->quote(uri) + "%%'");
if(!sizeof(a))
return;
array ids = a->id;
#ifdef SEARCH_DEBUG
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:322:
db->query("INSERT INTO deleted_document "
"(doc_id) VALUES (" + (ids * "),(") + ")");
}
protected Search.ResultSet deleted_documents = Search.ResultSet();
protected int deleted_max, deleted_count;
Search.ResultSet get_deleted_documents()
{
// FIXME: Make something better
+ Sql.Sql db = get_db();
array a = db->query("select max(doc_id) as m, count(*) as c from deleted_document");
int max_id = (int)a[0]->m;
int count = (int)a[0]->c;
if(max_id==deleted_max && count == deleted_count)
return deleted_documents;
else
{
array ids = (array(int))db->query("select doc_id from deleted_document "
"order by doc_id")->doc_id;
deleted_count = count;
deleted_max = max_id;
return deleted_documents = Search.ResultSet(ids);
}
}
Search.ResultSet get_all_documents()
{
-
+ Sql.Sql db = get_db();
array ids =
(array(int)) db->query("SELECT id FROM document ORDER BY id")->id;
return Search.ResultSet(ids);
}
// ----------------------------------------------
// Field handling
// ----------------------------------------------
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:368:
init_done=1;
foreach(({"uri","path1", "path2"})+Search.get_filter_fields(), string field)
allocate_field_id(field);
}
mapping(string:int) list_fields()
{
if(list_fields_cache)
return list_fields_cache;
init_fields();
+ Sql.Sql db = get_db();
array a=db->query("select name,id from field") + ({ (["name":"body",
"id": "0"]) });
return list_fields_cache=mkmapping(a->name, (array(int))a->id);
}
int allocate_field_id(string field)
{
if(!init_done)
init_fields();
if(field=="body")
return 0;
-
+ Sql.Sql db = get_db();
array a =db->query("select id from field where name=%s", field);
if(sizeof(a))
return (int)a[0]->id;
db->query("lock tables field write");
mixed err = catch {
for(int i=1; i<64; i++)
{
array a=db->query("select name from field where id=%d",i);
if(!sizeof(a))
{
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:411:
}
protected mapping field_cache = ([]);
int get_field_id(string field, void|int do_not_create)
{
// The one special case.
if(field=="body") return 0;
if(field_cache[field]) return field_cache[field];
init_fields();
+ Sql.Sql db = get_db();
string s=sprintf("select id from field where name='%s'",db->quote(field));
array a=db->query(s);
if(sizeof(a))
{
field_cache[field]=(int)a[0]->id;
return (int)a[0]->id;
}
if(do_not_create)
return -1;
return allocate_field_id(field);
}
void remove_field(string field)
{
init_fields();
m_delete(field_cache, field);
list_fields_cache=0;
-
+ Sql.Sql db = get_db();
db->query("delete from field where name=%s", field);
}
void safe_remove_field(string field)
{
if( search(({"uri","path1","path2"})+Search.get_filter_fields(), field) == -1 )
remove_field( field );
}
// ----------------------------------------------
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:475:
if(blobs->memsize() > MAXMEM)
if(options->mergefiles)
mergefile_sync();
else
sync();
}
array(string) expand_word_glob(string g, void|int max_hits)
{
g = replace( string_to_utf8(g), ({ "*", "?" }), ({ "%", "_" }) );
+ Sql.Sql db = get_db();
if(max_hits)
return map(db->query("select distinct word from word_hit where word like %s limit %d",
g, max_hits)->word,utf8_to_string);
else
return map(db->query("select distinct word from word_hit where word like %s",g)->word,utf8_to_string);
}
int get_padded_blob_length(int used_len)
{
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:515: Inside #if defined(SEARCH_DEBUG)
#ifdef SEARCH_DEBUG
times[word] = 0;
#endif
return 0;
}
#ifdef SEARCH_DEBUG
int t0 = gethrtime();
#endif
int use_padded_blobs = supports_padded_blobs();
+ Sql.Sql db = get_db();
array a =
db->query(" SELECT hits, first_doc_id " +
(use_padded_blobs ? ", used_len, real_len " : "") +
" FROM word_hit "
" WHERE word = %s "
"ORDER BY first_doc_id "
" LIMIT %d,%d",
word, num, blobs_per_select);
#ifdef SEARCH_DEBUG
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:570:
// ----------------------------------------------
// Metadata handling
// ----------------------------------------------
void remove_metadata(Standards.URI|string uri, void|string language)
{
int doc_id;
if(!intp(uri))
doc_id = get_document_id((string)uri, language, 1);
+ Sql.Sql db = get_db();
db->query("delete from metadata where doc_id = %d", doc_id);
}
protected string make_fields_sql(void|array(string) wanted_fields)
{
-
+ Sql.Sql db = get_db();
if(wanted_fields && sizeof(wanted_fields))
return " and name IN ('"+map(wanted_fields,db->quote)*"','"+"')";
else
return "";
}
mapping(string:string) get_metadata(int|Standards.URI|string uri,
void|string language,
void|array(string) wanted_fields)
{
int doc_id;
if(intp(uri))
doc_id=uri;
else
doc_id = get_document_id((string)uri, language);
-
+ Sql.Sql db = get_db();
array a=db->query("select name,value from metadata where doc_id=%d"+
make_fields_sql(wanted_fields),
doc_id);
mapping md=mkmapping(a->name,a->value);
#if constant(Gz)
if(md->body)
md->body=Gz.inflate()->inflate(md->body);
#endif
foreach(indices(md), string field)
md[field] = utf8_to_string(md[field]);
return md;
}
mapping(int:string) get_special_metadata(array(int) doc_ids,
string wanted_field)
{
-
+ Sql.Sql db = get_db();
array a=db->query("select doc_id,value from metadata where doc_id IN ("+
((array(string))doc_ids)*","+") and name = %s",
wanted_field);
return mkmapping( (array(int))a->doc_id, a->value);
}
// ----------------------------------------------
// Date stuff
// ----------------------------------------------
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:646:
#endif
}
if(!sizeof(md))
return 0;
foreach(indices(md), string ind)
if(ind!="body")
md[ind]=string_to_utf8(md[ind]);
+ Sql.Sql db = get_db();
string s=map(Array.transpose( ({ map(indices(md),db->quote),
map(values(md), db->quote) }) ),
lambda(array a)
{
return sprintf("(%d,'%s','%s')", doc_id,
a[0], a[1]);
}) * ", ";
db->query("replace into metadata (doc_id, name, value) values "+s);
}
void set_lastmodified(Standards.URI|string uri,
void|string language,
int when)
{
int doc_id = get_document_id((string)uri, language);
-
+ Sql.Sql db = get_db();
db->query("replace into lastmodified (doc_id, at) values (%d,%d)", doc_id, when);
}
int get_lastmodified(Standards.URI|string|array(Standards.URI|string) uri, void|string language)
{
int doc_id = get_document_id((string)uri, language);
-
+ Sql.Sql db = get_db();
array q = db->query("select at from lastmodified where doc_id=%d", doc_id);
if( sizeof( q ) )
return (int)q[0]->at;
}
void randomize_dates()
{
-
+ Sql.Sql db = get_db();
foreach(db->query("select id from document")->id, string id)
db->query("replace into lastmodified (doc_id,at) values (%s,%d)",
id,
random(365*24*3600)+time()-365*24*3600);
}
protected
{
_WhiteFish.DateSet dateset_cache;
int dateset_cache_max_doc_id = -1;
int get_max_doc_id()
{
-
+ Sql.Sql db = get_db();
array a = db->query("select doc_id from lastmodified order by doc_id desc limit 1");
if(!sizeof(a))
return 0;
else
return (int)a[0]->doc_id;
}
};
_WhiteFish.DateSet get_global_dateset()
{
int max_doc_id = get_max_doc_id();
if(max_doc_id == dateset_cache_max_doc_id)
return dateset_cache;
else
{
-
+ Sql.Sql db = get_db();
array a = db->query("select doc_id,at from lastmodified where "
"doc_id > %d order by doc_id asc", dateset_cache_max_doc_id);
dateset_cache_max_doc_id = max_doc_id;
if(!dateset_cache)
dateset_cache = _WhiteFish.DateSet();
dateset_cache->add_many( (array(int))a->doc_id,
(array(int))a->at );
return dateset_cache;
}
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:729:
int publ_dateset_cache_max_doc_id = -1;
};
_WhiteFish.DateSet get_global_publ_dateset()
{
int max_doc_id = get_max_doc_id();
if(max_doc_id == publ_dateset_cache_max_doc_id)
return publ_dateset_cache;
else
{
+ Sql.Sql db = get_db();
array(mapping(string:mixed)) a =
db->query("SELECT doc_id, value FROM metadata "
" WHERE name = 'publish-time' "
" AND doc_id > %d ORDER BY doc_id ASC",
publ_dateset_cache_max_doc_id);
publ_dateset_cache_max_doc_id = max_doc_id;
if(!publ_dateset_cache)
publ_dateset_cache = _WhiteFish.DateSet();
publ_dateset_cache->add_many( (array(int))a->doc_id,
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:770:
return get_document_id( (string)uri, language);
});
string res =
"replace into link (from_id, to_id) values " +
map(to_ids,
lambda(int to_id)
{
return sprintf("(%d, %d)", doc_id, to_id);
}) * ", ";
+ Sql.Sql db = get_db();
db->query(res);
}
void remove_links(Standards.URI|string uri,
void|string language)
{
int doc_id = get_document_id((string)uri, language, 1);
-
+ Sql.Sql db = get_db();
db->query("delete from link where from_id=%d", doc_id);
}
array(int) get_broken_links()
{
-
+ Sql.Sql db = get_db();
db->query("select 'Not yet done :-)'");
}
// ----------------------------------------------
// Sync stuff
// ----------------------------------------------
protected function sync_callback;
void set_sync_callback( function f )
{
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:851:
{
Search.MergeFile mergedfile;
if(mergedfilename)
mergedfile = Search.MergeFile(Stdio.File(mergedfilename, "r"));
int use_padded_blobs = supports_padded_blobs();
int s = time();
int q;
- Sql.Sql db = Sql.Sql( host );
+ Sql.Sql db = get_db();
#ifdef SEARCH_DEBUG
werror("----------- sync() %4d docs --------------\n", docs);
#endif
db->query("LOCK TABLES word_hit LOW_PRIORITY WRITE");
mixed err = catch {
String.Buffer multi_query = String.Buffer();
do
{
pike.git/lib/modules/Search.pmod/Database.pmod/MySQL.pike:1202:
// Statistics
// ----------------------------------------------
int memsize()
{
return blobs->memsize();
}
mapping(string|int:int) get_language_stats()
{
+ Sql.Sql db = get_db();
array a=db->query("select count(id) as c,language from document group by language");
return mkmapping( a->language, a->c);
}
int get_num_words()
{
-
+ Sql.Sql db = get_db();
return (int)(db->query("select count(distinct word) as c from word_hit") +
({ (["c": 0]) }))[0]->c;
}
int get_database_size()
{
-
+ Sql.Sql db = get_db();
int size;
foreach(db->query("show table status"), mapping table)
size += (int)table->Data_length + (int)table->Index_length;
return size;
}
int get_num_deleted_documents()
{
-
+ Sql.Sql db = get_db();
return (int)db->query("select count(*) as c from deleted_document")[0]->c;
}
protected string my_denormalize(string in)
{
return Unicode.normalize(utf8_to_string(in), "C");
}
array(array) get_most_common_words(void|int count)
{
-
+ Sql.Sql db = get_db();
array a =
db->query(" SELECT word, " +
(supports_padded_blobs() ?
" SUM(used_len) / 5 AS c " :
" SUM(LENGTH(hits)) / 5 AS c ") +
" FROM word_hit "
" GROUP BY word "
" ORDER BY c DESC "
" LIMIT %d", count || 10);
if(!sizeof(a))
return ({ });
else
return Array.transpose( ({ map(a->word, my_denormalize),
(array(int))a->c }) );
}
void list_url_by_prefix(string url_prefix, function(string:void) cb)
{
-
+ Sql.Sql db = get_db();
Sql.sql_result q =
db->big_query("SELECT uri "
" FROM uri "
" WHERE uri LIKE '"+db->quote(url_prefix)+"%'");
for(;;) {
array row = q->fetch_row();
if(!row)
break;
cb(row[0]);
}
}