54025d2001-05-26Per Hedbor #include "global.h" #include "stralloc.h" #include "global.h" #include "pike_macros.h" #include "interpret.h" #include "program.h" #include "object.h" #include "array.h" #include "module_support.h" #include "config.h" #include "whitefish.h" #include "buffer.h" #include "blobs.h" static void exit_blobs_struct( );
40a44d2004-08-07Johan Schön #define HSIZE 10007
54025d2001-05-26Per Hedbor 
40a44d2004-08-07Johan Schön #define THIS ((struct blobs *)Pike_fp->current_storage)
8b57d82005-05-20Martin Stjernholm #define HASH(X) (((unsigned int) (size_t)(X)) % HSIZE)
54025d2001-05-26Per Hedbor  struct hash {
40a44d2004-08-07Johan Schön  unsigned int word_data_offset; int current_document; struct buffer *buffer;
54025d2001-05-26Per Hedbor  struct hash *next;
20de752001-07-03Per Hedbor  struct pike_string *id;
54025d2001-05-26Per Hedbor }; struct blobs {
57ab6d2001-05-26Per Hedbor  int next_ind;
40a44d2004-08-07Johan Schön  int size; int nwords;
57ab6d2001-05-26Per Hedbor  struct hash *next_h;
54025d2001-05-26Per Hedbor  struct hash *hash[HSIZE]; };
20de752001-07-03Per Hedbor static struct hash *new_hash( struct pike_string *id )
54025d2001-05-26Per Hedbor {
40a44d2004-08-07Johan Schön  struct hash *res = malloc( sizeof( struct hash ) ); if( !res ) Pike_error("Out of memory\n");
54025d2001-05-26Per Hedbor  res->id = id;
107c242002-01-02Johan Schön  add_ref(id);
54025d2001-05-26Per Hedbor  res->next = 0;
40a44d2004-08-07Johan Schön  res->buffer = wf_buffer_new(); res->word_data_offset = 0; res->current_document = -1;
54025d2001-05-26Per Hedbor  return res; } static void insert_hash( struct blobs *d, struct hash *h ) {
350d4b2004-07-20Henrik Grubbström (Grubba)  unsigned int r = HASH(h->id);
54025d2001-05-26Per Hedbor  h->next = d->hash[ r ]; d->hash[ r ] = h; } static void free_hash( struct hash *h ) { while( h ) { struct hash *n = h->next;
40a44d2004-08-07Johan Schön  if( h->buffer ) wf_buffer_free( h->buffer ); if( h->id ) free_string( h->id );
54025d2001-05-26Per Hedbor  free( h ); h = n; } }
20de752001-07-03Per Hedbor static struct hash *find_hash( struct blobs *d, struct pike_string *id )
54025d2001-05-26Per Hedbor {
350d4b2004-07-20Henrik Grubbström (Grubba)  unsigned int r = HASH(id);
54025d2001-05-26Per Hedbor  struct hash *h = d->hash[ r ]; while( h ) { if( h->id == id ) return h; h = h->next; } h = new_hash( id ); insert_hash( d, h );
40a44d2004-08-07Johan Schön  d->nwords++; d->size+=sizeof( struct hash )+32;
54025d2001-05-26Per Hedbor  return h; }
40a44d2004-08-07Johan Schön /*! @module Search */ /*! @class Blobs */
54025d2001-05-26Per Hedbor static void f_blobs_add_words( INT32 args )
40a44d2004-08-07Johan Schön /*! @decl void add_words( int docid, array(string) words, int field_id )
54025d2001-05-26Per Hedbor  *! *! Add all the words in the 'words' array to the blobs */ {
1d8d7d2013-02-05Jonas Walldén  int docid;
54025d2001-05-26Per Hedbor  struct array *words;
1d8d7d2013-02-05Jonas Walldén  int field_id;
54025d2001-05-26Per Hedbor  int i; struct blobs *blbl = THIS;
af27a62001-07-31Johan Schön  get_all_args( "add_words", args, "%d%a%d", &docid, &words, &field_id);
54025d2001-05-26Per Hedbor  for( i = 0; i<words->size; i++ )
017b572011-10-28Henrik Grubbström (Grubba)  if( TYPEOF(words->item[i]) != PIKE_T_STRING )
20de752001-07-03Per Hedbor  Pike_error("Illegal element %d in words array\n", i ); else
40a44d2004-08-07Johan Schön  { struct hash *x = find_hash( blbl, words->item[i].u.string ); if( !x->buffer ) Pike_error("Read already called\n"); blbl->size-=x->buffer->allocated_size; if( x->current_document != docid ) { x->current_document = docid; wf_buffer_wint( x->buffer, docid ); wf_buffer_wbyte( x->buffer, 0 ); x->word_data_offset = x->buffer->size-1; }
c9566c2013-11-03Arne Goedeke  if( (unsigned char)x->buffer->data[x->word_data_offset] < 255 )
40a44d2004-08-07Johan Schön  { unsigned short s; x->buffer->data[x->word_data_offset]++; blbl->size+=2; if( field_id ) s = (3<<14) | ((field_id-1)<<8) | (i>255?255:i); else s = i>((1<<14)-1)?((1<<14)-1):i; wf_buffer_wshort( x->buffer, s ); } blbl->size+=x->buffer->allocated_size; }
e5e4c62001-05-26Per Hedbor  pop_n_elems( args ); push_int(0);
54025d2001-05-26Per Hedbor } static void f_blobs_memsize( INT32 args )
40a44d2004-08-07Johan Schön /*! @decl int memsize()
54025d2001-05-26Per Hedbor  *! *! Returns the in-memory size of the blobs */ {
20de752001-07-03Per Hedbor  pop_n_elems( args );
40a44d2004-08-07Johan Schön  push_int( THIS->size );
54025d2001-05-26Per Hedbor }
57ab6d2001-05-26Per Hedbor static void f_blobs_read( INT32 args )
40a44d2004-08-07Johan Schön /*! @decl array read();
57ab6d2001-05-26Per Hedbor  *!
40a44d2004-08-07Johan Schön  *! returns ({ string word_id, string blob }) or ({0,0}) As a side-effect,
20de752001-07-03Per Hedbor  *! this function frees the blob and the word_id, so you can only read *! the blobs struct once. Also, once you have called @[read], *! @[add_words] will no longer work as expected.
57ab6d2001-05-26Per Hedbor  */ { struct blobs *t = THIS;
20de752001-07-03Per Hedbor  struct array *a = allocate_array( 2 );
74117a2004-08-18Henrik Grubbström (Grubba)  pop_n_elems(args);
57ab6d2001-05-26Per Hedbor  while( !t->next_h ) { if( t->next_ind >= HSIZE ) {
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL(a->item[0], PIKE_T_INT, NUMBER_NUMBER, integer, 0); SET_SVAL(a->item[1], PIKE_T_INT, NUMBER_NUMBER, integer, 0);
20de752001-07-03Per Hedbor  push_array( a );
57ab6d2001-05-26Per Hedbor  return; } t->next_h = t->hash[ t->next_ind ]; t->next_ind++; }
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL(a->item[0], PIKE_T_STRING, 0, string, t->next_h->id); SET_SVAL(a->item[1], PIKE_T_STRING, 0, string, make_shared_binary_string( t->next_h->buffer->data, t->next_h->buffer->size ));
40a44d2004-08-07Johan Schön  wf_buffer_free( t->next_h->buffer ); t->next_h->buffer = 0;
c7c2cb2001-07-05Per Hedbor  t->next_h->id = 0;
20de752001-07-03Per Hedbor  push_array( a );
57ab6d2001-05-26Per Hedbor  t->next_h = THIS->next_h->next; }
40a44d2004-08-07Johan Schön  static int compare_wordarrays( const void *_a, const void *_b ) { const struct svalue *a = (struct svalue *)_a; const struct svalue *b = (struct svalue *)_b; return my_quick_strcmp( a->u.array->item[0].u.string, b->u.array->item[0].u.string ); } /*! @decl array(array(string)) read_all_sorted() *! *! returns ({({ string word1_id, string blob1 }),...}), sorted by word_id in octed order. *! *! As a side-effect, *! this function frees the blobs and the word_ids, so you can only read *! the blobs struct once. Also, once you have called @[read] or @[read_all_sorted], *! @[add_words] will no longer work as expected. */
74dfe82012-12-30Jonas Walldén static void f_blobs_read_all_sorted( INT32 UNUSED(args) )
40a44d2004-08-07Johan Schön { struct array *g = allocate_array( THIS->nwords ); int i; for( i = 0; i<THIS->nwords; i++ ) { f_blobs_read(0); g->item[i]=Pike_sp[-1]; Pike_sp--; } qsort( &g->item[0], THIS->nwords, sizeof(struct svalue), compare_wordarrays ); push_array(g); } /*! @endclass */ /*! @endmodule */
54025d2001-05-26Per Hedbor static void init_blobs_struct( ) { MEMSET( THIS, 0, sizeof( struct blobs ) );
40a44d2004-08-07Johan Schön  THIS->size = sizeof( struct blobs ) + 128;
54025d2001-05-26Per Hedbor } static void exit_blobs_struct( ) { int i; for( i = 0; i<HSIZE; i++ ) if( THIS->hash[i] ) free_hash( THIS->hash[i] ); init_blobs_struct(); } static struct program *blobs_program; void init_blobs_program() { start_new_program(); ADD_STORAGE( struct blobs );
e5e4c62001-05-26Per Hedbor  add_function("add_words",f_blobs_add_words,
af27a62001-07-31Johan Schön  "function(int,array,int:void)",0 );
54025d2001-05-26Per Hedbor  add_function("memsize", f_blobs_memsize, "function(void:int)", 0 );
40a44d2004-08-07Johan Schön  add_function("read", f_blobs_read, "function(void:array(string))", 0); add_function("read_all_sorted", f_blobs_read_all_sorted, "function(void:array(array(string)))", 0);
54025d2001-05-26Per Hedbor  set_init_callback( init_blobs_struct ); set_exit_callback( exit_blobs_struct ); blobs_program = end_program( ); add_program_constant( "Blobs", blobs_program, 0 ); } void exit_blobs_program() { free_program( blobs_program ); }