pike.git / src / modules / _WhiteFish / whitefish.c

version» Context lines:

pike.git/src/modules/_WhiteFish/whitefish.c:1:   #include <math.h>      #include "global.h"   #include "stralloc.h"   #include "global.h" - RCSID("$Id: whitefish.c,v 1.31 2001/07/04 22:24:42 per Exp $"); + RCSID("$Id: whitefish.c,v 1.32 2001/07/31 15:27:18 js Exp $");   #include "pike_macros.h"   #include "interpret.h"   #include "program.h"   #include "program_id.h"   #include "object.h"   #include "operators.h"   #include "array.h"   #include "module_support.h"      #include "config.h"
pike.git/src/modules/_WhiteFish/whitefish.c:37:    struct tofree *t= (struct tofree *)_t;    int i;    if( t->res ) free_object( t->res );    for( i = 0; i<t->nblobs; i++ )    wf_blob_free( t->blobs[i] );    free( t->tmp );    free( t );   }      #define OFFSET(X) \ -  (X.type == HIT_BODY?X.u.body.pos:X.type==HIT_FIELD?(X.u.field.pos):(X.u.anchor.pos)) +  (X.type == HIT_BODY?X.u.body.pos:X.u.field.pos)    -  + #define DOFF(X) _distance_f(X) + #define MOFF(X) (X.type==HIT_BODY?0:X.u.field.type+1) +    static int _distance_f( int distance )   {    if( distance < 2 ) return 0;    if( distance < 6 ) return 1;    if( distance < 11 ) return 2;    if( distance < 22 ) return 3;    if( distance < 42 ) return 4;    if( distance < 82 ) return 5;    if( distance < 161 ) return 6;    return 7;   }    - #define DOFF(X) _distance_f(X) - #define MOFF(X) (X.type==HIT_BODY?0:X.type==HIT_FIELD?X.u.field.type+2:1) +       static void handle_hit( Blob **blobs,    int nblobs,    struct object *res,    int docid, -  double *field_c[66], +  double *field_c[65],    double *prox_c[8],    double mc, double mp,    int cutoff )   {    int i, j, k, end = 0;    Hit *hits = malloc( nblobs * sizeof(Hit) );    unsigned char *nhits = malloc( nblobs );    unsigned char *pos = malloc( nblobs );    -  int matrix[66][8]; +  int matrix[65][8];       MEMSET(matrix, 0, sizeof(matrix) );    MEMSET(hits, 0, nblobs * sizeof(Hit) );    MEMSET(pos, 0, nblobs );       for( i = 0; i<nblobs; i++ )    nhits[i] = wf_blob_nhits( blobs[i] );          for( i = 0; i<nblobs; i++ )
pike.git/src/modules/_WhiteFish/whitefish.c:106:    }       free( pos );    free( nhits );    free( hits );    /* Now we have our nice matrix. Time to do some multiplication */       {    double accum = 0.0, fc, pc;    int accum_i; -  for( i = 0; i<66; i++ ) +  for( i = 0; i<65; i++ )    if( (fc = (*field_c)[i]) != 0.0 )    for( j = 0; j<8; j++ )    if( (pc = (*prox_c)[j]) != 0.0 )    accum += (MINIMUM(matrix[i][j],cutoff)*fc*pc) / (mc*mp);       /* Limit */    if( accum > 32000.0 )    accum = 32000.0; -  accum_i = (int)(accum *100 ); -  if( accum_i > 0 ) +  accum_i = (int)(accum *100 ) + 1; +  if( accum > 0.0 )    wf_resultset_add( res, docid, accum_i );    }   }      static struct object *low_do_query_or( Blob **blobs,    int nblobs, -  double field_c[66], +  double field_c[65],    double prox_c[8],    int cutoff)   {    struct object *res = wf_resultset_new();    struct tofree *__f = malloc( sizeof( struct tofree ) );    double max_c=0.0, max_p=0.0;    ONERROR e;    int i, j;    Blob **tmp;    tmp = malloc( nblobs * sizeof( Blob *) );       __f->res = res;    __f->blobs = blobs;    __f->nblobs = nblobs;    __f->tmp = tmp;    SET_ONERROR( e, free_stuff, __f );       -  for( i = 0; i<66; i++ ) +  for( i = 0; i<65; i++ )    if( field_c[i] > max_c )    max_c = field_c[i];       for( i = 0; i<8; i++ )    if( prox_c[i] > max_p )    max_p = prox_c[i];       if( max_p != 0.0 && max_c != 0.0 )    {    /* Time to do the real work. :-) */
pike.git/src/modules/_WhiteFish/whitefish.c:190:    UNSET_ONERROR( e );    __f->res = 0;    free_stuff( __f );    return res;   }      static void handle_phrase_hit( Blob **blobs,    int nblobs,    struct object *res,    int docid, -  double *field_c[66], +  double *field_c[65],    double mc )   {    int i, j, k;    unsigned char *nhits = malloc( nblobs*2 );    unsigned char *first = nhits+nblobs; -  int matrix[66]; +  int matrix[65];    double accum = 0.0;       MEMSET(matrix, 0, sizeof(matrix) );          for( i = 0; i<nblobs; i++ )    {    nhits[i] = wf_blob_nhits( blobs[i] );    first[i] = 0;    }
pike.git/src/modules/_WhiteFish/whitefish.c:242:    accum += add/mc;    }       free( nhits );       if( accum > 0.0 )    wf_resultset_add( res, docid, (int)(accum*100) );   }      static struct object *low_do_query_phrase( Blob **blobs, int nblobs, -  double field_c[66]) +  double field_c[65])   {    struct object *res = wf_resultset_new();    struct tofree *__f = malloc( sizeof( struct tofree ) );    double max_c=0.0;    ONERROR e;    int i, j;    __f->blobs = blobs;    __f->nblobs = nblobs;    __f->res = res;    __f->tmp = 0;    SET_ONERROR( e, free_stuff, __f );       -  for( i = 0; i<66; i++ ) +  for( i = 0; i<65; i++ )    if( field_c[i] > max_c )    max_c = field_c[i];       if( max_c != 0.0 )    {    /* Time to do the real work. :-) */    for( i = 0; i<nblobs; i++ ) /* Forward to first element */    wf_blob_next( blobs[i] );       /* Main loop: Find the smallest element in the blob array. */
pike.git/src/modules/_WhiteFish/whitefish.c:302:   end:    /* Free workarea and return the result. */       UNSET_ONERROR( e );    __f->res = 0;    free_stuff( __f );    return res;   }      static struct object *low_do_query_and( Blob **blobs, int nblobs, -  double field_c[66], +  double field_c[65],    double prox_c[8],    int cutoff)   {    struct object *res = wf_resultset_new();    struct tofree *__f = malloc( sizeof( struct tofree ) );    double max_c=0.0, max_p=0.0;    ONERROR e;    int i, j;    __f->blobs = blobs;    __f->nblobs = nblobs;    __f->res = res;    __f->tmp = 0;    SET_ONERROR( e, free_stuff, __f );       -  for( i = 0; i<66; i++ ) +  for( i = 0; i<65; i++ )    if( field_c[i] > max_c )    max_c = field_c[i];       for( i = 0; i<8; i++ )    if( prox_c[i] > max_p )    max_p = prox_c[i];       if( max_c != 0.0 )    {    /* Time to do the real work. :-) */
pike.git/src/modules/_WhiteFish/whitefish.c:381:    *! array(int) field_coefficients, @    *! function(int:string) blobfeeder)    *! @[words]    *!    *! Arrays of word ids. Note that the order is significant    *! for the ranking.    *!    *! @[field_coefficients]    *!    *! An array of ranking coefficients for the different fields. -  *! In the range of [0x0000-0xffff]. The array (always) has 66 +  *! In the range of [0x0000-0xffff]. The array (always) has 65    *! elements:    *!    *! Index Coefficient for field    *! ----- ---------------------    *! 0 body -  *! 1 anchor -  *! 2..65 Special field 0..63 +  *! 1..64 Special field 0..63    *!    *! @[blobfeeder]    *!    *! This function returns a Pike string containing the word hits    *! for a certain word_id. Call repeatedly until it returns 0.    */   {    double proximity_coefficients[8]; -  double field_coefficients[66]; +  double field_coefficients[65];    int numblobs, i;    Blob **blobs;       struct svalue *cb;    struct object *res;    struct array *_words, *_field;       /* 1: Get all arguments. */    get_all_args( "do_query_phrase", args, "%a%a%*",    &_words, &_field, &cb);    -  if( _field->size != 66 ) -  Pike_error("Illegal size of field_coefficients array (expected 66)\n" ); +  if( _field->size != 65 ) +  Pike_error("Illegal size of field_coefficients array (expected 65)\n" );       numblobs = _words->size;    if( !numblobs )    {    struct object *o = wf_resultset_new( );    pop_n_elems( args );    wf_resultset_push( o );    return;    }       blobs = malloc( sizeof(Blob *) * numblobs );       for( i = 0; i<numblobs; i++ )    blobs[i] = wf_blob_new( cb, _words->item[i].u.string );    -  for( i = 0; i<66; i++ ) +  for( i = 0; i<65; i++ )    field_coefficients[i] = (double)_field->item[i].u.integer;       res = low_do_query_phrase(blobs,numblobs, field_coefficients );    pop_n_elems( args );    wf_resultset_push( res );   }      static void f_do_query_and( INT32 args )   /*! @decl ResultSet do_query_and( array(string) words, @    *! array(int) field_coefficients, @    *! array(int) proximity_coefficients, @    *! function(int:string) blobfeeder)    *! @[words]    *!    *! Arrays of word ids. Note that the order is significant    *! for the ranking.    *!    *! @[field_coefficients]    *!    *! An array of ranking coefficients for the different fields. -  *! In the range of [0x0000-0xffff]. The array (always) has 66 +  *! In the range of [0x0000-0xffff]. The array (always) has 65    *! elements:    *!    *! Index Coefficient for field    *! ----- ---------------------    *! 0 body -  *! 1 anchor -  *! 2..65 Special field 0..63 +  *! 1..64 Special field 0..63    *!    *! @[proximity_coefficients]    *!    *! An array of ranking coefficients for the different    *! proximity categories. Always has 8 elements, in the range    *! of [0x0000-0xffff].    *!    *! Index Meaning    *! ----- -------    *! 0 spread: 0 (Perfect hit)
pike.git/src/modules/_WhiteFish/whitefish.c:482:    *!    *! The 'spread' value should be defined somehow.    *!    *! @[blobfeeder]    *!    *! This function returns a Pike string containing the word hits    *! for a certain word_id. Call repeatedly until it returns 0.    */   {    double proximity_coefficients[8]; -  double field_coefficients[66]; +  double field_coefficients[65];    int numblobs, i, cutoff;    Blob **blobs;       struct svalue *cb;    struct object *res;    struct array *_words, *_field, *_prox;       /* 1: Get all arguments. */    get_all_args( "do_query_and", args, "%a%a%a%d%*",    &_words, &_field, &_prox, &cutoff, &cb);    -  if( _field->size != 66 ) -  Pike_error("Illegal size of field_coefficients array (expected 66)\n" ); +  if( _field->size != 65 ) +  Pike_error("Illegal size of field_coefficients array (expected 65)\n" );    if( _prox->size != 8 )    Pike_error("Illegal size of proximity_coefficients array (expected 8)\n" );       numblobs = _words->size;    if( !numblobs )    {    struct object *o = wf_resultset_new( );    pop_n_elems( args );    wf_resultset_push( o );    return;    }       blobs = malloc( sizeof(Blob *) * numblobs );       for( i = 0; i<numblobs; i++ )    blobs[i] = wf_blob_new( cb, _words->item[i].u.string );       for( i = 0; i<8; i++ )    proximity_coefficients[i] = (double)_prox->item[i].u.integer;    -  for( i = 0; i<66; i++ ) +  for( i = 0; i<65; i++ )    field_coefficients[i] = (double)_field->item[i].u.integer;       res = low_do_query_and(blobs,numblobs,    field_coefficients,    proximity_coefficients,    cutoff );    pop_n_elems( args );    wf_resultset_push( res );   }   
pike.git/src/modules/_WhiteFish/whitefish.c:540:    *! array(int) proximity_coefficients, @    *! function(int:string) blobfeeder)    *! @[words]    *!    *! Arrays of word ids. Note that the order is significant    *! for the ranking.    *!    *! @[field_coefficients]    *!    *! An array of ranking coefficients for the different fields. -  *! In the range of [0x0000-0xffff]. The array (always) has 66 +  *! In the range of [0x0000-0xffff]. The array (always) has 65    *! elements:    *!    *! Index Coefficient for field    *! ----- ---------------------    *! 0 body -  *! 1 anchor -  *! 2..65 Special field 0..63 +  *! 1..64 Special field 0..63    *!    *! @[proximity_coefficients]    *!    *! An array of ranking coefficients for the different    *! proximity categories. Always has 8 elements, in the range    *! of [0x0000-0xffff].    *!    *! Index Meaning    *! ----- -------    *! 0 spread: 0 (Perfect hit)
pike.git/src/modules/_WhiteFish/whitefish.c:575:    *!    *! The 'spread' value should be defined somehow.    *!    *! @[blobfeeder]    *!    *! This function returns a Pike string containing the word hits    *! for a certain word_id. Call repeatedly until it returns 0.    */   {    double proximity_coefficients[8]; -  double field_coefficients[66]; +  double field_coefficients[65];    int numblobs, i, cutoff;    Blob **blobs;       struct svalue *cb;    struct object *res;    struct array *_words, *_field, *_prox;       /* 1: Get all arguments. */    get_all_args( "do_query_or", args, "%a%a%a%d%*",    &_words, &_field, &_prox, &cutoff, &cb);    -  if( _field->size != 66 ) -  Pike_error("Illegal size of field_coefficients array (expected 66)\n" ); +  if( _field->size != 65 ) +  Pike_error("Illegal size of field_coefficients array (expected 65)\n" );    if( _prox->size != 8 )    Pike_error("Illegal size of proximity_coefficients array (expected 8)\n" );       numblobs = _words->size;    if( !numblobs )    {    struct object *o = wf_resultset_new( );    pop_n_elems( args );    wf_resultset_push( o );    return;    }       blobs = malloc( sizeof(Blob *) * numblobs );       for( i = 0; i<numblobs; i++ )    blobs[i] = wf_blob_new( cb, _words->item[i].u.string );       for( i = 0; i<8; i++ )    proximity_coefficients[i] = (double)_prox->item[i].u.integer;    -  for( i = 0; i<66; i++ ) +  for( i = 0; i<65; i++ )    field_coefficients[i] = (double)_field->item[i].u.integer;       res = low_do_query_or(blobs,numblobs,    field_coefficients,    proximity_coefficients,    cutoff );    pop_n_elems( args );    wf_resultset_push( res );   }