a23b4c | 2001-05-25 | Per Hedbor | | #include <math.h>
|
750251 | 2001-05-22 | Per Hedbor | | #include "global.h"
#include "stralloc.h"
#include "global.h"
#include "pike_macros.h"
#include "interpret.h"
#include "program.h"
#include "object.h"
|
4ad07e | 2001-05-22 | Per Hedbor | | #include "array.h"
#include "module_support.h"
|
fcd060 | 2003-02-18 | Martin Stjernholm | | #include "module.h"
|
750251 | 2001-05-22 | Per Hedbor | |
|
085384 | 2001-05-22 | Per Hedbor | | #include "config.h"
|
750251 | 2001-05-22 | Per Hedbor | | #include "whitefish.h"
#include "resultset.h"
|
4926f7 | 2001-05-22 | Per Hedbor | | #include "blob.h"
|
54025d | 2001-05-26 | Per Hedbor | | #include "blobs.h"
|
915723 | 2001-05-28 | Per Hedbor | | #include "linkfarm.h"
|
750251 | 2001-05-22 | Per Hedbor | |
|
fcd060 | 2003-02-18 | Martin Stjernholm | |
#ifndef PIKE_MODULE_INIT
|
750251 | 2001-05-22 | Per Hedbor | |
#include "module_magic.h"
|
fcd060 | 2003-02-18 | Martin Stjernholm | | #define PIKE_MODULE_INIT void pike_module_init(void)
#define PIKE_MODULE_EXIT void pike_module_exit(void)
#endif
|
750251 | 2001-05-22 | Per Hedbor | |
|
4ad07e | 2001-05-22 | Per Hedbor | | struct tofree
{
Blob **blobs;
|
a23b4c | 2001-05-25 | Per Hedbor | | Blob **tmp;
|
4ad07e | 2001-05-22 | Per Hedbor | | int nblobs;
struct object *res;
};
|
6aa8ef | 2001-05-23 | Per Hedbor | | static void free_stuff( void *_t )
|
4ad07e | 2001-05-22 | Per Hedbor | | {
struct tofree *t= (struct tofree *)_t;
int i;
if( t->res ) free_object( t->res );
for( i = 0; i<t->nblobs; i++ )
wf_blob_free( t->blobs[i] );
|
107c24 | 2002-01-02 | Johan Schön | | free(t->blobs);
|
a23b4c | 2001-05-25 | Per Hedbor | | free( t->tmp );
|
4ad07e | 2001-05-22 | Per Hedbor | | free( t );
}
|
a23b4c | 2001-05-25 | Per Hedbor | | #define OFFSET(X) \
|
af27a6 | 2001-07-31 | Johan Schön | | (X.type == HIT_BODY?X.u.body.pos:X.u.field.pos)
#define DOFF(X) _distance_f(X)
#define MOFF(X) (X.type==HIT_BODY?0:X.u.field.type+1)
|
a23b4c | 2001-05-25 | Per Hedbor | |
|
7a08b4 | 2001-05-25 | Per Hedbor | | static int _distance_f( int distance )
{
if( distance < 2 ) return 0;
if( distance < 6 ) return 1;
if( distance < 11 ) return 2;
if( distance < 22 ) return 3;
if( distance < 42 ) return 4;
if( distance < 82 ) return 5;
if( distance < 161 ) return 6;
return 7;
}
|
a23b4c | 2001-05-25 | Per Hedbor | |
static void handle_hit( Blob **blobs,
int nblobs,
struct object *res,
|
5cdc2c | 2001-05-25 | Per Hedbor | | int docid,
|
af27a6 | 2001-07-31 | Johan Schön | | double *field_c[65],
|
a63397 | 2001-05-25 | Per Hedbor | | double *prox_c[8],
|
73e942 | 2001-05-28 | Per Hedbor | | double mc, double mp,
int cutoff )
|
a23b4c | 2001-05-25 | Per Hedbor | | {
int i, j, k, end = 0;
Hit *hits = malloc( nblobs * sizeof(Hit) );
unsigned char *nhits = malloc( nblobs );
unsigned char *pos = malloc( nblobs );
|
af27a6 | 2001-07-31 | Johan Schön | | int matrix[65][8];
|
a23b4c | 2001-05-25 | Per Hedbor | |
|
a63397 | 2001-05-25 | Per Hedbor | | MEMSET(matrix, 0, sizeof(matrix) );
|
a23b4c | 2001-05-25 | Per Hedbor | | MEMSET(hits, 0, nblobs * sizeof(Hit) );
MEMSET(pos, 0, nblobs );
for( i = 0; i<nblobs; i++ )
nhits[i] = wf_blob_nhits( blobs[i] );
for( i = 0; i<nblobs; i++ )
{
MEMSET( pos, 0, nblobs );
for( j = 0; j<nhits[i]; j++ )
{
hits[i] = wf_blob_hit( blobs[i], j );
|
a1a488 | 2001-05-31 | Johan Schön | | matrix[MOFF(hits[i])][3]++;
|
ccfc83 | 2001-05-25 | Per Hedbor | |
|
a23b4c | 2001-05-25 | Per Hedbor | |
for( k = 0; k<nblobs; k++ )
|
a63397 | 2001-05-25 | Per Hedbor | | if( k != i && pos[ k ] < nhits[ k ] )
|
a23b4c | 2001-05-25 | Per Hedbor | | {
|
cce09d | 2001-05-25 | Per Hedbor | | while( (hits[k].raw < hits[i].raw) && (pos[ k ] < nhits[ k ]))
|
a23b4c | 2001-05-25 | Per Hedbor | | hits[k] = wf_blob_hit( blobs[k], pos[k]++ );
|
07a8a0 | 2001-05-25 | Per Hedbor | | if( (pos[ k ] < nhits[ k ]) && hits[k].type == hits[i].type )
|
73e942 | 2001-05-28 | Per Hedbor | | matrix[MOFF(hits[i])][DOFF(OFFSET(hits[k])-OFFSET(hits[i]))]+=4;
|
a23b4c | 2001-05-25 | Per Hedbor | | }
}
}
|
ccfc83 | 2001-05-25 | Per Hedbor | | free( pos );
free( nhits );
free( hits );
|
a23b4c | 2001-05-25 | Per Hedbor | |
|
5cdc2c | 2001-05-25 | Per Hedbor | |
{
double accum = 0.0, fc, pc;
int accum_i;
|
af27a6 | 2001-07-31 | Johan Schön | | for( i = 0; i<65; i++ )
|
a63397 | 2001-05-25 | Per Hedbor | | if( (fc = (*field_c)[i]) != 0.0 )
|
5cdc2c | 2001-05-25 | Per Hedbor | | for( j = 0; j<8; j++ )
|
a63397 | 2001-05-25 | Per Hedbor | | if( (pc = (*prox_c)[j]) != 0.0 )
|
73e942 | 2001-05-28 | Per Hedbor | | accum += (MINIMUM(matrix[i][j],cutoff)*fc*pc) / (mc*mp);
|
5cdc2c | 2001-05-25 | Per Hedbor | |
if( accum > 32000.0 )
accum = 32000.0;
|
af27a6 | 2001-07-31 | Johan Schön | | accum_i = (int)(accum *100 ) + 1;
if( accum > 0.0 )
|
5cdc2c | 2001-05-25 | Per Hedbor | | wf_resultset_add( res, docid, accum_i );
}
|
a23b4c | 2001-05-25 | Per Hedbor | | }
|
fdc466 | 2001-05-31 | Johan Schön | | static struct object *low_do_query_or( Blob **blobs,
|
4ad07e | 2001-05-22 | Per Hedbor | | int nblobs,
|
af27a6 | 2001-07-31 | Johan Schön | | double field_c[65],
|
73e942 | 2001-05-28 | Per Hedbor | | double prox_c[8],
int cutoff)
|
4ad07e | 2001-05-22 | Per Hedbor | | {
struct object *res = wf_resultset_new();
struct tofree *__f = malloc( sizeof( struct tofree ) );
|
a63397 | 2001-05-25 | Per Hedbor | | double max_c=0.0, max_p=0.0;
|
4ad07e | 2001-05-22 | Per Hedbor | | ONERROR e;
|
ccfc83 | 2001-05-25 | Per Hedbor | | int i, j;
|
a23b4c | 2001-05-25 | Per Hedbor | | Blob **tmp;
tmp = malloc( nblobs * sizeof( Blob *) );
|
4ad07e | 2001-05-22 | Per Hedbor | |
__f->res = res;
__f->blobs = blobs;
__f->nblobs = nblobs;
|
a23b4c | 2001-05-25 | Per Hedbor | | __f->tmp = tmp;
|
4ad07e | 2001-05-22 | Per Hedbor | | SET_ONERROR( e, free_stuff, __f );
|
369306 | 2001-05-22 | Per Hedbor | |
|
af27a6 | 2001-07-31 | Johan Schön | | for( i = 0; i<65; i++ )
|
a63397 | 2001-05-25 | Per Hedbor | | if( field_c[i] > max_c )
max_c = field_c[i];
for( i = 0; i<8; i++ )
if( prox_c[i] > max_p )
max_p = prox_c[i];
|
a23b4c | 2001-05-25 | Per Hedbor | |
|
a63397 | 2001-05-25 | Per Hedbor | | if( max_p != 0.0 && max_c != 0.0 )
|
a23b4c | 2001-05-25 | Per Hedbor | | {
|
a63397 | 2001-05-25 | Per Hedbor | |
for( i = 0; i<nblobs; i++ )
wf_blob_next( blobs[i] );
|
ccfc83 | 2001-05-25 | Per Hedbor | | while( 1 )
|
a63397 | 2001-05-25 | Per Hedbor | | {
|
740b06 | 2001-05-28 | Johan Schön | | unsigned int min = 0x7fffffff;
|
a23b4c | 2001-05-25 | Per Hedbor | |
|
a63397 | 2001-05-25 | Per Hedbor | | for( i = 0; i<nblobs; i++ )
if( !blobs[i]->eof && ((unsigned int)blobs[i]->docid) < min )
min = blobs[i]->docid;
|
369306 | 2001-05-22 | Per Hedbor | |
|
740b06 | 2001-05-28 | Johan Schön | | if( min == 0x7fffffff )
|
a63397 | 2001-05-25 | Per Hedbor | | break;
for( j = 0, i = 0; i < nblobs; i++ )
if( blobs[i]->docid == min && !blobs[i]->eof )
tmp[j++] = blobs[i];
|
ccfc83 | 2001-05-25 | Per Hedbor | |
|
73e942 | 2001-05-28 | Per Hedbor | | handle_hit( tmp, j, res, min, &field_c, &prox_c, max_c, max_p, cutoff );
|
a23b4c | 2001-05-25 | Per Hedbor | |
|
a63397 | 2001-05-25 | Per Hedbor | | for( i = 0; i<j; i++ )
wf_blob_next( tmp[i] );
}
|
a23b4c | 2001-05-25 | Per Hedbor | | }
|
369306 | 2001-05-22 | Per Hedbor | |
|
4ad07e | 2001-05-22 | Per Hedbor | | UNSET_ONERROR( e );
__f->res = 0;
free_stuff( __f );
return res;
}
|
ccfc83 | 2001-05-25 | Per Hedbor | | static void handle_phrase_hit( Blob **blobs,
int nblobs,
struct object *res,
int docid,
|
af27a6 | 2001-07-31 | Johan Schön | | double *field_c[65],
|
ccfc83 | 2001-05-25 | Per Hedbor | | double mc )
{
int i, j, k;
|
a5039e | 2001-05-30 | Per Hedbor | | unsigned char *nhits = malloc( nblobs*2 );
unsigned char *first = nhits+nblobs;
|
af27a6 | 2001-07-31 | Johan Schön | | int matrix[65];
|
ccfc83 | 2001-05-25 | Per Hedbor | | double accum = 0.0;
|
c24772 | 2013-01-29 | Jonas Walldén | | int base_hit = -1;
|
ccfc83 | 2001-05-25 | Per Hedbor | |
MEMSET(matrix, 0, sizeof(matrix) );
|
a5039e | 2001-05-30 | Per Hedbor | |
|
ccfc83 | 2001-05-25 | Per Hedbor | | for( i = 0; i<nblobs; i++ )
|
a5039e | 2001-05-30 | Per Hedbor | | {
|
ccfc83 | 2001-05-25 | Per Hedbor | | nhits[i] = wf_blob_nhits( blobs[i] );
|
a5039e | 2001-05-30 | Per Hedbor | | first[i] = 0;
}
|
ccfc83 | 2001-05-25 | Per Hedbor | |
|
a5039e | 2001-05-30 | Per Hedbor | |
for( i = 0; i<nhits[0]; i++)
|
ccfc83 | 2001-05-25 | Per Hedbor | | {
double add;
|
a5039e | 2001-05-30 | Per Hedbor | | int hit = 1;
Hit m = wf_blob_hit( blobs[0], i );
int h = m.raw;
if( (add = (*field_c)[ MOFF(m) ]) == 0.0 )
continue;
|
a34d38 | 2013-01-29 | Jonas Walldén | | if (h > base_hit)
base_hit = h;
|
a5039e | 2001-05-30 | Per Hedbor | |
for( j = 1; j<nblobs; j++)
for( k = first[j]; k<nhits[j]; k++ )
|
ccfc83 | 2001-05-25 | Per Hedbor | | {
|
a5039e | 2001-05-30 | Per Hedbor | | int h2 = wf_blob_hit_raw( blobs[j], k );
if( h2 > h )
|
ccfc83 | 2001-05-25 | Per Hedbor | | {
|
a5039e | 2001-05-30 | Per Hedbor | | first[j]=k;
if( h2-j == h )
hit++;
|
a34d38 | 2013-01-29 | Jonas Walldén | | if (h2 - j > base_hit)
base_hit = h2 - j;
|
a5039e | 2001-05-30 | Per Hedbor | | break;
|
ccfc83 | 2001-05-25 | Per Hedbor | | }
}
|
a5039e | 2001-05-30 | Per Hedbor | |
if( hit == nblobs )
accum += add/mc;
|
ccfc83 | 2001-05-25 | Per Hedbor | | }
|
a34d38 | 2013-01-29 | Jonas Walldén | |
|
c24772 | 2013-01-29 | Jonas Walldén | | if (base_hit >= 0) {
|
a34d38 | 2013-01-29 | Jonas Walldén | | int did_next = 0;
for (i = 0; i < nblobs; i++)
if (nhits[i]) {
int max_hit = wf_blob_hit_raw(blobs[i], nhits[i] - 1);
if (max_hit < base_hit + i) {
wf_blob_next(blobs[i]);
did_next = 1;
}
}
if (!did_next)
|
c24772 | 2013-01-29 | Jonas Walldén | | base_hit = -1;
|
a34d38 | 2013-01-29 | Jonas Walldén | | }
|
c24772 | 2013-01-29 | Jonas Walldén | | if (base_hit < 0) {
|
a34d38 | 2013-01-29 | Jonas Walldén | | for (i = 0; i < nblobs; i++)
wf_blob_next(blobs[i]);
}
|
ccfc83 | 2001-05-25 | Per Hedbor | | free( nhits );
|
375791 | 2001-05-31 | Johan Schön | | if( accum > 0.0 )
|
ccfc83 | 2001-05-25 | Per Hedbor | | wf_resultset_add( res, docid, (int)(accum*100) );
}
static struct object *low_do_query_phrase( Blob **blobs, int nblobs,
|
af27a6 | 2001-07-31 | Johan Schön | | double field_c[65])
|
ccfc83 | 2001-05-25 | Per Hedbor | | {
struct object *res = wf_resultset_new();
struct tofree *__f = malloc( sizeof( struct tofree ) );
double max_c=0.0;
ONERROR e;
int i, j;
__f->blobs = blobs;
__f->nblobs = nblobs;
__f->res = res;
__f->tmp = 0;
SET_ONERROR( e, free_stuff, __f );
|
af27a6 | 2001-07-31 | Johan Schön | | for( i = 0; i<65; i++ )
|
ccfc83 | 2001-05-25 | Per Hedbor | | if( field_c[i] > max_c )
max_c = field_c[i];
if( max_c != 0.0 )
{
for( i = 0; i<nblobs; i++ )
wf_blob_next( blobs[i] );
while( 1 )
{
|
740b06 | 2001-05-28 | Johan Schön | | unsigned int min = 0x7fffffff;
|
a34d38 | 2013-01-29 | Jonas Walldén | | int test_phrase = 1;
|
ccfc83 | 2001-05-25 | Per Hedbor | |
for( i = 0; i<nblobs; i++ )
if( blobs[i]->eof )
goto end;
else if( ((unsigned int)blobs[i]->docid) < min )
min = blobs[i]->docid;
|
740b06 | 2001-05-28 | Johan Schön | | if( min == 0x7fffffff )
|
ccfc83 | 2001-05-25 | Per Hedbor | | goto end;
for( j = 0, i = 0; i < nblobs; i++ )
|
a34d38 | 2013-01-29 | Jonas Walldén | | if( blobs[i]->docid != min ) {
test_phrase = 0;
break;
}
|
ccfc83 | 2001-05-25 | Per Hedbor | |
|
a34d38 | 2013-01-29 | Jonas Walldén | | if (test_phrase) {
handle_phrase_hit( blobs, nblobs, res, min, &field_c, max_c );
} else {
for( i = 0; i<nblobs; i++ )
if( blobs[i]->docid == min )
wf_blob_next( blobs[i] );
}
|
ccfc83 | 2001-05-25 | Per Hedbor | | }
}
end:
UNSET_ONERROR( e );
__f->res = 0;
free_stuff( __f );
return res;
}
|
73e942 | 2001-05-28 | Per Hedbor | | static struct object *low_do_query_and( Blob **blobs, int nblobs,
|
af27a6 | 2001-07-31 | Johan Schön | | double field_c[65],
|
73e942 | 2001-05-28 | Per Hedbor | | double prox_c[8],
int cutoff)
{
struct object *res = wf_resultset_new();
struct tofree *__f = malloc( sizeof( struct tofree ) );
double max_c=0.0, max_p=0.0;
ONERROR e;
int i, j;
__f->blobs = blobs;
__f->nblobs = nblobs;
__f->res = res;
__f->tmp = 0;
SET_ONERROR( e, free_stuff, __f );
|
af27a6 | 2001-07-31 | Johan Schön | | for( i = 0; i<65; i++ )
|
73e942 | 2001-05-28 | Per Hedbor | | if( field_c[i] > max_c )
max_c = field_c[i];
for( i = 0; i<8; i++ )
if( prox_c[i] > max_p )
max_p = prox_c[i];
if( max_c != 0.0 )
{
for( i = 0; i<nblobs; i++ )
wf_blob_next( blobs[i] );
while( 1 )
{
|
740b06 | 2001-05-28 | Johan Schön | | unsigned int min = 0x7fffffff;
|
73e942 | 2001-05-28 | Per Hedbor | |
for( i = 0; i<nblobs; i++ )
if( blobs[i]->eof )
goto end;
else if( ((unsigned int)blobs[i]->docid) < min )
min = blobs[i]->docid;
|
740b06 | 2001-05-28 | Johan Schön | | if( min == 0x7fffffff )
|
73e942 | 2001-05-28 | Per Hedbor | | goto end;
for( j = 0, i = 0; i < nblobs; i++ )
if( blobs[i]->docid != min )
goto next;
handle_hit( blobs, nblobs, res, min, &field_c,&prox_c, max_c,max_p,
cutoff );
next:
for( i = 0; i<nblobs; i++ )
if( blobs[i]->docid == min )
wf_blob_next( blobs[i] );
}
}
end:
UNSET_ONERROR( e );
__f->res = 0;
free_stuff( __f );
return res;
}
|
40a44d | 2004-08-07 | Johan Schön | |
|
ccfc83 | 2001-05-25 | Per Hedbor | |
static void f_do_query_phrase( INT32 args )
|
2ca49e | 2001-07-05 | Per Hedbor | | |
ccfc83 | 2001-05-25 | Per Hedbor | | *! array(int) field_coefficients, @
|
f84c49 | 2013-01-29 | Jonas Walldén | | *! function(string,int,int:string) blobfeeder)
|
40a44d | 2004-08-07 | Johan Schön | | *! @param words
|
ccfc83 | 2001-05-25 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! Arrays of word ids. Note that the order is significant for the
*! ranking.
|
ccfc83 | 2001-05-25 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @param field_coefficients
|
ccfc83 | 2001-05-25 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! An array of ranking coefficients for the different fields. In the
*! range of [0x0000-0xffff]. The array (always) has 65 elements:
|
ccfc83 | 2001-05-25 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @array
*! @elem int 0
*! body
*! @elem int 1..64
*! Special field 0..63.
*! @endarray
|
ccfc83 | 2001-05-25 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @param blobfeeder
|
ccfc83 | 2001-05-25 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! This function returns a Pike string containing the word hits for a
|
f84c49 | 2013-01-29 | Jonas Walldén | | *! certain word. Call repeatedly until it returns @expr{0@}.
|
ccfc83 | 2001-05-25 | Per Hedbor | | */
{
double proximity_coefficients[8];
|
af27a6 | 2001-07-31 | Johan Schön | | double field_coefficients[65];
|
ccfc83 | 2001-05-25 | Per Hedbor | | int numblobs, i;
Blob **blobs;
struct svalue *cb;
struct object *res;
struct array *_words, *_field;
|
8dfecc | 2001-05-31 | David Norlin | | get_all_args( "do_query_phrase", args, "%a%a%*",
|
ccfc83 | 2001-05-25 | Per Hedbor | | &_words, &_field, &cb);
|
af27a6 | 2001-07-31 | Johan Schön | | if( _field->size != 65 )
Pike_error("Illegal size of field_coefficients array (expected 65)\n" );
|
ccfc83 | 2001-05-25 | Per Hedbor | |
numblobs = _words->size;
if( !numblobs )
{
struct object *o = wf_resultset_new( );
pop_n_elems( args );
|
f5ac10 | 2001-06-15 | Per Hedbor | | wf_resultset_push( o );
|
ccfc83 | 2001-05-25 | Per Hedbor | | return;
}
blobs = malloc( sizeof(Blob *) * numblobs );
for( i = 0; i<numblobs; i++ )
|
2ca49e | 2001-07-05 | Per Hedbor | | blobs[i] = wf_blob_new( cb, _words->item[i].u.string );
|
ccfc83 | 2001-05-25 | Per Hedbor | |
|
af27a6 | 2001-07-31 | Johan Schön | | for( i = 0; i<65; i++ )
|
ccfc83 | 2001-05-25 | Per Hedbor | | field_coefficients[i] = (double)_field->item[i].u.integer;
res = low_do_query_phrase(blobs,numblobs, field_coefficients );
pop_n_elems( args );
|
f5ac10 | 2001-06-15 | Per Hedbor | | wf_resultset_push( res );
|
ccfc83 | 2001-05-25 | Per Hedbor | | }
|
750251 | 2001-05-22 | Per Hedbor | |
|
73e942 | 2001-05-28 | Per Hedbor | | static void f_do_query_and( INT32 args )
|
2ca49e | 2001-07-05 | Per Hedbor | | |
73e942 | 2001-05-28 | Per Hedbor | | *! array(int) field_coefficients, @
*! array(int) proximity_coefficients, @
|
f84c49 | 2013-01-29 | Jonas Walldén | | *! function(string,int,int:string) blobfeeder)
|
40a44d | 2004-08-07 | Johan Schön | | *! @param words
|
73e942 | 2001-05-28 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! Arrays of word ids. Note that the order is significant for the
*! ranking.
*!
*! @param field_coefficients
|
73e942 | 2001-05-28 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! An array of ranking coefficients for the different fields. In the
*! range of [0x0000-0xffff]. The array (always) has 65 elements:
|
73e942 | 2001-05-28 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @array
*! @elem int 0
*! body
*! @elem int 1..64
*! Special field 0..63.
*! @endarray
|
73e942 | 2001-05-28 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @param proximity_coefficients
|
73e942 | 2001-05-28 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! An array of ranking coefficients for the different proximity
*! categories. Always has 8 elements, in the range of
*! [0x0000-0xffff].
|
73e942 | 2001-05-28 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @array
*! @elem int 0
*! spread: 0 (Perfect hit)
*! @elem int 1
*! spread: 1-5
*! @elem int 2
*! spread: 6-10
*! @elem int 3
*! spread: 11-20
*! @elem int 4
*! spread: 21-40
*! @elem int 5
*! spread: 41-80
*! @elem int 6
*! spread: 81-160
*! @elem int 7
*! spread: 161-
*! @endarray
|
73e942 | 2001-05-28 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @param blobfeeder
|
73e942 | 2001-05-28 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! This function returns a Pike string containing the word hits for a
|
f84c49 | 2013-01-29 | Jonas Walldén | | *! certain word. Call repeatedly until it returns @expr{0@}.
|
73e942 | 2001-05-28 | Per Hedbor | | */
{
double proximity_coefficients[8];
|
af27a6 | 2001-07-31 | Johan Schön | | double field_coefficients[65];
|
73e942 | 2001-05-28 | Per Hedbor | | int numblobs, i, cutoff;
Blob **blobs;
struct svalue *cb;
struct object *res;
struct array *_words, *_field, *_prox;
get_all_args( "do_query_and", args, "%a%a%a%d%*",
&_words, &_field, &_prox, &cutoff, &cb);
|
af27a6 | 2001-07-31 | Johan Schön | | if( _field->size != 65 )
Pike_error("Illegal size of field_coefficients array (expected 65)\n" );
|
73e942 | 2001-05-28 | Per Hedbor | | if( _prox->size != 8 )
Pike_error("Illegal size of proximity_coefficients array (expected 8)\n" );
numblobs = _words->size;
if( !numblobs )
{
struct object *o = wf_resultset_new( );
pop_n_elems( args );
|
f5ac10 | 2001-06-15 | Per Hedbor | | wf_resultset_push( o );
|
73e942 | 2001-05-28 | Per Hedbor | | return;
}
blobs = malloc( sizeof(Blob *) * numblobs );
for( i = 0; i<numblobs; i++ )
|
2ca49e | 2001-07-05 | Per Hedbor | | blobs[i] = wf_blob_new( cb, _words->item[i].u.string );
|
73e942 | 2001-05-28 | Per Hedbor | |
for( i = 0; i<8; i++ )
proximity_coefficients[i] = (double)_prox->item[i].u.integer;
|
af27a6 | 2001-07-31 | Johan Schön | | for( i = 0; i<65; i++ )
|
73e942 | 2001-05-28 | Per Hedbor | | field_coefficients[i] = (double)_field->item[i].u.integer;
res = low_do_query_and(blobs,numblobs,
field_coefficients,
proximity_coefficients,
cutoff );
|
107c24 | 2002-01-02 | Johan Schön | |
|
73e942 | 2001-05-28 | Per Hedbor | | pop_n_elems( args );
|
f5ac10 | 2001-06-15 | Per Hedbor | | wf_resultset_push( res );
|
73e942 | 2001-05-28 | Per Hedbor | | }
|
1b89a8 | 2001-05-29 | Johan Schön | | static void f_do_query_or( INT32 args )
|
2ca49e | 2001-07-05 | Per Hedbor | | |
1b89a8 | 2001-05-29 | Johan Schön | | *! array(int) field_coefficients, @
*! array(int) proximity_coefficients, @
|
f84c49 | 2013-01-29 | Jonas Walldén | | *! function(string,int,int:string) blobfeeder)
|
40a44d | 2004-08-07 | Johan Schön | | *! @param words
|
4926f7 | 2001-05-22 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! Arrays of word ids. Note that the order is significant for the
*! ranking.
|
4926f7 | 2001-05-22 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @param field_coefficients
|
4926f7 | 2001-05-22 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! An array of ranking coefficients for the different fields. In the
*! range of [0x0000-0xffff]. The array (always) has 65 elements:
|
4926f7 | 2001-05-22 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @array
*! @elem int 0
*! body
*! @elem int 1..64
*! Special field 0..63.
*! @endarray
|
4926f7 | 2001-05-22 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @param proximity_coefficients
|
4926f7 | 2001-05-22 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! An array of ranking coefficients for the different proximity
*! categories. Always has 8 elements, in the range of
*! [0x0000-0xffff].
|
4926f7 | 2001-05-22 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! @array
*! @elem int 0
*! spread: 0 (Perfect hit)
*! @elem int 1
*! spread: 1-5
*! @elem int 2
*! spread: 6-10
*! @elem int 3
*! spread: 11-20
*! @elem int 4
*! spread: 21-40
*! @elem int 5
*! spread: 41-80
*! @elem int 6
*! spread: 81-160
*! @elem int 7
*! spread: 161-
*! @endarray
*!
*! @param blobfeeder
|
4926f7 | 2001-05-22 | Per Hedbor | | *!
|
40a44d | 2004-08-07 | Johan Schön | | *! This function returns a Pike string containing the word hits for a
|
f84c49 | 2013-01-29 | Jonas Walldén | | *! certain word. Call repeatedly until it returns @expr{0@}.
|
4926f7 | 2001-05-22 | Per Hedbor | | */
{
|
5cdc2c | 2001-05-25 | Per Hedbor | | double proximity_coefficients[8];
|
af27a6 | 2001-07-31 | Johan Schön | | double field_coefficients[65];
|
73e942 | 2001-05-28 | Per Hedbor | | int numblobs, i, cutoff;
|
4ad07e | 2001-05-22 | Per Hedbor | | Blob **blobs;
struct svalue *cb;
|
a23b4c | 2001-05-25 | Per Hedbor | | struct object *res;
|
4ad07e | 2001-05-22 | Per Hedbor | | struct array *_words, *_field, *_prox;
|
8dfecc | 2001-05-31 | David Norlin | | get_all_args( "do_query_or", args, "%a%a%a%d%*",
|
73e942 | 2001-05-28 | Per Hedbor | | &_words, &_field, &_prox, &cutoff, &cb);
|
4ad07e | 2001-05-22 | Per Hedbor | |
|
af27a6 | 2001-07-31 | Johan Schön | | if( _field->size != 65 )
Pike_error("Illegal size of field_coefficients array (expected 65)\n" );
|
4ad07e | 2001-05-22 | Per Hedbor | | if( _prox->size != 8 )
Pike_error("Illegal size of proximity_coefficients array (expected 8)\n" );
numblobs = _words->size;
if( !numblobs )
{
struct object *o = wf_resultset_new( );
pop_n_elems( args );
|
f5ac10 | 2001-06-15 | Per Hedbor | | wf_resultset_push( o );
|
4ad07e | 2001-05-22 | Per Hedbor | | return;
}
blobs = malloc( sizeof(Blob *) * numblobs );
for( i = 0; i<numblobs; i++ )
|
2ca49e | 2001-07-05 | Per Hedbor | | blobs[i] = wf_blob_new( cb, _words->item[i].u.string );
|
4ad07e | 2001-05-22 | Per Hedbor | |
for( i = 0; i<8; i++ )
|
a63397 | 2001-05-25 | Per Hedbor | | proximity_coefficients[i] = (double)_prox->item[i].u.integer;
|
4ad07e | 2001-05-22 | Per Hedbor | |
|
af27a6 | 2001-07-31 | Johan Schön | | for( i = 0; i<65; i++ )
|
a63397 | 2001-05-25 | Per Hedbor | | field_coefficients[i] = (double)_field->item[i].u.integer;
|
4926f7 | 2001-05-22 | Per Hedbor | |
|
fdc466 | 2001-05-31 | Johan Schön | | res = low_do_query_or(blobs,numblobs,
|
2ca49e | 2001-07-05 | Per Hedbor | | field_coefficients,
proximity_coefficients,
cutoff );
|
a23b4c | 2001-05-25 | Per Hedbor | | pop_n_elems( args );
|
f5ac10 | 2001-06-15 | Per Hedbor | | wf_resultset_push( res );
|
4926f7 | 2001-05-22 | Per Hedbor | | }
|
40a44d | 2004-08-07 | Johan Schön | |
|
73e942 | 2001-05-28 | Per Hedbor | |
|
fcd060 | 2003-02-18 | Martin Stjernholm | | PIKE_MODULE_INIT
|
750251 | 2001-05-22 | Per Hedbor | | {
init_resultset_program();
|
20c597 | 2001-05-23 | Per Hedbor | | init_blob_program();
|
54025d | 2001-05-26 | Per Hedbor | | init_blobs_program();
|
915723 | 2001-05-28 | Per Hedbor | | init_linkfarm_program();
|
4926f7 | 2001-05-22 | Per Hedbor | |
|
1b89a8 | 2001-05-29 | Johan Schön | | add_function( "do_query_or", f_do_query_or,
|
2ca49e | 2001-07-05 | Per Hedbor | | "function(array(string),array(int),array(int),int"
|
f84c49 | 2013-01-29 | Jonas Walldén | | ",function(string,int,int:string):object)",
|
73e942 | 2001-05-28 | Per Hedbor | | 0 );
add_function( "do_query_and", f_do_query_and,
|
2ca49e | 2001-07-05 | Per Hedbor | | "function(array(string),array(int),array(int),int"
|
f84c49 | 2013-01-29 | Jonas Walldén | | ",function(string,int,int:string):object)",
|
4926f7 | 2001-05-22 | Per Hedbor | | 0 );
|
ccfc83 | 2001-05-25 | Per Hedbor | |
add_function( "do_query_phrase", f_do_query_phrase,
|
2ca49e | 2001-07-05 | Per Hedbor | | "function(array(string),array(int)"
|
f84c49 | 2013-01-29 | Jonas Walldén | | ",function(string,int,int:string):object)",
|
ccfc83 | 2001-05-25 | Per Hedbor | | 0 );
|
750251 | 2001-05-22 | Per Hedbor | | }
|
fcd060 | 2003-02-18 | Martin Stjernholm | | PIKE_MODULE_EXIT
|
750251 | 2001-05-22 | Per Hedbor | | {
|
085384 | 2001-05-22 | Per Hedbor | | exit_resultset_program();
|
20c597 | 2001-05-23 | Per Hedbor | | exit_blob_program();
|
54025d | 2001-05-26 | Per Hedbor | | exit_blobs_program();
|
915723 | 2001-05-28 | Per Hedbor | | exit_linkfarm_program();
|
750251 | 2001-05-22 | Per Hedbor | | }
|