pike.git/
src/
builtin_functions.c
Branch:
Tag:
Non-build tags
All tags
No tags
1998-05-19
1998-05-19 17:25:11 by Henrik Grubbström (Grubba) <grubba@grubba.org>
f8738399a2b9241843682109cec69603eb0719f5 (
276
lines) (+
263
/-
13
)
[
Show
|
Annotate
]
Branch:
7.9
Added diff_dyn_longest_sequence().
Not very fast though...
Rev: src/builtin_functions.c:1.107
4:
||| See the files COPYING and DISCLAIMER for more information. \*/ #include "global.h"
-
RCSID("$Id: builtin_functions.c,v 1.
106
1998/05/
13
07
:
41
:
16
hubbe
Exp $");
+
RCSID("$Id: builtin_functions.c,v 1.
107
1998/05/
19
17
:
25
:
11
grubba
Exp $");
#include "interpret.h" #include "svalue.h" #include "pike_macros.h"
1757:
/**** diff ************************************************************/
-
static struct array* diff_compare_table(struct array *a,struct array *b)
+
static struct array* diff_compare_table(struct array *a,struct array *b
,int *u
)
{ struct array *res; struct mapping *map; struct svalue *pval; int i;
-
+
if (u) {
+
*u = 0; /* Unique rows in array b */
+
}
+
map=allocate_mapping(256); push_mapping(map); /* in case of out of memory */
1780:
val.u.array->item[0].u.integer=i; mapping_insert(map,b->item+i,&val); free_svalue(&val);
+
if (u) {
+
(*u)++;
}
-
+
}
else { pval->u.array=resize_array(pval->u.array,pval->u.array->size+1);
1825:
struct diff_magic_link dml[1]; };
+
struct diff_magic_link_head
+
{
+
unsigned int depth;
+
struct diff_magic_link *link;
+
};
+
#define DMLPOOLSIZE 16384 static int dmls=0;
1957:
* cmptbl == diff_compare_table(a, b) * blen == sizeof(b) >= max(@(cmptbl*({}))) */
-
static struct array
*
diff_longest_sequence(struct array *cmptbl, int blen)
+
static struct array
*diff_longest_sequence(struct array *cmptbl, int blen)
{ int i,j,top=0,lsize=0; struct array *a;
2068:
dml->refs = 1; if (pos)
-
add_ref
(dml->prev = stack[pos-1]);
+
(dml->prev = stack[pos-1])
->refs++
;
else dml->prev = NULL;
2101:
dml->refs = 1; if (pos)
-
add_ref
(dml->prev = stack[pos-1]);
+
(dml->prev = stack[pos-1])
->refs++
;
else dml->prev = NULL;
2145:
return a; }
+
/*
+
* The dynamic programming Longest Common Sequence algorithm.
+
*
+
* This algorithm is O(Na * Nb), where:
+
*
+
* Na == sizeof(a)
+
* Nb == sizeof(b)
+
*
+
* This makes it faster than the G-M algorithm on binary data,
+
* but slower on ascii data.
+
*/
+
static struct array *diff_dyn_longest_sequence(struct array *a,
+
struct array *b)
+
{
+
struct array *res = NULL;
+
struct diff_magic_link_head *table = NULL;
+
struct diff_magic_link_pool *dml_pool = NULL;
+
struct diff_magic_link *dml;
+
unsigned int sa = (unsigned int)a->size;
+
unsigned int sb = (unsigned int)b->size;
+
unsigned int ia;
+
unsigned int ib;
+
unsigned int off1 = 0;
+
unsigned int off2;
+
unsigned int tmp;
+
+
if (sa <= sb) {
+
off2 = sa+1;
+
table = calloc(sizeof(struct diff_magic_link_head)*2, off2);
+
if (!table) {
+
error("diff_dyn_longest_sequence(): Out of memory");
+
}
+
+
/* FIXME: Assumes NULL is represented with all zeroes */
+
/* NOTE: Scan strings backwards to get the same result as the G-M
+
* algorithm.
+
*/
+
for (ib = sb; ib--;) {
+
tmp = off1;
+
off1 = off2;
+
off2 = tmp;
+
+
for (ia = sa; ia--;) {
+
int res = is_eq(b->item + ib, a->item + ia);
+
if (table[off1 + ia].link) {
+
if (!--(table[off1 + ia].link->refs)) {
+
dml_delete(dml_pool, table[off1 + ia].link);
+
}
+
}
+
if (res) {
+
/* Equal */
+
+
table[off1 + ia].depth = table[off2 + ia + 1].depth + 1;
+
dml = (table[off1 + ia].link = dml_new(&dml_pool));
+
if (!dml) {
+
dml_free_pools(dml_pool);
+
free(table);
+
error("diff_dyn_longest_sequence(): Out of memory");
+
}
+
dml->refs = 1;
+
dml->prev = table[off2 + ia + 1].link;
+
if (dml->prev) {
+
dml->prev->refs++;
+
}
+
dml->x = ib;
+
} else {
+
/* Differ */
+
/* FIXME: Should it be > or >= here to get the same result
+
* as with the G-M algorithm?
+
*/
+
if (table[off2 + ia].depth > table[off1 + ia + 1].depth) {
+
table[off1 + ia].depth = table[off2 + ia].depth;
+
dml = (table[off1 + ia].link = table[off2 + ia].link);
+
} else {
+
table[off1 + ia].depth = table[off1 + ia + 1].depth;
+
dml = (table[off1 + ia].link = table[off1 + ia + 1].link);
+
}
+
if (dml) {
+
dml->refs++;
+
}
+
}
+
}
+
}
+
} else {
+
/* Do the mirror version */
+
off2 = sb+1;
+
table = calloc(sizeof(struct diff_magic_link_head)*2, off2);
+
if (!table) {
+
error("diff_dyn_longest_sequence(): Out of memory");
+
}
+
+
/* FIXME: Assumes NULL is represented with all zeroes */
+
/* NOTE: Scan strings backwards to get the same result as the G-M
+
* algorithm.
+
*/
+
for (ia = sa; ia--;) {
+
tmp = off1;
+
off1 = off2;
+
off2 = tmp;
+
+
#ifdef DIFF_DEBUG
+
fprintf(stderr, " ia:%d\n", ia);
+
#endif /* DIFF_DEBUG */
+
+
for (ib = sb; ib--;) {
+
int res = is_eq(b->item + ib, a->item + ia);
+
+
#ifdef DIFF_DEBUG
+
fprintf(stderr, " ib:%d ", ib);
+
#endif /* DIFF_DEBUG */
+
+
if (table[off1 + ib].link) {
+
if (!--(table[off1 + ib].link->refs)) {
+
dml_delete(dml_pool, table[off1 + ib].link);
+
}
+
}
+
if (res) {
+
/* Equal */
+
#ifdef DIFF_DEBUG
+
fprintf(stderr, "Equal\n");
+
#endif /* DIFF_DEBUG */
+
+
table[off1 + ib].depth = table[off2 + ib + 1].depth + 1;
+
dml = (table[off1 + ib].link = dml_new(&dml_pool));
+
if (!dml) {
+
dml_free_pools(dml_pool);
+
free(table);
+
error("diff_dyn_longest_sequence(): Out of memory");
+
}
+
dml->refs = 1;
+
dml->prev = table[off2 + ib + 1].link;
+
if (dml->prev) {
+
dml->prev->refs++;
+
}
+
dml->x = ib;
+
} else {
+
/* Differ */
+
#ifdef DIFF_DEBUG
+
fprintf(stderr, "Differ\n");
+
#endif /* DIFF_DEBUG */
+
/* FIXME: Should it be > or >= here to get the same result
+
* as with the G-M algorithm?
+
*/
+
if (table[off2 + ib].depth > table[off1 + ib + 1].depth) {
+
table[off1 + ib].depth = table[off2 + ib].depth;
+
dml = (table[off1 + ib].link = table[off2 + ib].link);
+
} else {
+
table[off1 + ib].depth = table[off1 + ib + 1].depth;
+
dml = (table[off1 + ib].link = table[off1 + ib + 1].link);
+
}
+
if (dml) {
+
dml->refs++;
+
}
+
}
+
}
+
}
+
}
+
+
/* Convert table into res */
+
sa = table[off1].depth;
+
dml = table[off1].link;
+
free(table);
+
#ifdef DIFF_DEBUG
+
fprintf(stderr, "Result array size:%d\n", sa);
+
#endif /* DIFF_DEBUG */
+
+
res = allocate_array(sa);
+
if (!res) {
+
if (dml_pool) {
+
dml_free_pools(dml_pool);
+
}
+
error("diff_dyn_longest_sequence(): Out of memory");
+
}
+
+
ia = 0;
+
while(dml) {
+
#ifdef DEBUG
+
if (ia >= sa) {
+
fatal("Consistency error in diff_dyn_longest_sequence()\n");
+
}
+
#endif /* DEBUG */
+
#ifdef DIFF_DEBUG
+
fprintf(stderr, " %02d: %d\n", ia, dml->x);
+
#endif /* DIFF_DEBUG */
+
res->item[ia].type = T_INT;
+
res->item[ia].subtype = 0;
+
res->item[ia].u.integer = dml->x;
+
dml = dml->prev;
+
ia++;
+
}
+
#ifdef DEBUG
+
if (ia != sa) {
+
fatal("Consistency error in diff_dyn_longest_sequence()\n");
+
}
+
#endif /* DEBUG */
+
+
dml_free_pools(dml_pool);
+
return(res);
+
}
+
static struct array* diff_build(struct array *a, struct array *b, struct array *seq)
2219:
struct array *seq; struct array *cmptbl; struct array *diff;
+
int uniq;
if (args<2) PIKE_ERROR("diff", "Too few arguments.\n", sp, args);
2227:
sp[1-args].type!=T_ARRAY) PIKE_ERROR("diff", "Bad arguments.\n", sp, args);
-
cmptbl=diff_compare_table(sp[-args].u.array,sp[1-args].u.array);
+
cmptbl
=
diff_compare_table(sp[-args].u.array,
sp[1-args].u.array
, &uniq
);
+
+
if (uniq * 100 > sp[1-args].u.array->size) {
+
#ifdef DIFF_DEBUG
+
fprintf(stderr, "diff: Using G-M algorithm, u:%d, s:%d\n",
+
uniq, sp[1-args].u.array->size);
+
#endif /* DIFF_DEBUG */
push_array(cmptbl); seq=diff_longest_sequence(cmptbl, sp[1-1-args].u.array->size); push_array(seq); diff=diff_build(sp[-2-args].u.array,sp[1-2-args].u.array,seq);
-
+
} else {
+
#ifdef DIFF_DEBUG
+
fprintf(stderr, "diff: Using dyn algorithm, u:%d, s:%d\n",
+
uniq, sp[1-args].u.array->size);
+
#endif /* DIFF_DEBUG */
+
free_array(cmptbl);
+
seq = diff_dyn_longest_sequence(sp[-args].u.array, sp[1-args].u.array);
+
push_array(seq);
-
+
diff = diff_build(sp[-1-args].u.array, sp[1-1-args].u.array, seq);
+
}
+
pop_n_elems(2+args); push_array(diff); }
2249:
sp[1-args].type!=T_ARRAY) PIKE_ERROR("diff_compare_table", "Bad arguments.\n", sp, args);
-
cmptbl=diff_compare_table(sp[-args].u.array,sp[1-args].u.array);
+
cmptbl=diff_compare_table(sp[-args].u.array,sp[1-args].u.array
,NULL
);
pop_n_elems(args); push_array(cmptbl);
2259:
{ struct array *seq; struct array *cmptbl;
-
struct array *diff;
+
if (args<2) PIKE_ERROR("diff_longest_sequence", "Too few arguments.\n", sp, args);
2268:
sp[1-args].type!=T_ARRAY) PIKE_ERROR("diff_longest_sequence", "Bad arguments.\n", sp, args);
-
cmptbl=diff_compare_table(sp[-args].u.array,sp[1-args].u.array);
+
cmptbl=diff_compare_table(sp[-args].u.array,sp[1-args].u.array
, NULL
);
push_array(cmptbl); /* Note that the stack is one element off here. */ seq=diff_longest_sequence(cmptbl, sp[1-1-args].u.array->size);
2276:
push_array(seq); }
+
void f_diff_dyn_longest_sequence(INT32 args)
+
{
+
struct array *seq;
+
+
if (args<2)
+
PIKE_ERROR("diff_dyn_longest_sequence", "Too few arguments.\n",
+
sp, args);
+
+
if (sp[-args].type!=T_ARRAY ||
+
sp[1-args].type!=T_ARRAY)
+
PIKE_ERROR("diff_dyn_longest_sequence", "Bad arguments.\n", sp, args);
+
+
seq = diff_dyn_longest_sequence(sp[-args].u.array, sp[1-args].u.array);
+
+
pop_n_elems(args);
+
push_array(seq);
+
}
+
/**********************************************************************/ static struct callback_list memory_usage_callback;
2799:
add_function("diff",f_diff,"function(array,array:array(array))",OPT_TRY_OPTIMIZE); add_function("diff_longest_sequence",f_diff_longest_sequence,"function(array,array:array(int))",OPT_TRY_OPTIMIZE);
+
add_function("diff_dyn_longest_sequence",f_diff_dyn_longest_sequence,"function(array,array:array(int))",OPT_TRY_OPTIMIZE);
add_function("diff_compare_table",f_diff_compare_table,"function(array,array:array(array))",OPT_TRY_OPTIMIZE); add_function("longest_ordered_sequence",f_longest_ordered_sequence,"function(array:array(int))",0); add_function("sort",f_sort,"function(array(mixed),array(mixed)...:array(mixed))",OPT_SIDE_EFFECT);