e576bb2002-10-11Martin Nilsson /* || This file is part of Pike. For copyright information see COPYRIGHT. || Pike is distributed under GPL, LGPL and MPL. See the file COPYING || for more information. */
aedfb12002-10-09Martin Nilsson 
5267b71995-08-09Fredrik Hübinette (Hubbe) #include "global.h" #include "stralloc.h"
bb55f81997-03-16Fredrik Hübinette (Hubbe) #include "pike_macros.h"
2d10fb2016-12-29Arne Goedeke #include "buffer.h"
bb55f81997-03-16Fredrik Hübinette (Hubbe) #include "pike_macros.h"
9aa6fa1997-05-19Fredrik Hübinette (Hubbe) #include "pike_memory.h"
b2d3e42000-12-01Fredrik Hübinette (Hubbe) #include "pike_error.h"
9c6f7d1997-04-15Fredrik Hübinette (Hubbe) #include "gc.h"
31ea271999-10-22Fredrik Noring #include "bignum.h" #include "interpret.h"
6f3ad02001-07-02Martin Stjernholm #include "operators.h"
686ef22004-11-11Henrik Grubbström (Grubba) #include "pike_float.h"
97bfe22015-10-11Henrik Grubbström (Grubba) #include "pike_types.h"
d476592013-06-12Arne Goedeke #include "block_allocator.h"
c054542016-12-17Martin Nilsson #include "whitespace.h"
5f90062017-01-10Martin Nilsson #include "stuff.h"
5267b71995-08-09Fredrik Hübinette (Hubbe) 
f3ece81999-02-28Fredrik Hübinette (Hubbe) #include <errno.h>
2043ba1998-02-10Fredrik Hübinette (Hubbe) 
ddad7a2014-10-20Martin Nilsson #define SET_HSIZE(X) htable_mask=(htable_size=(X))-1
e85df82001-09-06Fredrik Hübinette (Hubbe) #define HMODULO(X) ((X) & (htable_mask))
dbce712011-05-15Per Hedbor static unsigned INT32 htable_mask;
e85df82001-09-06Fredrik Hübinette (Hubbe) 
a549642003-03-17Henrik Grubbström (Grubba) #if (SIZEOF_LONG == 4) && defined(_LP64) /* Kludge for gcc and the system header files not using the same model... */ #undef LONG_MIN #undef LONG_MAX #undef ULONG_MAX #define LONG_MIN INT_MIN #define LONG_MAX INT_MAX #define ULONG_MAX UINT_MAX #endif
e1939c2001-03-30Fredrik Hübinette (Hubbe) 
ed2bed2013-06-14Per Hedbor #define BEGIN_HASH_SIZE 1024
2cbf7c1999-09-01Fredrik Hübinette (Hubbe) 
8282ca2013-11-02Per Hedbor static unsigned int hash_prefix_len=64;
6ed3d22005-01-17Henrik Grubbström (Grubba) static unsigned int need_more_hash_prefix_depth=0;
af93211996-10-12Fredrik Hübinette (Hubbe) 
abce512013-11-06Arne Goedeke static unsigned int need_new_hashkey_depth=0;
e9d7c52013-11-02Per Hedbor static size_t hashkey = 0;
0169c62011-12-30Henrik Grubbström (Grubba) 
bcafc32004-09-19Martin Nilsson static unsigned INT32 htable_size=0;
af93211996-10-12Fredrik Hübinette (Hubbe) static struct pike_string **base_table=0;
bcafc32004-09-19Martin Nilsson static unsigned INT32 num_strings=0;
4edb1a2002-09-11David Hedbor PMOD_EXPORT struct pike_string *empty_pike_string = 0;
5267b71995-08-09Fredrik Hübinette (Hubbe) 
af93211996-10-12Fredrik Hübinette (Hubbe) /*** Main string hash function ***/
db4a401998-10-09Fredrik Hübinette (Hubbe)  #define StrHash(s,len) low_do_hash(s,len,0)
8282ca2013-11-02Per Hedbor #define low_do_hash(STR,LEN,SHIFT) low_hashmem( (STR), (LEN)<<(SHIFT), hash_prefix_len<<(SHIFT), hashkey )
34ffc02011-05-15Per Hedbor #define do_hash(STR) low_do_hash(STR->str,STR->len,STR->size_shift)
db4a401998-10-09Fredrik Hübinette (Hubbe) 
9925512013-05-31Per Hedbor /* Returns true if str could contain n. */ PMOD_EXPORT int string_range_contains( struct pike_string *str, int n ) { INT32 min, max; check_string_range( str, 1, &min, &max ); if( n >= min && n <= max ) return 1; return 0; } /* Returns true if str2 could be in str1. */ PMOD_EXPORT int string_range_contains_string( struct pike_string *str1, struct pike_string *str2 ) { INT32 max1, min1; INT32 max2, min2;
c037bf2013-06-11Martin Nilsson  if( !str2->len ) return 1; /* Empty string is part of every string */
9925512013-05-31Per Hedbor  check_string_range( str1, 1, &min1, &max1 ); check_string_range( str2, 1, &min2, &max2 ); if( (min2 < min1) || (max2 > max1) ) { if( (str1->flags & STRING_CONTENT_CHECKED) == (str2->flags & STRING_CONTENT_CHECKED) ) return 0; /* fallback to simple size-shift check. */ return str1->size_shift >= str2->size_shift; } return 1; } PMOD_EXPORT void check_string_range( struct pike_string *str, int loose, INT32 *min, INT32 *max ) { INT32 s_min = MAX_INT32; INT32 s_max = MIN_INT32;
37647e2013-11-03Tobias S. Josefowitz  ptrdiff_t i;
9925512013-05-31Per Hedbor  if( loose || ((str->flags & STRING_CONTENT_CHECKED ) && (!str->size_shift || !max)) ) { if( str->flags & STRING_CONTENT_CHECKED ) {
f1298d2014-07-02Arne Goedeke  switch (str->size_shift) { case eightbit: s_min = str->min; s_max = str->max; break; case sixteenbit: s_min = str->min; s_max = str->max; s_min *= 256; s_max *= 256; s_max += 255; break; case thirtytwobit: { unsigned INT32 tmp; tmp = str->min; tmp *= (1 << 24); s_min = tmp; tmp = str->max; tmp *= (1 << 24); tmp += (1 << 24) - 1; s_max = tmp; break; }
9925512013-05-31Per Hedbor  } } else { switch( str->size_shift ) { case 2: s_min = MIN_INT32; s_max=MAX_INT32; break; case 1: s_min = 0; s_max = 65535; break; case 0: s_min = 0; s_max = 255; break; } } } else { str->flags |= STRING_CONTENT_CHECKED; switch( str->size_shift ) { case 0: { p_wchar0 *p = (p_wchar0*)str->str; int upper = 0, lower = 0; for( i=0; i<str->len; i++,p++ ) { /* For 7-bit strings it's easy to check for * lower/uppercase, so do that here as well. */ if( *p >= 'A' && *p <= 'Z') upper++; if( *p >= 'a' && *p <= 'z') lower++; if( *p > s_max ) s_max = *p; if( *p < s_min ) s_min = *p; } if( s_max < 128 ) {
3a059b2013-06-09Chris Angelico  if( !lower )
9925512013-05-31Per Hedbor  str->flags |= STRING_IS_UPPERCASE;
3a059b2013-06-09Chris Angelico  if( !upper )
9925512013-05-31Per Hedbor  str->flags |= STRING_IS_LOWERCASE; } } str->min = s_min; str->max = s_max; break; case 1: { p_wchar1 *p = (p_wchar1*)str->str; for( i=0; i<str->len; i++,p++ ) { if( *p > s_max ) s_max = *p; if( *p < s_min ) s_min = *p; } }
f1298d2014-07-02Arne Goedeke  str->min = s_min / 256; str->max = s_max / 256;
9925512013-05-31Per Hedbor  break; case 2: { p_wchar2 *p = (p_wchar2*)str->str; for( i=0; i<str->len; i++,p++ ) { if( *p > s_max ) s_max = *p; if( *p < s_min ) s_min = *p; } }
f1298d2014-07-02Arne Goedeke  str->min = (unsigned INT32)s_min / (1 << 24); str->max = (unsigned INT32)s_max / (1 << 24);
9925512013-05-31Per Hedbor  break; } } if( min ) *min = s_min; if( max ) *max = s_max; }
db4a401998-10-09Fredrik Hübinette (Hubbe) 
5de2692012-05-28Martin Stjernholm void low_set_index(struct pike_string *s, ptrdiff_t pos, int value)
db4a401998-10-09Fredrik Hübinette (Hubbe) {
71f3a21998-11-22Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
b99d882003-05-15Martin Stjernholm  if(pos > s->len || pos<0) { if (s->len) { Pike_fatal("String index %"PRINTPTRDIFFT"d is out of " "range 0..%"PRINTPTRDIFFT"d.\n", pos, s->len-1); } else { Pike_fatal("Attempt to index the empty string with %"PRINTPTRDIFFT"d.\n", pos); } }
db4a401998-10-09Fredrik Hübinette (Hubbe)  if(pos == s->len && value)
5aad932002-08-15Marcus Comstedt  Pike_fatal("string zero termination foul!\n");
db4a401998-10-09Fredrik Hübinette (Hubbe) #endif
9cd0372005-11-03Henrik Grubbström (Grubba)  s->flags |= STRING_NOT_HASHED;
ed2bed2013-06-14Per Hedbor  if(!s->size_shift) STR0(s)[pos]=value; else if(s->size_shift == 1) STR1(s)[pos]=value;
13670c2015-05-25Martin Nilsson  else
ed2bed2013-06-14Per Hedbor  STR2(s)[pos]=value;
db4a401998-10-09Fredrik Hübinette (Hubbe) }
71f3a21998-11-22Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
ed04142015-09-27Per Hedbor PMOD_EXPORT struct pike_string *debug_check_size_shift(const struct pike_string *a, enum size_shift shift)
db4a401998-10-09Fredrik Hübinette (Hubbe) { if(a->size_shift != shift)
5aad932002-08-15Marcus Comstedt  Pike_fatal("Wrong STRX macro used!\n");
cae4ab2014-09-04Stephen R. van den Berg  return (struct pike_string*)a;
db4a401998-10-09Fredrik Hübinette (Hubbe) } #endif
024adc2004-11-14Martin Stjernholm #define CONVERT(FROM,TO) \ void PIKE_CONCAT4(convert_,FROM,_to_,TO) (PIKE_CONCAT(p_wchar,TO) *to, \ const PIKE_CONCAT(p_wchar,FROM) *from, \ ptrdiff_t len) \ { \
09d0632008-07-16Martin Stjernholm  while(--len>=0) *(to++)= (PIKE_CONCAT (p_wchar, TO)) *(from++); \
024adc2004-11-14Martin Stjernholm  }
3e625c1998-10-11Fredrik Hübinette (Hubbe)  CONVERT(0,1) CONVERT(0,2) CONVERT(1,0) CONVERT(1,2) CONVERT(2,0) CONVERT(2,1)
db4a401998-10-09Fredrik Hübinette (Hubbe) #define TWO_SIZES(X,Y) (((X)<<2)+(Y))
66d9282011-05-01Per Hedbor void generic_memcpy(PCHARP to,
a9b8172014-04-05Martin Nilsson  const PCHARP from,
66d9282011-05-01Per Hedbor  ptrdiff_t len)
db4a401998-10-09Fredrik Hübinette (Hubbe) {
68d9131999-04-01Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG if(len<0)
bd67392015-10-14Martin Nilsson  Pike_fatal("Cannot copy %ld bytes!\n", (long)len);
68d9131999-04-01Fredrik Hübinette (Hubbe) #endif
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  switch(TWO_SIZES(from.shift,to.shift))
db4a401998-10-09Fredrik Hübinette (Hubbe)  { case TWO_SIZES(0,0):
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  convert_0_to_0((p_wchar0 *)to.ptr,(p_wchar0 *)from.ptr,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  break; case TWO_SIZES(0,1):
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  convert_0_to_1((p_wchar1 *)to.ptr,(p_wchar0 *)from.ptr,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  break; case TWO_SIZES(0,2):
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  convert_0_to_2((p_wchar2 *)to.ptr,(p_wchar0 *)from.ptr,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  break; case TWO_SIZES(1,0):
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  convert_1_to_0((p_wchar0 *)to.ptr,(p_wchar1 *)from.ptr,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  break; case TWO_SIZES(1,1):
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  convert_1_to_1((p_wchar1 *)to.ptr,(p_wchar1 *)from.ptr,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  break; case TWO_SIZES(1,2):
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  convert_1_to_2((p_wchar2 *)to.ptr,(p_wchar1 *)from.ptr,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  break; case TWO_SIZES(2,0):
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  convert_2_to_0((p_wchar0 *)to.ptr,(p_wchar2 *)from.ptr,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  break; case TWO_SIZES(2,1):
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  convert_2_to_1((p_wchar1 *)to.ptr,(p_wchar2 *)from.ptr,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  break; case TWO_SIZES(2,2):
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  convert_2_to_2((p_wchar2 *)to.ptr,(p_wchar2 *)from.ptr,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  break; } }
a9b8172014-04-05Martin Nilsson PMOD_EXPORT void pike_string_cpy(PCHARP to, const struct pike_string *from)
3e625c1998-10-11Fredrik Hübinette (Hubbe) {
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  generic_memcpy(to,MKPCHARP_STR(from),from->len);
3e625c1998-10-11Fredrik Hübinette (Hubbe) }
5014211998-10-14Fredrik Hübinette (Hubbe) 
71f3a21998-11-22Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
5014211998-10-14Fredrik Hübinette (Hubbe) #ifdef DEBUG_MALLOC #define DM(X) X #else #define DM(X) #endif
a9b8172014-04-05Martin Nilsson PMOD_EXPORT p_wchar2 index_shared_string(const struct pike_string *s,
e9117b2014-02-25Per Hedbor  ptrdiff_t pos) { if(pos > s->len || pos<0) { if (s->len) { Pike_fatal("String index %"PRINTPTRDIFFT"d is out of " "range 0..%"PRINTPTRDIFFT"d.\n", pos, s->len-1); } else { Pike_fatal("Attempt to index the empty string with %"PRINTPTRDIFFT"d.\n", pos); } } return generic_extract(s->str,s->size_shift,pos); } PMOD_EXPORT p_wchar2 generic_extract (const void *str, int size, ptrdiff_t pos) { switch(size) { case 0: return ((p_wchar0 *)str)[pos]; case 1: return ((p_wchar1 *)str)[pos]; case 2: return ((p_wchar2 *)str)[pos]; }
759c422015-10-17Martin Nilsson  UNREACHABLE(return 0);
e9117b2014-02-25Per Hedbor }
abe3a82014-05-06Arne Goedeke static void locate_problem(int (*isproblem)(const struct pike_string *))
2043ba1998-02-10Fredrik Hübinette (Hubbe) { unsigned INT32 e; struct pike_string *s;
9e52381998-03-01Fredrik Hübinette (Hubbe)  DM(struct memhdr *yes=alloc_memhdr()); DM(struct memhdr *no=alloc_memhdr());
2043ba1998-02-10Fredrik Hübinette (Hubbe)  for(e=0;e<htable_size;e++)
9e52381998-03-01Fredrik Hübinette (Hubbe)  {
2043ba1998-02-10Fredrik Hübinette (Hubbe)  for(s=base_table[e];s;s=s->next)
9e52381998-03-01Fredrik Hübinette (Hubbe)  { if(isproblem(s)) { fprintf(stderr,"***Guilty string:\n"); debug_dump_pike_string(s, 70); DM(add_marks_to_memhdr(yes,s)); }else{ DM(add_marks_to_memhdr(no,s)); } } }
2043ba1998-02-10Fredrik Hübinette (Hubbe) 
9e52381998-03-01Fredrik Hübinette (Hubbe)  DM(fprintf(stderr,"Plausible problem location(s):\n"));
a4033e2000-04-14Fredrik Hübinette (Hubbe)  DM(dump_memhdr_locations(yes,0,0));
9e52381998-03-01Fredrik Hübinette (Hubbe)  DM(fprintf(stderr,"More Plausible problem location(s):\n"));
a4033e2000-04-14Fredrik Hübinette (Hubbe)  DM(dump_memhdr_locations(yes,no,0));
2043ba1998-02-10Fredrik Hübinette (Hubbe) }
a9b8172014-04-05Martin Nilsson static int bad_pointer(const struct pike_string *s)
4bdf5f2001-03-30Henrik Grubbström (Grubba) {
cfc9842001-03-30Henrik Grubbström (Grubba)  return (((ptrdiff_t)s)&(sizeof(struct pike_string *)-1));
4bdf5f2001-03-30Henrik Grubbström (Grubba) }
a9b8172014-04-05Martin Nilsson static int has_zero_refs(const struct pike_string *s)
2043ba1998-02-10Fredrik Hübinette (Hubbe) { return s->refs<=0; }
a9b8172014-04-05Martin Nilsson static int wrong_hash(const struct pike_string *s)
2043ba1998-02-10Fredrik Hübinette (Hubbe) {
8bcb3b2001-03-28Fredrik Hübinette (Hubbe)  return s->hval != do_hash(s);
2043ba1998-02-10Fredrik Hübinette (Hubbe) }
a9b8172014-04-05Martin Nilsson static int improper_zero_termination(const struct pike_string *s)
2043ba1998-02-10Fredrik Hübinette (Hubbe) {
db4a401998-10-09Fredrik Hübinette (Hubbe)  return index_shared_string(s,s->len);
2043ba1998-02-10Fredrik Hübinette (Hubbe) } #else #define locate_problem(X)
a20d822013-06-08Martin Nilsson #endif /* PIKE_DEBUG */
5267b71995-08-09Fredrik Hübinette (Hubbe) 
fe3c9f1999-09-06Henrik Grubbström (Grubba) /* Find a string in the shared string table.
13670c2015-05-25Martin Nilsson  * This assumes that the string is minimized!!!!
fe3c9f1999-09-06Henrik Grubbström (Grubba)  */
ed2bed2013-06-14Per Hedbor static struct pike_string *internal_findstring(const char *s,
a9b8172014-04-05Martin Nilsson  ptrdiff_t len,
ed04142015-09-27Per Hedbor  enum size_shift size_shift,
a9b8172014-04-05Martin Nilsson  size_t hval)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
ed2bed2013-06-14Per Hedbor  struct pike_string *curr; //,**prev, **base;
2cbf7c1999-09-01Fredrik Hübinette (Hubbe)  unsigned int depth=0;
0169c62011-12-30Henrik Grubbström (Grubba)  unsigned int prefix_depth=0;
8282ca2013-11-02Per Hedbor 
e1939c2001-03-30Fredrik Hübinette (Hubbe)  size_t h;
e85df82001-09-06Fredrik Hübinette (Hubbe)  h=HMODULO(hval);
ed2bed2013-06-14Per Hedbor  for(curr = base_table[h]; curr; curr = curr->next)
5267b71995-08-09Fredrik Hübinette (Hubbe)  {
71f3a21998-11-22Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
5267b71995-08-09Fredrik Hübinette (Hubbe)  if(curr->refs<1)
38d6081998-02-07Fredrik Hübinette (Hubbe)  { debug_dump_pike_string(curr, 70);
2043ba1998-02-10Fredrik Hübinette (Hubbe)  locate_problem(has_zero_refs);
5aad932002-08-15Marcus Comstedt  Pike_fatal("String with no references.\n");
38d6081998-02-07Fredrik Hübinette (Hubbe)  }
5267b71995-08-09Fredrik Hübinette (Hubbe) #endif
f43e421999-10-21Fredrik Hübinette (Hubbe)  debug_malloc_touch(curr);
5267b71995-08-09Fredrik Hübinette (Hubbe) 
ed2bed2013-06-14Per Hedbor  if ( len == curr->len && size_shift == curr->size_shift && hval == curr->hval && ( curr->str == s ||
67074e2014-09-03Martin Nilsson  !memcmp(curr->str, s,len<<size_shift))) /* found it */
ed04142015-09-27Per Hedbor  return curr;
0169c62011-12-30Henrik Grubbström (Grubba)  depth++;
8282ca2013-11-02Per Hedbor  if (curr->len > (ptrdiff_t)hash_prefix_len)
0169c62011-12-30Henrik Grubbström (Grubba)  prefix_depth++;
5267b71995-08-09Fredrik Hübinette (Hubbe)  }
0169c62011-12-30Henrik Grubbström (Grubba)  if (depth > need_new_hashkey_depth) { /* Keep track of whether the hashtable is getting unbalanced. */ need_new_hashkey_depth = depth; }
2cbf7c1999-09-01Fredrik Hübinette (Hubbe)  /* These heuruistics might require tuning! /Hubbe */
0169c62011-12-30Henrik Grubbström (Grubba)  if (prefix_depth > need_more_hash_prefix_depth)
2cbf7c1999-09-01Fredrik Hübinette (Hubbe)  {
6ed3d22005-01-17Henrik Grubbström (Grubba) #if 0 fprintf(stderr,
0169c62011-12-30Henrik Grubbström (Grubba)  "prefix_depth=%d num_strings=%d need_more_hash_prefix_depth=%d\n"
8282ca2013-11-02Per Hedbor  " hash_prefix_len=%d\n",
0169c62011-12-30Henrik Grubbström (Grubba)  prefix_depth, num_strings, need_more_hash_prefix_depth,
8282ca2013-11-02Per Hedbor  hash_prefix_len);
6ed3d22005-01-17Henrik Grubbström (Grubba) #endif /* 0 */
0169c62011-12-30Henrik Grubbström (Grubba)  need_more_hash_prefix_depth = prefix_depth;
2cbf7c1999-09-01Fredrik Hübinette (Hubbe)  }
5267b71995-08-09Fredrik Hübinette (Hubbe)  return 0; /* not found */ }
9506a12016-08-20Martin Nilsson /** * Finds an 8-bit string in the shared string table. Returns 0 on failure. * @param str Pointer to the start of the string. * @param len The number of characters in the string. */ struct pike_string *binary_findstring(const char *str, ptrdiff_t len)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
9506a12016-08-20Martin Nilsson  return internal_findstring(str, len, 0, StrHash(str,len));
5267b71995-08-09Fredrik Hübinette (Hubbe) }
5de2692012-05-28Martin Stjernholm struct pike_string *findstring(const char *foo)
ca74dd1996-10-08Fredrik Hübinette (Hubbe) { return binary_findstring(foo, strlen(foo)); }
af93211996-10-12Fredrik Hübinette (Hubbe) /*** rehash ***/ static void rehash_string_backwards(struct pike_string *s)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
1bfe772005-01-14Henrik Grubbström (Grubba)  struct pike_string *prev = NULL; struct pike_string *next;
af93211996-10-12Fredrik Hübinette (Hubbe)  if(!s) return;
1bfe772005-01-14Henrik Grubbström (Grubba)  /* Reverse the hash list. */ while ((next = s->next)) { s->next = prev; prev = s; s = next; } s->next = prev; /* Rehash the strings for this list. */ do { ptrdiff_t h = HMODULO(s->hval); next = s->next; s->next = base_table[h]; base_table[h] = s; } while ((s = next));
af93211996-10-12Fredrik Hübinette (Hubbe) }
aef30b1996-10-11Fredrik Hübinette (Hubbe) 
e1939c2001-03-30Fredrik Hübinette (Hubbe) static void stralloc_rehash(void)
af93211996-10-12Fredrik Hübinette (Hubbe) { int h,old; struct pike_string **old_base;
aef30b1996-10-11Fredrik Hübinette (Hubbe) 
e1939c2001-03-30Fredrik Hübinette (Hubbe)  old=htable_size; old_base=base_table;
ddad7a2014-10-20Martin Nilsson  SET_HSIZE(htable_size<<1);
e1939c2001-03-30Fredrik Hübinette (Hubbe) 
9c14f32014-04-27Martin Nilsson  base_table=xcalloc(sizeof(struct pike_string *), htable_size);
af93211996-10-12Fredrik Hübinette (Hubbe) 
6ed3d22005-01-17Henrik Grubbström (Grubba)  need_more_hash_prefix_depth = 0;
2cbf7c1999-09-01Fredrik Hübinette (Hubbe)  for(h=0;h<old;h++) rehash_string_backwards(old_base[h]);
af93211996-10-12Fredrik Hübinette (Hubbe)  if(old_base)
0ec7522014-04-27Martin Nilsson  free(old_base);
5267b71995-08-09Fredrik Hübinette (Hubbe) }
af93211996-10-12Fredrik Hübinette (Hubbe) 
b0289e2000-12-01Henrik Grubbström (Grubba) /* Allocation of strings */
af93211996-10-12Fredrik Hübinette (Hubbe) 
60e6cf2013-11-09Arne Goedeke #define STRING_BLOCK 2048
8f0b9a2014-06-19Per Hedbor 
ed04142015-09-27Per Hedbor static struct block_allocator string_allocator =
5d23192015-12-10Per Hedbor  BA_INIT_PAGES(sizeof(struct pike_string), 2);
ed04142015-09-27Per Hedbor static struct block_allocator substring_allocator =
5d23192015-12-10Per Hedbor  BA_INIT_PAGES(sizeof(struct substring_pike_string), 1);
3587242015-08-22Per Hedbor 
ed04142015-09-27Per Hedbor static void free_string_content(struct pike_string * s) { switch (s->alloc_type) { case STRING_ALLOC_STATIC: break; case STRING_ALLOC_MALLOC: free(s->str); break; case STRING_ALLOC_BA: ba_free(&string_allocator, s->str); break; case STRING_ALLOC_SUBSTRING: free_string(((struct substring_pike_string*)s)->parent); break; } }
4a5e3f2000-11-25Henrik Grubbström (Grubba) 
3587242015-08-22Per Hedbor static void free_unlinked_pike_string(struct pike_string * s) {
ed04142015-09-27Per Hedbor  free_string_content(s); switch(s->struct_type) { case STRING_STRUCT_STRING: ba_free(&string_allocator, s); break; case STRING_STRUCT_SUBSTRING: ba_free(&substring_allocator, s); break; }
60e6cf2013-11-09Arne Goedeke }
4a5e3f2000-11-25Henrik Grubbström (Grubba) 
ed04142015-09-27Per Hedbor 
5267b71995-08-09Fredrik Hübinette (Hubbe) /* note that begin_shared_string expects the _exact_ size of the string, * not the maximum size */
c8318b2000-08-03Henrik Grubbström (Grubba) PMOD_EXPORT struct pike_string *debug_begin_shared_string(size_t len)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
60e6cf2013-11-09Arne Goedeke  return debug_begin_wide_shared_string(len, 0);
5267b71995-08-09Fredrik Hübinette (Hubbe) }
8bcb3b2001-03-28Fredrik Hübinette (Hubbe) static void link_pike_string(struct pike_string *s, size_t hval)
af93211996-10-12Fredrik Hübinette (Hubbe) {
8bcb3b2001-03-28Fredrik Hübinette (Hubbe)  size_t h;
9cd0372005-11-03Henrik Grubbström (Grubba) #ifdef PIKE_DEBUG if (!(s->flags & STRING_NOT_SHARED)) { debug_dump_pike_string(s, 70); Pike_fatal("String already linked.\n"); }
fc6ea02008-07-18Martin Stjernholm  if (PIKE_MEM_NOT_DEF_RANGE (s->str, (s->len + 1) << s->size_shift)) Pike_fatal ("Got undefined contents in pike string %p.\n", s);
9cd0372005-11-03Henrik Grubbström (Grubba) #endif
e85df82001-09-06Fredrik Hübinette (Hubbe)  h=HMODULO(hval);
af93211996-10-12Fredrik Hübinette (Hubbe)  s->next = base_table[h]; base_table[h] = s;
8bcb3b2001-03-28Fredrik Hübinette (Hubbe)  s->hval=hval;
9cd0372005-11-03Henrik Grubbström (Grubba)  s->flags &= ~(STRING_NOT_HASHED|STRING_NOT_SHARED);
af93211996-10-12Fredrik Hübinette (Hubbe)  num_strings++;
e1939c2001-03-30Fredrik Hübinette (Hubbe) 
ed2bed2013-06-14Per Hedbor  if(num_strings > htable_size) {
e1939c2001-03-30Fredrik Hübinette (Hubbe)  stralloc_rehash();
0169c62011-12-30Henrik Grubbström (Grubba)  }
2cbf7c1999-09-01Fredrik Hübinette (Hubbe) 
6ed3d22005-01-17Henrik Grubbström (Grubba)  /* These heuristics might require tuning! /Hubbe */
ed2bed2013-06-14Per Hedbor  if((need_more_hash_prefix_depth > 4) || (need_new_hashkey_depth > 128))
2cbf7c1999-09-01Fredrik Hübinette (Hubbe)  {
6ed3d22005-01-17Henrik Grubbström (Grubba)  /* Changed heuristic 2005-01-17: *
8282ca2013-11-02Per Hedbor  * Increase hash_prefix_len if there's some bucket containing
ed2bed2013-06-14Per Hedbor  * more than 4 strings that are longer
8282ca2013-11-02Per Hedbor  * than hash_prefix_len.
6ed3d22005-01-17Henrik Grubbström (Grubba)  * /grubba
0169c62011-12-30Henrik Grubbström (Grubba)  * * Changed heuristic 2011-12-30: * * Generate a new hash key if there's some bucket containing
ed2bed2013-06-14Per Hedbor  * more than 16 strings. This ought to
0169c62011-12-30Henrik Grubbström (Grubba)  * suffice to alleviate the #hashdos vulnerability. * * /grubba
6ed3d22005-01-17Henrik Grubbström (Grubba)  */ /* This could in theory have a pretty ugly complexity * /Hubbe
2cbf7c1999-09-01Fredrik Hübinette (Hubbe)  */
8bcb3b2001-03-28Fredrik Hübinette (Hubbe) 
abce512013-11-06Arne Goedeke  if (need_new_hashkey_depth > 128) { /* A simple mixing function. */ hashkey ^= (hashkey << 5) ^ (current_time.tv_sec ^ current_time.tv_usec); need_new_hashkey_depth = 0; }
557f742014-05-23Per Hedbor  if (need_more_hash_prefix_depth > 4)
8282ca2013-11-02Per Hedbor  hash_prefix_len=hash_prefix_len*2;
557f742014-05-23Per Hedbor 
0169c62011-12-30Henrik Grubbström (Grubba)  /* NOTE: No need to update to the correct values, since that will
6ed3d22005-01-17Henrik Grubbström (Grubba)  * be done on demand. */ need_more_hash_prefix_depth=0;
2cbf7c1999-09-01Fredrik Hübinette (Hubbe)  for(h=0;h<htable_size;h++) { struct pike_string *tmp=base_table[h]; base_table[h]=0; while(tmp) {
d3b06f2000-08-10Henrik Grubbström (Grubba)  size_t h2;
2cbf7c1999-09-01Fredrik Hübinette (Hubbe)  struct pike_string *tmp2=tmp; /* First unlink */ tmp=tmp2->next;
8bcb3b2001-03-28Fredrik Hübinette (Hubbe)  tmp2->hval=do_hash(tmp2); /* compute new hash value */
e85df82001-09-06Fredrik Hübinette (Hubbe)  h2=HMODULO(tmp2->hval);
2cbf7c1999-09-01Fredrik Hübinette (Hubbe)  tmp2->next=base_table[h2]; /* and re-hash */ base_table[h2]=tmp2; } } }
af93211996-10-12Fredrik Hübinette (Hubbe) }
ed04142015-09-27Per Hedbor PMOD_EXPORT struct pike_string *debug_begin_wide_shared_string(size_t len, enum size_shift shift)
db4a401998-10-09Fredrik Hübinette (Hubbe) {
4170b92004-11-06Henrik Grubbström (Grubba)  struct pike_string *t = NULL;
1b42052015-11-23Arne Goedeke  size_t bytes;
23bdcd2015-08-22Per Hedbor  ONERROR fe;
1b42052015-11-23Arne Goedeke  if ((ptrdiff_t)len < 0 || DO_SIZE_T_ADD_OVERFLOW(len, 1, &bytes) || DO_SIZE_T_MUL_OVERFLOW(bytes, 1 << shift, &bytes)) { Pike_error("String is too large.\n"); }
71f3a21998-11-22Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
db4a401998-10-09Fredrik Hübinette (Hubbe)  if(d_flag>10) verify_shared_strings_tables(); #endif
4a5e3f2000-11-25Henrik Grubbström (Grubba) #ifdef PIKE_DEBUG
d476592013-06-12Arne Goedeke  if (shift > 2)
5aad932002-08-15Marcus Comstedt  Pike_fatal("Unsupported string shift: %d\n", shift);
4a5e3f2000-11-25Henrik Grubbström (Grubba) #endif /* PIKE_DEBUG */
ed04142015-09-27Per Hedbor  t=ba_alloc(&string_allocator);
23bdcd2015-08-22Per Hedbor  /* we mark the string as static here, to avoid double free if the * allocations fail
119e1e2014-08-22Arne Goedeke  */
3587242015-08-22Per Hedbor  t->flags = STRING_NOT_HASHED|STRING_NOT_SHARED; t->alloc_type = STRING_ALLOC_STATIC;
ed04142015-09-27Per Hedbor  t->struct_type = STRING_STRUCT_STRING;
23bdcd2015-08-22Per Hedbor  SET_ONERROR(fe,free_unlinked_pike_string,t); if (bytes <= sizeof(struct pike_string)) {
119e1e2014-08-22Arne Goedeke  t->str = ba_alloc(&string_allocator);
3587242015-08-22Per Hedbor  t->alloc_type = STRING_ALLOC_BA;
4a5e3f2000-11-25Henrik Grubbström (Grubba)  } else {
23bdcd2015-08-22Per Hedbor  t->str = xalloc(bytes);
3587242015-08-22Per Hedbor  t->alloc_type = STRING_ALLOC_MALLOC;
4a5e3f2000-11-25Henrik Grubbström (Grubba)  }
dc245f2011-07-21Henrik Grubbström (Grubba)  t->refs = 0;
a495ca2014-08-23Arne Goedeke  t->size_shift=shift;
dc245f2011-07-21Henrik Grubbström (Grubba)  add_ref(t); /* For DMALLOC */
db4a401998-10-09Fredrik Hübinette (Hubbe)  t->len=len;
0e602a2006-01-12Henrik Grubbström (Grubba)  DO_IF_DEBUG(t->next = NULL);
5166152015-08-22Per Hedbor  UNSET_ONERROR(fe);
db4a401998-10-09Fredrik Hübinette (Hubbe)  low_set_index(t,len,0); return t; }
23bdcd2015-08-22Per Hedbor static struct pike_string * make_static_string(const char * str, size_t len, enum size_shift shift) {
119e1e2014-08-22Arne Goedeke  struct pike_string * t = ba_alloc(&string_allocator);
3587242015-08-22Per Hedbor  t->flags = STRING_NOT_HASHED|STRING_NOT_SHARED;
ed04142015-09-27Per Hedbor  t->size_shift = shift;
3587242015-08-22Per Hedbor  t->alloc_type = STRING_ALLOC_STATIC;
ed04142015-09-27Per Hedbor  t->struct_type = STRING_STRUCT_STRING;
5166152015-08-22Per Hedbor  t->str = (char *)str;
119e1e2014-08-22Arne Goedeke  t->refs = 0; t->len = len;
ed04142015-09-27Per Hedbor  add_ref(t); /* For DMALLOC */
119e1e2014-08-22Arne Goedeke  return t; } PMOD_EXPORT struct pike_string * make_shared_static_string(const char *str, size_t len, enum size_shift shift) { struct pike_string *s; ptrdiff_t h = StrHash(str, len); s = internal_findstring(str,len,shift,h); if (!s) { s = make_static_string(str, len, shift); link_pike_string(s, h); } else {
ed04142015-09-27Per Hedbor  if (!string_is_static(s)) { free_string_content(s); s->alloc_type = STRING_ALLOC_STATIC; s->str = (char*)str;
7ed4d82014-08-24Arne Goedeke  }
119e1e2014-08-22Arne Goedeke  add_ref(s); } return s; }
36bfbf2016-11-08Arne Goedeke PMOD_EXPORT struct pike_string * make_shared_malloc_string(char *str, size_t len, enum size_shift shift) { struct pike_string *s; ptrdiff_t h = StrHash(str, len); s = internal_findstring(str,len,shift,h); if (!s) { s = ba_alloc(&string_allocator); s->flags = STRING_NOT_HASHED|STRING_NOT_SHARED; s->size_shift = shift; s->alloc_type = STRING_ALLOC_MALLOC; s->struct_type = STRING_STRUCT_STRING; s->str = str; s->refs = 0; s->len = len; add_ref(s); link_pike_string(s, h); } else { free(str); add_ref(s); } return s; }
3e625c1998-10-11Fredrik Hübinette (Hubbe) /* * This function assumes that the shift size is already the minimum it * can be. */
66d9282011-05-01Per Hedbor struct pike_string *low_end_shared_string(struct pike_string *s)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
2eed0b2000-10-08Henrik Grubbström (Grubba)  ptrdiff_t len;
ec51ce2006-04-25David Hedbor  size_t h=0;
06983f1996-09-22Fredrik Hübinette (Hubbe)  struct pike_string *s2;
5267b71995-08-09Fredrik Hübinette (Hubbe) 
e8371e2008-06-23Martin Stjernholm #ifdef PIKE_DEBUG if (d_flag) { switch (s->size_shift) { case 0:
ed04142015-09-27Per Hedbor  break;
e8371e2008-06-23Martin Stjernholm  case 1:
ed04142015-09-27Per Hedbor  if(!find_magnitude1(STR1(s),s->len)) Pike_fatal ("String %p that should have shift 1 really got 0.\n", s); break;
e8371e2008-06-23Martin Stjernholm  case 2: {
ed04142015-09-27Per Hedbor  int m = find_magnitude2 (STR2 (s), s->len); if (m != 2) Pike_fatal ("String %p that should have shift 2 really got %d.\n", s, m); break;
e8371e2008-06-23Martin Stjernholm  } default:
ed04142015-09-27Per Hedbor  Pike_fatal("ARGHEL! size_shift:%d\n", s->size_shift);
e8371e2008-06-23Martin Stjernholm  } } #endif
d3b06f2000-08-10Henrik Grubbström (Grubba)  len = s->len;
9cd0372005-11-03Henrik Grubbström (Grubba)  if (s->flags & STRING_NOT_HASHED) { h = s->hval = do_hash(s); s->flags &= ~STRING_NOT_HASHED; }
2eed0b2000-10-08Henrik Grubbström (Grubba)  s2 = internal_findstring(s->str, len, s->size_shift, h);
71f3a21998-11-22Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
13670c2015-05-25Martin Nilsson  if(s2==s)
5aad932002-08-15Marcus Comstedt  Pike_fatal("end_shared_string called twice! (or something like that)\n");
3e625c1998-10-11Fredrik Hübinette (Hubbe) #endif if(s2) {
9cd0372005-11-03Henrik Grubbström (Grubba)  free_string(s);
2eed0b2000-10-08Henrik Grubbström (Grubba)  s = s2;
9cd0372005-11-03Henrik Grubbström (Grubba)  add_ref(s);
3e625c1998-10-11Fredrik Hübinette (Hubbe)  }else{ link_pike_string(s, h); } return s; } /* * This function checks if the shift size can be decreased before * entering the string in the shared string table */
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT struct pike_string *end_shared_string(struct pike_string *s)
3e625c1998-10-11Fredrik Hübinette (Hubbe) { struct pike_string *s2;
23bdcd2015-08-22Per Hedbor  switch(UNLIKELY(s->size_shift))
db4a401998-10-09Fredrik Hübinette (Hubbe)  { case 2: switch(find_magnitude2(STR2(s),s->len)) {
ed04142015-09-27Per Hedbor  case 0: s2=begin_shared_string(s->len); convert_2_to_0(STR0(s2),STR2(s),s->len); free_string(s); s=s2; break; case 1: s2=begin_wide_shared_string(s->len,1); convert_2_to_1(STR1(s2),STR2(s),s->len); free_string(s); s=s2; /* Fall though */
db4a401998-10-09Fredrik Hübinette (Hubbe)  } break;
9925512013-05-31Per Hedbor 
db4a401998-10-09Fredrik Hübinette (Hubbe)  case 1: if(!find_magnitude1(STR1(s),s->len)) {
ed04142015-09-27Per Hedbor  s2=begin_shared_string(s->len); convert_1_to_0(STR0(s2),STR1(s),s->len); free_string(s); s=s2;
db4a401998-10-09Fredrik Hübinette (Hubbe)  } break; case 0: break; }
3e625c1998-10-11Fredrik Hübinette (Hubbe)  return low_end_shared_string(s);
5267b71995-08-09Fredrik Hübinette (Hubbe) }
d5b1e22000-11-29Fredrik Hübinette (Hubbe) PMOD_EXPORT struct pike_string *end_and_resize_shared_string(struct pike_string *str, ptrdiff_t len) { struct pike_string *tmp; #ifdef PIKE_DEBUG if(len > str->len)
5aad932002-08-15Marcus Comstedt  Pike_fatal("Cannot extend string here!\n");
d5b1e22000-11-29Fredrik Hübinette (Hubbe) #endif
9bb0ef2016-01-18Henrik Grubbström (Grubba)  if (len == str->len) { return end_shared_string(str); }
d5b1e22000-11-29Fredrik Hübinette (Hubbe)  tmp = make_shared_binary_pcharp(MKPCHARP_STR(str),len);
9cd0372005-11-03Henrik Grubbström (Grubba)  free_string(str);
d5b1e22000-11-29Fredrik Hübinette (Hubbe)  return tmp; }
c8318b2000-08-03Henrik Grubbström (Grubba) PMOD_EXPORT struct pike_string * debug_make_shared_binary_string(const char *str,size_t len)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
06983f1996-09-22Fredrik Hübinette (Hubbe)  struct pike_string *s;
89fc4c2000-08-10Henrik Grubbström (Grubba)  ptrdiff_t h = StrHash(str, len);
5267b71995-08-09Fredrik Hübinette (Hubbe) 
db4a401998-10-09Fredrik Hübinette (Hubbe)  s = internal_findstring(str,len,0,h);
13670c2015-05-25Martin Nilsson  if (!s)
5267b71995-08-09Fredrik Hübinette (Hubbe)  { s=begin_shared_string(len);
59fc9e2014-09-03Martin Nilsson  memcpy(s->str, str, len);
af93211996-10-12Fredrik Hübinette (Hubbe)  link_pike_string(s, h);
5317302005-11-05Henrik Grubbström (Grubba)  } else { add_ref(s);
5267b71995-08-09Fredrik Hübinette (Hubbe)  } return s; }
c8318b2000-08-03Henrik Grubbström (Grubba) PMOD_EXPORT struct pike_string * debug_make_shared_binary_pcharp(const PCHARP str,size_t len)
011ad31999-10-22Fredrik Hübinette (Hubbe) { switch(str.shift) { case 0:
01a9572000-02-03Henrik Grubbström (Grubba)  return make_shared_binary_string((char *)(str.ptr), len);
011ad31999-10-22Fredrik Hübinette (Hubbe)  case 1: return make_shared_binary_string1((p_wchar1 *)(str.ptr), len); case 2: return make_shared_binary_string2((p_wchar2 *)(str.ptr), len); }
9282fd2015-09-27Martin Nilsson  UNREACHABLE(return NULL);
011ad31999-10-22Fredrik Hübinette (Hubbe) }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT struct pike_string * debug_make_shared_pcharp(const PCHARP str)
011ad31999-10-22Fredrik Hübinette (Hubbe) { return debug_make_shared_binary_pcharp(str, pcharp_strlen(str)); }
c8318b2000-08-03Henrik Grubbström (Grubba) PMOD_EXPORT struct pike_string * debug_make_shared_binary_string0(const p_wchar0 *str,size_t len)
50d6d31999-10-31Henrik Grubbström (Grubba) {
a0d5ae1999-10-31Henrik Grubbström (Grubba)  return debug_make_shared_binary_string((const char *)str, len);
50d6d31999-10-31Henrik Grubbström (Grubba) }
c8318b2000-08-03Henrik Grubbström (Grubba) PMOD_EXPORT struct pike_string * debug_make_shared_binary_string1(const p_wchar1 *str,size_t len)
db4a401998-10-09Fredrik Hübinette (Hubbe) { struct pike_string *s;
89fc4c2000-08-10Henrik Grubbström (Grubba)  ptrdiff_t h;
db4a401998-10-09Fredrik Hübinette (Hubbe)  if(!find_magnitude1(str,len)) { /* Wrong size, convert */ s=begin_shared_string(len);
efae671998-10-21Fredrik Hübinette (Hubbe)  convert_1_to_0(STR0(s),str,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  return end_shared_string(s); }
3e625c1998-10-11Fredrik Hübinette (Hubbe)  h=low_do_hash(str, len, 1);
db4a401998-10-09Fredrik Hübinette (Hubbe)  s = internal_findstring((char *)str,len,1,h);
13670c2015-05-25Martin Nilsson  if (!s)
db4a401998-10-09Fredrik Hübinette (Hubbe)  { s=begin_wide_shared_string(len,1);
59fc9e2014-09-03Martin Nilsson  memcpy(s->str, str, len<<1);
db4a401998-10-09Fredrik Hübinette (Hubbe)  link_pike_string(s, h);
5317302005-11-05Henrik Grubbström (Grubba)  } else { add_ref(s);
db4a401998-10-09Fredrik Hübinette (Hubbe)  } return s; }
c8318b2000-08-03Henrik Grubbström (Grubba) PMOD_EXPORT struct pike_string * debug_make_shared_binary_string2(const p_wchar2 *str,size_t len)
db4a401998-10-09Fredrik Hübinette (Hubbe) { struct pike_string *s;
89fc4c2000-08-10Henrik Grubbström (Grubba)  ptrdiff_t h;
db4a401998-10-09Fredrik Hübinette (Hubbe)  switch(find_magnitude2(str,len)) { case 0: /* Wrong size, convert */ s=begin_shared_string(len);
efae671998-10-21Fredrik Hübinette (Hubbe)  convert_2_to_0(STR0(s),str,len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  return end_shared_string(s); case 1: /* Wrong size, convert */ s=begin_wide_shared_string(len,1); convert_2_to_1(STR1(s),str,len); return end_shared_string(s); /* not entirely optimal */ }
3e625c1998-10-11Fredrik Hübinette (Hubbe)  h=low_do_hash(str, len, 2);
db4a401998-10-09Fredrik Hübinette (Hubbe)  s = internal_findstring((char *)str,len,2,h);
13670c2015-05-25Martin Nilsson  if (!s)
db4a401998-10-09Fredrik Hübinette (Hubbe)  { s=begin_wide_shared_string(len,2);
59fc9e2014-09-03Martin Nilsson  memcpy(s->str, str, len<<2);
db4a401998-10-09Fredrik Hübinette (Hubbe)  link_pike_string(s, h);
5317302005-11-05Henrik Grubbström (Grubba)  } else { add_ref(s);
db4a401998-10-09Fredrik Hübinette (Hubbe)  } return s; }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT struct pike_string *debug_make_shared_string(const char *str)
5267b71995-08-09Fredrik Hübinette (Hubbe) { return make_shared_binary_string(str, strlen(str)); }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT struct pike_string *debug_make_shared_string0(const p_wchar0 *str)
50d6d31999-10-31Henrik Grubbström (Grubba) {
a0d5ae1999-10-31Henrik Grubbström (Grubba)  return debug_make_shared_string((const char *)str);
50d6d31999-10-31Henrik Grubbström (Grubba) }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT struct pike_string *debug_make_shared_string1(const p_wchar1 *str)
4d1ed11998-09-18Fredrik Hübinette (Hubbe) {
db4a401998-10-09Fredrik Hübinette (Hubbe)  INT32 len;
4d1ed11998-09-18Fredrik Hübinette (Hubbe)  for(len=0;str[len];len++);
db4a401998-10-09Fredrik Hübinette (Hubbe)  return debug_make_shared_binary_string1(str,len); }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT struct pike_string *debug_make_shared_string2(const p_wchar2 *str)
db4a401998-10-09Fredrik Hübinette (Hubbe) { INT32 len; for(len=0;str[len];len++); return debug_make_shared_binary_string2(str,len);
4d1ed11998-09-18Fredrik Hübinette (Hubbe) }
af93211996-10-12Fredrik Hübinette (Hubbe) /*** Free strings ***/
5267b71995-08-09Fredrik Hübinette (Hubbe) 
2a3bfa2017-01-26Henrik Grubbström (Grubba) void unlink_pike_string(struct pike_string *s)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
ed04142015-09-27Per Hedbor  size_t h=HMODULO(s->hval); struct pike_string *tmp=base_table[h], *p=NULL;
23bdcd2015-08-22Per Hedbor  while( tmp ) { if( tmp == s ) { if( p ) p->next = s->next; else base_table[h] = s->next; break; } p = tmp; tmp = tmp->next;
4bdf5f2001-03-30Henrik Grubbström (Grubba)  }
5166152015-08-22Per Hedbor 
23bdcd2015-08-22Per Hedbor  if( !tmp ) Pike_fatal("unlink on non-shared string\n");
5166152015-08-22Per Hedbor 
89fc4c2000-08-10Henrik Grubbström (Grubba)  s->next=(struct pike_string *)(ptrdiff_t)-1;
760b261996-12-03Fredrik Hübinette (Hubbe)  num_strings--;
9cd0372005-11-03Henrik Grubbström (Grubba)  s->flags |= STRING_NOT_SHARED;
0a3d601996-10-09Fredrik Hübinette (Hubbe) }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT void do_free_string(struct pike_string *s)
04965a1998-12-06Fredrik Hübinette (Hubbe) {
65b6732000-07-07Martin Stjernholm  if (s) free_string(s);
04965a1998-12-06Fredrik Hübinette (Hubbe) }
fb22942008-06-16Martin Stjernholm PMOD_EXPORT void do_free_unlinked_pike_string(struct pike_string *s)
d5b1e22000-11-29Fredrik Hübinette (Hubbe) { if (s)
fb22942008-06-16Martin Stjernholm  free_unlinked_pike_string(s);
d5b1e22000-11-29Fredrik Hübinette (Hubbe) }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT void really_free_string(struct pike_string *s)
0a3d601996-10-09Fredrik Hübinette (Hubbe) {
71f3a21998-11-22Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
d631b82002-12-01Martin Stjernholm  if (s->refs) { #ifdef DEBUG_MALLOC describe_something(s, T_STRING, 0,2,0, NULL); #endif Pike_fatal("Freeing string with %d references.\n", s->refs); }
f88e292007-03-29Marcus Comstedt  if(d_flag > 2 && !(s->flags & STRING_NOT_SHARED))
d631b82002-12-01Martin Stjernholm  { if(s->next == (struct pike_string *)(ptrdiff_t)-1) Pike_fatal("Freeing shared string again!\n");
7abf491998-04-17Fredrik Hübinette (Hubbe) 
d631b82002-12-01Martin Stjernholm  if(((ptrdiff_t)s->next) & 1) Pike_fatal("Freeing shared string again, memory corrupt or other bug!\n");
9367351997-01-27Fredrik Hübinette (Hubbe)  }
d476592013-06-12Arne Goedeke  if (s->size_shift > 2) {
5aad932002-08-15Marcus Comstedt  Pike_fatal("Freeing string with bad shift (0x%08x); could it be a type?\n",
ed04142015-09-27Per Hedbor  s->size_shift);
c7eadf2001-03-29Henrik Grubbström (Grubba)  }
9367351997-01-27Fredrik Hübinette (Hubbe) #endif
9cd0372005-11-03Henrik Grubbström (Grubba)  if (!(s->flags & STRING_NOT_SHARED)) unlink_pike_string(s);
8a14542008-06-29Martin Nilsson  if (s->flags & STRING_CLEAR_ON_EXIT)
0a146b2013-03-12Arne Goedeke  guaranteed_memset(s->str, 0, s->len<<s->size_shift);
fb22942008-06-16Martin Stjernholm  free_unlinked_pike_string(s);
6cb7832000-09-15Martin Stjernholm  GC_FREE_SIMPLE_BLOCK(s);
5267b71995-08-09Fredrik Hübinette (Hubbe) }
ed04142015-09-27Per Hedbor 
66d9282011-05-01Per Hedbor void do_really_free_string(struct pike_string *s)
fb22942008-06-16Martin Stjernholm { if (s) really_free_string(s); }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT void debug_free_string(struct pike_string *s)
61e9a01998-01-25Fredrik Hübinette (Hubbe) {
50ea682003-03-14Henrik Grubbström (Grubba)  if(!sub_ref(s))
61e9a01998-01-25Fredrik Hübinette (Hubbe)  really_free_string(s); }
5267b71995-08-09Fredrik Hübinette (Hubbe) /*
af93211996-10-12Fredrik Hübinette (Hubbe)  * String table status
5267b71995-08-09Fredrik Hübinette (Hubbe)  */
06983f1996-09-22Fredrik Hübinette (Hubbe) struct pike_string *add_string_status(int verbose)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
af80262013-02-07Henrik Grubbström (Grubba)  struct string_builder s; init_string_builder(&s, 0);
5267b71995-08-09Fredrik Hübinette (Hubbe)  if (verbose) {
af80262013-02-07Henrik Grubbström (Grubba)  long alloced_strings[8] = {0,0,0,0,0,0,0,0}; long alloced_bytes[8] = {0,0,0,0,0,0,0,0}; long num_distinct_strings[8] = {0,0,0,0,0,0,0,0}; long bytes_distinct_strings[8] = {0,0,0,0,0,0,0,0}; long overhead_bytes[8] = {0,0,0,0,0,0,0,0};
af93211996-10-12Fredrik Hübinette (Hubbe)  unsigned INT32 e;
06983f1996-09-22Fredrik Hübinette (Hubbe)  struct pike_string *p;
af93211996-10-12Fredrik Hübinette (Hubbe)  for(e=0;e<htable_size;e++)
5267b71995-08-09Fredrik Hübinette (Hubbe)  { for(p=base_table[e];p;p=p->next) {
ed04142015-09-27Per Hedbor  int key = p->size_shift + (string_is_malloced(p)?4:0); num_distinct_strings[key]++;
119e1e2014-08-22Arne Goedeke  alloced_bytes[key] += p->refs*sizeof(struct pike_string);
ed04142015-09-27Per Hedbor  alloced_strings[key] += p->refs;
119e1e2014-08-22Arne Goedeke  if (string_is_block_allocated(p)) {
ca76852014-08-18Arne Goedeke  alloced_bytes[key] +=
119e1e2014-08-22Arne Goedeke  p->refs*sizeof(struct pike_string);
ca76852014-08-18Arne Goedeke  } else {
119e1e2014-08-22Arne Goedeke  alloced_bytes[key] += p->refs*DO_ALIGN((p->len+3) << p->size_shift,sizeof(void *));
ca76852014-08-18Arne Goedeke  }
5267b71995-08-09Fredrik Hübinette (Hubbe)  } }
af80262013-02-07Henrik Grubbström (Grubba)  string_builder_sprintf(&s,
ed04142015-09-27Per Hedbor  "\nShared string hash table:\n" "-------------------------\n" "\n" "Type Count Distinct Bytes Actual Overhead %%\n" "------------------------------------------------------------\n");
af80262013-02-07Henrik Grubbström (Grubba)  for(e = 0; e < 8; e++) { int shift = e & 3; ptrdiff_t overhead; if (!num_distinct_strings[e]) continue; if (shift != 3) {
ed04142015-09-27Per Hedbor  if (e < 4) { string_builder_sprintf(&s, "Short/%-2d ", 8<<shift); } else { string_builder_sprintf(&s, "Long/%-2d ", 8<<shift); }
af80262013-02-07Henrik Grubbström (Grubba) 
ed04142015-09-27Per Hedbor  overhead_bytes[e] =
bd67392015-10-14Martin Nilsson  (long)sizeof(struct pike_string) * num_distinct_strings[e];
af80262013-02-07Henrik Grubbström (Grubba) 
ed04142015-09-27Per Hedbor  alloced_strings[e|3] += alloced_strings[e]; alloced_bytes[e|3] += alloced_bytes[e]; num_distinct_strings[e|3] += num_distinct_strings[e]; bytes_distinct_strings[e|3] += bytes_distinct_strings[e]; overhead_bytes[e|3] += overhead_bytes[e];
af80262013-02-07Henrik Grubbström (Grubba)  } else {
ed04142015-09-27Per Hedbor  if (e < 4) { string_builder_sprintf(&s, "Total short"); } else { string_builder_sprintf(&s, "Total long "); }
af80262013-02-07Henrik Grubbström (Grubba)  } string_builder_sprintf(&s,
ed04142015-09-27Per Hedbor  "%8ld %8ld %8ld %8ld %8ld ", alloced_strings[e], num_distinct_strings[e], alloced_bytes[e], bytes_distinct_strings[e], overhead_bytes[e]);
af80262013-02-07Henrik Grubbström (Grubba)  if (alloced_bytes[e]) {
ed04142015-09-27Per Hedbor  string_builder_sprintf(&s, "%4d\n", (bytes_distinct_strings[e] + overhead_bytes[e]) * 100 / alloced_bytes[e]);
af80262013-02-07Henrik Grubbström (Grubba)  } else {
ed04142015-09-27Per Hedbor  string_builder_strcat(&s, " -\n");
af80262013-02-07Henrik Grubbström (Grubba)  } } alloced_strings[7] += alloced_strings[3]; alloced_bytes[7] += alloced_bytes[3]; num_distinct_strings[7] += num_distinct_strings[3]; bytes_distinct_strings[7] += bytes_distinct_strings[3]; overhead_bytes[7] += overhead_bytes[3]; string_builder_sprintf(&s,
ed04142015-09-27Per Hedbor  "------------------------------------------------------------\n" "Total %8ld %8ld %8ld %8ld %8ld ", alloced_strings[7], num_distinct_strings[7], alloced_bytes[7], bytes_distinct_strings[7], overhead_bytes[7]);
af80262013-02-07Henrik Grubbström (Grubba)  if (alloced_bytes[7]) { string_builder_sprintf(&s, "%4d\n",
ed04142015-09-27Per Hedbor  (bytes_distinct_strings[7] + overhead_bytes[7]) * 100 / alloced_bytes[7]);
af80262013-02-07Henrik Grubbström (Grubba)  } else { string_builder_strcat(&s, " -\n"); }
5267b71995-08-09Fredrik Hübinette (Hubbe)  } /* sprintf(b,"Searches: %ld Average search length: %6.3f\n", (long)num_str_searches, (double)search_len / num_str_searches); my_strcat(b); */
af80262013-02-07Henrik Grubbström (Grubba)  return finish_string_builder(&s);
5267b71995-08-09Fredrik Hübinette (Hubbe) }
71f3a21998-11-22Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
af93211996-10-12Fredrik Hübinette (Hubbe) 
6b997c2001-09-04Fredrik Hübinette (Hubbe) static long last_stralloc_verify=0; extern long current_do_debug_cycle;
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT void check_string(struct pike_string *s)
af93211996-10-12Fredrik Hübinette (Hubbe) {
6b997c2001-09-04Fredrik Hübinette (Hubbe)  if(current_do_debug_cycle == last_stralloc_verify)
2043ba1998-02-10Fredrik Hübinette (Hubbe)  {
33e8732015-12-25Henrik Grubbström (Grubba)  if(debug_findstring(s) != s)
5aad932002-08-15Marcus Comstedt  Pike_fatal("Shared string not shared.\n");
6b997c2001-09-04Fredrik Hübinette (Hubbe)  }else{
abdf6b2005-04-02Martin Stjernholm  switch (s->size_shift) { case 0:
ed04142015-09-27Per Hedbor  break;
abdf6b2005-04-02Martin Stjernholm  case 1: {
ed04142015-09-27Per Hedbor  ptrdiff_t i; p_wchar1 *str = STR1 (s); for (i = 0; i < s->len; i++) if (str[i] > 0xff) goto size_shift_check_done; Pike_fatal ("Shared string is too wide.\n");
abdf6b2005-04-02Martin Stjernholm  } case 2: {
ed04142015-09-27Per Hedbor  ptrdiff_t i; p_wchar2 *str = STR2 (s); for (i = 0; i < s->len; i++) if ((str[i] > 0xffff) || (str[i] < 0)) goto size_shift_check_done; Pike_fatal ("Shared string is too wide.\n");
abdf6b2005-04-02Martin Stjernholm  } default:
ed04142015-09-27Per Hedbor  Pike_fatal ("Invalid size shift %d.\n", s->size_shift);
abdf6b2005-04-02Martin Stjernholm  } size_shift_check_done:;
6b997c2001-09-04Fredrik Hübinette (Hubbe)  if(do_hash(s) != s->hval) { locate_problem(wrong_hash);
5aad932002-08-15Marcus Comstedt  Pike_fatal("Hash value changed?\n");
6b997c2001-09-04Fredrik Hübinette (Hubbe)  }
9925512013-05-31Per Hedbor 
33e8732015-12-25Henrik Grubbström (Grubba)  if(debug_findstring(s) != s)
5aad932002-08-15Marcus Comstedt  Pike_fatal("Shared string not shared.\n");
af93211996-10-12Fredrik Hübinette (Hubbe) 
6b997c2001-09-04Fredrik Hübinette (Hubbe)  if(index_shared_string(s,s->len)) { locate_problem(improper_zero_termination);
5aad932002-08-15Marcus Comstedt  Pike_fatal("Shared string is not zero terminated properly.\n");
6b997c2001-09-04Fredrik Hübinette (Hubbe)  }
2043ba1998-02-10Fredrik Hübinette (Hubbe)  }
af93211996-10-12Fredrik Hübinette (Hubbe) }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT void verify_shared_strings_tables(void)
af93211996-10-12Fredrik Hübinette (Hubbe) {
beac721998-04-16Fredrik Hübinette (Hubbe)  unsigned INT32 e, h, num=0;
af93211996-10-12Fredrik Hübinette (Hubbe)  struct pike_string *s;
6b997c2001-09-04Fredrik Hübinette (Hubbe)  last_stralloc_verify=current_do_debug_cycle;
af93211996-10-12Fredrik Hübinette (Hubbe)  for(e=0;e<htable_size;e++) { h=0; for(s=base_table[e];s;s=s->next) {
beac721998-04-16Fredrik Hübinette (Hubbe)  num++;
af93211996-10-12Fredrik Hübinette (Hubbe)  h++;
4bdf5f2001-03-30Henrik Grubbström (Grubba)  if (bad_pointer(s)) {
ed04142015-09-27Per Hedbor  Pike_fatal("Odd string pointer in string table!\n");
4bdf5f2001-03-30Henrik Grubbström (Grubba)  }
af93211996-10-12Fredrik Hübinette (Hubbe)  if(s->len < 0)
ed04142015-09-27Per Hedbor  Pike_fatal("Shared string shorter than zero bytes.\n");
af93211996-10-12Fredrik Hübinette (Hubbe)  if(s->refs <= 0)
2043ba1998-02-10Fredrik Hübinette (Hubbe)  {
ed04142015-09-27Per Hedbor  locate_problem(has_zero_refs); Pike_fatal("Shared string had too few references.\n");
2043ba1998-02-10Fredrik Hübinette (Hubbe)  }
af93211996-10-12Fredrik Hübinette (Hubbe) 
f4e1ec1998-10-22Fredrik Hübinette (Hubbe)  if(index_shared_string(s,s->len))
2043ba1998-02-10Fredrik Hübinette (Hubbe)  {
ed04142015-09-27Per Hedbor  locate_problem(improper_zero_termination); Pike_fatal("Shared string didn't end with a zero.\n");
2043ba1998-02-10Fredrik Hübinette (Hubbe)  }
af93211996-10-12Fredrik Hübinette (Hubbe) 
8bcb3b2001-03-28Fredrik Hübinette (Hubbe)  if(do_hash(s) != s->hval)
8be4822004-11-06Henrik Grubbström (Grubba)  {
ed04142015-09-27Per Hedbor  locate_problem(wrong_hash); Pike_fatal("Shared string hashed to other number.\n");
8be4822004-11-06Henrik Grubbström (Grubba)  }
8bcb3b2001-03-28Fredrik Hübinette (Hubbe) 
e85df82001-09-06Fredrik Hübinette (Hubbe)  if(HMODULO(s->hval) != e)
2043ba1998-02-10Fredrik Hübinette (Hubbe)  {
ed04142015-09-27Per Hedbor  locate_problem(wrong_hash); Pike_fatal("Shared string hashed to wrong place.\n");
2043ba1998-02-10Fredrik Hübinette (Hubbe)  }
af93211996-10-12Fredrik Hübinette (Hubbe)  if(h>10000) {
ed04142015-09-27Per Hedbor  struct pike_string *s2; for(s2=s;s2;s2=s2->next) if(s2 == s) Pike_fatal("Shared string table is cyclic.\n"); h=0;
af93211996-10-12Fredrik Hübinette (Hubbe)  } } }
beac721998-04-16Fredrik Hübinette (Hubbe)  if(num != num_strings)
5aad932002-08-15Marcus Comstedt  Pike_fatal("Num strings is wrong %d!=%d\n",num,num_strings);
af93211996-10-12Fredrik Hübinette (Hubbe) }
33e8732015-12-25Henrik Grubbström (Grubba) /* For once, this is actually a debug function! * * This function is mostly used to check that the argument * is a finished string. */ const struct pike_string *debug_findstring(const struct pike_string *s)
62971d1998-01-19Fredrik Hübinette (Hubbe) {
2f782f2015-12-17Henrik Grubbström (Grubba)  size_t h; struct pike_string *p;
3ebcf22015-12-22Henrik Grubbström (Grubba)  if(!base_table) return NULL; h = HMODULO(s->hval);
2f782f2015-12-17Henrik Grubbström (Grubba)  for(p=base_table[h];p;p=p->next)
62971d1998-01-19Fredrik Hübinette (Hubbe)  {
3ebcf22015-12-22Henrik Grubbström (Grubba)  if(p==s)
e1939c2001-03-30Fredrik Hübinette (Hubbe)  {
3ebcf22015-12-22Henrik Grubbström (Grubba)  return s; } } return NULL; } int safe_debug_findstring(const struct pike_string *foo) { unsigned INT32 e; if(!base_table) return 0; for(e=0;e<htable_size;e++) { struct pike_string *p; for(p=base_table[e];p;p=p->next) { if(p==foo) { return 1; }
e1939c2001-03-30Fredrik Hübinette (Hubbe)  }
62971d1998-01-19Fredrik Hübinette (Hubbe)  } return 0; }
fa7ecb2014-12-08Martin Nilsson PMOD_EXPORT void debug_dump_pike_string(const struct pike_string *s, INT32 max)
38d6081998-02-07Fredrik Hübinette (Hubbe) { INT32 e;
a737441998-10-11Henrik Grubbström (Grubba)  fprintf(stderr,"0x%p: %ld refs, len=%ld, size_shift=%d, hval=%lux (%lx)\n",
ed04142015-09-27Per Hedbor  s, (long)s->refs,
bd67392015-10-14Martin Nilsson  (long)s->len,
ed04142015-09-27Per Hedbor  s->size_shift,
bd67392015-10-14Martin Nilsson  (unsigned long)s->hval, (unsigned long)StrHash(s->str, s->len));
38d6081998-02-07Fredrik Hübinette (Hubbe)  fprintf(stderr," \""); for(e=0;e<s->len && max>0;e++) { int c=EXTRACT_UCHAR(s->str+e); switch(c) { case '\t': fprintf(stderr,"\\t"); max-=2; break; case '\n': fprintf(stderr,"\\n"); max-=2; break; case '\r': fprintf(stderr,"\\r"); max-=2; break; case '\b': fprintf(stderr,"\\b"); max-=2; break; default:
1a6b472017-01-10Martin Nilsson  if(isprint(c))
ed04142015-09-27Per Hedbor  { putc(c,stderr); max--; }else{ fprintf(stderr,"\\%03o",c); max-=4; }
38d6081998-02-07Fredrik Hübinette (Hubbe)  } } if(!max) fprintf(stderr,"...\n"); else fprintf(stderr,"\"\n"); }
be478c1997-08-30Henrik Grubbström (Grubba) void dump_stralloc_strings(void)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
af93211996-10-12Fredrik Hübinette (Hubbe)  unsigned INT32 e;
06983f1996-09-22Fredrik Hübinette (Hubbe)  struct pike_string *p;
af93211996-10-12Fredrik Hübinette (Hubbe)  for(e=0;e<htable_size;e++)
e1939c2001-03-30Fredrik Hübinette (Hubbe)  {
024adc2004-11-14Martin Stjernholm  for(p=base_table[e];p;p=p->next) {
38d6081998-02-07Fredrik Hübinette (Hubbe)  debug_dump_pike_string(p, 70);
024adc2004-11-14Martin Stjernholm #ifdef DEBUG_MALLOC debug_malloc_dump_references (p, 2, 1, 0); #endif }
e1939c2001-03-30Fredrik Hübinette (Hubbe)  }
5267b71995-08-09Fredrik Hübinette (Hubbe) }
a20d822013-06-08Martin Nilsson #endif /* PIKE_DEBUG */
af93211996-10-12Fredrik Hübinette (Hubbe)  /*** String compare functions ***/ /* does not take locale into account */
a9b8172014-04-05Martin Nilsson int low_quick_binary_strcmp(const char *a, ptrdiff_t alen,
ed04142015-09-27Per Hedbor  const char *b, ptrdiff_t blen)
af93211996-10-12Fredrik Hübinette (Hubbe) { int tmp; if(alen > blen) {
67074e2014-09-03Martin Nilsson  tmp=memcmp(a, b, blen);
af93211996-10-12Fredrik Hübinette (Hubbe)  if(tmp) return tmp; return 1; }else if(alen < blen){
67074e2014-09-03Martin Nilsson  tmp=memcmp(a, b, alen);
af93211996-10-12Fredrik Hübinette (Hubbe)  if(tmp) return tmp; return -1; }else{
67074e2014-09-03Martin Nilsson  return memcmp(a, b, alen);
af93211996-10-12Fredrik Hübinette (Hubbe)  } }
a5787d1999-03-03Fredrik Hübinette (Hubbe) 
3e625c1998-10-11Fredrik Hübinette (Hubbe) /* does not take locale into account */
66d9282011-05-01Per Hedbor ptrdiff_t generic_quick_binary_strcmp(const char *a,
d8e02f2014-09-03Martin Nilsson  ptrdiff_t alen, int asize, const char *b, ptrdiff_t blen, int bsize)
3e625c1998-10-11Fredrik Hübinette (Hubbe) {
0f0bf92014-09-04Martin Nilsson  ptrdiff_t pos;
3e625c1998-10-11Fredrik Hübinette (Hubbe)  if(!asize && !bsize)
d8e02f2014-09-03Martin Nilsson  return low_quick_binary_strcmp(a, alen, b, blen); for(pos=0;pos< MINIMUM(alen,blen) ;pos++)
3e625c1998-10-11Fredrik Hübinette (Hubbe)  {
d8e02f2014-09-03Martin Nilsson  p_wchar2 ac=generic_extract(a,asize,pos); p_wchar2 bc=generic_extract(b,bsize,pos); if(ac != bc) { if (ac < bc) return -1;
3e625c1998-10-11Fredrik Hübinette (Hubbe)  return 1; } }
d8e02f2014-09-03Martin Nilsson  return alen-blen;
3e625c1998-10-11Fredrik Hübinette (Hubbe) }
d7cc372006-03-10Henrik Grubbström (Grubba) /* Does not take locale into account * * Similar to (and could be used in place of) generic_quick_binary_strcmp(), * but returns +/- (offset + 1) to the first difference beween the strings. * * This can be used by eg replace_many() to speed up the comparisons. */
66d9282011-05-01Per Hedbor ptrdiff_t generic_find_binary_prefix(const char *a,
fa7ecb2014-12-08Martin Nilsson  ptrdiff_t alen, int asize, const char *b, ptrdiff_t blen, int bsize)
d7cc372006-03-10Henrik Grubbström (Grubba) { ptrdiff_t pos; ptrdiff_t len = MINIMUM(alen, blen); switch(TWO_SIZES(asize, bsize)) {
ed04142015-09-27Per Hedbor #define CASE(AZ, BZ) \ case TWO_SIZES(AZ, BZ): { \ PIKE_CONCAT(p_wchar, AZ) *a_arr = \ (PIKE_CONCAT(p_wchar, AZ) *)a; \ PIKE_CONCAT(p_wchar, BZ) *b_arr = \ (PIKE_CONCAT(p_wchar, BZ) *)b; \ for (pos=0; pos<len; pos++) { \ if (a_arr[pos] == b_arr[pos]) \ continue; \ if (a_arr[pos] < b_arr[pos]) \ return ~pos; \ return pos+1; \ } \
d7cc372006-03-10Henrik Grubbström (Grubba)  } break CASE(0,0); CASE(0,1); CASE(0,2); CASE(1,0); CASE(1,1); CASE(1,2); CASE(2,0); CASE(2,1); CASE(2,2); #undef CASE } if (alen == blen) return 0; if (alen < blen) return ~alen; return blen+1; }
fa7ecb2014-12-08Martin Nilsson PMOD_EXPORT int c_compare_string(const struct pike_string *s, const char *foo, int len)
a5787d1999-03-03Fredrik Hübinette (Hubbe) {
67074e2014-09-03Martin Nilsson  return s->len == len && s->size_shift == 0 && !memcmp(s->str,foo,len);
a5787d1999-03-03Fredrik Hübinette (Hubbe) }
af93211996-10-12Fredrik Hübinette (Hubbe) /* Does not take locale into account */
fa7ecb2014-12-08Martin Nilsson PMOD_EXPORT ptrdiff_t my_quick_strcmp(const struct pike_string *a,
ed04142015-09-27Per Hedbor  const struct pike_string *b)
af93211996-10-12Fredrik Hübinette (Hubbe) { if(a==b) return 0;
3e625c1998-10-11Fredrik Hübinette (Hubbe)  return generic_quick_binary_strcmp(a->str, a->len, a->size_shift,
ed04142015-09-27Per Hedbor  b->str, b->len, b->size_shift);
af93211996-10-12Fredrik Hübinette (Hubbe) }
ed04142015-09-27Per Hedbor 
5de2692012-05-28Martin Stjernholm struct pike_string *realloc_unlinked_string(struct pike_string *a,
ed04142015-09-27Per Hedbor  ptrdiff_t size)
8d28be1997-02-10Fredrik Hübinette (Hubbe) {
60e6cf2013-11-09Arne Goedeke  char * s = NULL; size_t nbytes = (size_t)(size+1) << a->size_shift;
b3ec732016-01-09Martin Nilsson  size_t obytes = (size_t)a->len << a->size_shift;
4a5e3f2000-11-25Henrik Grubbström (Grubba) 
ed04142015-09-27Per Hedbor  if( size < a->len && size-a->len<(signed)sizeof(void*) ) goto done;
3587242015-08-22Per Hedbor 
ed04142015-09-27Per Hedbor  if( nbytes < sizeof(struct pike_string) )
ced1912015-08-18Per Hedbor  {
ed04142015-09-27Per Hedbor  if( a->alloc_type == STRING_ALLOC_BA )
60e6cf2013-11-09Arne Goedeke  goto done;
ed04142015-09-27Per Hedbor  s = ba_alloc(&string_allocator);
a5b9612016-01-09Martin Nilsson  memcpy(s, a->str, MINIMUM(nbytes,obytes));
ed04142015-09-27Per Hedbor  free_string_content(a); a->alloc_type = STRING_ALLOC_BA; } else if( a->alloc_type == STRING_ALLOC_MALLOC) { s = xrealloc(a->str,nbytes); } else { s = xalloc(nbytes); memcpy(s,a->str,MINIMUM(nbytes,obytes)); free_string_content(a); a->alloc_type = STRING_ALLOC_MALLOC;
8d28be1997-02-10Fredrik Hübinette (Hubbe)  }
60e6cf2013-11-09Arne Goedeke  a->str = s; done: a->len=size; low_set_index(a,size,0);
ed04142015-09-27Per Hedbor 
b3ec732016-01-09Martin Nilsson  return a;
8d28be1997-02-10Fredrik Hübinette (Hubbe) }
ed04142015-09-27Per Hedbor 
8d28be1997-02-10Fredrik Hübinette (Hubbe) /* Returns an unlinked string ready for end_shared_string */
66d9282011-05-01Per Hedbor static struct pike_string *realloc_shared_string(struct pike_string *a, ptrdiff_t size)
8d28be1997-02-10Fredrik Hübinette (Hubbe) {
ed04142015-09-27Per Hedbor  if(string_may_modify_len(a))
8d28be1997-02-10Fredrik Hübinette (Hubbe)  { unlink_pike_string(a); return realloc_unlinked_string(a, size); }else{
60e6cf2013-11-09Arne Goedeke  struct pike_string *r=begin_wide_shared_string(size,a->size_shift);
f4ef462016-01-12Martin Nilsson  memcpy(r->str, a->str, a->len<<a->size_shift);
119e1e2014-08-22Arne Goedeke  r->flags |= a->flags & STRING_CHECKED_MASK;
ed2bed2013-06-14Per Hedbor  r->min = a->min; r->max = a->max;
7094631997-02-24Fredrik Hübinette (Hubbe)  free_string(a);
8d28be1997-02-10Fredrik Hübinette (Hubbe)  return r; } }
ed04142015-09-27Per Hedbor struct pike_string *new_realloc_shared_string(struct pike_string *a, INT32 size, enum size_shift shift)
3e625c1998-10-11Fredrik Hübinette (Hubbe) { struct pike_string *r; if(shift == a->size_shift) return realloc_shared_string(a,size); r=begin_wide_shared_string(size,shift);
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  pike_string_cpy(MKPCHARP_STR(r),a);
119e1e2014-08-22Arne Goedeke  r->flags |= (a->flags & STRING_CHECKED_MASK);
ed2bed2013-06-14Per Hedbor  r->min = a->min; r->max = a->max;
3e625c1998-10-11Fredrik Hübinette (Hubbe)  free_string(a); return r; }
0d3ea51998-01-19Fredrik Hübinette (Hubbe) /* Modify one index in a shared string * Not suitable for building new strings or changing multiple characters * within a string!
db4a401998-10-09Fredrik Hübinette (Hubbe)  * * Phew, this function become complicated when I inserted magic for wide * characters...
0d3ea51998-01-19Fredrik Hübinette (Hubbe)  */
66d9282011-05-01Per Hedbor struct pike_string *modify_shared_string(struct pike_string *a,
0d3ea51998-01-19Fredrik Hübinette (Hubbe)  INT32 index,
db4a401998-10-09Fredrik Hübinette (Hubbe)  INT32 c)
0d3ea51998-01-19Fredrik Hübinette (Hubbe) {
db4a401998-10-09Fredrik Hübinette (Hubbe)  INT32 old_value;
71f3a21998-11-22Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
0d3ea51998-01-19Fredrik Hübinette (Hubbe)  if(index<0 || index>=a->len)
5aad932002-08-15Marcus Comstedt  Pike_fatal("Index out of range in modify_shared_string()\n");
0d3ea51998-01-19Fredrik Hübinette (Hubbe) #endif
db4a401998-10-09Fredrik Hübinette (Hubbe)  old_value=index_shared_string(a,index); if(old_value==c) return a; /* First test if the string needs to be grown: * ie; the new value does not fit in the char size of * the old string */ if(min_magnitude(c) > a->size_shift) { /* String must be grown */ struct pike_string *b; switch(TWO_SIZES(min_magnitude(c),a->size_shift)) { case TWO_SIZES(1,0): b=begin_wide_shared_string(a->len,1);
01a9572000-02-03Henrik Grubbström (Grubba)  convert_0_to_1(STR1(b),(p_wchar0 *)a->str,a->len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  STR1(b)[index]=c;
7238061998-10-11Fredrik Hübinette (Hubbe)  free_string(a);
db4a401998-10-09Fredrik Hübinette (Hubbe)  return end_shared_string(b); case TWO_SIZES(2,0): b=begin_wide_shared_string(a->len,2);
01a9572000-02-03Henrik Grubbström (Grubba)  convert_0_to_2(STR2(b),(p_wchar0 *)a->str,a->len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  STR2(b)[index]=c;
7238061998-10-11Fredrik Hübinette (Hubbe)  free_string(a);
db4a401998-10-09Fredrik Hübinette (Hubbe)  return end_shared_string(b); case TWO_SIZES(2,1): b=begin_wide_shared_string(a->len,2); convert_1_to_2(STR2(b),STR1(a),a->len); STR2(b)[index]=c;
7238061998-10-11Fredrik Hübinette (Hubbe)  free_string(a);
db4a401998-10-09Fredrik Hübinette (Hubbe)  return end_shared_string(b);
c6b6042008-05-03Martin Nilsson #ifdef PIKE_DEBUG
db4a401998-10-09Fredrik Hübinette (Hubbe)  default:
5aad932002-08-15Marcus Comstedt  Pike_fatal("Odd wide string conversion!\n");
c6b6042008-05-03Martin Nilsson #endif
db4a401998-10-09Fredrik Hübinette (Hubbe)  } } /* Next we test if the new string can be shrunk * if all characters in the new string can fit in a string * of a lower magnitude, it must be shrunk */
13670c2015-05-25Martin Nilsson 
db4a401998-10-09Fredrik Hübinette (Hubbe)  if(min_magnitude(old_value) == a->size_shift && min_magnitude(c) < min_magnitude(old_value)) { /* We *might* need to shrink the string */ struct pike_string *b;
7001302014-09-29Martin Nilsson  unsigned int size,tmp;
db4a401998-10-09Fredrik Hübinette (Hubbe)  switch(a->size_shift) { case 0:
5aad932002-08-15Marcus Comstedt  Pike_fatal("Unshrinkable!\n");
db4a401998-10-09Fredrik Hübinette (Hubbe)  case 1: /* Test if we *actually* can shrink it.. */ if(find_magnitude1(STR1(a),index)) break; if(find_magnitude1(STR1(a)+index+1,a->len-index-1)) break;
13670c2015-05-25Martin Nilsson 
db4a401998-10-09Fredrik Hübinette (Hubbe)  b=begin_shared_string(a->len);
01a9572000-02-03Henrik Grubbström (Grubba)  convert_1_to_0((p_wchar0 *)b->str,STR1(a),a->len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  b->str[index]=c; free_string(a); return end_shared_string(b); case 2: /* Test if we *actually* can shrink it.. */ size=find_magnitude2(STR2(a),index); if(size==2) break; /* nope */ tmp=find_magnitude2(STR2(a)+index+1,a->len-index-1); if(tmp==2) break; /* nope */ size=MAXIMUM(MAXIMUM(size,tmp),min_magnitude(c)); switch(size) { case 0: b=begin_shared_string(a->len);
01a9572000-02-03Henrik Grubbström (Grubba)  convert_2_to_0((p_wchar0 *)b->str,STR2(a),a->len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  b->str[index]=c; free_string(a); return end_shared_string(b); case 1: b=begin_wide_shared_string(a->len,1);
01a9572000-02-03Henrik Grubbström (Grubba)  convert_2_to_1((p_wchar1 *)b->str,STR2(a),a->len);
db4a401998-10-09Fredrik Hübinette (Hubbe)  STR1(b)[index]=c; free_string(a); return end_shared_string(b); } } }
13670c2015-05-25Martin Nilsson 
db4a401998-10-09Fredrik Hübinette (Hubbe)  /* We now know that the string has the right character size */
119e1e2014-08-22Arne Goedeke  if(string_may_modify(a))
0d3ea51998-01-19Fredrik Hübinette (Hubbe)  {
db4a401998-10-09Fredrik Hübinette (Hubbe)  /* One ref - destructive mode */
e94f072005-01-18Henrik Grubbström (Grubba)  unlink_pike_string(a); low_set_index(a, index, c);
9925512013-05-31Per Hedbor  CLEAR_STRING_CHECKED(a);
60e6cf2013-11-09Arne Goedeke  if((((unsigned int)index) >= hash_prefix_len) && (index < a->len-8) )
0d3ea51998-01-19Fredrik Hübinette (Hubbe)  {
6ed3d22005-01-17Henrik Grubbström (Grubba)  struct pike_string *old;
8282ca2013-11-02Per Hedbor  /* Doesn't change hash value - sneak it in there */
5cc19b2005-05-18Martin Nilsson #ifdef PIKE_DEBUG
950ade2005-05-17Henrik Grubbström (Grubba)  if (wrong_hash(a)) { Pike_fatal("Broken hash optimization.\n"); }
5cc19b2005-05-18Martin Nilsson #endif
6ed3d22005-01-17Henrik Grubbström (Grubba)  old = internal_findstring(a->str, a->len, a->size_shift, a->hval); if (old) { /* The new string is equal to some old string. */
9cd0372005-11-03Henrik Grubbström (Grubba)  free_string(a);
6ed3d22005-01-17Henrik Grubbström (Grubba)  add_ref(a = old); } else { link_pike_string(a, a->hval); }
0d3ea51998-01-19Fredrik Hübinette (Hubbe)  }else{
e94f072005-01-18Henrik Grubbström (Grubba)  a = end_shared_string(a);
0d3ea51998-01-19Fredrik Hübinette (Hubbe)  }
e94f072005-01-18Henrik Grubbström (Grubba)  return a;
0d3ea51998-01-19Fredrik Hübinette (Hubbe)  }else{ struct pike_string *r;
db4a401998-10-09Fredrik Hübinette (Hubbe)  r=begin_wide_shared_string(a->len,a->size_shift);
59fc9e2014-09-03Martin Nilsson  memcpy(r->str, a->str, a->len << a->size_shift);
db4a401998-10-09Fredrik Hübinette (Hubbe)  low_set_index(r,index,c);
0d3ea51998-01-19Fredrik Hübinette (Hubbe)  free_string(a); return end_shared_string(r); } }
fa7ecb2014-12-08Martin Nilsson static void set_flags_for_add( struct pike_string *ret, unsigned char aflags, unsigned char amin, unsigned char amax, const struct pike_string *b)
9925512013-05-31Per Hedbor { if( !b->len ) {
e106dd2016-12-16Henrik Grubbström (Grubba)  ret->flags |= aflags & STRING_CHECKED_MASK;
9925512013-05-31Per Hedbor  ret->min = amin; ret->max = amax; return; }
afd5eb2013-06-03Martin Nilsson  if( aflags & b->flags & STRING_CONTENT_CHECKED )
9925512013-05-31Per Hedbor  {
6431dc2013-06-17Henrik Grubbström (Grubba)  ret->min = MINIMUM( amin, b->min ); ret->max = MAXIMUM( amax, b->max );
9925512013-05-31Per Hedbor  ret->flags |= STRING_CONTENT_CHECKED; } else ret->flags &= ~STRING_CONTENT_CHECKED;
a21b582013-06-09Martin Nilsson  ret->flags &= ~(STRING_IS_LOWERCASE | STRING_IS_UPPERCASE); ret->flags |= (aflags & b->flags & (STRING_IS_LOWERCASE | STRING_IS_UPPERCASE));
9925512013-05-31Per Hedbor }
fa7ecb2014-12-08Martin Nilsson void update_flags_for_add( struct pike_string *a, const struct pike_string *b)
9925512013-05-31Per Hedbor { if( !b->len ) return; if( a->flags & STRING_CONTENT_CHECKED ) { if(b->flags & STRING_CONTENT_CHECKED) {
9078a12016-12-16Henrik Grubbström (Grubba)  if (a->len) { if( b->min < a->min ) a->min = b->min; if( b->max > a->max ) a->max = b->max; } else { a->min = b->min; a->max = b->max; }
9925512013-05-31Per Hedbor  } else a->flags &= ~STRING_CONTENT_CHECKED; }
f3b33d2013-06-03Martin Nilsson  a->flags &= ~(STRING_IS_LOWERCASE | STRING_IS_UPPERCASE) | b->flags;
9925512013-05-31Per Hedbor }
af93211996-10-12Fredrik Hübinette (Hubbe) /*** Add strings ***/
fa7ecb2014-12-08Martin Nilsson PMOD_EXPORT struct pike_string *add_shared_strings(const struct pike_string *a, const struct pike_string *b)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
06983f1996-09-22Fredrik Hübinette (Hubbe)  struct pike_string *ret;
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  PCHARP tmp;
db4a401998-10-09Fredrik Hübinette (Hubbe)  int target_size=MAXIMUM(a->size_shift,b->size_shift);
5267b71995-08-09Fredrik Hübinette (Hubbe) 
db4a401998-10-09Fredrik Hübinette (Hubbe)  ret=begin_wide_shared_string(a->len+b->len,target_size);
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  tmp=MKPCHARP_STR(ret); pike_string_cpy(tmp,a); INC_PCHARP(tmp,a->len); pike_string_cpy(tmp,b);
9925512013-05-31Per Hedbor  set_flags_for_add( ret, a->flags, a->min, a->max, b );
1743c82001-02-03Fredrik Hübinette (Hubbe)  return low_end_shared_string(ret);
5267b71995-08-09Fredrik Hübinette (Hubbe) }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT struct pike_string *add_and_free_shared_strings(struct pike_string *a,
9925512013-05-31Per Hedbor  struct pike_string *b)
b1f4eb1998-01-13Fredrik Hübinette (Hubbe) {
89fc4c2000-08-10Henrik Grubbström (Grubba)  ptrdiff_t alen = a->len;
db4a401998-10-09Fredrik Hübinette (Hubbe)  if(a->size_shift == b->size_shift) {
9925512013-05-31Per Hedbor  a = realloc_shared_string(a, alen + b->len);
ed2bed2013-06-14Per Hedbor  update_flags_for_add( a, b );
59fc9e2014-09-03Martin Nilsson  memcpy(a->str+(alen<<a->size_shift),b->str,b->len<<b->size_shift);
db4a401998-10-09Fredrik Hübinette (Hubbe)  free_string(b);
9cd0372005-11-03Henrik Grubbström (Grubba)  a->flags |= STRING_NOT_HASHED;
db4a401998-10-09Fredrik Hübinette (Hubbe)  return end_shared_string(a); }else{ struct pike_string *ret=add_shared_strings(a,b); free_string(a); free_string(b); return ret; } }
89fc4c2000-08-10Henrik Grubbström (Grubba) PMOD_EXPORT ptrdiff_t string_search(struct pike_string *haystack, struct pike_string *needle, ptrdiff_t start)
db4a401998-10-09Fredrik Hübinette (Hubbe) {
9b1f032000-10-09Fredrik Hübinette (Hubbe)  SearchMojt mojt;
db4a401998-10-09Fredrik Hübinette (Hubbe)  char *r;
9925512013-05-31Per Hedbor  if( !string_range_contains_string( haystack, needle ) ) return -1; if(start + needle->len > haystack->len)
db4a401998-10-09Fredrik Hübinette (Hubbe)  return -1;
bfcfb02001-11-08Fredrik Hübinette (Hubbe)  if(!needle->len) return start;
9b1f032000-10-09Fredrik Hübinette (Hubbe)  mojt=compile_memsearcher(MKPCHARP_STR(needle),
db4a401998-10-09Fredrik Hübinette (Hubbe)  needle->len,
5d54232000-10-09Fredrik Hübinette (Hubbe)  haystack->len,
9b1f032000-10-09Fredrik Hübinette (Hubbe)  needle);
db4a401998-10-09Fredrik Hübinette (Hubbe) 
6e54c72004-03-24Henrik Grubbström (Grubba)  r = (char *)mojt.vtab->funcN(mojt.data, ADD_PCHARP(MKPCHARP_STR(haystack), start), haystack->len - start).ptr;
9b1f032000-10-09Fredrik Hübinette (Hubbe) 
8c11832008-06-23Martin Stjernholm  if (mojt.container) free_object (mojt.container);
db4a401998-10-09Fredrik Hübinette (Hubbe)  if(!r) return -1;
5d54232000-10-09Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
bd742e2000-10-19Henrik Grubbström (Grubba)  if((r < haystack->str) || (r - haystack->str)>>haystack->size_shift > haystack->len)
5aad932002-08-15Marcus Comstedt  Pike_fatal("string_search did a bobo!\n");
5d54232000-10-09Fredrik Hübinette (Hubbe) #endif
db4a401998-10-09Fredrik Hübinette (Hubbe)  return (r-haystack->str)>>haystack->size_shift;
b1f4eb1998-01-13Fredrik Hübinette (Hubbe) }
ed04142015-09-27Per Hedbor static struct pike_string *make_shared_substring( struct pike_string *s, ptrdiff_t start, ptrdiff_t len, enum size_shift shift)
3587242015-08-22Per Hedbor { struct pike_string *existing; struct substring_pike_string *res;
ed04142015-09-27Per Hedbor  void *strstart = s->str+(start<<shift); size_t hval = low_do_hash(strstart,len,shift); if( (existing = internal_findstring(strstart, len, shift, hval)) )
3587242015-08-22Per Hedbor  { add_ref(existing); return existing; } res = ba_alloc(&substring_allocator); res->parent = s; add_ref(s); existing = &res->str;
ed04142015-09-27Per Hedbor  existing->flags = STRING_NOT_SHARED; existing->size_shift = shift;
3587242015-08-22Per Hedbor  existing->alloc_type = STRING_ALLOC_SUBSTRING;
ed04142015-09-27Per Hedbor  existing->struct_type = STRING_STRUCT_SUBSTRING; existing->hval = hval; existing->str = strstart;
3587242015-08-22Per Hedbor  existing->len = len; #ifdef PIKE_DEBUG if( existing->len + start != s->len ) Pike_fatal("Substrings must be terminated at end of string for now.\n"); #endif existing->refs = 0; add_ref(existing);
ed04142015-09-27Per Hedbor  link_pike_string(existing,hval); return existing;
3587242015-08-22Per Hedbor }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT struct pike_string *string_slice(struct pike_string *s,
ab85722000-08-04Henrik Grubbström (Grubba)  ptrdiff_t start, ptrdiff_t len)
3e625c1998-10-11Fredrik Hübinette (Hubbe) {
71f3a21998-11-22Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG
3e625c1998-10-11Fredrik Hübinette (Hubbe)  if(start < 0 || len<0 || start+len>s->len ) {
5aad932002-08-15Marcus Comstedt  Pike_fatal("string_slice, start = %ld, len = %ld, s->len = %ld\n",
bd67392015-10-14Martin Nilsson  (long)start, (long)len, (long)s->len);
3e625c1998-10-11Fredrik Hübinette (Hubbe)  } #endif
6a12e42011-05-03Per Hedbor  if( len == 0) { add_ref(empty_pike_string); return empty_pike_string; }
3e625c1998-10-11Fredrik Hübinette (Hubbe)  if(start==0 && len==s->len) { add_ref(s); return s; }
3587242015-08-22Per Hedbor  /* Actually create a substring. */ /* If the string to take a substring of is a substring, take from the original. */ if( s->alloc_type == STRING_ALLOC_SUBSTRING ) {
ed04142015-09-27Per Hedbor  struct pike_string *pr= substring_content_string(s); /* Note: If substrings are ever anywhere except at the end, this might need to change. */
a1db062016-06-07Henrik Grubbström (Grubba)  start += (s->str-pr->str)>>s->size_shift;
ed04142015-09-27Per Hedbor  s = pr;
3587242015-08-22Per Hedbor  }
ed04142015-09-27Per Hedbor  if( (len+start == s->len) && start < (s->len>>1) && (!s->size_shift || (s->size_shift==1 && find_magnitude1(((p_wchar1*)s->str)+start,len)==1) || (s->size_shift==2 && find_magnitude2(((p_wchar2*)s->str)+start,len)==2)))
3587242015-08-22Per Hedbor  {
ed04142015-09-27Per Hedbor  /* If there is no change of maginute, make a substring. */ return make_shared_substring( s, start, len, s->size_shift );
3587242015-08-22Per Hedbor  }
3e625c1998-10-11Fredrik Hübinette (Hubbe)  switch(s->size_shift) { case 0:
01a9572000-02-03Henrik Grubbström (Grubba)  return make_shared_binary_string((char *)STR0(s)+start,len);
3e625c1998-10-11Fredrik Hübinette (Hubbe)  case 1: return make_shared_binary_string1(STR1(s)+start,len); case 2: return make_shared_binary_string2(STR2(s)+start,len); }
759c422015-10-17Martin Nilsson  UNREACHABLE(return 0);
3e625c1998-10-11Fredrik Hübinette (Hubbe) }
db4a401998-10-09Fredrik Hübinette (Hubbe) 
af93211996-10-12Fredrik Hübinette (Hubbe) /*** replace function ***/
5d54232000-10-09Fredrik Hübinette (Hubbe) typedef char *(* replace_searchfunc)(void *,void *,size_t);
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT struct pike_string *string_replace(struct pike_string *str,
fa7ecb2014-12-08Martin Nilsson  struct pike_string *del, struct pike_string *to)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
06983f1996-09-22Fredrik Hübinette (Hubbe)  struct pike_string *ret;
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  char *s,*tmp,*end; PCHARP r;
ed04142015-09-27Per Hedbor  enum size_shift shift;
5d54232000-10-09Fredrik Hübinette (Hubbe)  SearchMojt mojt;
8c11832008-06-23Martin Stjernholm  ONERROR mojt_uwp;
41b0b22000-10-20Henrik Grubbström (Grubba)  replace_searchfunc f = (replace_searchfunc)0;
5267b71995-08-09Fredrik Hübinette (Hubbe) 
4dd43e2013-06-10Arne Goedeke  if(!str->len || !string_range_contains_string(str, del))
a991451997-07-08Fredrik Hübinette (Hubbe)  {
4dd43e2013-06-10Arne Goedeke  add_ref(str); return str;
a991451997-07-08Fredrik Hübinette (Hubbe)  }
3e625c1998-10-11Fredrik Hübinette (Hubbe)  shift=MAXIMUM(str->size_shift,to->size_shift);
a991451997-07-08Fredrik Hübinette (Hubbe)  if(!del->len) {
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  int e,pos;
3e625c1998-10-11Fredrik Hübinette (Hubbe)  ret=begin_wide_shared_string(str->len + to->len * (str->len -1),shift); low_set_index(ret,0,index_shared_string(str,0));
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  for(pos=e=1;e<str->len;e++)
a991451997-07-08Fredrik Hübinette (Hubbe)  {
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  pike_string_cpy(MKPCHARP_STR_OFF(ret,pos),to);
3e625c1998-10-11Fredrik Hübinette (Hubbe)  pos+=to->len; low_set_index(ret,pos++,index_shared_string(str,e));
a991451997-07-08Fredrik Hübinette (Hubbe)  } return end_shared_string(ret); }
5267b71995-08-09Fredrik Hübinette (Hubbe)  s=str->str;
3e625c1998-10-11Fredrik Hübinette (Hubbe)  end=s+(str->len<<str->size_shift);
5267b71995-08-09Fredrik Hübinette (Hubbe) 
1f515a1997-02-15Fredrik Hübinette (Hubbe)  if(del->len == to->len)
5267b71995-08-09Fredrik Hübinette (Hubbe)  {
5d54232000-10-09Fredrik Hübinette (Hubbe)  mojt=compile_memsearcher(MKPCHARP_STR(del),
3e625c1998-10-11Fredrik Hübinette (Hubbe)  del->len, str->len,
5d54232000-10-09Fredrik Hübinette (Hubbe)  del);
8c11832008-06-23Martin Stjernholm  SET_ONERROR (mojt_uwp, do_free_object, mojt.container);
3e625c1998-10-11Fredrik Hübinette (Hubbe)  ret=begin_wide_shared_string(str->len,shift);
5d54232000-10-09Fredrik Hübinette (Hubbe)  switch(str->size_shift) { case 0: f=(replace_searchfunc)mojt.vtab->func0; break; case 1: f=(replace_searchfunc)mojt.vtab->func1; break; case 2: f=(replace_searchfunc)mojt.vtab->func2; break; }
1f515a1997-02-15Fredrik Hübinette (Hubbe)  }else{ INT32 delimeters=0;
5d54232000-10-09Fredrik Hübinette (Hubbe)  mojt=compile_memsearcher(MKPCHARP_STR(del),
3e625c1998-10-11Fredrik Hübinette (Hubbe)  del->len, str->len*2,
5d54232000-10-09Fredrik Hübinette (Hubbe)  del);
8c11832008-06-23Martin Stjernholm  SET_ONERROR (mojt_uwp, do_free_object, mojt.container);
3e625c1998-10-11Fredrik Hübinette (Hubbe) 
5d54232000-10-09Fredrik Hübinette (Hubbe)  switch(str->size_shift) { case 0: f=(replace_searchfunc)mojt.vtab->func0; break; case 1: f=(replace_searchfunc)mojt.vtab->func1; break; case 2: f=(replace_searchfunc)mojt.vtab->func2; break; } while((s = f(mojt.data, s, (end-s)>>str->size_shift)))
1f515a1997-02-15Fredrik Hübinette (Hubbe)  { delimeters++;
3e625c1998-10-11Fredrik Hübinette (Hubbe)  s+=del->len << str->size_shift;
1f515a1997-02-15Fredrik Hübinette (Hubbe)  }
13670c2015-05-25Martin Nilsson 
1f515a1997-02-15Fredrik Hübinette (Hubbe)  if(!delimeters) {
8c11832008-06-23Martin Stjernholm  CALL_AND_UNSET_ONERROR (mojt_uwp);
d6ac731998-04-20Henrik Grubbström (Grubba)  add_ref(str);
1f515a1997-02-15Fredrik Hübinette (Hubbe)  return str; }
5267b71995-08-09Fredrik Hübinette (Hubbe) 
3e625c1998-10-11Fredrik Hübinette (Hubbe)  ret=begin_wide_shared_string(str->len + (to->len-del->len)*delimeters, shift);
5267b71995-08-09Fredrik Hübinette (Hubbe)  } s=str->str;
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  r=MKPCHARP_STR(ret);
5267b71995-08-09Fredrik Hübinette (Hubbe) 
5d54232000-10-09Fredrik Hübinette (Hubbe)  while((tmp = f(mojt.data, s, (end-s)>>str->size_shift)))
5267b71995-08-09Fredrik Hübinette (Hubbe)  {
68d9131999-04-01Fredrik Hübinette (Hubbe) #ifdef PIKE_DEBUG if(tmp + (del->len << str->size_shift) > end)
362d302004-03-08Martin Nilsson  Pike_fatal("SearchMojt found a match beyond end of string!!!\n");
68d9131999-04-01Fredrik Hübinette (Hubbe) #endif
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  generic_memcpy(r,MKPCHARP(s,str->size_shift),(tmp-s)>>str->size_shift); INC_PCHARP(r,(tmp-s)>>str->size_shift); pike_string_cpy(r,to); INC_PCHARP(r,to->len);
3e625c1998-10-11Fredrik Hübinette (Hubbe)  s=tmp+(del->len << str->size_shift);
5267b71995-08-09Fredrik Hübinette (Hubbe)  }
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  generic_memcpy(r,MKPCHARP(s,str->size_shift),(end-s)>>str->size_shift);
5267b71995-08-09Fredrik Hübinette (Hubbe) 
8c11832008-06-23Martin Stjernholm  CALL_AND_UNSET_ONERROR (mojt_uwp);
5267b71995-08-09Fredrik Hübinette (Hubbe)  return end_shared_string(ret); }
af93211996-10-12Fredrik Hübinette (Hubbe) /*** init/exit memory ***/
be478c1997-08-30Henrik Grubbström (Grubba) void init_shared_string_table(void)
af93211996-10-12Fredrik Hübinette (Hubbe) {
ddad7a2014-10-20Martin Nilsson  SET_HSIZE(BEGIN_HASH_SIZE);
9c14f32014-04-27Martin Nilsson  base_table=xcalloc(sizeof(struct pike_string *), htable_size);
4edb1a2002-09-11David Hedbor  empty_pike_string = make_shared_string("");
c6cfc02014-11-12Per Hedbor  empty_pike_string->flags |= STRING_IS_LOWERCASE | STRING_IS_UPPERCASE;
af93211996-10-12Fredrik Hübinette (Hubbe) }
31a8682004-09-27Martin Stjernholm #ifdef DO_PIKE_CLEANUP
00e6682006-07-05Martin Stjernholm PMOD_EXPORT struct shared_string_location *all_shared_string_locations;
61e9a01998-01-25Fredrik Hübinette (Hubbe) #endif
be478c1997-08-30Henrik Grubbström (Grubba) void cleanup_shared_string_table(void)
5267b71995-08-09Fredrik Hübinette (Hubbe) {
af93211996-10-12Fredrik Hübinette (Hubbe)  unsigned INT32 e;
06983f1996-09-22Fredrik Hübinette (Hubbe)  struct pike_string *s,*next;
61e9a01998-01-25Fredrik Hübinette (Hubbe) 
4edb1a2002-09-11David Hedbor  if (empty_pike_string) { free_string(empty_pike_string); empty_pike_string = 0;
7ff5712001-12-20Martin Stjernholm  }
31a8682004-09-27Martin Stjernholm #ifdef DO_PIKE_CLEANUP
61e9a01998-01-25Fredrik Hübinette (Hubbe)  while(all_shared_string_locations) { struct shared_string_location *x=all_shared_string_locations; all_shared_string_locations=x->next; free_string(x->s); x->s=0; }
31a8682004-09-27Martin Stjernholm  if (exit_with_cleanup)
61e9a01998-01-25Fredrik Hübinette (Hubbe)  {
f757d02008-05-01Martin Stjernholm  size_t num,size;
3c0c281998-01-26Fredrik Hübinette (Hubbe)  count_memory_in_strings(&num,&size); if(num) {
f757d02008-05-01Martin Stjernholm  fprintf(stderr,"Strings left: %"PRINTSIZET"d " "(%"PRINTSIZET"d bytes) (zapped)\n",num,size);
31a8682004-09-27Martin Stjernholm #ifdef PIKE_DEBUG
3c0c281998-01-26Fredrik Hübinette (Hubbe)  dump_stralloc_strings();
31a8682004-09-27Martin Stjernholm #endif
3c0c281998-01-26Fredrik Hübinette (Hubbe)  }
61e9a01998-01-25Fredrik Hübinette (Hubbe)  } #endif
46563d2000-12-01Henrik Grubbström (Grubba) 
af93211996-10-12Fredrik Hübinette (Hubbe)  for(e=0;e<htable_size;e++)
5267b71995-08-09Fredrik Hübinette (Hubbe)  { for(s=base_table[e];s;s=next) { next=s->next; s->next=0; } base_table[e]=0; }
0ec7522014-04-27Martin Nilsson  free(base_table);
61e9a01998-01-25Fredrik Hübinette (Hubbe)  base_table=0; num_strings=0;
46563d2000-12-01Henrik Grubbström (Grubba) 
0d70692002-11-28Martin Stjernholm #ifdef DO_PIKE_CLEANUP
22287c2013-06-16Arne Goedeke  ba_destroy(&string_allocator);
0d70692002-11-28Martin Stjernholm #endif /* DO_PIKE_CLEANUP */
5267b71995-08-09Fredrik Hübinette (Hubbe) }
c3c7031996-12-04Fredrik Hübinette (Hubbe) 
a206ac2014-08-24Arne Goedeke void count_string_types() { unsigned INT32 e;
0ddc5d2015-11-14Martin Nilsson  size_t num_static = 0, num_short = 0, num_substring = 0, num_malloc = 0;
a206ac2014-08-24Arne Goedeke  for (e = 0; e < htable_size; e++) { struct pike_string * s; for (s = base_table[e]; s; s = s->next)
3587242015-08-22Per Hedbor  switch (s->alloc_type) {
a206ac2014-08-24Arne Goedeke  case STRING_ALLOC_BA: num_short ++; break; case STRING_ALLOC_STATIC: num_static ++; break;
3587242015-08-22Per Hedbor  case STRING_ALLOC_SUBSTRING: num_substring ++; break;
0ddc5d2015-11-14Martin Nilsson  case STRING_ALLOC_MALLOC: num_malloc ++; break;
a206ac2014-08-24Arne Goedeke  } }
3587242015-08-22Per Hedbor  push_static_text("num_short_strings");
a206ac2014-08-24Arne Goedeke  push_ulongest(num_short);
3587242015-08-22Per Hedbor  push_static_text("num_static_strings");
a206ac2014-08-24Arne Goedeke  push_ulongest(num_static);
3587242015-08-22Per Hedbor  push_static_text("num_substrings"); push_ulongest(num_substring);
0ddc5d2015-11-14Martin Nilsson  push_static_text("num_malloced_strings"); push_ulongest(num_malloc);
3c08602008-05-02Martin Stjernholm }
a495ca2014-08-23Arne Goedeke size_t count_memory_in_string(const struct pike_string * s) { size_t size = sizeof(struct pike_string);
60e6cf2013-11-09Arne Goedeke 
3587242015-08-22Per Hedbor  switch (s->alloc_type) { case STRING_ALLOC_SUBSTRING: size += sizeof( struct pike_string *); break;
a495ca2014-08-23Arne Goedeke  case STRING_ALLOC_BA: size += sizeof(struct pike_string); break; case STRING_ALLOC_MALLOC: size += PIKE_ALIGNTO(((s->len + 1) << s->size_shift), 4); break;
0ddc5d2015-11-14Martin Nilsson  case STRING_ALLOC_STATIC: break;
a495ca2014-08-23Arne Goedeke  }
60e6cf2013-11-09Arne Goedeke 
a495ca2014-08-23Arne Goedeke  return size;
041a532012-03-08Henrik Grubbström (Grubba) }
a495ca2014-08-23Arne Goedeke void count_memory_in_strings(size_t *num, size_t *_size)
c3c7031996-12-04Fredrik Hübinette (Hubbe) {
f757d02008-05-01Martin Stjernholm  unsigned INT32 e;
a495ca2014-08-23Arne Goedeke  size_t size = 0; *num = num_strings; size+=htable_size * sizeof(struct pike_string *); for (e = 0; e < htable_size; e++) { struct pike_string * s; for (s = base_table[e]; s; s = s->next) { size += count_memory_in_string(s); }
c3c7031996-12-04Fredrik Hübinette (Hubbe)  }
a495ca2014-08-23Arne Goedeke  *_size = size;
c3c7031996-12-04Fredrik Hübinette (Hubbe) }
9367351997-01-27Fredrik Hübinette (Hubbe) 
d056542014-06-17Henrik Grubbström (Grubba) PMOD_EXPORT void visit_string (struct pike_string *s, int action, void *extra)
ad8d052008-05-02Martin Stjernholm {
c42e092014-06-18Henrik Grubbström (Grubba)  visit_enter(s, T_STRING, extra);
8775df2015-06-04Martin Karlgren  switch (action & VISIT_MODE_MASK) {
5e83442008-05-11Martin Stjernholm #ifdef PIKE_DEBUG default: Pike_fatal ("Unknown visit action %d.\n", action); case VISIT_NORMAL: case VISIT_COMPLEX_ONLY: break; #endif case VISIT_COUNT_BYTES:
a495ca2014-08-23Arne Goedeke  mc_counted_bytes += count_memory_in_string (s);
5e83442008-05-11Martin Stjernholm  break; }
c42e092014-06-18Henrik Grubbström (Grubba)  visit_leave(s, T_STRING, extra);
ad8d052008-05-02Martin Stjernholm }
7a26722000-09-04Martin Stjernholm #ifdef PIKE_DEBUG unsigned gc_touch_all_strings(void) { unsigned INT32 e; unsigned n = 0; if (!base_table) return 0; for(e=0;e<htable_size;e++) { struct pike_string *p; for(p=base_table[e];p;p=p->next) debug_gc_touch(p), n++; } return n; }
be478c1997-08-30Henrik Grubbström (Grubba) void gc_mark_all_strings(void)
9367351997-01-27Fredrik Hübinette (Hubbe) { unsigned INT32 e; if(!base_table) return; for(e=0;e<htable_size;e++) { struct pike_string *p;
5e83442008-05-11Martin Stjernholm  for(p=base_table[e];p;p=p->next) gc_is_referenced(p);
9367351997-01-27Fredrik Hübinette (Hubbe)  } }
30431f2001-09-27Martin Stjernholm #endif
db4a401998-10-09Fredrik Hübinette (Hubbe) 
48d1592015-03-16Martin Nilsson PMOD_EXPORT struct pike_string *next_pike_string (const struct pike_string *s)
f7cfa82001-09-04Martin Stjernholm { struct pike_string *next = s->next; if (!next) { size_t h = s->hval; do { h++;
e85df82001-09-06Fredrik Hübinette (Hubbe)  h = HMODULO(h);
f7cfa82001-09-04Martin Stjernholm  next = base_table[h]; } while (!next); } return next; }
db4a401998-10-09Fredrik Hübinette (Hubbe) 
fa7ecb2014-12-08Martin Nilsson PMOD_EXPORT PCHARP MEMCHR_PCHARP(const PCHARP ptr, int chr, ptrdiff_t len)
0bc4cf1998-10-13Fredrik Hübinette (Hubbe) { switch(ptr.shift) {
98c0302014-09-03Martin Nilsson  case 0: return MKPCHARP(memchr(ptr.ptr,chr,len),0);
efae671998-10-21Fredrik Hübinette (Hubbe)  case 1: return MKPCHARP(MEMCHR1((p_wchar1 *)ptr.ptr,chr,len),1); case 2: return MKPCHARP(MEMCHR2((p_wchar2 *)ptr.ptr,chr,len),2);
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  }
9282fd2015-09-27Martin Nilsson  UNREACHABLE(MKPCHARP(0,0));
0bc4cf1998-10-13Fredrik Hübinette (Hubbe) }
ddecab2017-01-11Martin Nilsson #define DIGIT(x) ( (x)<256 ? hexdecode[x] : 16 )
0bc4cf1998-10-13Fredrik Hübinette (Hubbe) #define MBASE ('z' - 'a' + 1 + 10)
0c73822016-12-18Martin Nilsson PMOD_EXPORT long STRTOL_PCHARP(PCHARP str, PCHARP *ptr, int base)
0bc4cf1998-10-13Fredrik Hübinette (Hubbe) {
ddecab2017-01-11Martin Nilsson  /* Note: Code duplication in pcharp_to_svalue_inumber. */
90784a2003-02-26Martin Stjernholm  unsigned long val, mul_limit; int c;
0c73822016-12-18Martin Nilsson  int xx, neg = 0, add_limit, overflow = 0;
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  if (ptr) *ptr = str;
0c73822016-12-18Martin Nilsson  if (base < 0 || base > MBASE) return 0;
bbee342000-03-31Fredrik Hübinette (Hubbe)  if (!WIDE_ISALNUM(c = EXTRACT_PCHARP(str)))
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  {
c054542016-12-17Martin Nilsson  while (wide_isspace(c))
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  { INC_PCHARP(str,1); c=EXTRACT_PCHARP(str); } switch (c) { case '-': neg++;
f74ecc2009-11-30Arne Goedeke  /* FALL_THROUGH */ case '+':
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  INC_PCHARP(str,1); c=EXTRACT_PCHARP(str); } } if (!base) { if (c != '0') base = 10; else if (INDEX_PCHARP(str,1) == 'x' || INDEX_PCHARP(str,1) == 'X') base = 16; else base = 8; }
ddecab2017-01-11Martin Nilsson  if (DIGIT(c) >= base)
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  return 0; /* no number formed */
ddecab2017-01-11Martin Nilsson  if (base == 16 && c == '0' && DIGIT(INDEX_PCHARP(str,2))<16 &&
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  (INDEX_PCHARP(str,1) == 'x' || INDEX_PCHARP(str,1) == 'X')) { INC_PCHARP(str,2); c = EXTRACT_PCHARP(str); /* skip over leading "0x" or "0X" */ }
90784a2003-02-26Martin Stjernholm 
a549642003-03-17Henrik Grubbström (Grubba)  mul_limit = ((unsigned long)LONG_MAX)/base; add_limit = (int) (LONG_MAX % base);
90784a2003-02-26Martin Stjernholm  if (neg) {
a549642003-03-17Henrik Grubbström (Grubba)  if (++add_limit == base) { add_limit = 0; mul_limit++; }
90784a2003-02-26Martin Stjernholm  }
0c73822016-12-18Martin Nilsson  val=DIGIT(c);
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  while(1) { INC_PCHARP(str,1); c=EXTRACT_PCHARP(str);
ddecab2017-01-11Martin Nilsson  if( (xx=DIGIT(c)) >= base ) break;
a549642003-03-17Henrik Grubbström (Grubba)  if (val > mul_limit || (val == mul_limit && xx > add_limit)) {
90784a2003-02-26Martin Stjernholm  overflow = 1;
a549642003-03-17Henrik Grubbström (Grubba)  } else
90784a2003-02-26Martin Stjernholm  val = base * val + xx;
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  }
90784a2003-02-26Martin Stjernholm 
0bc4cf1998-10-13Fredrik Hübinette (Hubbe)  if (ptr) *ptr = str;
90784a2003-02-26Martin Stjernholm  if (overflow) { errno = ERANGE; return neg ? LONG_MIN : LONG_MAX; } else { if (neg)
a549642003-03-17Henrik Grubbström (Grubba)  return (long)(~val + 1);
90784a2003-02-26Martin Stjernholm  else return (long) val; }
0bc4cf1998-10-13Fredrik Hübinette (Hubbe) }
efae671998-10-21Fredrik Hübinette (Hubbe) 
66d9282011-05-01Per Hedbor int wide_string_to_svalue_inumber(struct svalue *r,
3d3d722014-12-15Martin Nilsson  void * str, void *ptr, int base, ptrdiff_t maxlength,
ed04142015-09-27Per Hedbor  enum size_shift shift)
011ad31999-10-22Fredrik Hübinette (Hubbe) { PCHARP tmp; int ret=pcharp_to_svalue_inumber(r, MKPCHARP(str,shift), &tmp, base, maxlength);
f4778d2003-07-30Martin Stjernholm  if(ptr) *(p_wchar0 **)ptr=tmp.ptr;
011ad31999-10-22Fredrik Hübinette (Hubbe)  return ret; }
15f63a2008-07-23Martin Stjernholm int safe_wide_string_to_svalue_inumber(struct svalue *r, void * str, void *ptr, int base, ptrdiff_t maxlength,
ed04142015-09-27Per Hedbor  enum size_shift shift)
15f63a2008-07-23Martin Stjernholm /* For use from the lexer where we can't let errors be thrown. */ { PCHARP tmp; JMP_BUF recovery; int ret = 0; free_svalue (&throw_value); mark_free_svalue (&throw_value); if (SETJMP (recovery)) { /* We know that pcharp_to_svalue_inumber has initialized the * svalue before any error might be thrown. */ call_handle_error(); ret = 0; } else ret = pcharp_to_svalue_inumber(r, MKPCHARP(str,shift), &tmp, base, maxlength); UNSETJMP (recovery); if(ptr) *(p_wchar0 **)ptr=tmp.ptr; return ret; }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT int pcharp_to_svalue_inumber(struct svalue *r,
8c37d12000-08-15Henrik Grubbström (Grubba)  PCHARP str, PCHARP *ptr,
0c73822016-12-18Martin Nilsson  int base,
8c37d12000-08-15Henrik Grubbström (Grubba)  ptrdiff_t maxlength)
31ea271999-10-22Fredrik Noring {
ddecab2017-01-11Martin Nilsson  /* Note: Code duplication in STRTOL_PCHARP. */
90784a2003-02-26Martin Stjernholm 
011ad31999-10-22Fredrik Hübinette (Hubbe)  PCHARP str_start;
13670c2015-05-25Martin Nilsson 
90784a2003-02-26Martin Stjernholm  unsigned INT_TYPE val, mul_limit; int c;
0c73822016-12-18Martin Nilsson  int xx, neg = 0, add_limit, overflow = 0;
31ea271999-10-22Fredrik Noring  maxlength--; /* max_length <= 0 means no max length. */ str_start = str; /* In case no number is formed. */
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL(*r, T_INT, NUMBER_NUMBER, integer, 0);
31ea271999-10-22Fredrik Noring  if(ptr != 0) *ptr = str;
13670c2015-05-25Martin Nilsson 
0c73822016-12-18Martin Nilsson  if(base < 0 || MBASE < base)
31ea271999-10-22Fredrik Noring  return 0;
13670c2015-05-25Martin Nilsson 
bbee342000-03-31Fredrik Hübinette (Hubbe)  if(!WIDE_ISALNUM(c = EXTRACT_PCHARP(str)))
31ea271999-10-22Fredrik Noring  {
c054542016-12-17Martin Nilsson  while(wide_isspace(c))
011ad31999-10-22Fredrik Hübinette (Hubbe)  { INC_PCHARP(str,1); c = EXTRACT_PCHARP(str); }
13670c2015-05-25Martin Nilsson 
31ea271999-10-22Fredrik Noring  switch (c) { case '-': neg++; /* Fall-through. */ case '+':
011ad31999-10-22Fredrik Hübinette (Hubbe)  INC_PCHARP(str,1); c = EXTRACT_PCHARP(str);
31ea271999-10-22Fredrik Noring  } }
13670c2015-05-25Martin Nilsson 
31ea271999-10-22Fredrik Noring  if(base == 0) { if(c != '0') base = 10;
011ad31999-10-22Fredrik Hübinette (Hubbe)  else if(INDEX_PCHARP(str,1) == 'x' || INDEX_PCHARP(str,1) == 'X')
31ea271999-10-22Fredrik Noring  base = 16;
5656451999-10-26Fredrik Noring  else if(INDEX_PCHARP(str,1) == 'b' || INDEX_PCHARP(str,1) == 'B') base = 2;
31ea271999-10-22Fredrik Noring  else base = 8; }
13670c2015-05-25Martin Nilsson 
0c73822016-12-18Martin Nilsson  /* * For any base > 10, the digits incrementally following * 9 are assumed to be "abc...z" or "ABC...Z". */
ddecab2017-01-11Martin Nilsson  if(DIGIT(c) >= base)
31ea271999-10-22Fredrik Noring  return 0; /* No number formed. */
5739832013-08-02Arne Goedeke  if(c == '0' &&
5656451999-10-26Fredrik Noring  ((base==16 && (INDEX_PCHARP(str,1)=='x' || INDEX_PCHARP(str,1)=='X')) ||
5739832013-08-02Arne Goedeke  (base==2 && (INDEX_PCHARP(str,1)=='b' || INDEX_PCHARP(str,1)=='B'))) &&
ddecab2017-01-11Martin Nilsson  DIGIT(INDEX_PCHARP(str,2))<16 )
011ad31999-10-22Fredrik Hübinette (Hubbe)  {
11e89c1999-10-30Fredrik Noring  /* Skip over leading "0x", "0X", "0b" or "0B". */
011ad31999-10-22Fredrik Hübinette (Hubbe)  INC_PCHARP(str,2); c=EXTRACT_PCHARP(str); }
29406f2001-06-05Fredrik Hübinette (Hubbe)  str_start=str;
90784a2003-02-26Martin Stjernholm  if (neg) { mul_limit = (unsigned INT_TYPE) MIN_INT_TYPE / base; add_limit = (int) ((unsigned INT_TYPE) MIN_INT_TYPE % base); } else { mul_limit = MAX_INT_TYPE / base; add_limit = (int) (MAX_INT_TYPE % base); }
13670c2015-05-25Martin Nilsson 
0c73822016-12-18Martin Nilsson  for(val = DIGIT(c);
bbee342000-03-31Fredrik Hübinette (Hubbe)  (INC_PCHARP(str,1), WIDE_ISALNUM(c = EXTRACT_PCHARP(str) )) &&
011ad31999-10-22Fredrik Hübinette (Hubbe)  (xx = DIGIT(c)) < base && 0 != maxlength--; )
31ea271999-10-22Fredrik Noring  {
90784a2003-02-26Martin Stjernholm  if (val > mul_limit || (val == mul_limit && xx > add_limit)) overflow = 1; else val = base * val + xx;
31ea271999-10-22Fredrik Noring  }
13670c2015-05-25Martin Nilsson 
31ea271999-10-22Fredrik Noring  if(ptr != 0) *ptr = str;
90784a2003-02-26Martin Stjernholm  if (overflow) {
011ad31999-10-22Fredrik Hübinette (Hubbe)  push_string(make_shared_binary_pcharp(str_start, SUBTRACT_PCHARP(str,str_start)));
90784a2003-02-26Martin Stjernholm  /* Note that this can conceivably throw errors()
011ad31999-10-22Fredrik Hübinette (Hubbe)  * in some situations that might not be desirable... * take care. * /Hubbe
29406f2001-06-05Fredrik Hübinette (Hubbe)  * * It could probably also be faster...
011ad31999-10-22Fredrik Hübinette (Hubbe)  */
29406f2001-06-05Fredrik Hübinette (Hubbe)  push_int(base); convert_stack_top_with_base_to_bignum(); if(neg) o_negate();
13670c2015-05-25Martin Nilsson 
9b150a2002-05-11Martin Nilsson  *r = *--Pike_sp;
90784a2003-02-26Martin Stjernholm  dmalloc_touch_svalue (r);
31ea271999-10-22Fredrik Noring  }
90784a2003-02-26Martin Stjernholm  else { if (neg) r->u.integer = val > (unsigned INT_TYPE) MAX_INT_TYPE ? -(INT_TYPE) (val - (unsigned INT_TYPE) MAX_INT_TYPE) - MAX_INT_TYPE : -(INT_TYPE) val; else r->u.integer = (INT_TYPE) val; }
31ea271999-10-22Fredrik Noring  return 1; }
1f21332000-07-28Fredrik Hübinette (Hubbe) PMOD_EXPORT int convert_stack_top_string_to_inumber(int base)
31ea271999-10-22Fredrik Noring { struct svalue r;
775f3d1999-10-23Fredrik Noring  int i;
31ea271999-10-22Fredrik Noring 
017b572011-10-28Henrik Grubbström (Grubba)  if(TYPEOF(Pike_sp[-1]) != T_STRING)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error("Cannot convert stack top to integer number.\n");
13670c2015-05-25Martin Nilsson 
9b150a2002-05-11Martin Nilsson  i=pcharp_to_svalue_inumber(&r, MKPCHARP_STR(Pike_sp[-1].u.string), 0, base, 0);
13670c2015-05-25Martin Nilsson 
9b150a2002-05-11Martin Nilsson  free_string(Pike_sp[-1].u.string); Pike_sp[-1] = r;
775f3d1999-10-23Fredrik Noring  return i;
31ea271999-10-22Fredrik Noring }
f3ece81999-02-28Fredrik Hübinette (Hubbe) /* Convert PCHARP to a double. If ENDPTR is not NULL, a pointer to the character after the last one used in the number is put in *ENDPTR. */
3d3d722014-12-15Martin Nilsson PMOD_EXPORT double STRTOD_PCHARP(const PCHARP nptr, PCHARP *endptr)
f3ece81999-02-28Fredrik Hübinette (Hubbe) {
21b12a2014-09-03Martin Nilsson  /* Note: Code duplication in strtod. */
71d7d52003-02-26Martin Stjernholm 
636bc52014-11-01Martin Nilsson  PCHARP s;
f3ece81999-02-28Fredrik Hübinette (Hubbe)  short int sign; /* The number so far. */ double num; int got_dot; /* Found a decimal point. */ int got_digit; /* Seen any digits. */ /* The exponent of the number. */ long int exponent; if (nptr.ptr == NULL) { errno = EINVAL; goto noconv; } s = nptr; /* Eat whitespace. */
a577232016-12-17Martin Nilsson  while (wide_isspace(EXTRACT_PCHARP(s))) INC_PCHARP(s,1);
f3ece81999-02-28Fredrik Hübinette (Hubbe)  /* Get the sign. */ sign = EXTRACT_PCHARP(s) == '-' ? -1 : 1; if (EXTRACT_PCHARP(s) == '-' || EXTRACT_PCHARP(s) == '+') INC_PCHARP(s,1); num = 0.0; got_dot = 0; got_digit = 0; exponent = 0; for (;; INC_PCHARP(s,1)) {
ddecab2017-01-11Martin Nilsson  if (WIDE_ISDIGIT (EXTRACT_PCHARP(s)))
f3ece81999-02-28Fredrik Hübinette (Hubbe)  { got_digit = 1; /* Make sure that multiplication by 10 will not overflow. */ if (num > DBL_MAX * 0.1) /* The value of the digit doesn't matter, since we have already gotten as many digits as can be represented in a `double'. This doesn't necessarily mean the result will overflow. The exponent may reduce it to within range.
13670c2015-05-25Martin Nilsson 
f3ece81999-02-28Fredrik Hübinette (Hubbe)  We just need to record that there was another digit so that we can multiply by 10 later. */ ++exponent; else num = (num * 10.0) + (EXTRACT_PCHARP(s) - '0'); /* Keep track of the number of digits after the decimal point. If we just divided by 10 here, we would lose precision. */ if (got_dot) --exponent; } else if (!got_dot && (char) EXTRACT_PCHARP(s) == '.') /* Record that we have found the decimal point. */ got_dot = 1; else /* Any other character terminates the number. */ break; } if (!got_digit) goto noconv;
2b315b2017-01-11Martin Nilsson  if (EXTRACT_PCHARP(s) == 'E' || EXTRACT_PCHARP(s) == 'e')
f3ece81999-02-28Fredrik Hübinette (Hubbe)  { /* Get the exponent specified after the `e' or `E'. */ int save = errno; PCHARP end; long int exp; errno = 0; INC_PCHARP(s,1); exp = STRTOL_PCHARP(s, &end, 10); if (errno == ERANGE) { /* The exponent overflowed a `long int'. It is probably a safe assumption that an exponent that cannot be represented by a `long int' exceeds the limits of a `double'. */
2ae97e2014-09-03Martin Nilsson  /* NOTE: Don't trust the value returned from strtol.
7ad4122003-10-03Henrik Grubbström (Grubba)  * We need to find the sign of the exponent by hand. */ p_wchar2 c;
c054542016-12-17Martin Nilsson  while(wide_isspace(c = EXTRACT_PCHARP(s))) {
7ad4122003-10-03Henrik Grubbström (Grubba)  INC_PCHARP(s, 1); }
f3ece81999-02-28Fredrik Hübinette (Hubbe)  if (endptr != NULL) *endptr = end;
7ad4122003-10-03Henrik Grubbström (Grubba)  if (c == '-')
f3ece81999-02-28Fredrik Hübinette (Hubbe)  goto underflow; else goto overflow; } else if (COMPARE_PCHARP(end,==,s)) /* There was no exponent. Reset END to point to the 'e' or 'E', so *ENDPTR will be set there. */ end = ADD_PCHARP(s,-1); errno = save; s = end; exponent += exp; } if(got_dot && INDEX_PCHARP(s,-1)=='.') INC_PCHARP(s,-1); if (endptr != NULL) *endptr = s; if (num == 0.0) return 0.0; /* Multiply NUM by 10 to the EXPONENT power, checking for overflow and underflow. */ if (exponent < 0) { if (num < DBL_MIN * pow(10.0, (double) -exponent)) goto underflow; } else if (exponent > 0) { if (num > DBL_MAX * pow(10.0, (double) -exponent)) goto overflow; }
bbee342000-03-31Fredrik Hübinette (Hubbe)  if(exponent < 0 && exponent >-100) /* make sure we don't underflow */ num /= pow(10.0, (double) -exponent); else num *= pow(10.0, (double) exponent);
f3ece81999-02-28Fredrik Hübinette (Hubbe)  return num * sign; overflow:
a4a1722000-12-05Per Hedbor  /* Return an overflow error. */
f3ece81999-02-28Fredrik Hübinette (Hubbe)  errno = ERANGE;
90784a2003-02-26Martin Stjernholm  return HUGE_VAL * sign;
f3ece81999-02-28Fredrik Hübinette (Hubbe)  underflow:
a4a1722000-12-05Per Hedbor  /* Return an underflow error. */
f3ece81999-02-28Fredrik Hübinette (Hubbe)  errno = ERANGE; return 0.0;
13670c2015-05-25Martin Nilsson 
f3ece81999-02-28Fredrik Hübinette (Hubbe)  noconv: /* There was no number. */ if (endptr != NULL) *endptr = nptr; return 0.0; }
3d3d722014-12-15Martin Nilsson PMOD_EXPORT p_wchar0 *require_wstring0(const struct pike_string *s, char **to_free)
efae671998-10-21Fredrik Hübinette (Hubbe) { switch(s->size_shift) { case 0: *to_free=0; return STR0(s); case 1: case 2: return 0; }
759c422015-10-17Martin Nilsson  UNREACHABLE(return 0);
efae671998-10-21Fredrik Hübinette (Hubbe) }
3d3d722014-12-15Martin Nilsson PMOD_EXPORT p_wchar1 *require_wstring1(const struct pike_string *s, char **to_free)
efae671998-10-21Fredrik Hübinette (Hubbe) { switch(s->size_shift) { case 0: *to_free=xalloc((s->len+1)*2); convert_0_to_1((p_wchar1 *)*to_free, STR0(s),s->len+1); return (p_wchar1 *)*to_free; case 1: *to_free=0; return STR1(s); case 2: return 0; }
759c422015-10-17Martin Nilsson  UNREACHABLE(return 0);
efae671998-10-21Fredrik Hübinette (Hubbe) }
3d3d722014-12-15Martin Nilsson PMOD_EXPORT p_wchar2 *require_wstring2(const struct pike_string *s, char **to_free)
efae671998-10-21Fredrik Hübinette (Hubbe) { switch(s->size_shift) { case 0: *to_free=xalloc((s->len+1)*4); convert_0_to_2((p_wchar2 *)*to_free, STR0(s),s->len+1); return (p_wchar2 *)*to_free; case 1: *to_free=xalloc((s->len+1)*4); convert_1_to_2((p_wchar2 *)*to_free, STR1(s),s->len+1); return (p_wchar2 *)*to_free; case 2: *to_free=0; return STR2(s); }
759c422015-10-17Martin Nilsson  UNREACHABLE(return 0);
efae671998-10-21Fredrik Hübinette (Hubbe) }
c054542016-12-17Martin Nilsson  PMOD_EXPORT int wide_isspace(int c) { switch(c) { SPACECASE16; return 1; } return 0; }
48c4ef2016-12-17Martin Nilsson  PMOD_EXPORT int wide_isidchar(int c) { if(c<0) return 0;
5e6dc22017-01-10Martin Nilsson  if(c<256) return isidchar(c);
48c4ef2016-12-17Martin Nilsson  if(wide_isspace(c)) return 0; return 1; }
c77edb2017-01-25Arne Goedeke  /* * UTF8 encoding functions. This code uses the following observation: * * For codepoints c > 0x7f, the length of the resulting utf8 encoding is * 1 + (fls32(c) - 2)/5. * * This is used to calculate the resulting length without any branches. */ #if defined(HAS___BUILTIN_POPCOUNTLL) && SIZEOF_CHARP == 8 # define poptype unsigned long long # define POPCOUNT __builtin_popcountll #elif defined(HAS___BUILTIN_POPCOUNTL) # define poptype unsigned long # define POPCOUNT __builtin_popcountl #else # if SIZEOF_CHARP == 8 # define poptype unsigned INT64 # else # define poptype unsigned INT32 # endif # define POPCOUNT popcount_fallback const poptype m1 = (poptype)0x5555555555555555ULL; const poptype m2 = (poptype)0x3333333333333333ULL; const poptype m4 = (poptype)0x0f0f0f0f0f0f0f0fULL; static unsigned int popcount_fallback(poptype x) { x -= (x >> 1) & m1; x = (x & m2) + ((x >> 2) & m2); x = (x + (x >> 4)) & m4; x += x >> 8; x += x >> 16; # if SIZEOF_CHARP == 8 x += x >> 32; # endif return x & 0x7f; } #endif /* The reason for using handmade divide is that on some arm32 targets there is * no integer divisions. The reason why we can do better than the compiler is * that we know that the argument is smaller than 32. */ static inline unsigned INT32 div5_8bit(unsigned INT32 x) { return ((x * 0xCD) >> 8) >> 2; }
e678a12017-10-08Arne Goedeke ATTRIBUTE((noinline)) static size_t pike_string_utf8_decode_length_slowpath(size_t len, const unsigned char *in, const unsigned char *end, INT32 args, int extended, int *_shift) { int shift = 0; for(; in < end; in++) { unsigned int c = *in; len++; if (LIKELY(!(c & 0x80))) continue; int cont = 0; /* From table 3-6 in the Unicode standard 4.0: Well-Formed UTF-8 * Byte Sequences * * Code Points 1st Byte 2nd Byte 3rd Byte 4th Byte * 000000-00007f 00-7f * 000080-0007ff c2-df 80-bf * 000800-000fff e0 a0-bf 80-bf * 001000-00cfff e1-ec 80-bf 80-bf * 00d000-00d7ff ed 80-9f 80-bf * 00e000-00ffff ee-ef 80-bf 80-bf * 010000-03ffff f0 90-bf 80-bf 80-bf * 040000-0fffff f1-f3 80-bf 80-bf 80-bf * 100000-10ffff f4 80-8f 80-bf 80-bf */ if ((c & 0xc0) == 0x80) { bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, NULL, Pike_sp - args, "Invalid continuation character 0x%02x.\n", c); } #define GET_CHAR(in, c) do { \ in++; \ if (in >= end) \ bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, \ NULL, Pike_sp - args, \ "Truncated UTF-8 sequence at end of string.\n"); \ c = *in; \ } while(0) #define GET_CONT_CHAR(in, c) do { \ GET_CHAR(in, c); \ if ((c & 0xc0) != 0x80) \ bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, \ NULL, Pike_sp - args, \ "Expected continuation character, " \ "got 0x%02x.\n", \ c); \ } while (0) #define UTF8_SEQ_ERROR(prefix, c, problem) do { \ bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, \ NULL, Pike_sp - args, \ "UTF-8 sequence beginning with %s0x%02x " \ " %s.\n", \ prefix, c, problem); \ } while (0) if ((c & 0xe0) == 0xc0) { /* 11bit */ if (!(c & 0x1e)) UTF8_SEQ_ERROR ("", c, "is a non-shortest form"); cont = 1; if (c & 0x1c) { if (shift < 1) { shift = 1; } } } else if ((c & 0xf0) == 0xe0) { /* 16bit */ if (c == 0xe0) { GET_CONT_CHAR (in, c); if (!(c & 0x20)) UTF8_SEQ_ERROR ("0xe0 ", c, "is a non-shortest form"); cont = 1; } else if (!(extended & 1) && c == 0xed) { GET_CONT_CHAR (in, c); if (c & 0x20) { /* Surrogate. */ if (!(extended & 2)) { UTF8_SEQ_ERROR ("0xed ", c, "would decode to " "a UTF-16 surrogate character"); } if (c & 0x10) { UTF8_SEQ_ERROR ("0xed ", c, "would decode to " "a UTF-16 low surrogate character"); } GET_CONT_CHAR(in, c); GET_CHAR (in, c); if (c != 0xed) { UTF8_SEQ_ERROR ("", c, "UTF-16 low surrogate " "character required"); } GET_CONT_CHAR (in, c); if ((c & 0xf0) != 0xb0) { UTF8_SEQ_ERROR ("0xed ", c, "UTF-16 low surrogate " "character required"); } shift = 2; } cont = 1; } else cont = 2; if (shift < 1) { shift = 1; } } else { if ((c & 0xf8) == 0xf0) { /* 21bit */ if (c == 0xf0) { GET_CONT_CHAR (in, c); if (!(c & 0x30)) UTF8_SEQ_ERROR ("0xf0 ", c, "is a non-shortest form"); cont = 2; } else if (!(extended & 1)) { if (c > 0xf4) UTF8_SEQ_ERROR ("", c, "would decode to " "a character outside the valid UTF-8 range"); else if (c == 0xf4) { GET_CONT_CHAR (in, c); if (c > 0x8f) UTF8_SEQ_ERROR ("0xf4 ", c, "would decode to " "a character outside the valid UTF-8 range"); cont = 2; } else cont = 3; } else cont = 3; } else if (c == 0xff) bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, NULL, Pike_sp - args, "Invalid character 0xff"); else if (!(extended & 1)) UTF8_SEQ_ERROR ("", c, "would decode to " "a character outside the valid UTF-8 range"); else { if ((c & 0xfc) == 0xf8) { /* 26bit */ if (c == 0xf8) { GET_CONT_CHAR (in, c); if (!(c & 0x38)) UTF8_SEQ_ERROR ("0xf8 ", c, "is a non-shortest form"); cont = 3; } else cont = 4; } else if ((c & 0xfe) == 0xfc) { /* 31bit */ if (c == 0xfc) { GET_CONT_CHAR (in, c); if (!(c & 0x3c)) UTF8_SEQ_ERROR ("0xfc ", c, "is a non-shortest form"); cont = 4; } else cont = 5; } else if (c == 0xfe) { /* 36bit */ GET_CONT_CHAR (in, c); if (!(c & 0x3e)) UTF8_SEQ_ERROR ("0xfe ", c, "is a non-shortest form"); else if (c & 0x3c) UTF8_SEQ_ERROR ("0xfe ", c, "would decode to " "a too large character value"); cont = 5; } } shift = 2; } while(cont--) GET_CONT_CHAR (in, c); #undef GET_CHAR #undef GET_CONT_CHAR #undef UTF8_SEQ_ERROR } *_shift = shift; return len; }
8c893d2017-10-11Arne Goedeke PMOD_EXPORT size_t pike_string_utf8_decode_length(const unsigned char *in, size_t inlen, INT32 args, int extended, int *_shift) {
e678a12017-10-08Arne Goedeke  static volatile poptype foo = (poptype)0x8080808080808080ULL; size_t len = 0; size_t elen; const poptype mask = foo;
8c893d2017-10-11Arne Goedeke  const poptype *in8 = (poptype*)in; const poptype *end8 = in8 + (inlen / sizeof(poptype)); const unsigned char *end = in + inlen;
e678a12017-10-08Arne Goedeke  if (in8 < end8) { const size_t tail = (size_t)(end8 - in8) % 4; elen = tail*sizeof(poptype); poptype a = 0, b = 0, c = 0, d = 0; in8 += tail; switch (tail) { do { case 0: in8 += 4; elen = sizeof(poptype)*4; a = in8[-4]; case 3: b = in8[-3]; case 2: c = in8[-2]; case 1: d = in8[-1]; a &= mask; b &= mask; c &= mask; d &= mask; if (UNLIKELY(a | b | c | d)) { /* we have to begin from the beginning of the last chunk */ in = (const unsigned char*)(in8) - elen; goto not_7bit; } len += elen; } while (in8 < end8); break; default: UNREACHABLE(break); } } /* process the single byte tail */
8c893d2017-10-11Arne Goedeke  elen = (size_t)inlen % sizeof(poptype);
e678a12017-10-08Arne Goedeke  if (elen) { poptype a = 0; in = (const unsigned char*)in8; switch (7-elen) { case 0: a |= in[0] & 0x80; case 1: a |= in[1] & 0x80; case 2: a |= in[2] & 0x80; case 3: a |= in[3] & 0x80; case 4: a |= in[4] & 0x80; case 5: a |= in[5] & 0x80; case 6: a |= in[6] & 0x80; break; default: UNREACHABLE(break); } if (UNLIKELY(a)) { goto not_7bit; } len += elen; } *_shift = 0; return len; not_7bit:
8c893d2017-10-11Arne Goedeke  return pike_string_utf8_decode_length_slowpath(len, in, end, args,
e678a12017-10-08Arne Goedeke  extended, _shift); }
c77edb2017-01-25Arne Goedeke PMOD_EXPORT size_t pike_string_utf8_length(const struct pike_string *s, INT32 args, int extended) { size_t len = s->len; size_t elen = s->len; /* This 8bit version counts the number of high bits in each byte. * The loop is unrolled 4 times and starts with a Duff's device style * loop entry to process the tail. */ if (LIKELY(s->size_shift == eightbit)) { /* * We force the compiler to load this constant only once. Otherwise, * GCC will decide to reload it into a register 4 times per loop. */ static volatile poptype foo = (poptype)0x8080808080808080ULL; const unsigned char *in; const poptype mask = foo; const poptype *in8 = (poptype*)STR0(s); const poptype *end8 = in8 + (s->len / sizeof(poptype)); if (in8 < end8) { const size_t tail = (size_t)(end8 - in8) % 4; poptype a = 0, b = 0, c = 0, d = 0; in8 += tail; switch (tail) { do { case 0: in8 += 4; a = in8[-4]; case 3: b = in8[-3]; case 2: c = in8[-2]; case 1: d = in8[-1]; a &= mask; b &= mask; c &= mask; d &= mask; b >>= 1; c >>= 2; d >>= 3; elen += POPCOUNT(a | b | c | d); } while (in8 < end8); break; default: UNREACHABLE(break); } } in = (unsigned char*)end8; switch ((size_t)s->len % sizeof(poptype)) { case 7: elen += (*in++) >> 7; case 6: elen += (*in++) >> 7; case 5: elen += (*in++) >> 7; case 4: elen += (*in++) >> 7; case 3: elen += (*in++) >> 7; case 2: elen += (*in++) >> 7; case 1: elen += (*in++) >> 7; case 0: break; default: UNREACHABLE(break); } return elen; #undef poptype #undef POPCOUNT } else { unsigned INT32 c; size_t i; if (s->size_shift == sixteenbit) { unsigned INT16 *in = (unsigned INT16*)STR1(s); for (i = 0; i < len; i++) { c = in[i]; if (c <= 0x7f) continue; elen += div5_8bit(fls32(c) - 2); if (extended) continue; if (UNLIKELY(c >= 0xd800 && c <= 0xdfff)) goto surrogate_error; } } else { unsigned INT32 *in = (unsigned INT32*)STR2(s); for (i = 0; i < len; i++) { c = in[i]; if (c <= 0x7f) continue; elen += div5_8bit(fls32(c) - 2); if (extended) continue; if (UNLIKELY(c >= 0xd800 && c <= 0xdfff)) goto surrogate_error; if (UNLIKELY(c > 0x10ffff)) goto extended_error; } } return elen; surrogate_error: bad_arg_error ("string_to_utf8", Pike_sp - args, args, 1, NULL, Pike_sp - args, "Character 0x%08x at index %"PRINTPTRDIFFT"d is " "in the surrogate range and therefore invalid.\n", c, i); extended_error: bad_arg_error ("string_to_utf8", Pike_sp - args, args, 1, NULL, Pike_sp - args, "Character 0x%08x at index %"PRINTPTRDIFFT"d is " "outside the allowed range.\n", c, i); } UNREACHABLE(return 0); }
8c893d2017-10-11Arne Goedeke PMOD_EXPORT struct pike_string *pike_string_utf8_decode(const p_wchar0 *in_str, enum size_shift shift, ptrdiff_t len) { struct pike_string *out = begin_wide_shared_string(len, shift); switch (shift) { case eightbit: { p_wchar0 *out_str = STR0 (out); for(ptrdiff_t j=0; j < len; j++) { unsigned int c = *(in_str++); /* NOTE: No tests here since we've already tested the string above. */ if (c & 0x80) { /* 11bit */ unsigned int c2 = *(in_str++) & 0x3f; c &= 0x1f; c = (c << 6) | c2; } out_str[j] = c; } break; } case sixteenbit: { p_wchar1 *out_str = STR1 (out); for(ptrdiff_t j=0; j < len; j++) { unsigned int c = *(in_str++); /* NOTE: No tests here since we've already tested the string above. */ if (c & 0x80) { if ((c & 0xe0) == 0xc0) { /* 11bit */ unsigned int c2 = *(in_str++) & 0x3f; c &= 0x1f; c = (c << 6) | c2; } else { /* 16bit */ unsigned int c2 = *(in_str++) & 0x3f; unsigned int c3 = *(in_str++) & 0x3f; c &= 0x0f; c = (c << 12) | (c2 << 6) | c3; } } out_str[j] = c; } break; } case thirtytwobit: { p_wchar2 *out_str = STR2 (out); for(ptrdiff_t j=0; j < len; j++) { unsigned int c = *(in_str++); /* NOTE: No tests here since we've already tested the string above. */ if (c & 0x80) { int cont = 0; if ((c & 0xe0) == 0xc0) { /* 11bit */ cont = 1; c &= 0x1f; } else if ((c & 0xf0) == 0xe0) { /* 16bit */ cont = 2; c &= 0x0f; } else if ((c & 0xf8) == 0xf0) { /* 21bit */ cont = 3; c &= 0x07; } else if ((c & 0xfc) == 0xf8) { /* 26bit */ cont = 4; c &= 0x03; } else if ((c & 0xfe) == 0xfc) { /* 31bit */ cont = 5; c &= 0x01; } else { /* 36bit */ cont = 6; c = 0; } while(cont--) { unsigned int c2 = *(in_str++) & 0x3f; c = (c << 6) | c2; } if (/*(extended & 2) && */(c & 0xfc00) == 0xdc00) { /* Low surrogate */ c &= 0x3ff; c |= ((out_str[--j] & 0x3ff)<<10) + 0x10000; } } out_str[j] = c; } break; } default: UNREACHABLE(break); } out = low_end_shared_string(out); #ifdef PIKE_DEBUG check_string (out); #endif return out; }
c77edb2017-01-25Arne Goedeke PMOD_EXPORT unsigned char *pike_string_utf8_encode(unsigned char *dst, const struct pike_string *s) { size_t len = s->len; switch (s->size_shift) { case eightbit: { const unsigned char *in = STR0(s); for (size_t i = 0; i < len; i++) { unsigned char c = *in++; if (c & 0x80) { *dst++ = 0xc0 | (c >> 6); *dst++ = 0x80 | (c & 0x3f); } else *dst++ = c; } break; } case sixteenbit: { const unsigned INT16 *in = STR1(s); for (size_t i = 0; i < len; i++) { unsigned INT16 c = *in++; if (LIKELY(c <= 0x7f)) { *dst++ = c; continue; } if (c <= 0x7ff) { /* 11bit */ *dst++ = 0xc0 | (c >> 6); *dst++ = 0x80 | (c & 0x3f); } else { /* 16bit */ *dst++ = 0xe0 | (c >> 12); *dst++ = 0x80 | ((c >> 6) & 0x3f); *dst++ = 0x80 | (c & 0x3f); } } break; } case thirtytwobit: { const unsigned INT32 *in = (unsigned INT32*)STR2(s); for (size_t i = 0; i < len; i++) { unsigned INT32 bytes, shift, first; unsigned INT32 c = in[i]; if (c <= 0x7f) { *dst++ = c; continue; } bytes = 1 + div5_8bit(fls32(c) - 2); shift = 6 * (bytes - 1); first = -0x40 >> (bytes - 2); /* the > 31bit case */ if (UNLIKELY(bytes >= 7)) { bytes = 7; shift = 32; } *dst = first | (c >> shift); dst += bytes; bytes -= 2; switch (bytes) { case 5: dst[-6] = 0x80 | ((c >> 30) & 0x3f); case 4: dst[-5] = 0x80 | ((c >> 24) & 0x3f); case 3: dst[-4] = 0x80 | ((c >> 18) & 0x3f); case 2: dst[-3] = 0x80 | ((c >> 12) & 0x3f); case 1: dst[-2] = 0x80 | ((c >> 6) & 0x3f); case 0: dst[-1] = 0x80 | (c & 0x3f); break; default: UNREACHABLE(break); } } break; } } return dst; }