pike.git / src / stralloc.c

version» Context lines:

pike.git/src/stralloc.c:1:   /*   || This file is part of Pike. For copyright information see COPYRIGHT.   || Pike is distributed under GPL, LGPL and MPL. See the file COPYING   || for more information.   */      #include "global.h"   #include "stralloc.h"   #include "pike_macros.h"   #include "buffer.h" - #include "pike_macros.h" +    #include "pike_memory.h"   #include "pike_error.h"   #include "gc.h"   #include "bignum.h"   #include "interpret.h"   #include "operators.h"   #include "pike_float.h"   #include "pike_types.h"   #include "block_allocator.h"   #include "whitespace.h" - #include "stuff.h" + #include "pike_search.h"      #include <errno.h>      #define SET_HSIZE(X) htable_mask=(htable_size=(X))-1   #define HMODULO(X) ((X) & (htable_mask))      static unsigned INT32 htable_mask;      #if (SIZEOF_LONG == 4) && defined(_LP64)   /* Kludge for gcc and the system header files not using the same model... */
pike.git/src/stralloc.c:123:    tmp += (1 << 24) - 1;    s_max = tmp;    break;    }    }    }    else    {    switch( str->size_shift )    { -  case 2: s_min = MIN_INT32; s_max=MAX_INT32; break; -  case 1: s_min = 0; s_max = 65535; break; -  case 0: s_min = 0; s_max = 255; break; +  case thirtytwobit: s_min = MIN_INT32; s_max = MAX_INT32; break; +  case sixteenbit: s_min = 0; s_max = 65535; break; +  case eightbit: s_min = 0; s_max = 255; break;    }    }    }    else    {    str->flags |= STRING_CONTENT_CHECKED;       switch( str->size_shift )    { -  case 0: +  case eightbit:    {    p_wchar0 *p = (p_wchar0*)str->str;    int upper = 0, lower = 0;    for( i=0; i<str->len; i++,p++ )    {    /* For 7-bit strings it's easy to check for    * lower/uppercase, so do that here as well.    */    if( *p >= 'A' && *p <= 'Z') upper++;    if( *p >= 'a' && *p <= 'z') lower++;
pike.git/src/stralloc.c:163:    if( !lower )    str->flags |= STRING_IS_UPPERCASE;    if( !upper )    str->flags |= STRING_IS_LOWERCASE;    }    }    str->min = s_min;    str->max = s_max;    break;    -  case 1: +  case sixteenbit:    {    p_wchar1 *p = (p_wchar1*)str->str;    for( i=0; i<str->len; i++,p++ )    {    if( *p > s_max ) s_max = *p;    if( *p < s_min ) s_min = *p;    }    }    str->min = s_min / 256;    str->max = s_max / 256;    break;    -  case 2: +  case thirtytwobit:    {    p_wchar2 *p = (p_wchar2*)str->str;    for( i=0; i<str->len; i++,p++ )    {    if( *p > s_max ) s_max = *p;    if( *p < s_min ) s_min = *p;    }    }    str->min = (unsigned INT32)s_min / (1 << 24);    str->max = (unsigned INT32)s_max / (1 << 24);
pike.git/src/stralloc.c:319:    "range 0..%"PRINTPTRDIFFT"d.\n",    pos, s->len-1);    } else {    Pike_fatal("Attempt to index the empty string with %"PRINTPTRDIFFT"d.\n",    pos);    }    }    return generic_extract(s->str,s->size_shift,pos);   }    - PMOD_EXPORT p_wchar2 generic_extract (const void *str, int size, ptrdiff_t pos) + PMOD_EXPORT p_wchar2 generic_extract(const void *str, enum size_shift size, +  ptrdiff_t pos)   {    switch(size)    { -  case 0: return ((p_wchar0 *)str)[pos]; -  case 1: return ((p_wchar1 *)str)[pos]; -  case 2: return ((p_wchar2 *)str)[pos]; +  case eightbit: return ((p_wchar0 *)str)[pos]; +  case sixteenbit: return ((p_wchar1 *)str)[pos]; +  case thirtytwobit: return ((p_wchar2 *)str)[pos];    }    UNREACHABLE(return 0);   }      static void locate_problem(int (*isproblem)(const struct pike_string *))   {    unsigned INT32 e;    struct pike_string *s;    DM(struct memhdr *yes=alloc_memhdr());    DM(struct memhdr *no=alloc_memhdr());
pike.git/src/stralloc.c:389:      /* Find a string in the shared string table.    * This assumes that the string is minimized!!!!    */   static struct pike_string *internal_findstring(const char *s,    ptrdiff_t len,    enum size_shift size_shift,    size_t hval)   {    struct pike_string *curr; - //,**prev, **base; +     unsigned int depth=0;    unsigned int prefix_depth=0;       size_t h;    h=HMODULO(hval);    for(curr = base_table[h]; curr; curr = curr->next)    {   #ifdef PIKE_DEBUG    if(curr->refs<1)    {
pike.git/src/stralloc.c:655:    verify_shared_strings_tables();   #endif   #ifdef PIKE_DEBUG    if (shift > 2)    Pike_fatal("Unsupported string shift: %d\n", shift);   #endif /* PIKE_DEBUG */    t=ba_alloc(&string_allocator);    /* we mark the string as static here, to avoid double free if the    * allocations fail    */ + #ifdef PIKE_DEBUG +  gc_init_marker(t); + #endif    t->flags = STRING_NOT_HASHED|STRING_NOT_SHARED;    t->alloc_type = STRING_ALLOC_STATIC;    t->struct_type = STRING_STRUCT_STRING;    SET_ONERROR(fe,free_unlinked_pike_string,t);    if (bytes <= sizeof(struct pike_string))    {    t->str = ba_alloc(&string_allocator);    t->alloc_type = STRING_ALLOC_BA;    } else {    t->str = xalloc(bytes);
pike.git/src/stralloc.c:681:    DO_IF_DEBUG(t->next = NULL);    UNSET_ONERROR(fe);    low_set_index(t,len,0);    return t;   }      static struct pike_string * make_static_string(const char * str, size_t len,    enum size_shift shift)   {    struct pike_string * t = ba_alloc(&string_allocator); -  + #ifdef PIKE_DEBUG +  gc_init_marker(t); + #endif    t->flags = STRING_NOT_HASHED|STRING_NOT_SHARED;    t->size_shift = shift;    t->alloc_type = STRING_ALLOC_STATIC;    t->struct_type = STRING_STRUCT_STRING;    t->str = (char *)str;    t->refs = 0;    t->len = len;    add_ref(t); /* For DMALLOC */       return t;
pike.git/src/stralloc.c:706:   {    struct pike_string *s;    ptrdiff_t h = StrHash(str, len);       s = internal_findstring(str,len,shift,h);       if (!s) {    s = make_static_string(str, len, shift);    link_pike_string(s, h);    } else { -  if (!string_is_static(s)) +  /* NB: The following is only possible if there are no substring references +  * to the old string. +  */ +  if (!(s->flags & STRING_IS_LOCKED) && !string_is_static(s))    {    free_string_content(s);    s->alloc_type = STRING_ALLOC_STATIC;    s->str = (char*)str;    }    add_ref(s);    }       return s;   }
pike.git/src/stralloc.c:728:   PMOD_EXPORT struct pike_string * make_shared_malloc_string(char *str, size_t len,    enum size_shift shift)   {    struct pike_string *s;    ptrdiff_t h = StrHash(str, len);       s = internal_findstring(str,len,shift,h);       if (!s) {    s = ba_alloc(&string_allocator); + #ifdef PIKE_DEBUG +  gc_init_marker(s); + #endif       s->flags = STRING_NOT_HASHED|STRING_NOT_SHARED;    s->size_shift = shift;    s->alloc_type = STRING_ALLOC_MALLOC;    s->struct_type = STRING_STRUCT_STRING;    s->str = str;    s->refs = 0;    s->len = len;    add_ref(s);   
pike.git/src/stralloc.c:760:    */   struct pike_string *low_end_shared_string(struct pike_string *s)   {    ptrdiff_t len;    size_t h=0;    struct pike_string *s2;      #ifdef PIKE_DEBUG    if (d_flag) {    switch (s->size_shift) { -  case 0: +  case eightbit:    break;    -  case 1: +  case sixteenbit:    if(!find_magnitude1(STR1(s),s->len))    Pike_fatal ("String %p that should have shift 1 really got 0.\n", s);    break;    -  case 2: { +  case thirtytwobit: {    int m = find_magnitude2 (STR2 (s), s->len);    if (m != 2)    Pike_fatal ("String %p that should have shift 2 really got %d.\n",    s, m);    break;    }       default:    Pike_fatal("ARGHEL! size_shift:%d\n", s->size_shift);    }
pike.git/src/stralloc.c:815:   /*    * This function checks if the shift size can be decreased before    * entering the string in the shared string table    */   PMOD_EXPORT struct pike_string *end_shared_string(struct pike_string *s)   {    struct pike_string *s2;       switch(UNLIKELY(s->size_shift))    { -  case 2: +  case thirtytwobit:    switch(find_magnitude2(STR2(s),s->len))    { -  case 0: +  case eightbit:    s2=begin_shared_string(s->len);    convert_2_to_0(STR0(s2),STR2(s),s->len);    free_string(s);    s=s2;    break;    -  case 1: +  case sixteenbit:    s2=begin_wide_shared_string(s->len,1);    convert_2_to_1(STR1(s2),STR2(s),s->len);    free_string(s);    s=s2;    /* Fall though */    }    break;    -  case 1: +  case sixteenbit:    if(!find_magnitude1(STR1(s),s->len))    {    s2=begin_shared_string(s->len);    convert_1_to_0(STR0(s2),STR1(s),s->len);    free_string(s);    s=s2;    }    break;    -  case 0: break; +  case eightbit: break;    }       return low_end_shared_string(s);   }      PMOD_EXPORT struct pike_string *end_and_resize_shared_string(struct pike_string *str, ptrdiff_t len)   {    struct pike_string *tmp;   #ifdef PIKE_DEBUG    if(len > str->len)
pike.git/src/stralloc.c:887:    add_ref(s);    }       return s;   }      PMOD_EXPORT struct pike_string * debug_make_shared_binary_pcharp(const PCHARP str,size_t len)   {    switch(str.shift)    { -  case 0: +  case eightbit:    return make_shared_binary_string((char *)(str.ptr), len); -  case 1: +  case sixteenbit:    return make_shared_binary_string1((p_wchar1 *)(str.ptr), len); -  case 2: +  case thirtytwobit:    return make_shared_binary_string2((p_wchar2 *)(str.ptr), len);    }    UNREACHABLE(return NULL);   }      PMOD_EXPORT struct pike_string * debug_make_shared_pcharp(const PCHARP str)   {    return debug_make_shared_binary_pcharp(str, pcharp_strlen(str));   }   
pike.git/src/stralloc.c:1060: Inside #if defined(PIKE_DEBUG)
   Pike_fatal("Freeing shared string again, memory corrupt or other bug!\n");    }    if (s->size_shift > 2) {    Pike_fatal("Freeing string with bad shift (0x%08x); could it be a type?\n",    s->size_shift);    }   #endif    if (!(s->flags & STRING_NOT_SHARED))    unlink_pike_string(s);    if (s->flags & STRING_CLEAR_ON_EXIT) -  guaranteed_memset(s->str, 0, s->len<<s->size_shift); +  secure_zero(s->str, s->len<<s->size_shift);    free_unlinked_pike_string(s);    GC_FREE_SIMPLE_BLOCK(s);   }         void do_really_free_string(struct pike_string *s)   {    if (s)    really_free_string(s);   }
pike.git/src/stralloc.c:1187:    }   /*    sprintf(b,"Searches: %ld Average search length: %6.3f\n",    (long)num_str_searches, (double)search_len / num_str_searches);    my_strcat(b);   */    return finish_string_builder(&s);   }      #ifdef PIKE_DEBUG + #include <ctype.h>      static long last_stralloc_verify=0;   extern long current_do_debug_cycle;      PMOD_EXPORT void check_string(struct pike_string *s)   {    if(current_do_debug_cycle == last_stralloc_verify)    {    if(debug_findstring(s) != s)    Pike_fatal("Shared string not shared.\n");    }else{       switch (s->size_shift) { -  case 0: +  case eightbit:    break; -  case 1: { +  case sixteenbit: {    ptrdiff_t i;    p_wchar1 *str = STR1 (s);    for (i = 0; i < s->len; i++)    if (str[i] > 0xff)    goto size_shift_check_done;    Pike_fatal ("Shared string is too wide.\n");    } -  case 2: { +  case thirtytwobit: {    ptrdiff_t i;    p_wchar2 *str = STR2 (s);    for (i = 0; i < s->len; i++)    if ((str[i] > 0xffff) || (str[i] < 0))    goto size_shift_check_done;    Pike_fatal ("Shared string is too wide.\n");    }    default:    Pike_fatal ("Invalid size shift %d.\n", s->size_shift);    }
pike.git/src/stralloc.c:1650:       if(min_magnitude(old_value) == a->size_shift &&    min_magnitude(c) < min_magnitude(old_value))    {    /* We *might* need to shrink the string */    struct pike_string *b;    unsigned int size,tmp;       switch(a->size_shift)    { -  case 0: +  case eightbit:    Pike_fatal("Unshrinkable!\n");    -  case 1: +  case sixteenbit:    /* Test if we *actually* can shrink it.. */    if(find_magnitude1(STR1(a),index)) break;    if(find_magnitude1(STR1(a)+index+1,a->len-index-1))    break;       b=begin_shared_string(a->len);    convert_1_to_0((p_wchar0 *)b->str,STR1(a),a->len);    b->str[index]=c;    free_string(a);    return end_shared_string(b);    -  case 2: +  case thirtytwobit:    /* Test if we *actually* can shrink it.. */    size=find_magnitude2(STR2(a),index);    if(size==2) break; /* nope */    tmp=find_magnitude2(STR2(a)+index+1,a->len-index-1);    if(tmp==2) break; /* nope */    size=MAXIMUM(MAXIMUM(size,tmp),min_magnitude(c));       switch(size)    { -  case 0: +  case eightbit:    b=begin_shared_string(a->len);    convert_2_to_0((p_wchar0 *)b->str,STR2(a),a->len);    b->str[index]=c;    free_string(a);    return end_shared_string(b);    -  case 1: +  case sixteenbit:    b=begin_wide_shared_string(a->len,1);    convert_2_to_1((p_wchar1 *)b->str,STR2(a),a->len);    STR1(b)[index]=c;    free_string(a);    return end_shared_string(b);    }    }    }      
pike.git/src/stralloc.c:1868:    struct substring_pike_string *res;    void *strstart = s->str+(start<<shift);    size_t hval = low_do_hash(strstart,len,shift);    if( (existing =    internal_findstring(strstart, len, shift, hval)) )    {    add_ref(existing);    return existing;    }    res = ba_alloc(&substring_allocator); + #ifdef PIKE_DEBUG +  gc_init_marker(&res->str); + #endif    res->parent = s; -  +  s->flags |= STRING_IS_LOCKED; /* Make sure the string data isn't reallocated. */    add_ref(s);    existing = &res->str;       existing->flags = STRING_NOT_SHARED;    existing->size_shift = shift;    existing->alloc_type = STRING_ALLOC_SUBSTRING;    existing->struct_type = STRING_STRUCT_SUBSTRING;    existing->hval = hval;    existing->str = strstart;    existing->len = len;
pike.git/src/stralloc.c:1941:    find_magnitude1(((p_wchar1*)s->str)+start,len)==1)    || (s->size_shift==2 &&    find_magnitude2(((p_wchar2*)s->str)+start,len)==2)))    {    /* If there is no change of maginute, make a substring. */    return make_shared_substring( s, start, len, s->size_shift );    }       switch(s->size_shift)    { -  case 0: +  case eightbit:    return make_shared_binary_string((char *)STR0(s)+start,len);    -  case 1: +  case sixteenbit:    return make_shared_binary_string1(STR1(s)+start,len);    -  case 2: +  case thirtytwobit:    return make_shared_binary_string2(STR2(s)+start,len);    }    UNREACHABLE(return 0);   }      /*** replace function ***/   typedef char *(* replace_searchfunc)(void *,void *,size_t);   PMOD_EXPORT struct pike_string *string_replace(struct pike_string *str,    struct pike_string *del,    struct pike_string *to)
pike.git/src/stralloc.c:2003:    {    mojt=compile_memsearcher(MKPCHARP_STR(del),    del->len,    str->len,    del);    SET_ONERROR (mojt_uwp, do_free_object, mojt.container);       ret=begin_wide_shared_string(str->len,shift);    switch(str->size_shift)    { -  case 0: f=(replace_searchfunc)mojt.vtab->func0; break; -  case 1: f=(replace_searchfunc)mojt.vtab->func1; break; -  case 2: f=(replace_searchfunc)mojt.vtab->func2; break; +  case eightbit: f=(replace_searchfunc)mojt.vtab->func0; break; +  case sixteenbit: f=(replace_searchfunc)mojt.vtab->func1; break; +  case thirtytwobit: f=(replace_searchfunc)mojt.vtab->func2; break; +  default: Pike_fatal("Invalid size_shift: %d.\n", str->size_shift); break;    }       }else{    INT32 delimeters=0;    mojt=compile_memsearcher(MKPCHARP_STR(del),    del->len,    str->len*2,    del);    SET_ONERROR (mojt_uwp, do_free_object, mojt.container);       switch(str->size_shift)    { -  case 0: f=(replace_searchfunc)mojt.vtab->func0; break; -  case 1: f=(replace_searchfunc)mojt.vtab->func1; break; -  case 2: f=(replace_searchfunc)mojt.vtab->func2; break; +  case eightbit: f=(replace_searchfunc)mojt.vtab->func0; break; +  case sixteenbit: f=(replace_searchfunc)mojt.vtab->func1; break; +  case thirtytwobit: f=(replace_searchfunc)mojt.vtab->func2; break; +  default: Pike_fatal("Invalid size_shift: %d.\n", str->size_shift); break;    }       while((s = f(mojt.data, s, (end-s)>>str->size_shift)))    {    delimeters++;    s+=del->len << str->size_shift;    }       if(!delimeters)    {
pike.git/src/stralloc.c:2262:    } while (!next);    }    return next;   }         PMOD_EXPORT PCHARP MEMCHR_PCHARP(const PCHARP ptr, int chr, ptrdiff_t len)   {    switch(ptr.shift)    { -  case 0: return MKPCHARP(memchr(ptr.ptr,chr,len),0); -  case 1: return MKPCHARP(MEMCHR1((p_wchar1 *)ptr.ptr,chr,len),1); -  case 2: return MKPCHARP(MEMCHR2((p_wchar2 *)ptr.ptr,chr,len),2); +  case eightbit: return MKPCHARP(memchr(ptr.ptr,chr,len),0); +  case sixteenbit: return MKPCHARP(MEMCHR1((p_wchar1 *)ptr.ptr,chr,len),1); +  case thirtytwobit: return MKPCHARP(MEMCHR2((p_wchar2 *)ptr.ptr,chr,len),2);    }    UNREACHABLE(MKPCHARP(0,0));   }    -  + const unsigned char hexdecode[256] = + { +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +  +  /* '0' - '9' */ +  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, +  +  16,16,16,16,16,16,16, +  +  /* 'A' - 'F' */ +  10, 11, 12, 13, 14, 15, +  +  16,16,16,16,16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16, +  +  /* 'a' - 'f' */ +  10, 11, 12, 13, 14, 15, +  +  16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, +  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + }; +    #define DIGIT(x) ( (x)<256 ? hexdecode[x] : 16 )   #define MBASE ('z' - 'a' + 1 + 10)      PMOD_EXPORT long STRTOL_PCHARP(PCHARP str, PCHARP *ptr, int base)   {    /* Note: Code duplication in pcharp_to_svalue_inumber. */       unsigned long val, mul_limit;    int c;    int xx, neg = 0, add_limit, overflow = 0;
pike.git/src/stralloc.c:2293:    {    while (wide_isspace(c))    {    INC_PCHARP(str,1);    c=EXTRACT_PCHARP(str);    }    switch (c)    {    case '-':    neg++; -  /* FALL_THROUGH */ +  /* FALLTHRU */    case '+':    INC_PCHARP(str,1);    c=EXTRACT_PCHARP(str);    }    }       if (!base)    {    if (c != '0')    base = 10;
pike.git/src/stralloc.c:2706:    *endptr = nptr;    return 0.0;   }         PMOD_EXPORT p_wchar0 *require_wstring0(const struct pike_string *s,    char **to_free)   {    switch(s->size_shift)    { -  case 0: +  case eightbit:    *to_free=0;    return STR0(s); -  case 1: -  case 2: +  case sixteenbit: +  case thirtytwobit:    return 0;    }    UNREACHABLE(return 0);   }      PMOD_EXPORT p_wchar1 *require_wstring1(const struct pike_string *s,    char **to_free)   {    switch(s->size_shift)    { -  case 0: +  case eightbit:    *to_free=xalloc((s->len+1)*2);    convert_0_to_1((p_wchar1 *)*to_free, STR0(s),s->len+1);    return (p_wchar1 *)*to_free;    -  case 1: +  case sixteenbit:    *to_free=0;    return STR1(s);    -  case 2: +  case thirtytwobit:    return 0;    }    UNREACHABLE(return 0);   }         PMOD_EXPORT p_wchar2 *require_wstring2(const struct pike_string *s,    char **to_free)   {    switch(s->size_shift)    { -  case 0: +  case eightbit:    *to_free=xalloc((s->len+1)*4);    convert_0_to_2((p_wchar2 *)*to_free, STR0(s),s->len+1);    return (p_wchar2 *)*to_free;    -  case 1: +  case sixteenbit:    *to_free=xalloc((s->len+1)*4);    convert_1_to_2((p_wchar2 *)*to_free, STR1(s),s->len+1);    return (p_wchar2 *)*to_free;    -  case 2: +  case thirtytwobit:    *to_free=0;    return STR2(s);    }    UNREACHABLE(return 0);   }      PMOD_EXPORT int wide_isspace(int c)   {    switch(c)    {    SPACECASE16;    return 1;    }    return 0;   }    -  + PMOD_EXPORT const char Pike_isidchar_vector[] = +  "0000000000000000" +  "0000000000000000" +  "0000000000000000" +  "1111111111000000" +  "0111111111111111" +  "1111111111100001" +  "0111111111111111" +  "1111111111100000" +  "0000000000000000" +  "0000000000000000" +  "0011110101100010" +  "1011011001101110" +  "1111111111111111" +  "1111111011111111" +  "1111111111111111" +  "1111111011111111"; +    PMOD_EXPORT int wide_isidchar(int c)   {    if(c<0) return 0;    if(c<256) return isidchar(c);    if(wide_isspace(c)) return 0;    return 1;   }      /*    * UTF8 encoding functions. This code uses the following observation:
pike.git/src/stralloc.c:2852:    * 000800-000fff e0 a0-bf 80-bf    * 001000-00cfff e1-ec 80-bf 80-bf    * 00d000-00d7ff ed 80-9f 80-bf    * 00e000-00ffff ee-ef 80-bf 80-bf    * 010000-03ffff f0 90-bf 80-bf 80-bf    * 040000-0fffff f1-f3 80-bf 80-bf 80-bf    * 100000-10ffff f4 80-8f 80-bf 80-bf    */       if ((c & 0xc0) == 0x80) { -  bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, -  NULL, Pike_sp - args, +  bad_arg_error ("utf8_to_string", args, 1, NULL, Pike_sp - args,    "Invalid continuation character 0x%02x.\n", c);    }      #define GET_CHAR(in, c) do { \    in++; \    if (in >= end) \ -  bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, \ -  NULL, Pike_sp - args, \ +  bad_arg_error ("utf8_to_string", args, 1, NULL, Pike_sp - args, \    "Truncated UTF-8 sequence at end of string.\n"); \    c = *in; \    } while(0)   #define GET_CONT_CHAR(in, c) do { \    GET_CHAR(in, c); \    if ((c & 0xc0) != 0x80) \ -  bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, \ -  NULL, Pike_sp - args, \ +  bad_arg_error ("utf8_to_string", args, 1, NULL, Pike_sp - args, \    "Expected continuation character, " \    "got 0x%02x.\n", \    c); \    } while (0)      #define UTF8_SEQ_ERROR(prefix, c, problem) do { \ -  bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, \ -  NULL, Pike_sp - args, \ +  bad_arg_error ("utf8_to_string", args, 1, NULL, Pike_sp - args, \    "UTF-8 sequence beginning with %s0x%02x " \    " %s.\n", \    prefix, c, problem); \    } while (0)       if ((c & 0xe0) == 0xc0) {    /* 11bit */    if (!(c & 0x1e))    UTF8_SEQ_ERROR ("", c, "is a non-shortest form");    cont = 1;
pike.git/src/stralloc.c:2966:    cont = 2;    }    else    cont = 3;    }    else    cont = 3;    }       else if (c == 0xff) -  bad_arg_error ("utf8_to_string", Pike_sp - args, args, 1, -  NULL, Pike_sp - args, +  bad_arg_error ("utf8_to_string", args, 1, NULL, Pike_sp - args,    "Invalid character 0xff");       else if (!(extended & 1))    UTF8_SEQ_ERROR ("", c, "would decode to "    "a character outside the valid UTF-8 range");       else {    if ((c & 0xfc) == 0xf8) {    /* 26bit */    if (c == 0xf8) {
pike.git/src/stralloc.c:3043:    const size_t tail = (size_t)(end8 - in8) % 4;    elen = tail*sizeof(poptype);    poptype a = 0, b = 0, c = 0, d = 0;       in8 += tail;    switch (tail) {    do {    case 0:    in8 += 4;    elen = sizeof(poptype)*4; -  a = in8[-4]; +  a = in8[-4]; /* FALLTHRU */    case 3: -  b = in8[-3]; +  b = in8[-3]; /* FALLTHRU */    case 2: -  c = in8[-2]; +  c = in8[-2]; /* FALLTHRU */    case 1:    d = in8[-1];       a &= mask;    b &= mask;    c &= mask;    d &= mask;       if (UNLIKELY(a | b | c | d)) {    /* we have to begin from the beginning of the last chunk */
pike.git/src/stralloc.c:3078:    /* process the single byte tail */       elen = (size_t)inlen % sizeof(poptype);       if (elen) {    poptype a = 0;       in = (const unsigned char*)in8;       switch (7-elen) { -  case 0: a |= in[0] & 0x80; -  case 1: a |= in[1] & 0x80; -  case 2: a |= in[2] & 0x80; -  case 3: a |= in[3] & 0x80; -  case 4: a |= in[4] & 0x80; -  case 5: a |= in[5] & 0x80; +  case 0: a |= in[0] & 0x80; /* FALLTHRU */ +  case 1: a |= in[1] & 0x80; /* FALLTHRU */ +  case 2: a |= in[2] & 0x80; /* FALLTHRU */ +  case 3: a |= in[3] & 0x80; /* FALLTHRU */ +  case 4: a |= in[4] & 0x80; /* FALLTHRU */ +  case 5: a |= in[5] & 0x80; /* FALLTHRU */    case 6: a |= in[6] & 0x80; break;    default: UNREACHABLE(break);    }       if (UNLIKELY(a)) {    goto not_7bit;    }    len += elen;    }   
pike.git/src/stralloc.c:3130:       if (in8 < end8) {    const size_t tail = (size_t)(end8 - in8) % 4;    poptype a = 0, b = 0, c = 0, d = 0;       in8 += tail;    switch (tail) {    do {    case 0:    in8 += 4; -  a = in8[-4]; +  a = in8[-4]; /* FALLTHRU */    case 3: -  b = in8[-3]; +  b = in8[-3]; /* FALLTHRU */    case 2: -  c = in8[-2]; +  c = in8[-2]; /* FALLTHRU */    case 1:    d = in8[-1];       a &= mask;    b &= mask;    c &= mask;    d &= mask;       b >>= 1;    c >>= 2;
pike.git/src/stralloc.c:3157:    elen += POPCOUNT(a | b | c | d);    } while (in8 < end8);    break;    default: UNREACHABLE(break);    }    }       in = (unsigned char*)end8;       switch ((size_t)s->len % sizeof(poptype)) { -  case 7: elen += (*in++) >> 7; -  case 6: elen += (*in++) >> 7; -  case 5: elen += (*in++) >> 7; -  case 4: elen += (*in++) >> 7; -  case 3: elen += (*in++) >> 7; -  case 2: elen += (*in++) >> 7; -  case 1: elen += (*in++) >> 7; +  case 7: elen += (*in++) >> 7; /* FALLTHRU */ +  case 6: elen += (*in++) >> 7; /* FALLTHRU */ +  case 5: elen += (*in++) >> 7; /* FALLTHRU */ +  case 4: elen += (*in++) >> 7; /* FALLTHRU */ +  case 3: elen += (*in++) >> 7; /* FALLTHRU */ +  case 2: elen += (*in++) >> 7; /* FALLTHRU */ +  case 1: elen += (*in++) >> 7; /* FALLTHRU */    case 0: break;    default: UNREACHABLE(break);    }       return elen;   #undef poptype   #undef POPCOUNT    } else {    unsigned INT32 c;    size_t i;       if (s->size_shift == sixteenbit) {    unsigned INT16 *in = (unsigned INT16*)STR1(s);       for (i = 0; i < len; i++) {    c = in[i];    if (c <= 0x7f) continue;    elen += div5_8bit(fls32(c) - 2); -  if (extended) continue; +  if (extended & 1) continue;    if (UNLIKELY(c >= 0xd800 && c <= 0xdfff)) goto surrogate_error;    }    } else {    unsigned INT32 *in = (unsigned INT32*)STR2(s);       for (i = 0; i < len; i++) {    c = in[i];    if (c <= 0x7f) continue;    elen += div5_8bit(fls32(c) - 2); -  if (extended) continue; +  if (extended & 2 && c < 0x10ffff) +  { +  /* Encode with a surrogate pair. */ +  elen += 2; +  continue; +  } +  if (extended & 1) continue;    if (UNLIKELY(c >= 0xd800 && c <= 0xdfff)) goto surrogate_error;    if (UNLIKELY(c > 0x10ffff)) goto extended_error;    }    }       return elen;   surrogate_error: -  bad_arg_error ("string_to_utf8", Pike_sp - args, args, 1, -  NULL, Pike_sp - args, +  bad_arg_error ("string_to_utf8", args, 1, NULL, Pike_sp - args,    "Character 0x%08x at index %"PRINTPTRDIFFT"d is "    "in the surrogate range and therefore invalid.\n",    c, i);   extended_error: -  bad_arg_error ("string_to_utf8", Pike_sp - args, args, 1, -  NULL, Pike_sp - args, +  bad_arg_error ("string_to_utf8", args, 1, NULL, Pike_sp - args,    "Character 0x%08x at index %"PRINTPTRDIFFT"d is "    "outside the allowed range.\n",    c, i);    }    UNREACHABLE(return 0);   }      PMOD_EXPORT struct pike_string *pike_string_utf8_decode(const p_wchar0 *in_str,    enum size_shift shift, ptrdiff_t len) {   
pike.git/src/stralloc.c:3322:       out = low_end_shared_string(out);      #ifdef PIKE_DEBUG    check_string (out);   #endif       return out;   }    - PMOD_EXPORT unsigned char *pike_string_utf8_encode(unsigned char *dst, const struct pike_string *s) { + PMOD_EXPORT unsigned char *pike_string_utf8_encode(unsigned char *dst, const struct pike_string *s, +  int extended) {    size_t len = s->len;       switch (s->size_shift) {    case eightbit:    {    const unsigned char *in = STR0(s);       for (size_t i = 0; i < len; i++) {    unsigned char c = *in++;   
pike.git/src/stralloc.c:3377:    {    const unsigned INT32 *in = (unsigned INT32*)STR2(s);       for (size_t i = 0; i < len; i++) {    unsigned INT32 bytes, shift, first;    unsigned INT32 c = in[i];       if (c <= 0x7f) {    *dst++ = c;    continue; +  } else if (UNLIKELY(extended & 2 && c >= 0xffff && c < 0x10ffff)) { +  /* Encode with surrogates. */ +  c -= 0x10000; +  /* 0xd800 | (c>>10) +  * 0b1101 10cccc cccccc +  * UTF8: 11101101 1010cccc 10cccccc +  */ +  *dst++ = 0xed; +  *dst++ = 0xa0 | (c >> 16); +  *dst++ = 0x80 | ((c >> 10) & 0x3f); +  /* 0xdc00 | (c & 0x3ff) +  * 0b1101 11cccc cccccc +  * UTF8: 11101101 1011cccc 10cccccc +  */ +  *dst++ = 0xed; +  *dst++ = 0xb0 | ((c >> 6) & 0x3f); +  *dst++ = 0x80 | (c & 0x3f);    }       bytes = 1 + div5_8bit(fls32(c) - 2);    shift = 6 * (bytes - 1);    first = -0x40 >> (bytes - 2);       /* the > 31bit case */    if (UNLIKELY(bytes >= 7)) {    bytes = 7;    shift = 32;    }       *dst = first | (c >> shift);       dst += bytes;       bytes -= 2;       switch (bytes) { -  case 5: dst[-6] = 0x80 | ((c >> 30) & 0x3f); -  case 4: dst[-5] = 0x80 | ((c >> 24) & 0x3f); -  case 3: dst[-4] = 0x80 | ((c >> 18) & 0x3f); -  case 2: dst[-3] = 0x80 | ((c >> 12) & 0x3f); -  case 1: dst[-2] = 0x80 | ((c >> 6) & 0x3f); +  case 5: dst[-6] = 0x80 | ((c >> 30) & 0x3f); /* FALLTHRU */ +  case 4: dst[-5] = 0x80 | ((c >> 24) & 0x3f); /* FALLTHRU */ +  case 3: dst[-4] = 0x80 | ((c >> 18) & 0x3f); /* FALLTHRU */ +  case 2: dst[-3] = 0x80 | ((c >> 12) & 0x3f); /* FALLTHRU */ +  case 1: dst[-2] = 0x80 | ((c >> 6) & 0x3f); /* FALLTHRU */    case 0: dst[-1] = 0x80 | (c & 0x3f); break;    default: UNREACHABLE(break);    }    }       break;    }    }       return dst;   }