Branch: Tag:

2015-09-27

2015-09-27 02:18:06 by Per Hedbor <ph@opera.com>

Split string alloc type and struct type

This makes the code somewhat cleaner. No measurable speed change.

Also change a lot of 'int shift' to 'enum size_shift shift' for more
symbolic debug info.

52:   static unsigned INT32 num_strings=0;   PMOD_EXPORT struct pike_string *empty_pike_string = 0;    + struct substring_pike_string { +  struct pike_string str; +  struct pike_string *parent; + }; +    /*** Main string hash function ***/      #define StrHash(s,len) low_do_hash(s,len,0)
74:    return s->alloc_type == STRING_ALLOC_SUBSTRING;   }    - static INLINE int string_may_modify(const struct pike_string * s) { -  return !string_is_static(s) && !string_is_substring(s) && s->refs == 1; + static struct pike_string *substring_content_string(const struct pike_string *s) + { +  return ((struct substring_pike_string*)s)->parent;   }    -  + static INLINE int string_may_modify(const struct pike_string * s) + { +  return !string_is_static(s) && !string_is_substring(s) +  && s->refs == 1; + }    -  + static INLINE int string_may_modify_len(const struct pike_string * s) + { +  return s->refs == 1; + } +  +    /* Returns true if str could contain n. */   PMOD_EXPORT int string_range_contains( struct pike_string *str, int n )   {
251:    return 0;   }    - static INLINE unsigned min_magnitude(const unsigned c) + static INLINE enum size_shift min_magnitude(const unsigned c)   {    return LIKELY(c<256) ? 0 : LIKELY(c<65536) ? 1 : 2;   }
284:   }      #ifdef PIKE_DEBUG - PMOD_EXPORT struct pike_string *debug_check_size_shift(const struct pike_string *a, int shift) + PMOD_EXPORT struct pike_string *debug_check_size_shift(const struct pike_string *a, enum size_shift shift)   {    if(a->size_shift != shift)    Pike_fatal("Wrong STRX macro used!\n");
459:    */   static struct pike_string *internal_findstring(const char *s,    ptrdiff_t len, -  int size_shift, +  enum size_shift size_shift,    size_t hval)   {    struct pike_string *curr;
486:    hval == curr->hval &&    ( curr->str == s ||    !memcmp(curr->str, s,len<<size_shift))) /* found it */ -  { -  /* *prev = curr->next; */ -  /* curr->next = *base; */ -  /* *base = curr; */ -  return curr; /* pointer to string */ -  } +  return curr; +     depth++;    if (curr->len > (ptrdiff_t)hash_prefix_len)    prefix_depth++;
615:   #define STRING_BLOCK 2048       - struct substring_pike_string { -  struct pike_string str; -  struct pike_string *parent; - }; + static struct block_allocator string_allocator = +  BA_INIT(sizeof(struct pike_string), STRING_BLOCK); + static struct block_allocator substring_allocator = +  BA_INIT(sizeof(struct substring_pike_string), STRING_BLOCK>>2);    - static struct block_allocator string_allocator = BA_INIT(sizeof(struct pike_string), STRING_BLOCK); - static struct block_allocator substring_allocator = BA_INIT(sizeof(struct substring_pike_string), STRING_BLOCK>>2); -  - static void free_unlinked_pike_string(struct pike_string * s) + static void free_string_content(struct pike_string * s)   {    switch (s->alloc_type)    { -  +  case STRING_ALLOC_STATIC: +  break;    case STRING_ALLOC_MALLOC:    free(s->str);    break;
634:    ba_free(&string_allocator, s->str);    break;    case STRING_ALLOC_SUBSTRING: -  if( ((struct substring_pike_string*)s)->parent ) +     free_string(((struct substring_pike_string*)s)->parent); -  ba_free(&substring_allocator, s); -  return; +  break;    } -  + }    -  + static void free_unlinked_pike_string(struct pike_string * s) + { +  free_string_content(s); +  switch(s->struct_type) +  { +  case STRING_STRUCT_STRING:    ba_free(&string_allocator, s); -  +  break; +  case STRING_STRUCT_SUBSTRING: +  ba_free(&substring_allocator, s); +  break;    } -  + }    -  +    /* note that begin_shared_string expects the _exact_ size of the string,    * not the maximum size    */
654:   static void link_pike_string(struct pike_string *s, size_t hval)   {    size_t h; -  +    #ifdef PIKE_DEBUG    if (!(s->flags & STRING_NOT_SHARED)) {    debug_dump_pike_string(s, 70);
734:    }   }    - PMOD_EXPORT struct pike_string *debug_begin_wide_shared_string(size_t len, int shift) + PMOD_EXPORT struct pike_string *debug_begin_wide_shared_string(size_t len, enum size_shift shift)   {    struct pike_string *t = NULL;    size_t bytes = (len+1) << shift;
747: Inside #if defined(PIKE_DEBUG)
   if (shift > 2)    Pike_fatal("Unsupported string shift: %d\n", shift);   #endif /* PIKE_DEBUG */ -  t=(struct pike_string *)ba_alloc(&string_allocator); +  t=ba_alloc(&string_allocator);    /* we mark the string as static here, to avoid double free if the    * allocations fail    */    t->flags = STRING_NOT_HASHED|STRING_NOT_SHARED;    t->alloc_type = STRING_ALLOC_STATIC; -  +  t->struct_type = STRING_STRUCT_STRING;    SET_ONERROR(fe,free_unlinked_pike_string,t);    if (bytes <= sizeof(struct pike_string))    {
778:    struct pike_string * t = ba_alloc(&string_allocator);       t->flags = STRING_NOT_HASHED|STRING_NOT_SHARED; +  t->size_shift = shift;    t->alloc_type = STRING_ALLOC_STATIC; -  +  t->struct_type = STRING_STRUCT_STRING;    t->str = (char *)str;    t->refs = 0;    t->len = len; -  t->size_shift = shift; -  add_ref(t); /* For DMALLOC */ +  add_ref(t); /* For DMALLOC */       return t;   }
800:    s = make_static_string(str, len, shift);    link_pike_string(s, h);    } else { -  if (!string_is_static(s)) { -  if (string_is_block_allocated(s)) { -  ba_free(&string_allocator, s->str); -  } else if(string_is_substring(s)) { -  if( ((struct substring_pike_string *)s)->parent ) { -  free_string( ((struct substring_pike_string *)s)->parent ); -  ((struct substring_pike_string *)s)->parent = NULL; -  } -  s->str = (char*)str; -  /* NOTE: We MUST NOT change the alloc_type, since that -  * would associate s with the wrong block allocator, -  * and cause a crash when s is eventually freed. -  */ -  goto done; -  } -  else +  if (!string_is_static(s))    { -  free(s->str); -  } +  free_string_content(s);    s->alloc_type = STRING_ALLOC_STATIC;    s->str = (char*)str;    } -  done: +     add_ref(s);    }   
885:    }       return s; -  +    }      /*
950:    return tmp;   }    -  +    PMOD_EXPORT struct pike_string * debug_make_shared_binary_string(const char *str,size_t len)   {    struct pike_string *s;
985:   #endif    }    /* NOT REACHED */ -  return NULL; /* Keep the compiler happy */ +  return NULL; /* Keep the compiler happy */   }      PMOD_EXPORT struct pike_string * debug_make_shared_pcharp(const PCHARP str)
1089:      static void unlink_pike_string(struct pike_string *s)   { -  size_t h = HMODULO(s->hval); -  struct pike_string *tmp = base_table[h], *p = NULL; +  size_t h=HMODULO(s->hval); +  struct pike_string *tmp=base_table[h], *p=NULL;       while( tmp )    {
1110:    Pike_fatal("unlink on non-shared string\n");       s->next=(struct pike_string *)(ptrdiff_t)-1; -  +     num_strings--;    s->flags |= STRING_NOT_SHARED;   }
1157:    GC_FREE_SIMPLE_BLOCK(s);   }    +    void do_really_free_string(struct pike_string *s)   {    if (s)
1169:    really_free_string(s);   }    -  +    /*    * String table status    */
1191:    {    for(p=base_table[e];p;p=p->next)    { -  int key = p->size_shift; +  int key = p->size_shift + (string_is_malloced(p)?4:0);    num_distinct_strings[key]++;    alloced_bytes[key] += p->refs*sizeof(struct pike_string);    alloced_strings[key] += p->refs;
1279:    return finish_string_builder(&s);   }    - /*** PIKE_DEBUG ***/ +    #ifdef PIKE_DEBUG      static long last_stralloc_verify=0;
1533:    ptrdiff_t pos;    ptrdiff_t len = MINIMUM(alen, blen);    switch(TWO_SIZES(asize, bsize)) { - #define CASE(AZ, BZ) \ -  case TWO_SIZES(AZ, BZ): { \ -  PIKE_CONCAT(p_wchar, AZ) *a_arr = \ -  (PIKE_CONCAT(p_wchar, AZ) *)a; \ -  PIKE_CONCAT(p_wchar, BZ) *b_arr = \ -  (PIKE_CONCAT(p_wchar, BZ) *)b; \ -  for (pos=0; pos<len; pos++) { \ -  if (a_arr[pos] == b_arr[pos]) \ -  continue; \ -  if (a_arr[pos] < b_arr[pos]) \ -  return ~pos; \ -  return pos+1; \ -  } \ + #define CASE(AZ, BZ) \ +  case TWO_SIZES(AZ, BZ): { \ +  PIKE_CONCAT(p_wchar, AZ) *a_arr = \ +  (PIKE_CONCAT(p_wchar, AZ) *)a; \ +  PIKE_CONCAT(p_wchar, BZ) *b_arr = \ +  (PIKE_CONCAT(p_wchar, BZ) *)b; \ +  for (pos=0; pos<len; pos++) { \ +  if (a_arr[pos] == b_arr[pos]) \ +  continue; \ +  if (a_arr[pos] < b_arr[pos]) \ +  return ~pos; \ +  return pos+1; \ +  } \    } break    CASE(0,0);    CASE(0,1);
1579:    b->str, b->len, b->size_shift);   }    +    struct pike_string *realloc_unlinked_string(struct pike_string *a,    ptrdiff_t size)   {
1586:    size_t nbytes = (size_t)(size+1) << a->size_shift;    size_t obytes = (size_t)(a->len+1) << a->size_shift;    - #define TWO(A,B) ((A<<8)|B) +  if( size < a->len && size-a->len<(signed)sizeof(void*) ) +  goto done;    -  -  switch (TWO(a->alloc_type, (nbytes <= sizeof(struct pike_string))) ) +  if( nbytes < sizeof(struct pike_string) )    { -  case TWO(STRING_ALLOC_MALLOC,0): // malloc->malloc -  s=xrealloc(a->str, nbytes); -  break; -  case TWO(STRING_ALLOC_BA,0): // short->malloc -  s = xalloc(nbytes); -  a->alloc_type = STRING_ALLOC_MALLOC; -  memcpy(s, a->str, obytes); -  ba_free(&string_allocator, a->str); -  break; -  case TWO(STRING_ALLOC_MALLOC,1): // malloc -> short +  if( a->alloc_type == STRING_ALLOC_BA ) +  goto done;    s = ba_alloc(&string_allocator); -  +  memcpy(s,a->str,nbytes); +  free_string_content(a);    a->alloc_type = STRING_ALLOC_BA; -  memcpy(s, a->str, nbytes); -  free(a->str); -  break; -  case TWO(STRING_ALLOC_BA,1): // both are short -  goto done; -  case TWO(STRING_ALLOC_STATIC,0): // static -> malloc +  } +  else if( a->alloc_type == STRING_ALLOC_MALLOC) +  { +  s = xrealloc(a->str,nbytes); +  } +  else +  {    s = xalloc(nbytes); -  +  memcpy(s,a->str,MINIMUM(nbytes,obytes)); +  free_string_content(a);    a->alloc_type = STRING_ALLOC_MALLOC; -  memcpy(s, a->str, obytes); -  break; -  case TWO(STRING_ALLOC_STATIC,1): // static -> short -  s = ba_alloc(&string_allocator); -  a->alloc_type = STRING_ALLOC_BA; -  memcpy(s, a->str, obytes); -  break; -  case TWO(STRING_ALLOC_SUBSTRING,0): -  case TWO(STRING_ALLOC_SUBSTRING,1): -  Pike_fatal("This should not happen, substrings are never unlinked.\n"); -  break; -  default: -  Pike_fatal("encountered string with unknown allocation type %d\n", -  a->alloc_type); -  break; +     } - #undef TWO +     a->str = s;   done:    a->len=size;
1636:    return a;   }    +    /* Returns an unlinked string ready for end_shared_string */   static struct pike_string *realloc_shared_string(struct pike_string *a,    ptrdiff_t size)   { -  if(string_may_modify(a)) +  if(string_may_modify_len(a))    {    unlink_pike_string(a);    return realloc_unlinked_string(a, size);
1655:    }   }    - struct pike_string *new_realloc_shared_string(struct pike_string *a, INT32 size, int shift) + struct pike_string *new_realloc_shared_string(struct pike_string *a, INT32 size, enum size_shift shift)   {    struct pike_string *r;    if(shift == a->size_shift) return realloc_shared_string(a,size);
1796:    unlink_pike_string(a);    low_set_index(a, index, c);    CLEAR_STRING_CHECKED(a); -  +     if((((unsigned int)index) >= hash_prefix_len) && (index < a->len-8) )    {    struct pike_string *old;
1943:    return (r-haystack->str)>>haystack->size_shift;   }    - static struct pike_string *make_shared_substring0( struct pike_string *s, + static struct pike_string *make_shared_substring( struct pike_string *s,    ptrdiff_t start, -  ptrdiff_t len) +  ptrdiff_t len, +  enum size_shift shift)   {    struct pike_string *existing;    struct substring_pike_string *res; -  if( (existing = binary_findstring( s->str+start, len )) ) +  void *strstart = s->str+(start<<shift); +  size_t hval = low_do_hash(strstart,len,shift); +  if( (existing = +  internal_findstring(strstart, len, shift, hval)) )    {    add_ref(existing);    return existing;
1959:    add_ref(s);    existing = &res->str;    -  existing->flags = STRING_NOT_HASHED|STRING_NOT_SHARED; +  existing->flags = STRING_NOT_SHARED; +  existing->size_shift = shift;    existing->alloc_type = STRING_ALLOC_SUBSTRING; -  existing->str = s->str+start; +  existing->struct_type = STRING_STRUCT_SUBSTRING; +  existing->hval = hval; +  existing->str = strstart;    existing->len = len;   #ifdef PIKE_DEBUG    if( existing->len + start != s->len )    Pike_fatal("Substrings must be terminated at end of string for now.\n");   #endif    existing->refs = 0; -  existing->size_shift = 0; +     add_ref(existing); -  link_pike_string(existing, -  StrHash(existing->str,existing->len)); -  -  return (struct pike_string *)existing; +  link_pike_string(existing,hval); +  return existing;   }      
2008:    a substring, take from the original. */    if( s->alloc_type == STRING_ALLOC_SUBSTRING )    { -  struct pike_string *pr= ((struct substring_pike_string*)s)->parent; -  if( pr ) -  { +  struct pike_string *pr= substring_content_string(s);    /* Note: If substrings are ever anywhere except at the end,    this might need to change.    */    start += s->str-pr->str;    s = pr;    } -  } +     -  if( (len+start == s->len) && !s->size_shift ) +  if( (len+start == s->len) +  && start < (s->len>>1) +  && (!s->size_shift +  || (s->size_shift==1 && +  find_magnitude1(((p_wchar1*)s->str)+start,len)==1) +  || (s->size_shift==2 && +  find_magnitude2(((p_wchar2*)s->str)+start,len)==2)))    { -  if( start < (s->len >>1) ) /* <50% waste. */ -  return make_shared_substring0( s, start, len ); +  /* If there is no change of maginute, make a substring. */ +  return make_shared_substring( s, start, len, s->size_shift );    }       switch(s->size_shift)
2051:    struct pike_string *ret;    char *s,*tmp,*end;    PCHARP r; -  int shift; +  enum size_shift shift;    SearchMojt mojt;    ONERROR mojt_uwp;    replace_searchfunc f = (replace_searchfunc)0;
2430:   PMOD_EXPORT void string_builder_putchar(struct string_builder *s, int ch)   {    ptrdiff_t i; -  int mag = min_magnitude(ch); +  enum size_shift mag = min_magnitude(ch);       string_build_mkspace(s, 1, mag);    if (mag > s->known_shift) {
2444:    ptrdiff_t count)   {    ptrdiff_t len = s->s->len; -  int mag = min_magnitude(ch); +  enum size_shift mag = min_magnitude(ch);       /* This is not really expected to happen. But since we are doing    * memset here, a negative argument should be avoided. */
2527:    const p_wchar2 *str, ptrdiff_t len)   {    if (s->s->size_shift < 2) { -  int shift = find_magnitude2 (str, len); +  enum size_shift shift = find_magnitude2 (str, len);       if (shift > s->s->size_shift) {    string_build_mkspace (s, len, shift);
2568:    const PCHARP from,    ptrdiff_t len)   { -  int shift = from.shift; +  enum size_shift shift = from.shift;    if (shift > s->s->size_shift) {    if (shift == 1) {    shift = find_magnitude1((p_wchar1 *)from.ptr, len);
2590:    ptrdiff_t offset)   {    ptrdiff_t tmp; -  int shift; +  enum size_shift shift;      #ifdef PIKE_DEBUG    if(len<=0)
3321:    void *ptr,    int base,    ptrdiff_t maxlength, -  int shift) +  enum size_shift shift)   {    PCHARP tmp;    int ret=pcharp_to_svalue_inumber(r,
3338:    void *ptr,    int base,    ptrdiff_t maxlength, -  int shift) +  enum size_shift shift)   /* For use from the lexer where we can't let errors be thrown. */   {    PCHARP tmp;