Branch: Tag:

2014-07-02

2014-07-02 17:06:29 by Arne Goedeke <el@laramies.com>

Strings: correctly store character ranges

Character ranges of strings are stored in two unsigned chars. For wide
strings, the values between 0 and 255 represent blocks of 255 and
(1<<24) characters, respectively.

The previous code had several issues:

1) After calculating the actual min/max values of the character range,
these value were rounded up, which could lead to an overflow. The
result was that both min and max could end up being 0. An example is
the string (string)({ (1<<16)-1 }).
2) The 32 bit case used blocks of 16 bit instead of 24 bit.

102:    {    if( str->flags & STRING_CONTENT_CHECKED )    { +  switch (str->size_shift) { +  case eightbit:    s_min = str->min;    s_max = str->max; -  +  break; +  case sixteenbit: +  s_min = str->min; +  s_max = str->max; +  s_min *= 256; +  s_max *= 256; +  s_max += 255; +  break; +  case thirtytwobit: { +  unsigned INT32 tmp;    -  if( str->size_shift ) -  { -  s_min <<= 8 * str->size_shift; -  s_max <<= 8 * str->size_shift; -  if( s_min ) -  s_min -= (1<<(8*str->size_shift))-1; -  s_max += str->size_shift == 1 ? 255 : 65535; +  tmp = str->min; +  tmp *= (1 << 24); +  s_min = tmp; +  +  tmp = str->max; +  tmp *= (1 << 24); +  tmp += (1 << 24) - 1; +  s_max = tmp; +  break;    }    } -  +  }    else    {    switch( str->size_shift )
167:    if( *p < s_min ) s_min = *p;    }    } -  str->min = (s_min+255) >> 8; -  str->max = (s_max+255) >> 8; +  str->min = s_min / 256; +  str->max = s_max / 256;    break;       case 2:
180:    if( *p < s_min ) s_min = *p;    }    } -  str->min = (s_min+65535) >> 16; -  str->max = (s_max+65535) >> 16; +  str->min = (unsigned INT32)s_min / (1 << 24); +  str->max = (unsigned INT32)s_max / (1 << 24);    break;    }    }