pike.git / src / modules / MIME / mime.c

version» Context lines:

pike.git/src/modules/MIME/mime.c:1:   /*   || This file is part of Pike. For copyright information see COPYRIGHT.   || Pike is distributed under GPL, LGPL and MPL. See the file COPYING   || for more information.   */      /* -  * RFC1521 functionality for Pike +  * @rfc{1521@} functionality for Pike    *    * Marcus Comstedt 1996-1999    */    - #include "global.h" -  + #include "module.h"   #include "config.h"    - #include "module.h" - #include "stralloc.h" +    #include "pike_macros.h" - #include "object.h" +    #include "program.h"   #include "interpret.h"   #include "builtin_functions.h"   #include "module_support.h"   #include "pike_error.h"      #ifdef __CHAR_UNSIGNED__   #define SIGNED signed   #else   #define SIGNED   #endif         #define sp Pike_sp      /** Forward declarations of functions implementing Pike functions **/      static void f_decode_base64( INT32 args ); -  + static void f_decode_base64url( INT32 args );   static void f_encode_base64( INT32 args ); -  + static void f_encode_base64url( INT32 args );   static void f_decode_qp( INT32 args );   static void f_encode_qp( INT32 args );   static void f_decode_uue( INT32 args );   static void f_encode_uue( INT32 args );      static void f_tokenize( INT32 args );   static void f_tokenize_labled( INT32 args );   static void f_quote( INT32 args );   static void f_quote_labled( INT32 args );         /** Global tables **/      static const char base64tab[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; -  + static const char base64urltab[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";   static SIGNED char base64rtab[(1<<(CHAR_BIT-1))-' ']; -  + static SIGNED char base64urlrtab[(1<<(CHAR_BIT-1))-' '];   static const char qptab[16] = "0123456789ABCDEF";   static SIGNED char qprtab[(1<<(CHAR_BIT-1))-'0'];      #define CT_CTL 0   #define CT_WHITE 1   #define CT_ATOM 2   #define CT_SPECIAL 3   #define CT_EQUAL 4   #define CT_LPAR 5   #define CT_RPAR 6
pike.git/src/modules/MIME/mime.c:88:   {    int i;       Pike_compiler->new_program->id = PROG_MODULE_MIME_ID;       /* Init reverse base64 mapping */    memset( base64rtab, -1, sizeof(base64rtab) );    for (i = 0; i < 64; i++)    base64rtab[base64tab[i] - ' '] = i;    +  /* Init reverse base64url mapping */ +  memset( base64urlrtab, -1, sizeof(base64urlrtab) ); +  for (i = 0; i < 64; i++) +  base64urlrtab[base64urltab[i] - ' '] = i; +     /* Init reverse qp mapping */    memset( qprtab, -1, sizeof(qprtab) );    for (i = 0; i < 16; i++)    qprtab[qptab[i]-'0'] = i;    for (i = 10; i < 16; i++)    /* Lower case hex digits */    qprtab[qptab[i] - ('0' + 'A' - 'a')] = i;       /* Init lexical properties of characters for MIME.tokenize() */    memset( rfc822ctype, CT_ATOM, sizeof(rfc822ctype) );
pike.git/src/modules/MIME/mime.c:118:    rfc822ctype['='] = CT_EQUAL;    for(i=0; i<9; i++)    rfc822ctype[(int)"<>@,;:\\/?"[i]] = CT_SPECIAL;       /* Add global functions */       /* Really tFunc(tStr7, tStr8), but cut down on warnings for now. */    ADD_FUNCTION2( "decode_base64", f_decode_base64,    tFunc(tStr, tStr8), 0, OPT_TRY_OPTIMIZE );    +  ADD_FUNCTION2( "decode_base64url", f_decode_base64url, +  tFunc(tStr, tStr8), 0, OPT_TRY_OPTIMIZE ); +     ADD_FUNCTION2( "encode_base64", f_encode_base64,    tFunc(tStr tOr(tVoid,tInt),tStr7), 0, OPT_TRY_OPTIMIZE );    -  +  ADD_FUNCTION2( "encode_base64url", f_encode_base64url, +  tFunc(tStr tOr(tVoid,tInt),tStr7), 0, OPT_TRY_OPTIMIZE ); +     add_function_constant( "decode_qp", f_decode_qp,    "function(string:string)", OPT_TRY_OPTIMIZE );       ADD_FUNCTION2( "encode_qp", f_encode_qp,    tFunc(tStr tOr(tVoid,tInt),tStr7), 0, OPT_TRY_OPTIMIZE );       add_function_constant( "decode_uue", f_decode_uue,    "function(string:string)", OPT_TRY_OPTIMIZE );       ADD_FUNCTION2( "encode_uue", f_encode_uue,
pike.git/src/modules/MIME/mime.c:155:    "function(array(array(string|int)):string)",    OPT_TRY_OPTIMIZE );   }      /* Restore and exit module */      PIKE_MODULE_EXIT   {   }    -  - /** Functions implementing Pike functions **/ -  - /*! @decl string decode_base64(string encoded_data) -  *! -  *! This function decodes data encoded using the @tt{base64@} -  *! transfer encoding. -  *! -  *! @seealso -  *! @[MIME.encode_base64()], @[MIME.decode()] -  */ - static void f_decode_base64( INT32 args ) + static void decode_base64( INT32 args, const char *name, const SIGNED char *tab)   { -  if(args != 1) -  Pike_error( "Wrong number of arguments to MIME.decode_base64()\n" ); -  else if (TYPEOF(sp[-1]) != T_STRING) -  Pike_error( "Wrong type of argument to MIME.decode_base64()\n" ); -  else if (sp[-1].u.string->size_shift != 0) -  Pike_error( "Char out of range for MIME.decode_base64()\n" ); -  else { +  /* Decode the string in sp[-1].u.string. Any whitespace etc must be +  ignored, so the size of the result can't be exactly calculated +  from the input size. We'll use a string builder instead. */    -  /* Decode the string in sp[-1].u.string. Any whitespace etc -  must be ignored, so the size of the result can't be exactly -  calculated from the input size. We'll use a string builder -  instead. */ -  +     struct string_builder buf;    SIGNED char *src;    ptrdiff_t cnt;    INT32 d = 1;    int pads = 3;    -  +  if(args != 1) +  Pike_error( "Wrong number of arguments to MIME.%s()\n",name ); +  if (TYPEOF(sp[-1]) != T_STRING) +  Pike_error( "Wrong type of argument to MIME.%s()\n",name ); +  if (sp[-1].u.string->size_shift != 0) +  Pike_error( "Char out of range for MIME.%s()\n",name ); +     init_string_builder( &buf, 0 );       for (src = (SIGNED char *)sp[-1].u.string->str, cnt = sp[-1].u.string->len;    cnt--; src++) -  if(*src>=' ' && base64rtab[*src-' ']>=0) { +  if(*src>=' ' && tab[*src-' ']>=0) {    /* 6 more bits to put into d */ -  if((d=(d<<6)|base64rtab[*src-' '])>=0x1000000) { +  if((d=(d<<6)|tab[*src-' '])>=0x1000000) {    /* d now contains 24 valid bits. Put them in the buffer */    string_builder_putchar( &buf, (d>>16)&0xff );    string_builder_putchar( &buf, (d>>8)&0xff );    string_builder_putchar( &buf, d&0xff );    d=1;    }    } else if (*src=='=') {    /* A pad character has been encountered. */ -  +  break;    }       /* If data size not an even multiple of 3 bytes, output remaining data */    if (d & 0x3f000000) {    /* NOT_REACHED, but here for symmetry. */    pads = 0;    } else if (d & 0xfc0000) {    pads = 1;    /* Remove unused bits from d. */    d >>= 2;    } else if (d & 0x3f000) {    pads = 2;    /* Remove unused bits from d. */    d >>= 4;    }    switch(pads) {    case 0:    /* NOT_REACHED, but here for symmetry. */    string_builder_putchar( &buf, (d>>16)&0xff ); -  +  /* FALLTHRU */    case 1:    string_builder_putchar( &buf, (d>>8)&0xff ); -  +  /* FALLTHRU */    case 2:    string_builder_putchar( &buf, d&0xff );    }       /* Return result */    pop_n_elems( 1 );    push_string( finish_string_builder( &buf ) );   } -  +  + /** Functions implementing Pike functions **/ +  + /*! @decl string decode_base64(string encoded_data) +  *! +  *! This function decodes data encoded using the @tt{base64@} +  *! transfer encoding. +  *! +  *! @seealso +  *! @[MIME.encode_base64()], @[MIME.decode()] +  */ + static void f_decode_base64( INT32 args ) + { +  decode_base64(args, "decode_base64", base64rtab);   }    -  + /*! @decl string decode_base64url(string encoded_data) +  *! +  *! Decode strings according to @rfc{4648@} base64url encoding. +  *! +  *! @seealso +  *! @[MIME.decode_base64] +  */ + static void f_decode_base64url( INT32 args ) + { +  decode_base64(args, "decode_base64url", base64urlrtab); + } +    /* Convenience function for encode_base64(); Encode groups*3 bytes from    * *srcp into groups*4 bytes at *destp.    */   static int do_b64_encode( ptrdiff_t groups, unsigned char **srcp, char **destp, -  int insert_crlf ) +  int insert_crlf, const char *tab )   {    unsigned char *src = *srcp;    char *dest = *destp;    int g = 0;       while (groups--) {    /* Get 24 bits from src */    INT32 d = *src++<<8;    d = (*src++|d)<<8;    d |= *src++;    /* Output in encoded from to dest */ -  *dest++ = base64tab[d>>18]; -  *dest++ = base64tab[(d>>12)&63]; -  *dest++ = base64tab[(d>>6)&63]; -  *dest++ = base64tab[d&63]; +  *dest++ = tab[d>>18]; +  *dest++ = tab[(d>>12)&63]; +  *dest++ = tab[(d>>6)&63]; +  *dest++ = tab[d&63];    /* Insert a linebreak once in a while... */    if(insert_crlf && ++g == 19) {    *dest++ = 13;    *dest++ = 10;    g=0;    }    }       /* Update pointers */    *srcp = src;    *destp = dest;    return g;   }    - /*! @decl string encode_base64(string data, void|int no_linebreaks) -  *! -  *! This function encodes data using the @tt{base64@} transfer encoding. -  *! -  *! If a nonzero value is passed as @[no_linebreaks], the result string -  *! will not contain any linebreaks. -  *! -  *! @seealso -  *! @[MIME.decode_base64()], @[MIME.encode()] -  */ - static void f_encode_base64( INT32 args ) + static void encode_base64( INT32 args, const char *name, const char *tab, +  int pad )   { -  +  ptrdiff_t groups; +  ptrdiff_t last; +  int insert_crlf; +  ptrdiff_t length; +  struct pike_string *str; +  unsigned char *src; +  char *dest; +     if(args != 1 && args != 2) -  Pike_error( "Wrong number of arguments to MIME.encode_base64()\n" ); -  else if(TYPEOF(sp[-args]) != T_STRING) -  Pike_error( "Wrong type of argument to MIME.encode_base64()\n" ); -  else if (sp[-args].u.string->size_shift != 0) -  Pike_error( "Char out of range for MIME.encode_base64()\n" ); -  else { +  Pike_error( "Wrong number of arguments to MIME.%s()\n",name ); +  if(TYPEOF(sp[-args]) != T_STRING) +  Pike_error( "Wrong type of argument to MIME.%s()\n",name ); +  if (sp[-args].u.string->size_shift != 0) +  Pike_error( "Char out of range for MIME.%s()\n",name ); +  if (sp[-args].u.string->len == 0) +  { +  pop_n_elems(args-1); +  return; +  }    -  +     /* Encode the string in sp[-args].u.string. First, we need to know -  the number of 24 bit groups in the input, and the number of -  bytes actually present in the last group. */ +  the number of 24 bit groups in the input, and the number of bytes +  actually present in the last group. */    -  ptrdiff_t groups = (sp[-args].u.string->len+2)/3; -  ptrdiff_t last = (sp[-args].u.string->len-1)%3+1; +  groups = (sp[-args].u.string->len+2)/3; +  last = (sp[-args].u.string->len-1)%3+1;    -  int insert_crlf = !(args == 2 && TYPEOF(sp[-1]) == T_INT && +  insert_crlf = !(args == 2 && TYPEOF(sp[-1]) == T_INT &&    sp[-1].u.integer != 0);       /* We need 4 bytes for each 24 bit group, and 2 bytes for each linebreak */ -  struct pike_string *str = -  begin_shared_string( groups*4+(insert_crlf? (groups/19)*2 : 0) ); +  length = groups*4+(insert_crlf? (groups/19)*2 : 0); +  str = begin_shared_string( length );    -  unsigned char *src = (unsigned char *)sp[-args].u.string->str; -  char *dest = str->str; +  src = (unsigned char *)sp[-args].u.string->str; +  dest = str->str;       if (groups) { -  /* Temporary storage for the last group, as we may have to read -  an extra byte or two and don't want to get any page-faults. */ +  /* Temporary storage for the last group, as we may have to read an +  extra byte or two and don't want to get any page-faults. */    unsigned char tmp[3], *tmpp = tmp;    int i;    -  if (do_b64_encode( groups-1, &src, &dest, insert_crlf ) == 18) +  if (do_b64_encode( groups-1, &src, &dest, insert_crlf, tab ) == 18)    /* Skip the final linebreak if it's not to be followed by anything */    str->len -= 2;       /* Copy the last group to temporary storage */    tmp[1] = tmp[2] = 0;    for (i = 0; i < last; i++)    tmp[i] = *src++;       /* Encode the last group, and replace output codes with pads as needed */ -  do_b64_encode( 1, &tmpp, &dest, 0 ); +  do_b64_encode( 1, &tmpp, &dest, 0, tab );    switch (last) {    case 1:    *--dest = '='; -  +  /* FALLTHRU */    case 2:    *--dest = '=';    }    }       /* Return the result */    pop_n_elems( args ); -  +  if( pad )    push_string( end_shared_string( str ) ); -  +  else +  push_string( end_and_resize_shared_string( str, length-(3-last) ) );   } -  +  + /*! @decl string encode_base64(string data, void|int no_linebreaks) +  *! +  *! This function encodes data using the @tt{base64@} transfer encoding. +  *! +  *! If a nonzero value is passed as @[no_linebreaks], the result string +  *! will not contain any linebreaks. +  *! +  *! @seealso +  *! @[MIME.decode_base64()], @[MIME.encode()] +  */ + static void f_encode_base64( INT32 args ) + { +  encode_base64(args, "encode_base64", base64tab, 1);   }    -  + /*! @decl string encode_base64url(string data, void|int no_linebreaks) +  *! +  *! Encode strings according to @rfc{4648@} base64url encoding. No +  *! padding is performed and no_linebreaks defaults to true. +  *! +  *! @seealso +  *! @[MIME.encode_base64] +  */ + static void f_encode_base64url( INT32 args ) + { +  if( args==1 ) +  { +  push_int(1); +  args++; +  } +  encode_base64(args, "encode_base64url", base64urltab, 0); + } +    /*! @decl string decode_qp(string encoded_data)    *!    *! This function decodes data encoded using the @tt{quoted-printable@}    *! (a.k.a. quoted-unreadable) transfer encoding.    *!    *! @seealso    *! @[MIME.encode_qp()], @[MIME.decode()]    */   static void f_decode_qp( INT32 args )   {
pike.git/src/modules/MIME/mime.c:570:   {    unsigned char *src = *srcp;    char *dest = *destp;       while (groups || last) {    /* A single line can hold at most 15 groups */    ptrdiff_t g = (groups >= 15? 15 : groups);       if (g<15) {    /* The line isn't filled completely. Add space for the "last" bytes */ -  *dest++ = ' ' + -  DO_NOT_WARN((char)(3*g + last)); +  *dest++ = ' ' + (char)(3*g + last);    last = 0;    } else -  *dest++ = ' ' + -  DO_NOT_WARN((char)(3*g)); +  *dest++ = ' ' + (char)(3*g);       groups -= g;       while (g--) {    /* Get 24 bits of data */    INT32 d = *src++<<8;    d = (*src++|d)<<8;    d |= *src++;    /* Output it in encoded form */    if((*dest++ = ' '+(d>>18)) == ' ') dest[-1]='`';
pike.git/src/modules/MIME/mime.c:681:    This will get overwritten by a fake length byte which we will    then replace with the originial character */    k = *--dest;    kp = dest;       do_uue_encode( 1, &tmpp, &dest, 0 );       /* Restore the saved character */    *kp = k;    -  /* Replace final nulls with pad characters if neccesary */ +  /* Replace final nulls with pad characters if necessary */    switch (last) {    case 1:    dest[-2] = '`'; -  +  /* FALLTHRU */    case 2:    dest[-1] = '`';    }       /* Add a final linebreak after the last group */    *dest++ = 13;    *dest++ = 10;    }       /* Put a terminating line (length byte `) and the "end" line into buffer */    memcpy( dest, "`\r\nend\r\n", 8 );       /* Return the result */    pop_n_elems( args );    push_string( end_shared_string( str ) );    }   }       - static void low_tokenize( const char *funname, INT32 args, int mode ) + static void low_tokenize( INT32 args, int mode )   {       /* Tokenize string in sp[-args].u.string. We'll just push the    tokens on the stack, and then do an aggregate_array just    before exiting. */       unsigned char *src;    int flags = 0;    struct array *arr;    struct pike_string *str;    ptrdiff_t cnt;    INT32 n = 0, l, e, d;    char *p;    -  get_all_args(funname, args, "%S.%d", &str, &flags); +  get_all_args(NULL, args, "%S.%d", &str, &flags);       src = STR0(str);    cnt = str->len;       while (cnt>0)    switch (rfc822ctype[*src]) {    case CT_EQUAL:    /* Might be an encoded word. Check it out. */    l = 0;    if (cnt>5 && src[1] == '?') {
pike.git/src/modules/MIME/mime.c:744:    else if(rfc822ctype[src[l]]<=CT_WHITE)    break;    if (nq == 3 && l<cnt && src[l] == '=')    l ++;    else    l = 0;    }    if (l>0) {    /* Yup. It's an encoded word, so it must be an atom. */    if(mode) -  push_constant_text("encoded-word"); +  push_static_text("encoded-word");    push_string( make_shared_binary_string( (char *)src, l ) );    if(mode)    f_aggregate(2);    n++;    src += l;    cnt -= l;    break;    } -  +  /* FALLTHRU */    case CT_SPECIAL:    case CT_RBRACK:    case CT_RPAR:    /* Individual special character, push as a char (= int) */    if(mode) -  push_constant_text("special"); +  push_static_text("special");    push_int( *src++ );    if(mode)    f_aggregate(2);    n++;    --cnt;    break;       case CT_ATOM:    /* Atom, find length then push as a string */    for (l=1; l<cnt; l++)    if (rfc822ctype[src[l]] != CT_ATOM)    break;       if(mode) -  push_constant_text("word"); +  push_static_text("word");    push_string( make_shared_binary_string( (char *)src, l ) );    if(mode)    f_aggregate(2);    n++;    src += l;    cnt -= l;    break;       case CT_QUOTE:    /* Quoted-string, find length then push as a string while removing
pike.git/src/modules/MIME/mime.c:796:    if (src[l] == '"')    break;    else    if ((src[l] == '\\') && !(flags & TOKENIZE_KEEP_ESCAPES)) {    e++;    l++;    }       /* Push the resulting string */    if(mode) -  push_constant_text("word"); +  push_static_text("word");       if (e) {    /* l is the distance to the ending ", and e is the number of \    escapes encountered on the way */    str = begin_shared_string( l-e-1 );       /* Copy the string and remove \ escapes */    for (p = str->str, e = 1; e < l; e++)    *p++ = (src[e] == '\\'? src[++e] : src[e]);   
pike.git/src/modules/MIME/mime.c:834:    break;    else    if(src[l] == '\\') {    e++;    l++;    }       if (l >= cnt) {    /* No ]; seems that this was no domain literal after all... */    if(mode) -  push_constant_text("special"); +  push_static_text("special");    push_int( *src++ );    if(mode)    f_aggregate(2);    n++;    --cnt;    break;    }       /* l is the distance to the ending ], and e is the number of \    escapes encountered on the way */    str = begin_shared_string( l-e+1 );       /* Copy the literal and remove \ escapes */    for (p = str->str, e = 0; e <= l; e++)    *p++ = (src[e] == '\\'? src[++e] : src[e]);       /* Push the resulting string */    if(mode) -  push_constant_text("domain-literal"); +  push_static_text("domain-literal");    push_string( end_shared_string( str ) );    if(mode)    f_aggregate(2);    n++;    src += l+1;    cnt -= l+1;    break;       case CT_LPAR:    /* Comment. Nested comments are allowed, so we'll use d to
pike.git/src/modules/MIME/mime.c:881:    if(!--d)    break;    } else    /* Skip escaped characters */    if(src[l] == '\\') {    e++;    l++;    }       if(mode) { -  push_constant_text("comment"); +  push_static_text("comment");       str = begin_shared_string( l-e-1 );       /* Copy the comment and remove \ escapes */    for (p = str->str, e = 1; e < l; e++)    *p++ = (src[e] == '\\'? src[++e] : src[e]);       push_string( end_shared_string( str ) );    f_aggregate(2);    n++;
pike.git/src/modules/MIME/mime.c:922:    }       /* Create the resulting array and push it */    arr = aggregate_array( n );    pop_n_elems( args );    push_array( arr );   }      /*! @decl array(string|int) tokenize(string header, int|void flags)    *! -  *! A structured header field, as specified by RFC822, is constructed from +  *! A structured header field, as specified by @rfc{822@}, is constructed from    *! a sequence of lexical elements.    *!    *! @param header    *! The header value to parse.    *!    *! @param flags    *! An optional set of flags. Currently only one flag is defined:    *! @int    *! @value TOKENIZE_KEEP_ESCAPES    *! Keep backslash-escapes in quoted-strings.
pike.git/src/modules/MIME/mime.c:966:    *! as strings.    *!    *! Comments are not returned in the array at all.    *!    *! @note    *! As domain-literals are returned as strings, there is no way to tell the    *! domain-literal @tt{[127.0.0.1]@} from the quoted-string    *! @tt{"[127.0.0.1]"@}. Hopefully this won't cause any problems.    *! Domain-literals are used seldom, if at all, anyway...    *! -  *! The set of special-characters is the one specified in RFC1521 +  *! The set of special-characters is the one specified in @rfc{1521@}    *! (i.e. @expr{"<", ">", "@@", ",", ";", ":", "\", "/", "?", "="@}), -  *! and not the set specified in RFC822. +  *! and not the set specified in @rfc{822@}.    *!    *! @seealso    *! @[MIME.quote()], @[tokenize_labled()],    *! @[decode_words_tokenized_remapped()].    */   static void f_tokenize( INT32 args )   { -  low_tokenize("MIME.tokenize", args, 0 ); +  low_tokenize(args, 0);   }      /*! @decl array(array(string|int)) tokenize_labled(string header, @    *! int|void flags)    *!    *! Similar to @[tokenize()], but labels the contents, by making    *! arrays with two elements; the first a label, and the second    *! the value that @[tokenize()] would have put there, except    *! for that comments are kept.    *!
pike.git/src/modules/MIME/mime.c:1017:    *! @value "comment"    *! Comment.    *! @endstring    *!    *! @seealso    *! @[MIME.quote()], @[tokenize()],    *! @[decode_words_tokenized_labled_remapped()]    */   static void f_tokenize_labled( INT32 args )   { -  low_tokenize("MIME.tokenize_labled", args, 1); +  low_tokenize(args, 1);   }         /* Convenience function for quote() which determines if a sequence of    * characters can be stored as an atom.    */   static int check_atom_chars( unsigned char *str, ptrdiff_t len )   {    /* Atoms must contain at least 1 character... */    if (len < 1)