e576bb2002-10-11Martin Nilsson /* || This file is part of Pike. For copyright information see COPYRIGHT. || Pike is distributed under GPL, LGPL and MPL. See the file COPYING || for more information. */
1b10db2002-10-08Martin Nilsson 
8f1cbc1997-03-13Marcus Comstedt /*
310a6b2015-08-21Henrik Grubbström (Grubba)  * @rfc{1521@} functionality for Pike
8f1cbc1997-03-13Marcus Comstedt  *
cebac91999-03-10Marcus Comstedt  * Marcus Comstedt 1996-1999
8f1cbc1997-03-13Marcus Comstedt  */
8e7c8e2018-01-18Martin Nilsson #include "module.h"
5565b61997-03-08Marcus Comstedt #include "config.h"
bb55f81997-03-16Fredrik Hübinette (Hubbe) #include "pike_macros.h"
5565b61997-03-08Marcus Comstedt #include "program.h" #include "interpret.h" #include "builtin_functions.h"
341f602008-01-25Henrik Grubbström (Grubba) #include "module_support.h"
b2d3e42000-12-01Fredrik Hübinette (Hubbe) #include "pike_error.h"
5565b61997-03-08Marcus Comstedt 
1fda201997-04-17Marcus Comstedt #ifdef __CHAR_UNSIGNED__ #define SIGNED signed #else #define SIGNED #endif
5565b61997-03-08Marcus Comstedt 
6ad2372002-05-11Martin Nilsson #define sp Pike_sp
8f1cbc1997-03-13Marcus Comstedt /** Forward declarations of functions implementing Pike functions **/ static void f_decode_base64( INT32 args );
d6261d2017-03-26Martin Nilsson static void f_decode_base64url( INT32 args );
8f1cbc1997-03-13Marcus Comstedt static void f_encode_base64( INT32 args );
d6261d2017-03-26Martin Nilsson static void f_encode_base64url( INT32 args );
8f1cbc1997-03-13Marcus Comstedt static void f_decode_qp( INT32 args ); static void f_encode_qp( INT32 args ); static void f_decode_uue( INT32 args ); static void f_encode_uue( INT32 args ); static void f_tokenize( INT32 args );
56e2c41999-03-07Marcus Comstedt static void f_tokenize_labled( INT32 args );
8f1cbc1997-03-13Marcus Comstedt static void f_quote( INT32 args );
9938e91999-03-09Marcus Comstedt static void f_quote_labled( INT32 args );
8f1cbc1997-03-13Marcus Comstedt 
5565b61997-03-08Marcus Comstedt 
8f1cbc1997-03-13Marcus Comstedt /** Global tables **/
c0ad8a2004-06-29Martin Nilsson static const char base64tab[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
d6261d2017-03-26Martin Nilsson static const char base64urltab[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
808e7d1999-03-09Marcus Comstedt static SIGNED char base64rtab[(1<<(CHAR_BIT-1))-' '];
d6261d2017-03-26Martin Nilsson static SIGNED char base64urlrtab[(1<<(CHAR_BIT-1))-' '];
c0ad8a2004-06-29Martin Nilsson static const char qptab[16] = "0123456789ABCDEF";
808e7d1999-03-09Marcus Comstedt static SIGNED char qprtab[(1<<(CHAR_BIT-1))-'0'];
5565b61997-03-08Marcus Comstedt  #define CT_CTL 0 #define CT_WHITE 1 #define CT_ATOM 2 #define CT_SPECIAL 3
ff7a0f1999-03-04Marcus Comstedt #define CT_EQUAL 4 #define CT_LPAR 5 #define CT_RPAR 6 #define CT_LBRACK 7 #define CT_RBRACK 8 #define CT_QUOTE 9
808e7d1999-03-09Marcus Comstedt unsigned char rfc822ctype[1<<CHAR_BIT];
5565b61997-03-08Marcus Comstedt 
341f602008-01-25Henrik Grubbström (Grubba) /*! @decl constant TOKENIZE_KEEP_ESCAPES *! *! Don't unquote backslash-sequences in quoted strings during tokenizing. *! This is used for bug-compatibility with Microsoft... *! *! @seealso *! @[tokenize()], @[tokenize_labled()] */ #define TOKENIZE_KEEP_ESCAPES 1
8f1cbc1997-03-13Marcus Comstedt  /** Externally available functions **/
61b0392001-01-06Henrik Grubbström (Grubba) /*! @module MIME */
5565b61997-03-08Marcus Comstedt /* Initialize and start module */
8f1cbc1997-03-13Marcus Comstedt 
51ef5c2002-10-21Marcus Comstedt PIKE_MODULE_INIT
5565b61997-03-08Marcus Comstedt { int i;
4879f32001-07-01Henrik Grubbström (Grubba)  Pike_compiler->new_program->id = PROG_MODULE_MIME_ID;
8f1cbc1997-03-13Marcus Comstedt  /* Init reverse base64 mapping */ memset( base64rtab, -1, sizeof(base64rtab) ); for (i = 0; i < 64; i++) base64rtab[base64tab[i] - ' '] = i;
d6261d2017-03-26Martin Nilsson  /* Init reverse base64url mapping */ memset( base64urlrtab, -1, sizeof(base64urlrtab) ); for (i = 0; i < 64; i++) base64urlrtab[base64urltab[i] - ' '] = i;
8f1cbc1997-03-13Marcus Comstedt  /* Init reverse qp mapping */ memset( qprtab, -1, sizeof(qprtab) ); for (i = 0; i < 16; i++) qprtab[qptab[i]-'0'] = i; for (i = 10; i < 16; i++) /* Lower case hex digits */ qprtab[qptab[i] - ('0' + 'A' - 'a')] = i; /* Init lexical properties of characters for MIME.tokenize() */ memset( rfc822ctype, CT_ATOM, sizeof(rfc822ctype) ); for (i = 0; i < 32; i++) rfc822ctype[i] = CT_CTL; rfc822ctype[127] = CT_CTL; rfc822ctype[' '] = CT_WHITE; rfc822ctype['\t'] = CT_WHITE; rfc822ctype['('] = CT_LPAR; rfc822ctype[')'] = CT_RPAR; rfc822ctype['['] = CT_LBRACK;
56e2c41999-03-07Marcus Comstedt  rfc822ctype[']'] = CT_RBRACK;
8f1cbc1997-03-13Marcus Comstedt  rfc822ctype['"'] = CT_QUOTE;
ff7a0f1999-03-04Marcus Comstedt  rfc822ctype['='] = CT_EQUAL;
12a17b1999-08-07Marcus Comstedt  for(i=0; i<9; i++)
ff7a0f1999-03-04Marcus Comstedt  rfc822ctype[(int)"<>@,;:\\/?"[i]] = CT_SPECIAL;
8f1cbc1997-03-13Marcus Comstedt  /* Add global functions */
c8dd5a2013-12-14Martin Nilsson  /* Really tFunc(tStr7, tStr8), but cut down on warnings for now. */ ADD_FUNCTION2( "decode_base64", f_decode_base64, tFunc(tStr, tStr8), 0, OPT_TRY_OPTIMIZE );
bff3272013-05-28Martin Nilsson 
d6261d2017-03-26Martin Nilsson  ADD_FUNCTION2( "decode_base64url", f_decode_base64url, tFunc(tStr, tStr8), 0, OPT_TRY_OPTIMIZE );
bff3272013-05-28Martin Nilsson  ADD_FUNCTION2( "encode_base64", f_encode_base64, tFunc(tStr tOr(tVoid,tInt),tStr7), 0, OPT_TRY_OPTIMIZE );
d6261d2017-03-26Martin Nilsson  ADD_FUNCTION2( "encode_base64url", f_encode_base64url, tFunc(tStr tOr(tVoid,tInt),tStr7), 0, OPT_TRY_OPTIMIZE );
8f1cbc1997-03-13Marcus Comstedt  add_function_constant( "decode_qp", f_decode_qp, "function(string:string)", OPT_TRY_OPTIMIZE );
bff3272013-05-28Martin Nilsson  ADD_FUNCTION2( "encode_qp", f_encode_qp, tFunc(tStr tOr(tVoid,tInt),tStr7), 0, OPT_TRY_OPTIMIZE );
8f1cbc1997-03-13Marcus Comstedt  add_function_constant( "decode_uue", f_decode_uue, "function(string:string)", OPT_TRY_OPTIMIZE );
bff3272013-05-28Martin Nilsson  ADD_FUNCTION2( "encode_uue", f_encode_uue, tFunc(tStr tOr(tVoid,tStr),tStr7), 0, OPT_TRY_OPTIMIZE );
8f1cbc1997-03-13Marcus Comstedt 
341f602008-01-25Henrik Grubbström (Grubba)  add_integer_constant("TOKENIZE_KEEP_ESCAPES", TOKENIZE_KEEP_ESCAPES, 0);
8f1cbc1997-03-13Marcus Comstedt  add_function_constant( "tokenize", f_tokenize,
341f602008-01-25Henrik Grubbström (Grubba)  "function(string, int|void:array(string|int))",
8f1cbc1997-03-13Marcus Comstedt  OPT_TRY_OPTIMIZE );
56e2c41999-03-07Marcus Comstedt  add_function_constant( "tokenize_labled", f_tokenize_labled,
341f602008-01-25Henrik Grubbström (Grubba)  "function(string, int|void:array(array(string|int)))",
56e2c41999-03-07Marcus Comstedt  OPT_TRY_OPTIMIZE );
8f1cbc1997-03-13Marcus Comstedt  add_function_constant( "quote", f_quote, "function(array(string|int):string)", OPT_TRY_OPTIMIZE );
9938e91999-03-09Marcus Comstedt  add_function_constant( "quote_labled", f_quote_labled, "function(array(array(string|int)):string)", OPT_TRY_OPTIMIZE );
5565b61997-03-08Marcus Comstedt } /* Restore and exit module */
8f1cbc1997-03-13Marcus Comstedt 
51ef5c2002-10-21Marcus Comstedt PIKE_MODULE_EXIT
5565b61997-03-08Marcus Comstedt { }
abdc262017-07-18Marcus Comstedt static void decode_base64( INT32 args, const char *name, const SIGNED char *tab)
d6261d2017-03-26Martin Nilsson { /* Decode the string in sp[-1].u.string. Any whitespace etc must be ignored, so the size of the result can't be exactly calculated from the input size. We'll use a string builder instead. */ struct string_builder buf; SIGNED char *src; ptrdiff_t cnt; INT32 d = 1; int pads = 3;
6505de2017-05-01Henrik Grubbström (Grubba)  if(args != 1) Pike_error( "Wrong number of arguments to MIME.%s()\n",name ); if (TYPEOF(sp[-1]) != T_STRING) Pike_error( "Wrong type of argument to MIME.%s()\n",name ); if (sp[-1].u.string->size_shift != 0) Pike_error( "Char out of range for MIME.%s()\n",name );
d6261d2017-03-26Martin Nilsson  init_string_builder( &buf, 0 ); for (src = (SIGNED char *)sp[-1].u.string->str, cnt = sp[-1].u.string->len; cnt--; src++) if(*src>=' ' && tab[*src-' ']>=0) { /* 6 more bits to put into d */ if((d=(d<<6)|tab[*src-' '])>=0x1000000) { /* d now contains 24 valid bits. Put them in the buffer */ string_builder_putchar( &buf, (d>>16)&0xff ); string_builder_putchar( &buf, (d>>8)&0xff ); string_builder_putchar( &buf, d&0xff ); d=1; } } else if (*src=='=') { /* A pad character has been encountered. */ break; } /* If data size not an even multiple of 3 bytes, output remaining data */ if (d & 0x3f000000) { /* NOT_REACHED, but here for symmetry. */ pads = 0; } else if (d & 0xfc0000) { pads = 1; /* Remove unused bits from d. */ d >>= 2; } else if (d & 0x3f000) { pads = 2; /* Remove unused bits from d. */ d >>= 4; } switch(pads) { case 0: /* NOT_REACHED, but here for symmetry. */ string_builder_putchar( &buf, (d>>16)&0xff );
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
d6261d2017-03-26Martin Nilsson  case 1: string_builder_putchar( &buf, (d>>8)&0xff );
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
d6261d2017-03-26Martin Nilsson  case 2: string_builder_putchar( &buf, d&0xff ); } /* Return result */ pop_n_elems( 1 ); push_string( finish_string_builder( &buf ) ); }
8f1cbc1997-03-13Marcus Comstedt  /** Functions implementing Pike functions **/
61b0392001-01-06Henrik Grubbström (Grubba) /*! @decl string decode_base64(string encoded_data) *! *! This function decodes data encoded using the @tt{base64@} *! transfer encoding. *! *! @seealso *! @[MIME.encode_base64()], @[MIME.decode()] */
8f1cbc1997-03-13Marcus Comstedt static void f_decode_base64( INT32 args )
5565b61997-03-08Marcus Comstedt {
d6261d2017-03-26Martin Nilsson  decode_base64(args, "decode_base64", base64rtab); }
5565b61997-03-08Marcus Comstedt 
d6261d2017-03-26Martin Nilsson /*! @decl string decode_base64url(string encoded_data) *! *! Decode strings according to @rfc{4648@} base64url encoding. *! *! @seealso *! @[MIME.decode_base64] */ static void f_decode_base64url( INT32 args ) { decode_base64(args, "decode_base64url", base64urlrtab);
5565b61997-03-08Marcus Comstedt }
8f1cbc1997-03-13Marcus Comstedt /* Convenience function for encode_base64(); Encode groups*3 bytes from * *srcp into groups*4 bytes at *destp. */
c3dbe52000-08-09Henrik Grubbström (Grubba) static int do_b64_encode( ptrdiff_t groups, unsigned char **srcp, char **destp,
d6261d2017-03-26Martin Nilsson  int insert_crlf, const char *tab )
5565b61997-03-08Marcus Comstedt {
8f1cbc1997-03-13Marcus Comstedt  unsigned char *src = *srcp; char *dest = *destp; int g = 0; while (groups--) { /* Get 24 bits from src */
5565b61997-03-08Marcus Comstedt  INT32 d = *src++<<8; d = (*src++|d)<<8; d |= *src++;
8f1cbc1997-03-13Marcus Comstedt  /* Output in encoded from to dest */
d6261d2017-03-26Martin Nilsson  *dest++ = tab[d>>18]; *dest++ = tab[(d>>12)&63]; *dest++ = tab[(d>>6)&63]; *dest++ = tab[d&63];
8f1cbc1997-03-13Marcus Comstedt  /* Insert a linebreak once in a while... */
f48b831997-05-06Marcus Comstedt  if(insert_crlf && ++g == 19) {
5565b61997-03-08Marcus Comstedt  *dest++ = 13; *dest++ = 10; g=0; } }
8f1cbc1997-03-13Marcus Comstedt  /* Update pointers */
5565b61997-03-08Marcus Comstedt  *srcp = src; *destp = dest; return g; }
d6261d2017-03-26Martin Nilsson static void encode_base64( INT32 args, const char *name, const char *tab, int pad ) {
6505de2017-05-01Henrik Grubbström (Grubba)  ptrdiff_t groups; ptrdiff_t last; int insert_crlf; ptrdiff_t length; struct pike_string *str; unsigned char *src; char *dest;
d6261d2017-03-26Martin Nilsson  if(args != 1 && args != 2) Pike_error( "Wrong number of arguments to MIME.%s()\n",name ); if(TYPEOF(sp[-args]) != T_STRING) Pike_error( "Wrong type of argument to MIME.%s()\n",name ); if (sp[-args].u.string->size_shift != 0) Pike_error( "Char out of range for MIME.%s()\n",name );
e489df2017-05-29Martin Nilsson  if (sp[-args].u.string->len == 0)
33eb402017-05-29Martin Nilsson  { pop_n_elems(args-1);
e489df2017-05-29Martin Nilsson  return;
33eb402017-05-29Martin Nilsson  }
e489df2017-05-29Martin Nilsson 
d6261d2017-03-26Martin Nilsson  /* Encode the string in sp[-args].u.string. First, we need to know the number of 24 bit groups in the input, and the number of bytes actually present in the last group. */
6505de2017-05-01Henrik Grubbström (Grubba)  groups = (sp[-args].u.string->len+2)/3; last = (sp[-args].u.string->len-1)%3+1;
d6261d2017-03-26Martin Nilsson 
6505de2017-05-01Henrik Grubbström (Grubba)  insert_crlf = !(args == 2 && TYPEOF(sp[-1]) == T_INT && sp[-1].u.integer != 0);
d6261d2017-03-26Martin Nilsson  /* We need 4 bytes for each 24 bit group, and 2 bytes for each linebreak */
6505de2017-05-01Henrik Grubbström (Grubba)  length = groups*4+(insert_crlf? (groups/19)*2 : 0); str = begin_shared_string( length );
d6261d2017-03-26Martin Nilsson 
6505de2017-05-01Henrik Grubbström (Grubba)  src = (unsigned char *)sp[-args].u.string->str; dest = str->str;
d6261d2017-03-26Martin Nilsson  if (groups) { /* Temporary storage for the last group, as we may have to read an extra byte or two and don't want to get any page-faults. */ unsigned char tmp[3], *tmpp = tmp; int i; if (do_b64_encode( groups-1, &src, &dest, insert_crlf, tab ) == 18) /* Skip the final linebreak if it's not to be followed by anything */ str->len -= 2; /* Copy the last group to temporary storage */ tmp[1] = tmp[2] = 0; for (i = 0; i < last; i++) tmp[i] = *src++; /* Encode the last group, and replace output codes with pads as needed */ do_b64_encode( 1, &tmpp, &dest, 0, tab ); switch (last) { case 1: *--dest = '=';
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
d6261d2017-03-26Martin Nilsson  case 2: *--dest = '='; } } /* Return the result */ pop_n_elems( args ); if( pad ) push_string( end_shared_string( str ) ); else push_string( end_and_resize_shared_string( str, length-(3-last) ) ); }
61b0392001-01-06Henrik Grubbström (Grubba) /*! @decl string encode_base64(string data, void|int no_linebreaks) *! *! This function encodes data using the @tt{base64@} transfer encoding. *! *! If a nonzero value is passed as @[no_linebreaks], the result string *! will not contain any linebreaks. *! *! @seealso *! @[MIME.decode_base64()], @[MIME.encode()] */
8f1cbc1997-03-13Marcus Comstedt static void f_encode_base64( INT32 args )
5565b61997-03-08Marcus Comstedt {
d6261d2017-03-26Martin Nilsson  encode_base64(args, "encode_base64", base64tab, 1); }
8f1cbc1997-03-13Marcus Comstedt 
d6261d2017-03-26Martin Nilsson /*! @decl string encode_base64url(string data, void|int no_linebreaks) *!
33eb402017-05-29Martin Nilsson  *! Encode strings according to @rfc{4648@} base64url encoding. No *! padding is performed and no_linebreaks defaults to true.
d6261d2017-03-26Martin Nilsson  *! *! @seealso *! @[MIME.encode_base64] */ static void f_encode_base64url( INT32 args ) {
33eb402017-05-29Martin Nilsson  if( args==1 ) { push_int(1); args++; }
d6261d2017-03-26Martin Nilsson  encode_base64(args, "encode_base64url", base64urltab, 0);
5565b61997-03-08Marcus Comstedt }
61b0392001-01-06Henrik Grubbström (Grubba) /*! @decl string decode_qp(string encoded_data) *! *! This function decodes data encoded using the @tt{quoted-printable@} *! (a.k.a. quoted-unreadable) transfer encoding. *! *! @seealso *! @[MIME.encode_qp()], @[MIME.decode()] */
8f1cbc1997-03-13Marcus Comstedt static void f_decode_qp( INT32 args )
5565b61997-03-08Marcus Comstedt { if(args != 1)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong number of arguments to MIME.decode_qp()\n" );
017b572011-10-28Henrik Grubbström (Grubba)  else if(TYPEOF(sp[-1]) != T_STRING)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong type of argument to MIME.decode_qp()\n" );
808e7d1999-03-09Marcus Comstedt  else if (sp[-1].u.string->size_shift != 0)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Char out of range for MIME.decode_qp()\n" );
5565b61997-03-08Marcus Comstedt  else {
8f1cbc1997-03-13Marcus Comstedt  /* Decode the string in sp[-1].u.string. We have absolutely no idea how much of the input is raw data and how much is encoded data,
cebac91999-03-10Marcus Comstedt  so we'll use a string builder to hold the result. */
8f1cbc1997-03-13Marcus Comstedt 
cebac91999-03-10Marcus Comstedt  struct string_builder buf;
1fda201997-04-17Marcus Comstedt  SIGNED char *src;
45372f2000-08-04Henrik Grubbström (Grubba)  ptrdiff_t cnt;
5565b61997-03-08Marcus Comstedt 
cebac91999-03-10Marcus Comstedt  init_string_builder(&buf, 0);
5565b61997-03-08Marcus Comstedt 
e1a2e71997-04-20Henrik Grubbström (Grubba)  for (src = (SIGNED char *)sp[-1].u.string->str, cnt = sp[-1].u.string->len; cnt--; src++)
8f1cbc1997-03-13Marcus Comstedt  if (*src == '=') { /* Encoded data */ if (cnt > 0 && (src[1] == 10 || src[1] == 13)) { /* A '=' followed by CR, LF or CRLF will be simply ignored. */ if (src[1] == 13) { --cnt; src++; } if (cnt>0 && src[1]==10) { --cnt; src++; } } else if (cnt >= 2 && src[1] >= '0' && src[2] >= '0' && qprtab[src[1]-'0'] >= 0 && qprtab[src[2]-'0'] >= 0) { /* A '=' followed by a hexadecimal number. */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, (qprtab[src[1]-'0']<<4)|qprtab[src[2]-'0'] );
5565b61997-03-08Marcus Comstedt  cnt -= 2; src += 2;
a48c961997-03-12Marcus Comstedt  } } else
8f1cbc1997-03-13Marcus Comstedt  /* Raw data */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, *(unsigned char *)src );
5565b61997-03-08Marcus Comstedt 
8f1cbc1997-03-13Marcus Comstedt  /* Return the result */ pop_n_elems( 1 );
cebac91999-03-10Marcus Comstedt  push_string( finish_string_builder( &buf ) );
5565b61997-03-08Marcus Comstedt  } }
61b0392001-01-06Henrik Grubbström (Grubba) /*! @decl string encode_qp(string data, void|int no_linebreaks) *! *! This function encodes data using the @tt{quoted-printable@} *! (a.k.a. quoted-unreadable) transfer encoding. *! *! If a nonzero value is passed as @[no_linebreaks], the result *! string will not contain any linebreaks. *! *! @note *! Please do not use this function. QP is evil, and there's no *! excuse for using it. *! *! @seealso *! @[MIME.decode_qp()], @[MIME.encode()] */
8f1cbc1997-03-13Marcus Comstedt static void f_encode_qp( INT32 args )
5565b61997-03-08Marcus Comstedt {
f48b831997-05-06Marcus Comstedt  if (args != 1 && args != 2)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong number of arguments to MIME.encode_qp()\n" );
017b572011-10-28Henrik Grubbström (Grubba)  else if (TYPEOF(sp[-args]) != T_STRING)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong type of argument to MIME.encode_qp()\n" );
808e7d1999-03-09Marcus Comstedt  else if (sp[-args].u.string->size_shift != 0)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Char out of range for MIME.encode_qp()\n" );
5565b61997-03-08Marcus Comstedt  else {
8f1cbc1997-03-13Marcus Comstedt 
f48b831997-05-06Marcus Comstedt  /* Encode the string in sp[-args].u.string. We don't know how
8f1cbc1997-03-13Marcus Comstedt  much of the data has to be encoded, so let's use that trusty
cebac91999-03-10Marcus Comstedt  string builder once again. */
8f1cbc1997-03-13Marcus Comstedt 
cebac91999-03-10Marcus Comstedt  struct string_builder buf;
f48b831997-05-06Marcus Comstedt  unsigned char *src = (unsigned char *)sp[-args].u.string->str;
45372f2000-08-04Henrik Grubbström (Grubba)  ptrdiff_t cnt;
8f1cbc1997-03-13Marcus Comstedt  int col = 0;
017b572011-10-28Henrik Grubbström (Grubba)  int insert_crlf = !(args == 2 && TYPEOF(sp[-1]) == T_INT &&
f48b831997-05-06Marcus Comstedt  sp[-1].u.integer != 0);
5565b61997-03-08Marcus Comstedt 
cebac91999-03-10Marcus Comstedt  init_string_builder( &buf, 0 );
5565b61997-03-08Marcus Comstedt 
f48b831997-05-06Marcus Comstedt  for (cnt = sp[-args].u.string->len; cnt--; src++) {
8f1cbc1997-03-13Marcus Comstedt  if ((*src >= 33 && *src <= 60) || (*src >= 62 && *src <= 126)) /* These characters can always be encoded as themselves */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, *(unsigned char *)src );
5565b61997-03-08Marcus Comstedt  else {
8f1cbc1997-03-13Marcus Comstedt  /* Better safe than sorry, eh? Use the dreaded hex escape */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '=' ); string_builder_putchar( &buf, qptab[(*src)>>4] ); string_builder_putchar( &buf, qptab[(*src)&15] );
5565b61997-03-08Marcus Comstedt  col += 2; }
8f1cbc1997-03-13Marcus Comstedt  /* We'd better not let the lines get too long */
f48b831997-05-06Marcus Comstedt  if (++col >= 73 && insert_crlf) {
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '=' ); string_builder_putchar( &buf, 13 ); string_builder_putchar( &buf, 10 );
5565b61997-03-08Marcus Comstedt  col = 0; } }
13670c2015-05-25Martin Nilsson 
8f1cbc1997-03-13Marcus Comstedt  /* Return the result */
5c8ddc1997-06-26Marcus Comstedt  pop_n_elems( args );
cebac91999-03-10Marcus Comstedt  push_string( finish_string_builder( &buf ) );
5565b61997-03-08Marcus Comstedt  } }
8f1cbc1997-03-13Marcus Comstedt /* MIME.decode_uue() */
5565b61997-03-08Marcus Comstedt 
61b0392001-01-06Henrik Grubbström (Grubba) /*! @decl string decode_uue(string encoded_data) *! *! This function decodes data encoded using the @tt{x-uue@} transfer encoding. *! It can also be used to decode generic UUEncoded files. *! *! @seealso *! @[MIME.encode_uue()], @[MIME.decode()] */
8f1cbc1997-03-13Marcus Comstedt static void f_decode_uue( INT32 args )
5565b61997-03-08Marcus Comstedt {
8f1cbc1997-03-13Marcus Comstedt  if (args != 1)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong number of arguments to MIME.decode_uue()\n" );
017b572011-10-28Henrik Grubbström (Grubba)  else if(TYPEOF(sp[-1]) != T_STRING)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong type of argument to MIME.decode_uue()\n" );
808e7d1999-03-09Marcus Comstedt  else if (sp[-1].u.string->size_shift != 0)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Char out of range for MIME.decode_uue()\n" );
5565b61997-03-08Marcus Comstedt  else {
8f1cbc1997-03-13Marcus Comstedt  /* Decode string in sp[-1].u.string. This is done much like in the base64 case, but we'll look for the "begin" line first. */
cebac91999-03-10Marcus Comstedt  struct string_builder buf;
5565b61997-03-08Marcus Comstedt  char *src;
45372f2000-08-04Henrik Grubbström (Grubba)  ptrdiff_t cnt;
5565b61997-03-08Marcus Comstedt 
cebac91999-03-10Marcus Comstedt  init_string_builder( &buf, 0 );
5565b61997-03-08Marcus Comstedt  src = sp[-1].u.string->str; cnt = sp[-1].u.string->len;
8f1cbc1997-03-13Marcus Comstedt  while (cnt--)
5565b61997-03-08Marcus Comstedt  if(*src++=='b' && cnt>5 && !memcmp(src, "egin ", 5)) break;
8f1cbc1997-03-13Marcus Comstedt  if (cnt>=0) /* We found a the string "begin". Now skip to EOL */ while (cnt--) if (*src++=='\n')
5565b61997-03-08Marcus Comstedt  break;
8f1cbc1997-03-13Marcus Comstedt  if (cnt<0) { /* Could not find "begin.*\n", return 0 */ pop_n_elems( 1 ); push_int( 0 );
5565b61997-03-08Marcus Comstedt  return; }
8f1cbc1997-03-13Marcus Comstedt  for (;;) {
5565b61997-03-08Marcus Comstedt  int l, g;
8f1cbc1997-03-13Marcus Comstedt 
02979f1997-04-12Marcus Comstedt  /* If we run out of input, or the line starts with "end", we are done */ if (cnt<=0 || *src=='e')
5565b61997-03-08Marcus Comstedt  break;
8f1cbc1997-03-13Marcus Comstedt 
02979f1997-04-12Marcus Comstedt  /* Get the length byte, calculate the number of groups, and
8f1cbc1997-03-13Marcus Comstedt  check that we have sufficient data */
02979f1997-04-12Marcus Comstedt  l=(*src++-' ')&63;
5565b61997-03-08Marcus Comstedt  --cnt;
8f1cbc1997-03-13Marcus Comstedt  g = (l+2)/3; l -= g*3; if ((cnt -= g*4) < 0)
5565b61997-03-08Marcus Comstedt  break;
8f1cbc1997-03-13Marcus Comstedt  while (g--) { /* Read 24 bits of data */
5565b61997-03-08Marcus Comstedt  INT32 d = ((*src++-' ')&63)<<18; d |= ((*src++-' ')&63)<<12; d |= ((*src++-' ')&63)<<6; d |= ((*src++-' ')&63);
8f1cbc1997-03-13Marcus Comstedt  /* Output it into the buffer */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, (d>>16)&0xff ); string_builder_putchar( &buf, (d>>8)&0xff ); string_builder_putchar( &buf, d&0xff );
5565b61997-03-08Marcus Comstedt  }
8f1cbc1997-03-13Marcus Comstedt  /* If the line didn't contain an even multiple of 24 bits, remove spurious bytes from the buffer */
cebac91999-03-10Marcus Comstedt  /* while (l++) string_builder_allocate( &buf, -1 ); */ /* Hmm... string_builder_allocate is static. Cheat a bit... */ if (l<0) buf.s->len += l;
8f1cbc1997-03-13Marcus Comstedt  /* Skip to EOL */ while (cnt-- && *src++!=10);
5565b61997-03-08Marcus Comstedt  }
8f1cbc1997-03-13Marcus Comstedt  /* Return the result */ pop_n_elems( 1 );
cebac91999-03-10Marcus Comstedt  push_string( finish_string_builder( &buf ) );
5565b61997-03-08Marcus Comstedt  } }
8f1cbc1997-03-13Marcus Comstedt /* Convenience function for encode_uue(); Encode groups*3 bytes from * *srcp into groups*4 bytes at *destp, and reserve space for last more. */
c3dbe52000-08-09Henrik Grubbström (Grubba) static void do_uue_encode(ptrdiff_t groups, unsigned char **srcp, char **destp, ptrdiff_t last )
5565b61997-03-08Marcus Comstedt {
8f1cbc1997-03-13Marcus Comstedt  unsigned char *src = *srcp; char *dest = *destp; while (groups || last) { /* A single line can hold at most 15 groups */
84f8952000-08-16Henrik Grubbström (Grubba)  ptrdiff_t g = (groups >= 15? 15 : groups);
5565b61997-03-08Marcus Comstedt 
8f1cbc1997-03-13Marcus Comstedt  if (g<15) { /* The line isn't filled completely. Add space for the "last" bytes */
cc7cf42015-10-14Martin Nilsson  *dest++ = ' ' + (char)(3*g + last);
5565b61997-03-08Marcus Comstedt  last = 0; } else
cc7cf42015-10-14Martin Nilsson  *dest++ = ' ' + (char)(3*g);
8f1cbc1997-03-13Marcus Comstedt 
5565b61997-03-08Marcus Comstedt  groups -= g;
8f1cbc1997-03-13Marcus Comstedt  while (g--) { /* Get 24 bits of data */
5565b61997-03-08Marcus Comstedt  INT32 d = *src++<<8; d = (*src++|d)<<8; d |= *src++;
8f1cbc1997-03-13Marcus Comstedt  /* Output it in encoded form */
02979f1997-04-12Marcus Comstedt  if((*dest++ = ' '+(d>>18)) == ' ') dest[-1]='`'; if((*dest++ = ' '+((d>>12)&63)) == ' ') dest[-1]='`'; if((*dest++ = ' '+((d>>6)&63)) == ' ') dest[-1]='`'; if((*dest++ = ' '+(d&63)) == ' ') dest[-1]='`';
5565b61997-03-08Marcus Comstedt  } if(groups || last) {
8f1cbc1997-03-13Marcus Comstedt  /* There's more data to be written, so add a linebreak before looping */
5565b61997-03-08Marcus Comstedt  *dest++ = 13; *dest++ = 10; } }
8f1cbc1997-03-13Marcus Comstedt  /* Update pointers */
5565b61997-03-08Marcus Comstedt  *srcp = src; *destp = dest; }
8f1cbc1997-03-13Marcus Comstedt /* MIME.encode_uue() */
61b0392001-01-06Henrik Grubbström (Grubba) /*! @decl string encode_uue(string encoded_data, void|string filename) *! *! This function encodes data using the @tt{x-uue@} transfer encoding. *! *! The optional argument @[filename] specifies an advisory filename to include *! in the encoded data, for extraction purposes. *! *! This function can also be used to produce generic UUEncoded files. *! *! @seealso *! @[MIME.decode_uue()], @[MIME.encode()] */
8f1cbc1997-03-13Marcus Comstedt static void f_encode_uue( INT32 args )
5565b61997-03-08Marcus Comstedt {
8f1cbc1997-03-13Marcus Comstedt  if (args != 1 && args != 2)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong number of arguments to MIME.encode_uue()\n" );
017b572011-10-28Henrik Grubbström (Grubba)  else if (TYPEOF(sp[-args]) != T_STRING || (args == 2 && TYPEOF(sp[-1]) != T_VOID && TYPEOF(sp[-1]) != T_STRING && TYPEOF(sp[-1]) != T_INT))
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong type of argument to MIME.encode_uue()\n" );
808e7d1999-03-09Marcus Comstedt  else if (sp[-args].u.string->size_shift != 0 ||
017b572011-10-28Henrik Grubbström (Grubba)  (args == 2 && TYPEOF(sp[-1]) == T_STRING &&
808e7d1999-03-09Marcus Comstedt  sp[-1].u.string->size_shift != 0))
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Char out of range for MIME.encode_uue()\n" );
5565b61997-03-08Marcus Comstedt  else {
8f1cbc1997-03-13Marcus Comstedt  /* Encode string in sp[-args].u.string. If args == 2, there may be a filename in sp[-1].u.string. If we don't get a filename, use the generic filename "attachment"... */ char *dest, *filename = "attachment";
5565b61997-03-08Marcus Comstedt  struct pike_string *str; unsigned char *src = (unsigned char *) sp[-args].u.string->str;
8f1cbc1997-03-13Marcus Comstedt  /* Calculate number of 24 bit groups, and actual # of bytes in last grp */
45372f2000-08-04Henrik Grubbström (Grubba)  ptrdiff_t groups = (sp[-args].u.string->len + 2)/3; ptrdiff_t last= (sp[-args].u.string->len - 1)%3 + 1;
8f1cbc1997-03-13Marcus Comstedt  /* Get the filename if provided */
017b572011-10-28Henrik Grubbström (Grubba)  if (args == 2 && TYPEOF(sp[-1]) == T_STRING)
5565b61997-03-08Marcus Comstedt  filename = sp[-1].u.string->str;
8f1cbc1997-03-13Marcus Comstedt  /* Allocate the space we need. This included space for the actual data, linebreaks and the "begin" and "end" lines (including filename) */ str = begin_shared_string( groups*4 + ((groups + 14)/15)*3 + strlen( filename ) + 20 );
5565b61997-03-08Marcus Comstedt  dest = str->str;
8f1cbc1997-03-13Marcus Comstedt  /* Write the begin line containing the filename */
5565b61997-03-08Marcus Comstedt  sprintf(dest, "begin 644 %s\r\n", filename); dest += 12 + strlen(filename);
8f1cbc1997-03-13Marcus Comstedt  if (groups) { /* Temporary storage for the last group, as we may have to read an extra byte or two and don't want to get any page-faults. */
5565b61997-03-08Marcus Comstedt  unsigned char tmp[3], *tmpp=tmp; char *kp, k; int i;
8f1cbc1997-03-13Marcus Comstedt  do_uue_encode( groups-1, &src, &dest, last ); /* Copy the last group into temporary storage */ tmp[1] = tmp[2] = 0; for (i = 0; i < last; i++) tmp[i] = *src++;
5565b61997-03-08Marcus Comstedt 
8f1cbc1997-03-13Marcus Comstedt  /* Remember the address and contents of the last character written. This will get overwritten by a fake length byte which we will then replace with the originial character */
5565b61997-03-08Marcus Comstedt  k = *--dest; kp = dest;
8f1cbc1997-03-13Marcus Comstedt  do_uue_encode( 1, &tmpp, &dest, 0 ); /* Restore the saved character */
5565b61997-03-08Marcus Comstedt  *kp = k;
8f1cbc1997-03-13Marcus Comstedt 
4d27b62017-08-11Chris Angelico  /* Replace final nulls with pad characters if necessary */
8f1cbc1997-03-13Marcus Comstedt  switch (last) {
5565b61997-03-08Marcus Comstedt  case 1:
8f1cbc1997-03-13Marcus Comstedt  dest[-2] = '`';
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
5565b61997-03-08Marcus Comstedt  case 2:
8f1cbc1997-03-13Marcus Comstedt  dest[-1] = '`';
5565b61997-03-08Marcus Comstedt  }
8f1cbc1997-03-13Marcus Comstedt  /* Add a final linebreak after the last group */
5565b61997-03-08Marcus Comstedt  *dest++ = 13; *dest++ = 10; }
8f1cbc1997-03-13Marcus Comstedt  /* Put a terminating line (length byte `) and the "end" line into buffer */ memcpy( dest, "`\r\nend\r\n", 8 ); /* Return the result */ pop_n_elems( args ); push_string( end_shared_string( str ) );
5565b61997-03-08Marcus Comstedt  } }
8f1cbc1997-03-13Marcus Comstedt 
391ac52018-08-05Martin Nilsson static void low_tokenize( INT32 args, int mode )
5565b61997-03-08Marcus Comstedt {
aec58f1998-04-04Mirar (Pontus Hagland) 
56e2c41999-03-07Marcus Comstedt  /* Tokenize string in sp[-args].u.string. We'll just push the tokens on the stack, and then do an aggregate_array just before exiting. */
341f602008-01-25Henrik Grubbström (Grubba)  unsigned char *src; int flags = 0;
56e2c41999-03-07Marcus Comstedt  struct array *arr; struct pike_string *str;
341f602008-01-25Henrik Grubbström (Grubba)  ptrdiff_t cnt;
45372f2000-08-04Henrik Grubbström (Grubba)  INT32 n = 0, l, e, d;
56e2c41999-03-07Marcus Comstedt  char *p;
391ac52018-08-05Martin Nilsson  get_all_args(NULL, args, "%S.%d", &str, &flags);
341f602008-01-25Henrik Grubbström (Grubba)  src = STR0(str); cnt = str->len;
56e2c41999-03-07Marcus Comstedt  while (cnt>0) switch (rfc822ctype[*src]) { case CT_EQUAL: /* Might be an encoded word. Check it out. */ l = 0; if (cnt>5 && src[1] == '?') { int nq = 0; for (l=2; l<cnt && nq<3; l++) if (src[l]=='?') nq ++; else if(rfc822ctype[src[l]]<=CT_WHITE) break; if (nq == 3 && l<cnt && src[l] == '=') l ++; else l = 0; } if (l>0) { /* Yup. It's an encoded word, so it must be an atom. */ if(mode)
5e9fc02015-08-18Per Hedbor  push_static_text("encoded-word");
56e2c41999-03-07Marcus Comstedt  push_string( make_shared_binary_string( (char *)src, l ) ); if(mode) f_aggregate(2); n++; src += l; cnt -= l; break; }
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
56e2c41999-03-07Marcus Comstedt  case CT_SPECIAL: case CT_RBRACK: case CT_RPAR: /* Individual special character, push as a char (= int) */ if(mode)
5e9fc02015-08-18Per Hedbor  push_static_text("special");
56e2c41999-03-07Marcus Comstedt  push_int( *src++ ); if(mode) f_aggregate(2); n++; --cnt; break;
aec58f1998-04-04Mirar (Pontus Hagland) 
56e2c41999-03-07Marcus Comstedt  case CT_ATOM: /* Atom, find length then push as a string */ for (l=1; l<cnt; l++) if (rfc822ctype[src[l]] != CT_ATOM) break;
8f1cbc1997-03-13Marcus Comstedt 
56e2c41999-03-07Marcus Comstedt  if(mode)
5e9fc02015-08-18Per Hedbor  push_static_text("word");
56e2c41999-03-07Marcus Comstedt  push_string( make_shared_binary_string( (char *)src, l ) ); if(mode) f_aggregate(2); n++; src += l; cnt -= l; break; case CT_QUOTE: /* Quoted-string, find length then push as a string while removing escapes. */ for (e = 0, l = 1; l < cnt; l++) if (src[l] == '"') break; else
341f602008-01-25Henrik Grubbström (Grubba)  if ((src[l] == '\\') && !(flags & TOKENIZE_KEEP_ESCAPES)) {
56e2c41999-03-07Marcus Comstedt  e++; l++; }
8f1cbc1997-03-13Marcus Comstedt 
56e2c41999-03-07Marcus Comstedt  /* Push the resulting string */ if(mode)
5e9fc02015-08-18Per Hedbor  push_static_text("word");
341f602008-01-25Henrik Grubbström (Grubba)  if (e) { /* l is the distance to the ending ", and e is the number of \ escapes encountered on the way */ str = begin_shared_string( l-e-1 ); /* Copy the string and remove \ escapes */ for (p = str->str, e = 1; e < l; e++) *p++ = (src[e] == '\\'? src[++e] : src[e]); push_string( end_shared_string( str ) ); } else { /* No escapes. */ push_string(make_shared_binary_string( (char *)src+1, l-1)); }
56e2c41999-03-07Marcus Comstedt  if(mode) f_aggregate(2); n++; src += l+1; cnt -= l+1; break; case CT_LBRACK: /* Domain literal. Handled just like quoted-string, except that ] marks the end of the token, not ". */ for (e = 0, l = 1; l < cnt; l++) if(src[l] == ']')
ff7a0f1999-03-04Marcus Comstedt  break;
56e2c41999-03-07Marcus Comstedt  else if(src[l] == '\\') { e++; l++; } if (l >= cnt) { /* No ]; seems that this was no domain literal after all... */ if(mode)
5e9fc02015-08-18Per Hedbor  push_static_text("special");
8f1cbc1997-03-13Marcus Comstedt  push_int( *src++ );
56e2c41999-03-07Marcus Comstedt  if(mode) f_aggregate(2);
5565b61997-03-08Marcus Comstedt  n++; --cnt; break;
56e2c41999-03-07Marcus Comstedt  }
8f1cbc1997-03-13Marcus Comstedt 
56e2c41999-03-07Marcus Comstedt  /* l is the distance to the ending ], and e is the number of \ escapes encountered on the way */ str = begin_shared_string( l-e+1 ); /* Copy the literal and remove \ escapes */ for (p = str->str, e = 0; e <= l; e++) *p++ = (src[e] == '\\'? src[++e] : src[e]); /* Push the resulting string */ if(mode)
5e9fc02015-08-18Per Hedbor  push_static_text("domain-literal");
56e2c41999-03-07Marcus Comstedt  push_string( end_shared_string( str ) ); if(mode) f_aggregate(2); n++; src += l+1; cnt -= l+1; break; case CT_LPAR: /* Comment. Nested comments are allowed, so we'll use d to keep track of the nesting level. */ for (e = 0, d = 1, l = 1; l < cnt; l++) if (src[l] == '(') /* One level deeper nesting */ d++; else if(src[l] == ')') { /* End of comment level. If nesting reaches 0, we're done */ if(!--d)
5565b61997-03-08Marcus Comstedt  break;
56e2c41999-03-07Marcus Comstedt  } else /* Skip escaped characters */ if(src[l] == '\\') { e++; l++; }
8f1cbc1997-03-13Marcus Comstedt 
56e2c41999-03-07Marcus Comstedt  if(mode) {
5e9fc02015-08-18Per Hedbor  push_static_text("comment");
8f1cbc1997-03-13Marcus Comstedt 
5565b61997-03-08Marcus Comstedt  str = begin_shared_string( l-e-1 );
8f1cbc1997-03-13Marcus Comstedt 
56e2c41999-03-07Marcus Comstedt  /* Copy the comment and remove \ escapes */
8f1cbc1997-03-13Marcus Comstedt  for (p = str->str, e = 1; e < l; e++) *p++ = (src[e] == '\\'? src[++e] : src[e]); push_string( end_shared_string( str ) );
56e2c41999-03-07Marcus Comstedt  f_aggregate(2);
5565b61997-03-08Marcus Comstedt  n++;
56e2c41999-03-07Marcus Comstedt  }
8f1cbc1997-03-13Marcus Comstedt 
56e2c41999-03-07Marcus Comstedt  /* Skip the comment altogether */ src += l+1; cnt -= l+1; break;
8f1cbc1997-03-13Marcus Comstedt 
56e2c41999-03-07Marcus Comstedt  case CT_WHITE: /* Whitespace, just ignore it */ src++; --cnt; break;
8f1cbc1997-03-13Marcus Comstedt 
56e2c41999-03-07Marcus Comstedt  default:
b189741999-08-17Marcus Comstedt  if(*src == '\0') { /* Multiple occurance header. Ignore all but first. */ cnt = 0; break; }
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Invalid character in header field\n" );
56e2c41999-03-07Marcus Comstedt  }
8f1cbc1997-03-13Marcus Comstedt 
56e2c41999-03-07Marcus Comstedt  /* Create the resulting array and push it */ arr = aggregate_array( n );
341f602008-01-25Henrik Grubbström (Grubba)  pop_n_elems( args );
56e2c41999-03-07Marcus Comstedt  push_array( arr ); }
8f1cbc1997-03-13Marcus Comstedt 
341f602008-01-25Henrik Grubbström (Grubba) /*! @decl array(string|int) tokenize(string header, int|void flags)
a86cc82001-02-14Henrik Grubbström (Grubba)  *!
310a6b2015-08-21Henrik Grubbström (Grubba)  *! A structured header field, as specified by @rfc{822@}, is constructed from
a86cc82001-02-14Henrik Grubbström (Grubba)  *! a sequence of lexical elements. *!
341f602008-01-25Henrik Grubbström (Grubba)  *! @param header *! The header value to parse. *! *! @param flags *! An optional set of flags. Currently only one flag is defined: *! @int
87213c2008-03-25Martin Bähr  *! @value TOKENIZE_KEEP_ESCAPES
341f602008-01-25Henrik Grubbström (Grubba)  *! Keep backslash-escapes in quoted-strings. *! @endint *! *! The lexical elements parsed are:
a86cc82001-02-14Henrik Grubbström (Grubba)  *! @dl *! @item *! individual special characters *! @item *! quoted-strings *! @item *! domain-literals *! @item *! comments *! @item *! atoms *! @enddl *! *! This function will analyze a string containing the header value, *! and produce an array containing the lexical elements. *! *! Individual special characters will be returned as characters (i.e.
cbe8c92003-04-07Martin Nilsson  *! @expr{int@}s).
a86cc82001-02-14Henrik Grubbström (Grubba)  *! *! Quoted-strings, domain-literals and atoms will be decoded and returned *! as strings. *! *! Comments are not returned in the array at all. *! *! @note *! As domain-literals are returned as strings, there is no way to tell the *! domain-literal @tt{[127.0.0.1]@} from the quoted-string *! @tt{"[127.0.0.1]"@}. Hopefully this won't cause any problems. *! Domain-literals are used seldom, if at all, anyway...
13670c2015-05-25Martin Nilsson  *!
310a6b2015-08-21Henrik Grubbström (Grubba)  *! The set of special-characters is the one specified in @rfc{1521@}
cbe8c92003-04-07Martin Nilsson  *! (i.e. @expr{"<", ">", "@@", ",", ";", ":", "\", "/", "?", "="@}),
310a6b2015-08-21Henrik Grubbström (Grubba)  *! and not the set specified in @rfc{822@}.
a86cc82001-02-14Henrik Grubbström (Grubba)  *! *! @seealso
b7be082002-11-25Marcus Comstedt  *! @[MIME.quote()], @[tokenize_labled()], *! @[decode_words_tokenized_remapped()].
a86cc82001-02-14Henrik Grubbström (Grubba)  */
56e2c41999-03-07Marcus Comstedt static void f_tokenize( INT32 args ) {
391ac52018-08-05Martin Nilsson  low_tokenize(args, 0);
5565b61997-03-08Marcus Comstedt }
341f602008-01-25Henrik Grubbström (Grubba) /*! @decl array(array(string|int)) tokenize_labled(string header, @ *! int|void flags)
a86cc82001-02-14Henrik Grubbström (Grubba)  *!
b7be082002-11-25Marcus Comstedt  *! Similar to @[tokenize()], but labels the contents, by making
a86cc82001-02-14Henrik Grubbström (Grubba)  *! arrays with two elements; the first a label, and the second *! the value that @[tokenize()] would have put there, except *! for that comments are kept. *!
341f602008-01-25Henrik Grubbström (Grubba)  *! @param header *! The header value to parse. *! *! @param flags *! An optional set of flags. Currently only one flag is defined: *! @int
87213c2008-03-25Martin Bähr  *! @value TOKENIZE_KEEP_ESCAPES
341f602008-01-25Henrik Grubbström (Grubba)  *! Keep backslash-escapes in quoted-strings. *! @endint *!
b7be082002-11-25Marcus Comstedt  *! The following labels exist:
a86cc82001-02-14Henrik Grubbström (Grubba)  *! @string *! @value "encoded-word" *! Word encoded according to =?... *! @value "special" *! Special character. *! @value "word" *! Word. *! @value "domain-literal" *! Domain literal. *! @value "comment" *! Comment. *! @endstring *! *! @seealso
b7be082002-11-25Marcus Comstedt  *! @[MIME.quote()], @[tokenize()], *! @[decode_words_tokenized_labled_remapped()]
a86cc82001-02-14Henrik Grubbström (Grubba)  */
56e2c41999-03-07Marcus Comstedt static void f_tokenize_labled( INT32 args ) {
391ac52018-08-05Martin Nilsson  low_tokenize(args, 1);
56e2c41999-03-07Marcus Comstedt }
8f1cbc1997-03-13Marcus Comstedt /* Convenience function for quote() which determines if a sequence of * characters can be stored as an atom. */
45372f2000-08-04Henrik Grubbström (Grubba) static int check_atom_chars( unsigned char *str, ptrdiff_t len )
5565b61997-03-08Marcus Comstedt { /* Atoms must contain at least 1 character... */
8f1cbc1997-03-13Marcus Comstedt  if (len < 1) return 0;
5565b61997-03-08Marcus Comstedt 
8f1cbc1997-03-13Marcus Comstedt  /* Check the individual characters */ while (len--) if (*str >= 0x80 || rfc822ctype[*str] != CT_ATOM)
5565b61997-03-08Marcus Comstedt  return 0; else str++;
8f1cbc1997-03-13Marcus Comstedt  /* Ok, it's safe */
5565b61997-03-08Marcus Comstedt  return 1; }
1a3b191999-03-07Marcus Comstedt /* This one check is a sequence of charactes is actually an encoded word. */
45372f2000-08-04Henrik Grubbström (Grubba) static int check_encword( unsigned char *str, ptrdiff_t len )
1a3b191999-03-07Marcus Comstedt { int q = 0; /* An encoded word begins with =?, ends with ?= and contains 2 internal ? */ if (len < 6 || str[0] != '=' || str[1] != '?' || str[len-2] != '?' || str[len-1] != '=') return 0; /* Remove =? and ?= */ len -= 4; str += 2; /* Count number of internal ? */ while (len--) if (*str++ == '?') if(++q > 2) return 0; /* If we found exactly 2, this is an encoded word. */ return q == 2; }
8f1cbc1997-03-13Marcus Comstedt /* MIME.quote() */
61b0392001-01-06Henrik Grubbström (Grubba) /*! @decl string quote(array(string|int) lexical_elements); *! *! This function is the inverse of the @[MIME.tokenize] function. *! *! A header field value is constructed from a sequence of lexical elements.
cbe8c92003-04-07Martin Nilsson  *! Characters (@expr{int@}s) are taken to be special-characters, whereas
61b0392001-01-06Henrik Grubbström (Grubba)  *! strings are encoded as atoms or quoted-strings, depending on whether *! they contain any special characters. *! *! @note
a86cc82001-02-14Henrik Grubbström (Grubba)  *! There is no way to construct a domain-literal using this function. *! Neither can it be used to produce comments.
61b0392001-01-06Henrik Grubbström (Grubba)  *! *! @seealso
a86cc82001-02-14Henrik Grubbström (Grubba)  *! @[MIME.tokenize()]
61b0392001-01-06Henrik Grubbström (Grubba)  */
8f1cbc1997-03-13Marcus Comstedt static void f_quote( INT32 args )
5565b61997-03-08Marcus Comstedt { struct svalue *item; INT32 cnt;
cebac91999-03-10Marcus Comstedt  struct string_builder buf;
5565b61997-03-08Marcus Comstedt  int prev_atom = 0;
8f1cbc1997-03-13Marcus Comstedt  if (args != 1)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong number of arguments to MIME.quote()\n" );
017b572011-10-28Henrik Grubbström (Grubba)  else if (TYPEOF(sp[-1]) != T_ARRAY)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong type of argument to MIME.quote()\n" );
8f1cbc1997-03-13Marcus Comstedt  /* Quote array in sp[-1].u.array. Once again we'll rely on a
cebac91999-03-10Marcus Comstedt  string_builder to collect the output string. */
8f1cbc1997-03-13Marcus Comstedt 
cebac91999-03-10Marcus Comstedt  init_string_builder( &buf, 0 );
8f1cbc1997-03-13Marcus Comstedt  for (cnt=sp[-1].u.array->size, item=sp[-1].u.array->item; cnt--; item++) {
017b572011-10-28Henrik Grubbström (Grubba)  if (TYPEOF(*item) == T_INT) {
8f1cbc1997-03-13Marcus Comstedt  /* Single special character */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, item->u.integer );
8f1cbc1997-03-13Marcus Comstedt  prev_atom = 0;
017b572011-10-28Henrik Grubbström (Grubba)  } else if (TYPEOF(*item) != T_STRING) {
8f1cbc1997-03-13Marcus Comstedt  /* Neither int or string. Too bad... */
cebac91999-03-10Marcus Comstedt  free_string_builder( &buf );
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong type of argument to MIME.quote()\n" );
8f1cbc1997-03-13Marcus Comstedt 
808e7d1999-03-09Marcus Comstedt  } else if (item->u.string->size_shift != 0) {
cebac91999-03-10Marcus Comstedt  free_string_builder( &buf );
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Char out of range for MIME.quote()\n" );
808e7d1999-03-09Marcus Comstedt 
5565b61997-03-08Marcus Comstedt  } else {
8f1cbc1997-03-13Marcus Comstedt  /* It's a string, so we'll store it either as an atom, or as a quoted-string */ struct pike_string *str = item->u.string; /* In case the previous item was also a string, we'll add a single whitespace as a delimiter */ if (prev_atom)
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, ' ' );
8f1cbc1997-03-13Marcus Comstedt 
1a3b191999-03-07Marcus Comstedt  if ((str->len>5 && str->str[0]=='=' && str->str[1]=='?' && check_encword((unsigned char *)str->str, str->len)) || check_atom_chars((unsigned char *)str->str, str->len)) {
8f1cbc1997-03-13Marcus Comstedt  /* Valid atom without quotes... */
cebac91999-03-10Marcus Comstedt  string_builder_binary_strcat( &buf, str->str, str->len );
8f1cbc1997-03-13Marcus Comstedt 
5565b61997-03-08Marcus Comstedt  } else {
8f1cbc1997-03-13Marcus Comstedt  /* Have to use quoted-string */
c3dbe52000-08-09Henrik Grubbström (Grubba)  ptrdiff_t len = str->len;
8f1cbc1997-03-13Marcus Comstedt  char *src = str->str;
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '"' );
5565b61997-03-08Marcus Comstedt  while(len--) { if(*src=='"' || *src=='\\' || *src=='\r')
8f1cbc1997-03-13Marcus Comstedt  /* Some characters have to be escaped even within quotes... */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '\\' ); string_builder_putchar( &buf, (*src++)&0xff );
5565b61997-03-08Marcus Comstedt  }
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '"' );
8f1cbc1997-03-13Marcus Comstedt 
5565b61997-03-08Marcus Comstedt  }
8f1cbc1997-03-13Marcus Comstedt  prev_atom = 1;
5565b61997-03-08Marcus Comstedt  } }
8f1cbc1997-03-13Marcus Comstedt  /* Return the result */ pop_n_elems( 1 );
cebac91999-03-10Marcus Comstedt  push_string( finish_string_builder( &buf ) );
5565b61997-03-08Marcus Comstedt }
9938e91999-03-09Marcus Comstedt 
a86cc82001-02-14Henrik Grubbström (Grubba) /*! @decl string quote_labled(array(array(string|int)) tokens) *! *! This function performs the reverse operation of @[tokenize_labled()]. *! *! @seealso *! @[MIME.quote()], @[MIME.tokenize_labled()] */
9938e91999-03-09Marcus Comstedt static void f_quote_labled( INT32 args ) { struct svalue *item; INT32 cnt;
cebac91999-03-10Marcus Comstedt  struct string_builder buf;
9938e91999-03-09Marcus Comstedt  int prev_atom = 0; if (args != 1)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong number of arguments to MIME.quote_labled()\n" );
017b572011-10-28Henrik Grubbström (Grubba)  else if (TYPEOF(sp[-1]) != T_ARRAY)
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong type of argument to MIME.quote_labled()\n" );
9938e91999-03-09Marcus Comstedt  /* Quote array in sp[-1].u.array. Once again we'll rely on a
cebac91999-03-10Marcus Comstedt  string_builder to collect the output string. */
9938e91999-03-09Marcus Comstedt 
cebac91999-03-10Marcus Comstedt  init_string_builder( &buf, 0 );
9938e91999-03-09Marcus Comstedt  for (cnt=sp[-1].u.array->size, item=sp[-1].u.array->item; cnt--; item++) {
017b572011-10-28Henrik Grubbström (Grubba)  if (TYPEOF(*item) != T_ARRAY || item->u.array->size<2 || TYPEOF(item->u.array->item[0]) != T_STRING) {
cebac91999-03-10Marcus Comstedt  free_string_builder( &buf );
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong type of argument to MIME.quote_labled()\n" );
9938e91999-03-09Marcus Comstedt  } if (c_compare_string( item->u.array->item[0].u.string, "special", 7 )) {
017b572011-10-28Henrik Grubbström (Grubba)  if(TYPEOF(item->u.array->item[1]) != T_INT) {
cebac91999-03-10Marcus Comstedt  free_string_builder( &buf );
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong type of argument to MIME.quote_labled()\n" );
9938e91999-03-09Marcus Comstedt  } /* Single special character */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, item->u.array->item[1].u.integer );
9938e91999-03-09Marcus Comstedt  prev_atom = 0;
017b572011-10-28Henrik Grubbström (Grubba)  } else if(TYPEOF(item->u.array->item[1]) != T_STRING) {
9938e91999-03-09Marcus Comstedt  /* All the remaining lexical items require item[1] to be a string */
cebac91999-03-10Marcus Comstedt  free_string_builder( &buf );
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Wrong type of argument to MIME.quote_labled()\n" );
9938e91999-03-09Marcus Comstedt 
808e7d1999-03-09Marcus Comstedt  } else if (item->u.array->item[1].u.string->size_shift != 0) {
cebac91999-03-10Marcus Comstedt  free_string_builder( &buf );
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Char out of range for MIME.quote_labled()\n" );
808e7d1999-03-09Marcus Comstedt 
9938e91999-03-09Marcus Comstedt  } else if (c_compare_string( item->u.array->item[0].u.string, "word", 4 )){ /* It's a word, so we'll store it either as an atom, or as a quoted-string */ struct pike_string *str = item->u.array->item[1].u.string; /* In case the previous item was also a string, we'll add a single whitespace as a delimiter */ if (prev_atom)
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, ' ' );
9938e91999-03-09Marcus Comstedt  if ((str->len>5 && str->str[0]=='=' && str->str[1]=='?' && check_encword((unsigned char *)str->str, str->len)) || check_atom_chars((unsigned char *)str->str, str->len)) { /* Valid atom without quotes... */
cebac91999-03-10Marcus Comstedt  string_builder_binary_strcat( &buf, str->str, str->len );
9938e91999-03-09Marcus Comstedt  } else { /* Have to use quoted-string */
45372f2000-08-04Henrik Grubbström (Grubba)  ptrdiff_t len = str->len;
9938e91999-03-09Marcus Comstedt  char *src = str->str;
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '"' );
9938e91999-03-09Marcus Comstedt  while(len--) { if(*src=='"' || *src=='\\' || *src=='\r') /* Some characters have to be escaped even within quotes... */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '\\' ); string_builder_putchar( &buf, (*src++)&0xff );
9938e91999-03-09Marcus Comstedt  }
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '"' );
9938e91999-03-09Marcus Comstedt  } prev_atom = 1; } else if (c_compare_string( item->u.array->item[0].u.string, "encoded-word", 12 )) { struct pike_string *str = item->u.array->item[1].u.string; /* Insert 'as is'. */
cebac91999-03-10Marcus Comstedt  string_builder_binary_strcat( &buf, str->str, str->len );
9938e91999-03-09Marcus Comstedt  prev_atom = 1; } else if (c_compare_string( item->u.array->item[0].u.string, "comment", 7 )) { struct pike_string *str = item->u.array->item[1].u.string; /* Encode comment */
45372f2000-08-04Henrik Grubbström (Grubba)  ptrdiff_t len = str->len;
9938e91999-03-09Marcus Comstedt  char *src = str->str;
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '(' );
9938e91999-03-09Marcus Comstedt  while(len--) { if(*src=='(' || *src==')' || *src=='\\' || *src=='\r') /* Some characters have to be escaped even within comments... */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '\\' ); string_builder_putchar( &buf, (*src++)&0xff );
9938e91999-03-09Marcus Comstedt  }
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, ')' );
9938e91999-03-09Marcus Comstedt  prev_atom = 0;
13670c2015-05-25Martin Nilsson 
9938e91999-03-09Marcus Comstedt  } else if (c_compare_string( item->u.array->item[0].u.string, "domain-literal", 14 )) { struct pike_string *str = item->u.array->item[1].u.string; /* Encode domain-literal */
45372f2000-08-04Henrik Grubbström (Grubba)  ptrdiff_t len = str->len;
9938e91999-03-09Marcus Comstedt  char *src = str->str; if (len<2 || src[0] != '[' || src[len-1] != ']') {
cebac91999-03-10Marcus Comstedt  free_string_builder( &buf );
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Illegal domain-literal passed to MIME.quote_labled()\n" );
9938e91999-03-09Marcus Comstedt  } len -= 2; src++;
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '[' );
9938e91999-03-09Marcus Comstedt  while(len--) { if(*src=='[' || *src==']' || *src=='\\' || *src=='\r') /* Some characters have to be escaped within domain-literals... */
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, '\\' ); string_builder_putchar( &buf, (*src++)&0xff );
9938e91999-03-09Marcus Comstedt  }
cebac91999-03-10Marcus Comstedt  string_builder_putchar( &buf, ']' );
9938e91999-03-09Marcus Comstedt  prev_atom = 0; } else { /* Unknown label. Too bad... */
cebac91999-03-10Marcus Comstedt  free_string_builder( &buf );
b2d3e42000-12-01Fredrik Hübinette (Hubbe)  Pike_error( "Unknown label passed to MIME.quote_labled()\n" );
9938e91999-03-09Marcus Comstedt  } } /* Return the result */ pop_n_elems( 1 );
cebac91999-03-10Marcus Comstedt  push_string( finish_string_builder( &buf ) );
9938e91999-03-09Marcus Comstedt }
61b0392001-01-06Henrik Grubbström (Grubba)  /*! @endmodule */