1ab4ac2008-01-26Martin Stjernholm /* -*- c -*-
23a7db2005-11-12Martin Nilsson || This file is part of Pike. For copyright information see COPYRIGHT. || Pike is distributed under GPL, LGPL and MPL. See the file COPYING || for more information. */ #include "config.h"
a26f5d2018-03-17Martin Nilsson #include "global.h"
23a7db2005-11-12Martin Nilsson  #include "pike_macros.h" #include "stralloc.h" #include "object.h" #include "interpret.h" #include "mapping.h" #include "program.h" #include "array.h" #include "builtin_functions.h" #include "module_support.h" #include "operators.h" #include "pike_error.h" #include "bignum.h"
d476592013-06-12Arne Goedeke #include "block_allocator.h"
23a7db2005-11-12Martin Nilsson  #define sp Pike_sp /* FIXME: * recursive parsing will lock make the parser lock or * use up all memory and then crash... */ /* #define VERBOSE_XMLDEBUG */
f12fc62006-05-05Henrik Grubbström (Grubba) static struct svalue location_string_svalue;
23a7db2005-11-12Martin Nilsson 
f12fc62006-05-05Henrik Grubbström (Grubba) /*! @module Parser */
23a7db2005-11-12Martin Nilsson 
f12fc62006-05-05Henrik Grubbström (Grubba) /*! @module XML */
23a7db2005-11-12Martin Nilsson 
f12fc62006-05-05Henrik Grubbström (Grubba) DECLARATIONS
23a7db2005-11-12Martin Nilsson  /* FIXME: Make all these functions available inside pike */
97bdb62006-05-02Henrik Grubbström (Grubba) /* FIXME: Ought to be generated from UnicodeData.txt. */
23a7db2005-11-12Martin Nilsson static int isBaseChar(INT32 c) { switch(c>>8) { case 0x00: if(c>=0x0041 && c<=0x005A) return 1; if(c>=0x0061 && c<=0x007A) return 1; if(c>=0x00C0 && c<=0x00D6) return 1; if(c>=0x00D8 && c<=0x00F6) return 1; if(c>=0x00F8 && c<=0x00FF) return 1; break; case 0x01: if(c>=0x0100 && c<=0x0131) return 1; if(c>=0x0134 && c<=0x013E) return 1; if(c>=0x0141 && c<=0x0148) return 1; if(c>=0x014A && c<=0x017E) return 1; if(c>=0x0180 && c<=0x01C3) return 1; if(c>=0x01CD && c<=0x01F0) return 1; if(c>=0x01F4 && c<=0x01F5) return 1; if(c>=0x01FA && c<=0x0217) return 1; break; case 0x002: if(c>=0x0250 && c<=0x02A8) return 1; if(c>=0x02BB && c<=0x02C1) return 1; break; case 0x03: if(c==0x0386) return 1; if(c>=0x0388 && c<=0x038A) return 1; if(c==0x038C) return 1; if(c>=0x038E && c<=0x03A1) return 1; if(c>=0x03A3 && c<=0x03CE) return 1; if(c>=0x03D0 && c<=0x03D6) return 1; if(c==0x03DA) return 1; if(c==0x03DC) return 1; if(c==0x03DE) return 1; if(c==0x03E0) return 1; if(c>=0x03E2 && c<=0x03F3) return 1; break; case 0x04: if(c>=0x0401 && c<=0x040C) return 1; if(c>=0x040E && c<=0x044F) return 1; if(c>=0x0451 && c<=0x045C) return 1; if(c>=0x045E && c<=0x0481) return 1; if(c>=0x0490 && c<=0x04C4) return 1; if(c>=0x04C7 && c<=0x04C8) return 1; if(c>=0x04CB && c<=0x04CC) return 1; if(c>=0x04D0 && c<=0x04EB) return 1; if(c>=0x04EE && c<=0x04F5) return 1; if(c>=0x04F8 && c<=0x04F9) return 1; break; case 0x05: if(c>=0x0531 && c<=0x0556) return 1; if(c==0x0559) return 1; if(c>=0x0561 && c<=0x0586) return 1; if(c>=0x05D0 && c<=0x05EA) return 1; if(c>=0x05F0 && c<=0x05F2) return 1; break; case 0x06: if(c>=0x0621 && c<=0x063A) return 1; if(c>=0x0641 && c<=0x064A) return 1; if(c>=0x0671 && c<=0x06B7) return 1; if(c>=0x06BA && c<=0x06BE) return 1; if(c>=0x06C0 && c<=0x06CE) return 1; if(c>=0x06D0 && c<=0x06D3) return 1; if(c==0x06D5) return 1; if(c>=0x06E5 && c<=0x06E6) return 1; break; case 0x09: if(c>=0x0905 && c<=0x0939) return 1; if(c==0x093D) return 1; if(c>=0x0958 && c<=0x0961) return 1; if(c>=0x0985 && c<=0x098C) return 1; if(c>=0x098F && c<=0x0990) return 1; if(c>=0x0993 && c<=0x09A8) return 1; if(c>=0x09AA && c<=0x09B0) return 1; if(c==0x09B2) return 1; if(c>=0x09B6 && c<=0x09B9) return 1; if(c>=0x09DC && c<=0x09DD) return 1; if(c>=0x09DF && c<=0x09E1) return 1; if(c>=0x09F0 && c<=0x09F1) return 1; break; case 0x0a: if(c>=0x0A05 && c<=0x0A0A) return 1; if(c>=0x0A0F && c<=0x0A10) return 1; if(c>=0x0A13 && c<=0x0A28) return 1; if(c>=0x0A2A && c<=0x0A30) return 1; if(c>=0x0A32 && c<=0x0A33) return 1; if(c>=0x0A35 && c<=0x0A36) return 1; if(c>=0x0A38 && c<=0x0A39) return 1; if(c>=0x0A59 && c<=0x0A5C) return 1; if(c==0x0A5E) return 1; if(c>=0x0A72 && c<=0x0A74) return 1; if(c>=0x0A85 && c<=0x0A8B) return 1; if(c==0x0A8D) return 1; if(c>=0x0A8F && c<=0x0A91) return 1; if(c>=0x0A93 && c<=0x0AA8) return 1; if(c>=0x0AAA && c<=0x0AB0) return 1; if(c>=0x0AB2 && c<=0x0AB3) return 1; if(c>=0x0AB5 && c<=0x0AB9) return 1; if(c==0x0ABD) return 1; if(c==0x0AE0) return 1; break; case 0x0b: if(c>=0x0B05 && c<=0x0B0C) return 1; if(c>=0x0B0F && c<=0x0B10) return 1; if(c>=0x0B13 && c<=0x0B28) return 1; if(c>=0x0B2A && c<=0x0B30) return 1; if(c>=0x0B32 && c<=0x0B33) return 1; if(c>=0x0B36 && c<=0x0B39) return 1; if(c==0x0B3D) return 1; if(c>=0x0B5C && c<=0x0B5D) return 1; if(c>=0x0B5F && c<=0x0B61) return 1; if(c>=0x0B85 && c<=0x0B8A) return 1; if(c>=0x0B8E && c<=0x0B90) return 1; if(c>=0x0B92 && c<=0x0B95) return 1; if(c>=0x0B99 && c<=0x0B9A) return 1; if(c==0x0B9C) return 1; if(c>=0x0B9E && c<=0x0B9F) return 1; if(c>=0x0BA3 && c<=0x0BA4) return 1; if(c>=0x0BA8 && c<=0x0BAA) return 1; if(c>=0x0BAE && c<=0x0BB5) return 1; if(c>=0x0BB7 && c<=0x0BB9) return 1; break; case 0x0c: if(c>=0x0C05 && c<=0x0C0C) return 1; if(c>=0x0C0E && c<=0x0C10) return 1; if(c>=0x0C12 && c<=0x0C28) return 1; if(c>=0x0C2A && c<=0x0C33) return 1; if(c>=0x0C35 && c<=0x0C39) return 1; if(c>=0x0C60 && c<=0x0C61) return 1; if(c>=0x0C85 && c<=0x0C8C) return 1; if(c>=0x0C8E && c<=0x0C90) return 1; if(c>=0x0C92 && c<=0x0CA8) return 1; if(c>=0x0CAA && c<=0x0CB3) return 1; if(c>=0x0CB5 && c<=0x0CB9) return 1; if(c==0x0CDE) return 1; if(c>=0x0CE0 && c<=0x0CE1) return 1; break; case 0x0d: if(c>=0x0D05 && c<=0x0D0C) return 1; if(c>=0x0D0E && c<=0x0D10) return 1; if(c>=0x0D12 && c<=0x0D28) return 1; if(c>=0x0D2A && c<=0x0D39) return 1; if(c>=0x0D60 && c<=0x0D61) return 1; break; case 0x0e: if(c>=0x0E01 && c<=0x0E2E) return 1; if(c==0x0E30) return 1; if(c>=0x0E32 && c<=0x0E33) return 1; if(c>=0x0E40 && c<=0x0E45) return 1; if(c>=0x0E81 && c<=0x0E82) return 1; if(c==0x0E84) return 1; if(c>=0x0E87 && c<=0x0E88) return 1; if(c==0x0E8A) return 1; if(c==0x0E8D) return 1; if(c>=0x0E94 && c<=0x0E97) return 1; if(c>=0x0E99 && c<=0x0E9F) return 1; if(c>=0x0EA1 && c<=0x0EA3) return 1; if(c==0x0EA5) return 1; if(c==0x0EA7) return 1; if(c>=0x0EAA && c<=0x0EAB) return 1; if(c>=0x0EAD && c<=0x0EAE) return 1; if(c==0x0EB0) return 1; if(c>=0x0EB2 && c<=0x0EB3) return 1; if(c==0x0EBD) return 1; if(c>=0x0EC0 && c<=0x0EC4) return 1; break; case 0x0f: if(c>=0x0F40 && c<=0x0F47) return 1; if(c>=0x0F49 && c<=0x0F69) return 1; break; case 0x10: if(c>=0x10A0 && c<=0x10C5) return 1; if(c>=0x10D0 && c<=0x10F6) return 1; break; case 0x11: if(c==0x1100) return 1; if(c>=0x1102 && c<=0x1103) return 1; if(c>=0x1105 && c<=0x1107) return 1; if(c==0x1109) return 1; if(c>=0x110B && c<=0x110C) return 1; if(c>=0x110E && c<=0x1112) return 1; if(c==0x113C) return 1; if(c==0x113E) return 1; if(c==0x1140) return 1; if(c==0x114C) return 1; if(c==0x114E) return 1; if(c==0x1150) return 1; if(c>=0x1154 && c<=0x1155) return 1; if(c==0x1159) return 1; if(c>=0x115F && c<=0x1161) return 1; if(c==0x1163) return 1; if(c==0x1165) return 1; if(c==0x1167) return 1; if(c==0x1169) return 1; if(c>=0x116D && c<=0x116E) return 1; if(c>=0x1172 && c<=0x1173) return 1; if(c==0x1175) return 1; if(c==0x119E) return 1; if(c==0x11A8) return 1; if(c==0x11AB) return 1; if(c>=0x11AE && c<=0x11AF) return 1; if(c>=0x11B7 && c<=0x11B8) return 1; if(c==0x11BA) return 1; if(c>=0x11BC && c<=0x11C2) return 1; if(c==0x11EB) return 1; if(c==0x11F0) return 1; if(c==0x11F9) return 1; break; case 0x1e: if(c>=0x1E00 && c<=0x1E9B) return 1; if(c>=0x1EA0 && c<=0x1EF9) return 1; break; case 0x1f: if(c>=0x1F00 && c<=0x1F15) return 1; if(c>=0x1F18 && c<=0x1F1D) return 1; if(c>=0x1F20 && c<=0x1F45) return 1; if(c>=0x1F48 && c<=0x1F4D) return 1; if(c>=0x1F50 && c<=0x1F57) return 1; if(c==0x1F59) return 1; if(c==0x1F5B) return 1; if(c==0x1F5D) return 1; if(c>=0x1F5F && c<=0x1F7D) return 1; if(c>=0x1F80 && c<=0x1FB4) return 1; if(c>=0x1FB6 && c<=0x1FBC) return 1; if(c==0x1FBE) return 1; if(c>=0x1FC2 && c<=0x1FC4) return 1; if(c>=0x1FC6 && c<=0x1FCC) return 1; if(c>=0x1FD0 && c<=0x1FD3) return 1; if(c>=0x1FD6 && c<=0x1FDB) return 1; if(c>=0x1FE0 && c<=0x1FEC) return 1; if(c>=0x1FF2 && c<=0x1FF4) return 1; if(c>=0x1FF6 && c<=0x1FFC) return 1; break; case 0x21: if(c==0x2126) return 1; if(c>=0x212A && c<=0x212B) return 1; if(c==0x212E) return 1; if(c>=0x2180 && c<=0x2182) return 1; break; case 0x30: if(c>=0x3041 && c<=0x3094) return 1; if(c>=0x30A1 && c<=0x30FA) return 1; if(c>=0x3105 && c<=0x312C) return 1; break; default: if(c>=0xAC00 && c<=0xD7A3) return 1; } return 0; }
01b9212016-01-12Per Hedbor static inline int isIdeographic(INT32 c)
23a7db2005-11-12Martin Nilsson { if(c>=0x4E00 && c<=0x9FA5) return 1; if(c==0x3007) return 1; if(c>=0x3021 && c<=0x3029) return 1; return 0; }
01b9212016-01-12Per Hedbor static inline int isLetter(INT32 c)
23a7db2005-11-12Martin Nilsson { return isBaseChar(c) || isIdeographic(c); } static int isCombiningChar(INT32 c) { switch(c>>8) { case 0x03: if(c>=0x0300 && c<=0x0345) return 1; if(c>=0x0360 && c<=0x0361) return 1; break; case 0x04: if(c>=0x0483 && c<=0x0486) return 1; break; case 0x05: if(c>=0x0591 && c<=0x05A1) return 1; if(c>=0x05A3 && c<=0x05B9) return 1; if(c>=0x05BB && c<=0x05BD) return 1; if(c==0x05BF) return 1; if(c>=0x05C1 && c<=0x05C2) return 1; if(c==0x05C4) return 1; break; case 0x06: if(c>=0x064B && c<=0x0652) return 1; if(c==0x0670) return 1; if(c>=0x06D6 && c<=0x06DC) return 1; if(c>=0x06DD && c<=0x06DF) return 1; if(c>=0x06E0 && c<=0x06E4) return 1; if(c>=0x06E7 && c<=0x06E8) return 1; if(c>=0x06EA && c<=0x06ED) return 1; break; case 0x09: if(c>=0x0901 && c<=0x0903) return 1; if(c==0x093C) return 1; if(c>=0x093E && c<=0x094C) return 1; if(c==0x094D) return 1; if(c>=0x0951 && c<=0x0954) return 1; if(c>=0x0962 && c<=0x0963) return 1; if(c>=0x0981 && c<=0x0983) return 1; if(c==0x09BC) return 1; if(c==0x09BE) return 1; if(c==0x09BF) return 1; if(c>=0x09C0 && c<=0x09C4) return 1; if(c>=0x09C7 && c<=0x09C8) return 1; if(c>=0x09CB && c<=0x09CD) return 1; if(c==0x09D7) return 1; if(c>=0x09E2 && c<=0x09E3) return 1; break; case 0x0a: if(c==0x0A02) return 1; if(c==0x0A3C) return 1; if(c==0x0A3E) return 1; if(c==0x0A3F) return 1; if(c>=0x0A40 && c<=0x0A42) return 1; if(c>=0x0A47 && c<=0x0A48) return 1; if(c>=0x0A4B && c<=0x0A4D) return 1; if(c>=0x0A70 && c<=0x0A71) return 1; if(c>=0x0A81 && c<=0x0A83) return 1; if(c==0x0ABC) return 1; if(c>=0x0ABE && c<=0x0AC5) return 1; if(c>=0x0AC7 && c<=0x0AC9) return 1; if(c>=0x0ACB && c<=0x0ACD) return 1; break; case 0x0b: if(c>=0x0B01 && c<=0x0B03) return 1; if(c==0x0B3C) return 1; if(c>=0x0B3E && c<=0x0B43) return 1; if(c>=0x0B47 && c<=0x0B48) return 1; if(c>=0x0B4B && c<=0x0B4D) return 1; if(c>=0x0B56 && c<=0x0B57) return 1; if(c>=0x0B82 && c<=0x0B83) return 1; if(c>=0x0BBE && c<=0x0BC2) return 1; if(c>=0x0BC6 && c<=0x0BC8) return 1; if(c>=0x0BCA && c<=0x0BCD) return 1; if(c==0x0BD7) return 1; break; case 0x0c: if(c>=0x0C01 && c<=0x0C03) return 1; if(c>=0x0C3E && c<=0x0C44) return 1; if(c>=0x0C46 && c<=0x0C48) return 1; if(c>=0x0C4A && c<=0x0C4D) return 1; if(c>=0x0C55 && c<=0x0C56) return 1; if(c>=0x0C82 && c<=0x0C83) return 1; if(c>=0x0CBE && c<=0x0CC4) return 1; if(c>=0x0CC6 && c<=0x0CC8) return 1; if(c>=0x0CCA && c<=0x0CCD) return 1; if(c>=0x0CD5 && c<=0x0CD6) return 1; break; case 0x0d: if(c>=0x0D02 && c<=0x0D03) return 1; if(c>=0x0D3E && c<=0x0D43) return 1; if(c>=0x0D46 && c<=0x0D48) return 1; if(c>=0x0D4A && c<=0x0D4D) return 1; if(c==0x0D57) return 1; break; case 0x0e: if(c==0x0E31) return 1; if(c>=0x0E34 && c<=0x0E3A) return 1; if(c>=0x0E47 && c<=0x0E4E) return 1; if(c==0x0EB1) return 1; if(c>=0x0EB4 && c<=0x0EB9) return 1; if(c>=0x0EBB && c<=0x0EBC) return 1; if(c>=0x0EC8 && c<=0x0ECD) return 1; break; case 0x0f: if(c>=0x0F18 && c<=0x0F19) return 1; if(c==0x0F35) return 1; if(c==0x0F37) return 1; if(c==0x0F39) return 1; if(c==0x0F3E) return 1; if(c==0x0F3F) return 1; if(c>=0x0F71 && c<=0x0F84) return 1; if(c>=0x0F86 && c<=0x0F8B) return 1; if(c>=0x0F90 && c<=0x0F95) return 1; if(c==0x0F97) return 1; if(c>=0x0F99 && c<=0x0FAD) return 1; if(c>=0x0FB1 && c<=0x0FB7) return 1; if(c==0x0FB9) return 1; break; case 0x20: if(c>=0x20D0 && c<=0x20DC) return 1; if(c==0x20E1) return 1; break; case 0x30: if(c>=0x302A && c<=0x302F) return 1; if(c==0x3099) return 1; if(c==0x309A) return 1; } return 0; }
01b9212016-01-12Per Hedbor static inline int isDigit(INT32 c)
23a7db2005-11-12Martin Nilsson { switch(c>>8) { case 0x00: return c>=0x0030 && c<=0x0039; case 0x06: if(c>=0x0660 && c<=0x0669) return 1; if(c>=0x06F0 && c<=0x06F9) return 1; break; case 0x09: if(c>=0x0966 && c<=0x096F) return 1; if(c>=0x09E6 && c<=0x09EF) return 1; break; case 0x0a: if(c>=0x0A66 && c<=0x0A6F) return 1; if(c>=0x0AE6 && c<=0x0AEF) return 1; break; case 0x0b: if(c>=0x0B66 && c<=0x0B6F) return 1; if(c>=0x0BE7 && c<=0x0BEF) return 1; break; case 0x0c: if(c>=0x0C66 && c<=0x0C6F) return 1; if(c>=0x0CE6 && c<=0x0CEF) return 1; break; case 0x0d: if(c>=0x0D66 && c<=0x0D6F) return 1; break; case 0x0e: if(c>=0x0E50 && c<=0x0E59) return 1; if(c>=0x0ED0 && c<=0x0ED9) return 1;
0f5b3a2015-04-19Martin Nilsson  break;
23a7db2005-11-12Martin Nilsson  case 0x0f: if(c>=0x0F20 && c<=0x0F29) return 1;
0f5b3a2015-04-19Martin Nilsson  break;
23a7db2005-11-12Martin Nilsson  } return 0; } static int isExtender(INT32 c) { switch(c) { case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387: case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005: case 0x3031: case 0x3032: case 0x3033: case 0x3034: case 0x3035: case 0x309D: case 0x309E: case 0x30FC: case 0x30FD: case 0x30FE: return 1; } return 0; }
01b9212016-01-12Per Hedbor static inline int isSpace(INT32 c)
23a7db2005-11-12Martin Nilsson { switch(c) { case 0x20: case 0x09: case 0x0d: case 0x0a: return 1; } return 0; }
01b9212016-01-12Per Hedbor static inline int isNameChar(INT32 c)
23a7db2005-11-12Martin Nilsson {
13670c2015-05-25Martin Nilsson  return isLetter(c) || isDigit(c) ||
23a7db2005-11-12Martin Nilsson  c=='.' || c=='-' || c=='_' || c==':' || isCombiningChar(c) || isExtender(c); }
01b9212016-01-12Per Hedbor static inline int isFirstNameChar(INT32 c)
23a7db2005-11-12Martin Nilsson { return isLetter(c) || c=='_' || c==':'; }
01b9212016-01-12Per Hedbor static inline int isHexChar(INT32 c)
23a7db2005-11-12Martin Nilsson { switch(c) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: return -1; } } #define ISWRAP(X) \ void PIKE_CONCAT(f_,X) (INT32 args) \ { \ INT_TYPE i; \
9b48d52018-08-05Martin Nilsson  get_all_args(NULL, args, "%i", &i); \
23a7db2005-11-12Martin Nilsson  pop_n_elems(args); \ push_int( X (i) ); \ } ISWRAP(isBaseChar) ISWRAP(isIdeographic) ISWRAP(isLetter) ISWRAP(isCombiningChar) ISWRAP(isDigit) ISWRAP(isExtender) ISWRAP(isSpace) ISWRAP(isNameChar) ISWRAP(isFirstNameChar) ISWRAP(isHexChar) #ifdef VERBOSE_XMLDEBUG #define IF_XMLDEBUG(X) X
e1c8132016-12-08Martin Nilsson #define XMLDEBUGUSE(X) X
23a7db2005-11-12Martin Nilsson #ifndef PIKE_DEBUG #define PIKE_DEBUG #endif #else #define IF_XMLDEBUG(X)
e1c8132016-12-08Martin Nilsson #define XMLDEBUGUSE(X) UNUSED(X)
23a7db2005-11-12Martin Nilsson #endif #ifdef PIKE_DEBUG #define IF_PIKEDEBUG(X) X #else #define IF_PIKEDEBUG(X) #endif
eb1b5f2006-05-03Henrik Grubbström (Grubba)  /*! @class Simple */ PIKECLASS Simple {
ecc9382008-06-29Martin Nilsson  PIKEVAR mapping entities flags ID_PROTECTED|ID_PRIVATE; PIKEVAR mapping attributes flags ID_PROTECTED|ID_PRIVATE; PIKEVAR mapping is_cdata flags ID_PROTECTED|ID_PRIVATE;
eb1b5f2006-05-03Henrik Grubbström (Grubba)  CVAR int flags;
3d6ad42008-01-21Henrik Grubbström (Grubba)  DECLARE_STORAGE
eb1b5f2006-05-03Henrik Grubbström (Grubba) 
f12fc62006-05-05Henrik Grubbström (Grubba)  struct xmlinput { struct xmlinput *next; PCHARP datap; ptrdiff_t len; ptrdiff_t pos; struct mapping *callbackinfo; struct pike_string *to_free; struct pike_string *entity; };
d476592013-06-12Arne Goedeke  static struct block_allocator xmlinput_allocator = BA_INIT(sizeof(struct xmlinput), 64);
f12fc62006-05-05Henrik Grubbström (Grubba) 
d476592013-06-12Arne Goedeke  static struct xmlinput * alloc_xmlinput() { struct xmlinput * i = ba_alloc(&xmlinput_allocator); i->next = NULL; i->callbackinfo = NULL; i->to_free = NULL; i->entity = NULL; return i; } static void really_free_xmlinput(struct xmlinput * i) { ba_free(&xmlinput_allocator, i); }
f12fc62006-05-05Henrik Grubbström (Grubba)  struct xmlinput *new_string_xmlinput(struct pike_string *s) { struct xmlinput *i = alloc_xmlinput(); copy_shared_string(i->to_free, s); i->datap = MKPCHARP_STR(s); i->len = s->len; i->pos = 0; return i; }
4d5ebc2006-06-15Henrik Grubbström (Grubba) /* Flag bits. */ #define ALLOW_RXML_ENTITIES 0x01
bf376e2014-11-17Stephen R. van den Berg #define COMPAT_ALLOW_7_2_ERRORS 0x02 #define COMPAT_ALLOW_7_6_ERRORS 0x04 #define ALLOW_PESMEG_EVERYWHERE 0x08
4d5ebc2006-06-15Henrik Grubbström (Grubba) 
9d1f782008-11-21Henrik Grubbström (Grubba)  /* callback: * string type * string name * mapping args * array|string data * mixed ... xtra */ #define CALLBACKTYPE \ tFuncV(tStr tStr tMapping tOr(tArray,tStr) tMap(tStr,tMix), tMix, \ tSetvar(0, tMix)) /*"function(string,string,mapping,array|string,mapping(string:mixed),mixed...:0=mixed)"*/ #define PARSETYPE \ tOr(tFuncV(tStr tStr tOr(CALLBACKTYPE, tVoid), tMix, tArr(tVar(0))), \ tFuncV(tStr tOr(CALLBACKTYPE, tVoid), tMix, tArr(tVar(0))))
4d5ebc2006-06-15Henrik Grubbström (Grubba)  /* "function(string," CALLBACKTYPE ",mixed...:array(0))" */
9d1f782008-11-21Henrik Grubbström (Grubba)  /*! @decl array parse(string xml, string context, @ *! function cb, mixed ... extra_args) *! @decl array parse(string xml, function cb, mixed ... extra_args)
4d5ebc2006-06-15Henrik Grubbström (Grubba)  */
e4b0392014-02-25Per Hedbor  PIKEFUN array parse(string s, string|function cb, mixed ... UNUSED)
4d5ebc2006-06-15Henrik Grubbström (Grubba)  rawtype PARSETYPE;
f12fc62006-05-05Henrik Grubbström (Grubba)  {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  int d;
f12fc62006-05-05Henrik Grubbström (Grubba) 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  /* Move cb and extras one step to make place for flags. */ for (d = 1; d < args; d++) { sp[1-d] = sp[-d]; }
9d1f782008-11-21Henrik Grubbström (Grubba)  d = 1-args;
017b572011-10-28Henrik Grubbström (Grubba)  if (TYPEOF(*cb) == T_STRING) {
9d1f782008-11-21Henrik Grubbström (Grubba)  /* We have a context. Keep it in place. * NOTE: We have copied one element too much in this case, * but it doesn't matter, since we overwrite the copy. */ d++; }
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL(sp[d], T_INT, NUMBER_NUMBER, integer, THIS->flags);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  args++; sp++;
f12fc62006-05-05Henrik Grubbström (Grubba) 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  apply_current(Simple_Context_program_fun_num, args);
f12fc62006-05-05Henrik Grubbström (Grubba) 
017b572011-10-28Henrik Grubbström (Grubba)  if ((TYPEOF(Pike_sp[-1]) != T_OBJECT) || (!Pike_sp[-1].u.object->prog)) {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  Pike_error("Unexpected return value from Parser.XML.Low.Context().\n"); } apply(Pike_sp[-1].u.object, "parse_xml", 0); stack_swap(); pop_stack(); }
eb1b5f2006-05-03Henrik Grubbström (Grubba) 
9d1f782008-11-21Henrik Grubbström (Grubba)  /*! @decl mixed parse_dtd(string dtd, string context, @ *! function cb, mixed ... extras) *! @decl mixed parse_dtd(string dtd, function cb, mixed ... extras)
4d5ebc2006-06-15Henrik Grubbström (Grubba)  */
e4b0392014-02-25Per Hedbor  PIKEFUN mixed parse_dtd(string s, string|function cb, mixed ... UNUSED)
4d5ebc2006-06-15Henrik Grubbström (Grubba)  rawtype PARSETYPE; { int d;
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  /* Move cb and extras one step to make place for flags. */
9d1f782008-11-21Henrik Grubbström (Grubba)  for (d=1; d < args; d++) {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  sp[1-d] = sp[-d]; }
9d1f782008-11-21Henrik Grubbström (Grubba)  d = 1-args;
017b572011-10-28Henrik Grubbström (Grubba)  if (TYPEOF(*cb) == T_STRING) {
9d1f782008-11-21Henrik Grubbström (Grubba)  /* We have a context. Keep it in place. * NOTE: We have copied one element too much in this case, * but it doesn't matter, since we overwrite the copy. */ d++; }
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL(sp[d], T_INT, NUMBER_NUMBER, integer, THIS->flags | ALLOW_PESMEG_EVERYWHERE);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  args++; sp++;
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  apply_current(Simple_Context_program_fun_num, args);
017b572011-10-28Henrik Grubbström (Grubba)  if ((TYPEOF(Pike_sp[-1]) != T_OBJECT) || (!Pike_sp[-1].u.object->prog)) {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  Pike_error("Unexpected return value from Parser.XML.Low.Context().\n"); } apply(Pike_sp[-1].u.object, "parse_dtd", 0); stack_swap(); pop_stack(); } PIKEFUN void set_attribute_cdata(string tag, string attr) { push_int(1); args++; assign_svalue(mapping_mapping_lookup(THIS->is_cdata, sp-args, sp+1-args, 1), sp+2-args); } PIKEFUN mapping(string:int(0..1)) get_tag_attribute_cdata(string tag) { struct svalue *s = low_mapping_lookup(THIS->is_cdata, sp-1);
017b572011-10-28Henrik Grubbström (Grubba)  if (s && TYPEOF(*s) == T_MAPPING) {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  struct mapping *m = copy_mapping(s->u.mapping); pop_stack(); push_mapping(m); } else { pop_stack(); push_undefined(); } } PIKEFUN void set_default_attribute(string tag, string attr, string val) { assign_svalue(mapping_mapping_lookup(THIS->attributes, sp-args, sp+1-args, 1), sp+2-args); } PIKEFUN mapping(string:string) get_default_attributes(string tag) { struct svalue *s = low_mapping_string_lookup(THIS->attributes, tag);
017b572011-10-28Henrik Grubbström (Grubba)  if (s && TYPEOF(*s) == T_MAPPING) {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  struct mapping *m = copy_mapping(s->u.mapping); pop_n_elems(args); push_mapping(m); } else { pop_n_elems(args);
5b64ad2015-12-10Per Hedbor  push_mapping(allocate_mapping(0));
4d5ebc2006-06-15Henrik Grubbström (Grubba)  } } /*! @decl string lookup_entity(string entity) *!
9d1f782008-11-21Henrik Grubbström (Grubba)  *! @returns *! Returns the verbatim expansion of the entity.
4d5ebc2006-06-15Henrik Grubbström (Grubba)  */ PIKEFUN string lookup_entity(string entity) { struct svalue *s = NULL; if (THIS->entities) { s = low_mapping_lookup(THIS->entities, sp-args); } pop_n_elems(args); if (s) { push_svalue(s); } else { push_undefined(); } } /*! @decl void define_entity_raw(string entity, string raw)
9d1f782008-11-21Henrik Grubbström (Grubba)  *! *! Define an entity or an SMEG. *! *! @param entity *! Entity name, or SMEG name (if preceeded by a @expr{"%"@}). *! *! @param raw *! Verbatim expansion of the entity. *! *! @seealso *! @[define_entity()]
4d5ebc2006-06-15Henrik Grubbström (Grubba)  */ PIKEFUN void define_entity_raw(string entity, string raw) { if(!THIS->entities) { f_aggregate_mapping(2); THIS->entities=sp[-1].u.mapping; sp--; }else{ mapping_insert(THIS->entities, sp-2, sp-1); } }
23a7db2005-11-12Martin Nilsson 
9d1f782008-11-21Henrik Grubbström (Grubba)  /*! @decl void define_entity(string entity, string s, @
4d5ebc2006-06-15Henrik Grubbström (Grubba)  *! function cb, mixed ... extras)
9d1f782008-11-21Henrik Grubbström (Grubba)  *! *! Define an entity or an SMEG. *! *! @param entity *! Entity name, or SMEG name (if preceeded by a @expr{"%"@}). *! *! @param s *! Expansion of the entity. Entity evaluation will be performed. *! *! @seealso *! @[define_entity_raw()]
4d5ebc2006-06-15Henrik Grubbström (Grubba)  */ PIKEFUN void define_entity(string entity, string s, function cb, mixed ... extras) { int d; /* Move cb and extras one step to make place for flags. */ for (d = 2; d < args; d++) { sp[2-d] = sp[-d]; }
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL(sp[2-args], T_INT, NUMBER_NUMBER, integer, THIS->flags);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  args++; sp++;
9d1f782008-11-21Henrik Grubbström (Grubba)  /* FIXME: Add a proper context argument generated from entity. */
4d5ebc2006-06-15Henrik Grubbström (Grubba)  apply_current(Simple_Context_program_fun_num, args-1); args = 2;
017b572011-10-28Henrik Grubbström (Grubba)  if ((TYPEOF(Pike_sp[-1]) != T_OBJECT) || (!Pike_sp[-1].u.object->prog)) {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  Pike_error("Unexpected return value from Parser.XML.Low.Context().\n"); } apply(Pike_sp[-1].u.object, "parse_entity", 0); stack_swap(); pop_stack(); f_Simple_define_entity_raw(2); } /*! @decl void allow_rxml_entities(int(0..1) yes_no) */ PIKEFUN void allow_rxml_entities(int(0..1) yes_no) { if (!yes_no) THIS->flags &= ~ALLOW_RXML_ENTITIES; else THIS->flags |= ALLOW_RXML_ENTITIES; }
217d892014-11-17Stephen R. van den Berg  /*! @decl void compat_allow_errors(string version) *! *! Set whether the parser should allow certain errors for *! compatibility with earlier versions. @[version] can be: *! *! @string *! @value "7.2" *! Allow more data after the root element. *! @value "7.6" *! Allow multiple and invalidly placed "<?xml ... ?>" and *! "<!DOCTYPE ... >" declarations (invalid "<?xml ... ?>" *! declarations are otherwise treated as normal PI:s). Allow *! "<![CDATA[ ... ]]>" outside the root element. Allow the root *! element to be absent. *! @endstring *! *! @[version] can also be zero to enable all error checks. */
06bd612016-01-26Martin Nilsson  PIKEFUN void compat_allow_errors(void|string version)
217d892014-11-17Stephen R. van den Berg  { if (!version) { THIS->flags &= ~(COMPAT_ALLOW_7_2_ERRORS|COMPAT_ALLOW_7_6_ERRORS); } else if (version == MK_STRING("7.2")) { THIS->flags |= COMPAT_ALLOW_7_2_ERRORS|COMPAT_ALLOW_7_6_ERRORS; } else if (version == MK_STRING("7.6")) { THIS->flags &= ~COMPAT_ALLOW_7_2_ERRORS; THIS->flags |= COMPAT_ALLOW_7_6_ERRORS; } else { Pike_error ("Got unknown version string.\n"); } }
4d5ebc2006-06-15Henrik Grubbström (Grubba)  INIT {
5e9fc02015-08-18Per Hedbor  push_static_text("lt"); push_static_text("&#60;"); push_static_text("gt"); push_static_text(">"); push_static_text("amp"); push_static_text("&#38;"); push_static_text("apos"); push_static_text("'"); push_static_text("quot"); push_static_text("\"");
13670c2015-05-25Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  f_aggregate_mapping(10); THIS->entities=sp[-1].u.mapping; sp--; dmalloc_touch_svalue(sp); f_aggregate_mapping(0); THIS->attributes=sp[-1].u.mapping; sp--; dmalloc_touch_svalue(sp); f_aggregate_mapping(0); THIS->is_cdata=sp[-1].u.mapping; sp--; dmalloc_touch_svalue(sp); THIS->flags = 0; } EXTRA { /* Compat */ /* function(string:string) */ ADD_FUNCTION("autoconvert", f_autoconvert, tFunc(tStr,tStr), 0); } /*! @class Context */ PIKECLASS Context
ecc9382008-06-29Martin Nilsson  flags ID_PROTECTED;
4d5ebc2006-06-15Henrik Grubbström (Grubba)  program_flags PROGRAM_USES_PARENT; { CVAR struct xmlinput *input; CVAR struct svalue func; CVAR struct array *extra_args; CVAR int flags; CVAR int doc_seq_pos;
3d6ad42008-01-21Henrik Grubbström (Grubba)  DECLARE_STORAGE
23a7db2005-11-12Martin Nilsson  #define POP() do { \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  struct xmlinput *i = THIS->input; \ IF_XMLDEBUG(fprintf(stderr,"POP\n")); \ if (i->entity) \ free_string(i->entity); \ if(i->to_free) \ free_string(i->to_free); \ if(i->callbackinfo) \ free_mapping(i->callbackinfo); \ THIS->input = i->next; \ really_free_xmlinput(i); \ IF_XMLDEBUG(if (THIS->input) \ fprintf(stderr, \
e1c8132016-12-08Martin Nilsson  "ptr=%p len=%ld pos=%ld to_free=%p\n", \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  THIS->input->datap.ptr, \ THIS->input->len, \ THIS->input->pos, \ THIS->input->to_free)); \ } while(0) INIT { THIS->input = NULL;
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL(THIS->func, PIKE_T_INT, NUMBER_UNDEFINED, integer, 0);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  THIS->extra_args = NULL; THIS->flags = 0; THIS->doc_seq_pos = 0; } EXIT
8dcb7d2008-05-29Martin Stjernholm  gc_trivial;
4d5ebc2006-06-15Henrik Grubbström (Grubba)  { while (THIS->input) POP(); if (THIS->extra_args) { free_array(THIS->extra_args); THIS->extra_args = NULL; } free_svalue(&THIS->func); } EXTRA { #define XML_ADD_INTEGER_CONSTANT(X) add_integer_constant(#X, X, 0) XML_ADD_INTEGER_CONSTANT(ALLOW_RXML_ENTITIES);
217d892014-11-17Stephen R. van den Berg  XML_ADD_INTEGER_CONSTANT(COMPAT_ALLOW_7_2_ERRORS); XML_ADD_INTEGER_CONSTANT(COMPAT_ALLOW_7_6_ERRORS);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  XML_ADD_INTEGER_CONSTANT(ALLOW_PESMEG_EVERYWHERE); } #define PEEK(X) \ ( IF_PIKEDEBUG(((X) > THIS->input->len)? \ (Pike_fatal("PEEK out of bounds (%ld/%ld)\n", \
6da27e2016-02-12Martin Nilsson  (long)(X), (long)THIS->input->len), 0):) \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  INDEX_PCHARP(THIS->input->datap, X) )
734e7f2008-11-22Henrik Grubbström (Grubba) #define SAFE_PEEK(X) ((THIS->input && ((X)>=THIS->input->len)) ? 0 : PEEK(X))
4d5ebc2006-06-15Henrik Grubbström (Grubba)  #define POKE(X,Y) string_builder_putchar(&X,Y)
734e7f2008-11-22Henrik Grubbström (Grubba) #define XMLEOF() (!THIS->input || (THIS->input->len <= 0))
23a7db2005-11-12Martin Nilsson  #ifdef PIKE_DEBUG #define CHECK_INPUT(INPUT) do { \
05c78d2006-05-02Henrik Grubbström (Grubba)  if ((INPUT)->len < 0) { \ Pike_fatal("Negative input length: %"PRINTPTRDIFFT"d\n", (INPUT)->len); \
23a7db2005-11-12Martin Nilsson  } \ } while(0) #else /* !PIKE_DEBUG */ #define CHECK_INPUT(INPUT) #endif /* PIKE_DEBUG */
372c532006-09-19Martin Nilsson 
e1c8132016-12-08Martin Nilsson static inline int xmlread(int z, int XMLDEBUGUSE(line))
23a7db2005-11-12Martin Nilsson { int popped=0;
4d5ebc2006-06-15Henrik Grubbström (Grubba)  THIS->input->pos += z;
e1c8132016-12-08Martin Nilsson  IF_XMLDEBUG(fprintf(stderr,"Step(%d) (at %d) to %ld/%ld '%c%c%c%c'\n",
23a7db2005-11-12Martin Nilsson  z, line,
4d5ebc2006-06-15Henrik Grubbström (Grubba)  THIS->input->pos, THIS->input->len,
23a7db2005-11-12Martin Nilsson  SAFE_PEEK(0), SAFE_PEEK(1), SAFE_PEEK(2), SAFE_PEEK(3)));
4d5ebc2006-06-15Henrik Grubbström (Grubba)  THIS->input->len-=z; INC_PCHARP(THIS->input->datap, z); while(THIS->input->next && XMLEOF()) { POP(); popped++; }
23a7db2005-11-12Martin Nilsson  return popped;
13670c2015-05-25Martin Nilsson }
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba) #define READ(X) xmlread((X),__LINE__) #define SIMPLE_READ_ATTVALUE(X) simple_read_system_attvalue(X) #define SIMPLE_READ_SYSTEMLITERAL() simple_read_system_literal() #define SIMPLE_READ_PUBIDLITERAL() simple_read_pubid_literal() #define SIMPLE_READNAME() simple_readname() #define SIMPLE_READNAME_PERIOD() simple_readname_period() #define SIMPLE_READNMTOKEN() simple_readnmtoken()
3d6ad42008-01-21Henrik Grubbström (Grubba) static void sys(void); static int low_parse_dtd(void); static void simple_readname(void); static int simple_readname_period(void); static void simple_read_system_literal(void); static void simple_read_pubid_literal(void);
4d5ebc2006-06-15Henrik Grubbström (Grubba) static int low_parse_xml(struct pike_string *end); static void xmlerror(char *desc, struct pike_string *tag_name); #define XMLERROR(desc) do {xmlerror(desc,NULL); READ (1);} while (0) #define XMLERROR_STAY(desc) xmlerror(desc,NULL)
23a7db2005-11-12Martin Nilsson  #define SKIPSPACE() \ do { while((!XMLEOF()) && (SMEG(), isSpace(PEEK(0)))) READ(1); }while(0) #define SKIPSPACE_NO_SMEG() \ do { while((!XMLEOF()) && isSpace(PEEK(0))) READ(1); }while(0) #define SKIPTO(X) \ do { while( (!XMLEOF()) && (PEEK(0) != (X))) READ(1); }while(0)
4d5ebc2006-06-15Henrik Grubbström (Grubba) static int gobble(char *s)
23a7db2005-11-12Martin Nilsson { int e; for(e=0;s[e];e++) {
bdf7082008-06-29Marcus Comstedt  if(((p_wchar2)(EXTRACT_UCHAR(s+e)))!=PEEK(e))
23a7db2005-11-12Martin Nilsson  return 0; } if(isNameChar(PEEK(e))) return 0; READ(e); return 1; }
4d5ebc2006-06-15Henrik Grubbström (Grubba) #define GOBBLE(X) gobble(X)
23a7db2005-11-12Martin Nilsson  #define READNAME(X) do { \ if(isFirstNameChar(PEEK(0))) \ { \
2adade2006-05-05Henrik Grubbström (Grubba)  int e = 1; \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  while ((e < THIS->input->len) && \
2adade2006-05-05Henrik Grubbström (Grubba)  isNameChar(PEEK(e))) { \ e++; \ } \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  string_builder_append(&X, THIS->input->datap, \
2adade2006-05-05Henrik Grubbström (Grubba)  e); \ READ(e); \ } else { \
23a7db2005-11-12Martin Nilsson  XMLERROR("Name expected"); \ } \ }while(0) /* Almost identical to READNAME but returns whether any periods were found. Used to detect RXML variable entities. */ #define READNAME_PERIOD(X, FOUND_PERIOD) do { \ FOUND_PERIOD = 0; \ if(isFirstNameChar(PEEK(0))) \ { \
7af4d02006-05-05Henrik Grubbström (Grubba)  int e = 1; \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  while ((e < THIS->input->len) && \
7af4d02006-05-05Henrik Grubbström (Grubba)  isNameChar(PEEK(e))) { \ if (PEEK(e) == '.') FOUND_PERIOD = 1; \ e++; \ } \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  string_builder_append(&X, THIS->input->datap, \
7af4d02006-05-05Henrik Grubbström (Grubba)  e); \ READ(e); \ } else { \
23a7db2005-11-12Martin Nilsson  XMLERROR("Name expected"); \ } \ }while(0) #define READNMTOKEN(X) do { \ if(isNameChar(PEEK(0))) \ { \
7af4d02006-05-05Henrik Grubbström (Grubba)  int e = 1; \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  while ((e < THIS->input->len) && \
7af4d02006-05-05Henrik Grubbström (Grubba)  isNameChar(PEEK(e))) { \ e++; \ } \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  string_builder_append(&X, THIS->input->datap, \
7af4d02006-05-05Henrik Grubbström (Grubba)  e); \ READ(e); \
23a7db2005-11-12Martin Nilsson  }else{ \ XMLERROR("Nametoken expected"); \ } \ }while(0) #define BEGIN_STRING(STR) \ do{ \ struct string_builder STR; \ ONERROR tmp_ ## STR; \ init_string_builder(&STR,0); \ SET_ONERROR(tmp_ ## STR, free_string_builder, &STR) #define END_STRING(STR) \ check_stack(1); \ UNSET_ONERROR(tmp_ ## STR); \ push_string(finish_string_builder(&STR)); \ }while(0) #define STRIP_SPACES() do { \ IF_XMLDEBUG(fprintf(stderr,"STRIPPING SPACES (%s)\n",sp[-1].u.string->str)); \
5e9fc02015-08-18Per Hedbor  f_string_normalize_space(1); \
23a7db2005-11-12Martin Nilsson  IF_XMLDEBUG(fprintf(stderr,"SPACES STRIPPED (%s)\n",sp[-1].u.string->str)); \ } while(0) #define READ_CHAR_REF(X) do { \ /* Character reference */ \ INT32 num=0; \ \ READ(1); \ if(PEEK(0)=='x') \ { \ READ(1); \ while(isHexChar(PEEK(0))>=0) \ { \ num*=16; \ num+=isHexChar(PEEK(0)); \ READ(1); \ } \ }else{ \ while(PEEK(0)>='0' && PEEK(0)<='9') \ { \ num*=10; \ num+=PEEK(0)-'0'; \ READ(1); \ } \ } \ if(PEEK(0)!=';') \ XMLERROR("Missing ';' after character reference."); \ READ(1); \ POKE(X, num); \ }while(0) #define PARSE_REF(ATTR,PARSE_RECURSIVELY) do { \ /* Entity reference */ \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  struct xmlinput *i; \ /* Ensure that we aren't already parsing this ref. */ \ for (i = THIS->input; i; i = i->next) { \ if (i->entity == sp[-1].u.string) { \ XMLERROR("Recursive entity reference definition."); \ break; \ } \ } \
90bf4f2015-04-18Henrik Grubbström (Grubba)  if (i) break; \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  \
23a7db2005-11-12Martin Nilsson  /* lookup entry in mapping and parse it recursively */ \ /* Generate error if entity is not defined */ \ { \ struct pike_string *name=0; \ struct mapping *callbackinfo; \ ONERROR tmp3,tmp4; \ \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  copy_shared_string(name, sp[-1].u.string); \
23a7db2005-11-12Martin Nilsson  SET_ONERROR(tmp3, do_free_string, name); \ \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  apply_external(1, f_Simple_lookup_entity_fun_num, 1); \ \ UPDATE_LOCATION(THIS->input->pos, THIS->input->callbackinfo); \ callbackinfo=copy_mapping(THIS->input->callbackinfo); \
23a7db2005-11-12Martin Nilsson  SET_ONERROR(tmp4, do_free_mapping, callbackinfo); \ push_constant_text("previous"); \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  ref_push_mapping(THIS->input->callbackinfo); \
23a7db2005-11-12Martin Nilsson  mapping_insert(callbackinfo, sp-2, sp-1); \ pop_n_elems(2); \ \ do { \ if(UNSAFE_IS_ZERO(sp-1)) \ { \ pop_stack(); \ if(index_shared_string(name,0)=='%') \ { \ push_constant_text("%"); \ ref_push_string(name); \ push_int(1); \
fd17d82016-12-08Martin Nilsson  o_range2(RANGE_LOW_FROM_BEG|RANGE_HIGH_OPEN); \
23a7db2005-11-12Martin Nilsson  }else{ \ push_constant_text("&"); \ ref_push_string(name); \ } \ push_constant_text("in_attribute"); \ push_int(ATTR); \ f_aggregate_mapping(2); /* attributes */ \ push_int(0); /* no data */ \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  very_low_sys(callbackinfo); \
017b572011-10-28Henrik Grubbström (Grubba)  if(TYPEOF(sp[-1]) != T_STRING) \
23a7db2005-11-12Martin Nilsson  { \ pop_stack(); \ XMLERROR("No such entity."); \ break; \ } \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  ref_push_string(name); \ ref_push_string(sp[-2].u.string); \ apply_external(1, f_Simple_define_entity_raw_fun_num, 2); \
23a7db2005-11-12Martin Nilsson  } \ else \ { \
017b572011-10-28Henrik Grubbström (Grubba)  if(TYPEOF(sp[-1]) != T_STRING) \
23a7db2005-11-12Martin Nilsson  { \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  XMLERROR("lookup_entity() returned non-string!"); \
23a7db2005-11-12Martin Nilsson  break; \ } \ } \ \ { \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  struct pike_string *s = sp[-1].u.string; \ struct xmlinput *i = new_string_xmlinput(s); \ i->next = THIS->input; \ THIS->input = i; \
f12fc62006-05-05Henrik Grubbström (Grubba)  pop_stack(); \ check_stack(10); \ IF_XMLDEBUG(fprintf(stderr,"Entity expands to: %s\n",s->str)); \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  CHECK_INPUT(i); \ add_ref(i->callbackinfo = callbackinfo); \ copy_shared_string(i->entity, name); \
23a7db2005-11-12Martin Nilsson  } \ }while(0); \ CALL_AND_UNSET_ONERROR(tmp4); \ CALL_AND_UNSET_ONERROR(tmp3); \ } \ }while(0) #define READ_REFERENCE(ATTR,X,PARSE_RECURSIVELY) do { \ READ(1); /* Assume '&' for now */ \ if(PEEK(0)=='#') \ { \ READ_CHAR_REF(X); \ }else{ \ int found_period = 0; \ if (THIS->flags & ALLOW_RXML_ENTITIES) { \ found_period = SIMPLE_READNAME_PERIOD(); \ } else { \ SIMPLE_READNAME(); \ } \ IF_XMLDEBUG(fprintf(stderr,"Found entity: %s\n",sp[-1].u.string->str)); \ if(PEEK(0)!=';') \ XMLERROR("Missing ';' after entity reference."); \ READ(1); \ if(found_period) { \ /* RXML variable reference; output untouched */ \ IF_XMLDEBUG(fprintf(stderr,"RXML entity: %s\n",sp[-1].u.string->str));\ POKE(X, '&'); \ string_builder_shared_strcat(&X, sp[-1].u.string); \ POKE(X, ';'); \ pop_stack(); \ } else \
05c78d2006-05-02Henrik Grubbström (Grubba)  PARSE_REF(ATTR, PARSE_RECURSIVELY); \
23a7db2005-11-12Martin Nilsson  } \ IF_XMLDEBUG(fprintf(stderr,"Read reference at %d done.\n",__LINE__)); \ }while(0) #define READ_PEREFERENCE(ATTR,X,PARSE_RECURSIVELY) do { \ DO_IF_DEBUG(struct svalue *spsave=sp;) \ READ(1); /* Assume '%' */ \ push_constant_text("%"); \ SIMPLE_READNAME(); \ f_add(2); \ if(PEEK(0)!=';') \ XMLERROR("Missing ';' after parsed entity reference."); \ READ(1); \ PARSE_REF(ATTR,PARSE_RECURSIVELY); \ DO_IF_DEBUG(if(sp<spsave) Pike_fatal("Stack underflow\n");) \ }while(0) #define SMEG() \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  ((THIS->flags & ALLOW_PESMEG_EVERYWHERE) && \ (PEEK(0)=='%') && read_smeg_pereference())
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson #define READ_ATTVALUE(X,is_cdata) do { \ SKIPSPACE(); \ switch(PEEK(0)) \ { \ case '\'': \ READ(1); \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  read_attvalue(&X,'\'',0); \
23a7db2005-11-12Martin Nilsson  break; \ case '\"': \ READ(1); \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  read_attvalue(&X,'\"',0); \
23a7db2005-11-12Martin Nilsson  break; \ case '%': \ XMLERROR("PEReference outside quotes!."); \ SKIPTO(';'); \ break; \ case '&': \ IF_XMLDEBUG(READ(0)); \ XMLERROR("Reference outside quotes!."); \ SKIPTO(';'); \ break; \ default: \ IF_XMLDEBUG(READ(0)); \ XMLERROR("Unquoted attribute value."); \ } \ }while(0) #define READ_ENTITYVALUE(X) do { \ SKIPSPACE(); \ switch(PEEK(0)) \ { \ case '\'': \ READ(1); \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  read_entityvalue(&X,'\''); \
23a7db2005-11-12Martin Nilsson  break; \ case '\"': \ READ(1); \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  read_entityvalue(&X,'\"'); \
23a7db2005-11-12Martin Nilsson  break; \ case '%': \ IF_XMLDEBUG(READ(0)); \ XMLERROR("PEReference outside quotes!."); \ SKIPTO(';'); \ break; \ case '&': \ IF_XMLDEBUG(READ(0)); \ XMLERROR("Reference outside quotes!."); \ SKIPTO(';'); \ break; \ default: \ XMLERROR("Unquoted attribute value."); \ } \ }while(0)
734e7f2008-11-22Henrik Grubbström (Grubba) #define READ_COMMENT() do { \ BEGIN_STRING(com); \ while(!XMLEOF() && \ !(PEEK(0)=='-' && PEEK(1)=='-' && PEEK(2)=='>')) \
23a7db2005-11-12Martin Nilsson  { \ POKE(com, PEEK(0)); \ READ(1); \ } \
734e7f2008-11-22Henrik Grubbström (Grubba)  if (!XMLEOF()) \ READ(3); \ END_STRING(com); \ }while(0)
23a7db2005-11-12Martin Nilsson  #define INTERMISSION(X) do { \ if((X).s->len) { \ check_stack(4); \ push_empty_string(); \ push_int(0); /* No name */ \ push_int(0); /* No attributes */ \ push_string(finish_string_builder(&(X))); \ init_string_builder(&(X),0); \ SYS(); \ } } while (0) #define UPDATE_LOCATION(P,m) do{ \
4d5ebc2006-06-15Henrik Grubbström (Grubba)  push_int64((P)); \ mapping_insert((m), \ &location_string_svalue, \ sp-1); \ pop_stack(); \ }while(0)
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba) static inline void very_low_sys(struct mapping *callbackinfo)
23a7db2005-11-12Martin Nilsson {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  check_stack(1 + THIS->extra_args->size);
734e7f2008-11-22Henrik Grubbström (Grubba)  if (callbackinfo) ref_push_mapping(callbackinfo); else f_aggregate_mapping(0);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  assign_svalues_no_free(sp, THIS->extra_args->item, THIS->extra_args->size, THIS->extra_args->type_field); sp += THIS->extra_args->size; apply_svalue(&THIS->func, 5+THIS->extra_args->size);
23a7db2005-11-12Martin Nilsson }
3d6ad42008-01-21Henrik Grubbström (Grubba) static inline void low_sys(void)
23a7db2005-11-12Martin Nilsson {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  struct xmlinput *i = THIS->input;
734e7f2008-11-22Henrik Grubbström (Grubba)  if (i) { UPDATE_LOCATION(i->pos, i->callbackinfo); very_low_sys(i->callbackinfo); } else { very_low_sys(NULL); }
23a7db2005-11-12Martin Nilsson }
3d6ad42008-01-21Henrik Grubbström (Grubba) static void sys(void)
23a7db2005-11-12Martin Nilsson {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  low_sys();
23a7db2005-11-12Martin Nilsson  if(SAFE_IS_ZERO(sp-1)) pop_stack(); }
4d5ebc2006-06-15Henrik Grubbström (Grubba) #define SYS() sys()
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba) static void xmlerror(char *desc, struct pike_string *tag_name)
23a7db2005-11-12Martin Nilsson {
5e9fc02015-08-18Per Hedbor  push_constant_text("error");
23a7db2005-11-12Martin Nilsson  if (tag_name) { ref_push_string(tag_name); /* Name of tag that triggered the error. */ } else { push_int(0); /* no name */ } push_int(0); /* no attributes */ push_text(desc);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  low_sys();
23a7db2005-11-12Martin Nilsson  pop_stack(); }
3d6ad42008-01-21Henrik Grubbström (Grubba)  static int read_smeg_pereference(void)
4d5ebc2006-06-15Henrik Grubbström (Grubba)  { struct mapping *callbackinfo; struct pike_string *name=0; struct pike_string *full_name=0; struct xmlinput *i; ONERROR tmp3,tmp4,tmp5; READ(1); /* this is '%' */ IF_XMLDEBUG( fprintf(stderr, "SMEG reference found.\n") );
5e9fc02015-08-18Per Hedbor  push_constant_text("%");
4d5ebc2006-06-15Henrik Grubbström (Grubba)  SIMPLE_READNAME(); add_ref(name=sp[-1].u.string); SET_ONERROR(tmp3, do_free_string, name); f_add(2); add_ref(full_name=sp[-1].u.string); SET_ONERROR(tmp5, do_free_string, full_name); if(PEEK(0)!=';') XMLERROR("Missing ';' after parsed entity reference."); READ(1); /* lookup entry in mapping and parse it recursively */ /* Generate error if entity is not defined */ /* Ensure that we aren't already parsing this peref. */ for (i = THIS->input; i; i = i->next) { if (i->entity == full_name) { XMLERROR("Recursive parsed entity reference definition."); CALL_AND_UNSET_ONERROR(tmp5); CALL_AND_UNSET_ONERROR(tmp3); /* FIXME: Adjust stack? */ return 1; } }
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  apply_external(1, f_Simple_lookup_entity_fun_num, 1);
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  UPDATE_LOCATION(THIS->input->pos, THIS->input->callbackinfo); callbackinfo = copy_mapping(THIS->input->callbackinfo); SET_ONERROR(tmp4, do_free_mapping, callbackinfo); push_constant_text("previous"); ref_push_mapping(THIS->input->callbackinfo); mapping_insert(callbackinfo, sp-2, sp-1); pop_n_elems(2);
9d1f782008-11-21Henrik Grubbström (Grubba)  mapping_string_insert_string(callbackinfo, MK_STRING("context"), full_name);
23a7db2005-11-12Martin Nilsson  if(UNSAFE_IS_ZERO(sp-1)) { pop_stack(); push_constant_text("%"); ref_push_string(name); f_aggregate_mapping(0); push_int(0); /* no data */
4d5ebc2006-06-15Henrik Grubbström (Grubba)  very_low_sys(callbackinfo);
23a7db2005-11-12Martin Nilsson  }
017b572011-10-28Henrik Grubbström (Grubba)  if(TYPEOF(sp[-1]) == T_STRING)
23a7db2005-11-12Martin Nilsson  { struct pike_string *s=sp[-1].u.string;
e1c8132016-12-08Martin Nilsson  IF_XMLDEBUG(fprintf(stderr, "ptr=%p len=%ld pos=%ld to_free=%p\n",
4d5ebc2006-06-15Henrik Grubbström (Grubba)  THIS->input->datap.ptr, THIS->input->len, THIS->input->pos, THIS->input->to_free));
23a7db2005-11-12Martin Nilsson  /* PUSH(s) */ {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  i = new_string_xmlinput(s); i->next = THIS->input; THIS->input = i; CHECK_INPUT(THIS->input); THIS->input->callbackinfo = callbackinfo; copy_shared_string(THIS->input->entity, full_name);
23a7db2005-11-12Martin Nilsson  } UNSET_ONERROR(tmp4);
9075242006-02-17Martin Nilsson  CALL_AND_UNSET_ONERROR(tmp5);
23a7db2005-11-12Martin Nilsson  READ(0); /* autopop empty strings */ pop_stack(); CALL_AND_UNSET_ONERROR(tmp3); return 1; }
4d5ebc2006-06-15Henrik Grubbström (Grubba)  pop_stack();
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  XMLERROR("No such entity in pereference."); CALL_AND_UNSET_ONERROR(tmp4); CALL_AND_UNSET_ONERROR(tmp5); CALL_AND_UNSET_ONERROR(tmp3); return 0; }
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba) static void read_attvalue(struct string_builder *X,
23a7db2005-11-12Martin Nilsson  p_wchar2 Y, int keepspace) {
372c532006-09-19Martin Nilsson  struct xmlinput *input = THIS->input;
4d5ebc2006-06-15Henrik Grubbström (Grubba)  while(THIS->input)
23a7db2005-11-12Martin Nilsson  {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if(XMLEOF()) { POP(); continue;
23a7db2005-11-12Martin Nilsson  }
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if((THIS->input == input) && (PEEK(0) == Y))
23a7db2005-11-12Martin Nilsson  { READ(1);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  return;
23a7db2005-11-12Martin Nilsson  } switch(PEEK(0)) { case '&':
4d5ebc2006-06-15Henrik Grubbström (Grubba)  READ_REFERENCE(1,(*X), read_attvalue(X, 0,1));
23a7db2005-11-12Martin Nilsson  break;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  case 0x0d:
c4b82a2006-05-06Henrik Grubbström (Grubba)  if (!keepspace && PEEK(1)==0x0a) READ(1); /* cr/lf */
3595ea2018-02-12Marcus Comstedt  /* FALLTHRU */
23a7db2005-11-12Martin Nilsson  case 0x20: case 0x0a: case 0x09: READ(1); POKE(*X, 0x20); break;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  default: POKE(*X, PEEK(0)); READ(1); } }
734e7f2008-11-22Henrik Grubbström (Grubba)  XMLERROR_STAY("End of file while looking for end of attribute value.");
23a7db2005-11-12Martin Nilsson }
4d5ebc2006-06-15Henrik Grubbström (Grubba) static void read_pubid(struct string_builder *X,
23a7db2005-11-12Martin Nilsson  p_wchar2 Y) { while(1) { if(XMLEOF()) { if(Y)
734e7f2008-11-22Henrik Grubbström (Grubba)  XMLERROR_STAY("End of file while looking for end of pubid.");
23a7db2005-11-12Martin Nilsson  break; } if(PEEK(0)==Y) { READ(1); break; } switch(PEEK(0)) {
3595ea2018-02-12Marcus Comstedt  case 0x0d: if(PEEK(1)==0x0a) READ(1); /* FALLTHRU */
23a7db2005-11-12Martin Nilsson  case 0x20: case 0x0a: case 0x09: READ(1); POKE(*X, 0x20); break;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  default: POKE(*X, PEEK(0)); READ(1); } } }
4d5ebc2006-06-15Henrik Grubbström (Grubba) static void read_entityvalue(struct string_builder *X, p_wchar2 Y)
23a7db2005-11-12Martin Nilsson {
6c35f82008-11-18Henrik Grubbström (Grubba)  while(THIS->input)
23a7db2005-11-12Martin Nilsson  { if(XMLEOF()) {
6c35f82008-11-18Henrik Grubbström (Grubba)  POP(); continue;
23a7db2005-11-12Martin Nilsson  } if(PEEK(0)==Y) { READ(1);
6c35f82008-11-18Henrik Grubbström (Grubba)  return;
23a7db2005-11-12Martin Nilsson  } switch(PEEK(0)) { case '%':
4d5ebc2006-06-15Henrik Grubbström (Grubba)  READ_PEREFERENCE(1,(*X), read_entityvalue(X, 0));
23a7db2005-11-12Martin Nilsson  break; case 0x0d: if(PEEK(1)==0x0a) READ(1); #if 0 case 0x20: case 0x0a: case 0x09: POKE(*X, 0x20); READ(1); #endif
5de7a62015-04-19Henrik Grubbström (Grubba)  break;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson #if 0 case '&':
4d5ebc2006-06-15Henrik Grubbström (Grubba)  READ_REFERENCE(1,(*X), read_entityvalue(X, 0));
23a7db2005-11-12Martin Nilsson  break; #else case '&': if(PEEK(1)=='#') { READ(1); READ_CHAR_REF((*X)); break; } #endif
3595ea2018-02-12Marcus Comstedt  /* FALLTHRU */
23a7db2005-11-12Martin Nilsson  default: POKE(*X, PEEK(0)); READ(1); } }
6c35f82008-11-18Henrik Grubbström (Grubba)  if (Y)
734e7f2008-11-22Henrik Grubbström (Grubba)  XMLERROR_STAY("End of file while looking for end of entity value.");
23a7db2005-11-12Martin Nilsson }
3d6ad42008-01-21Henrik Grubbström (Grubba) static void simple_read_system_literal(void)
23a7db2005-11-12Martin Nilsson { BEGIN_STRING(name); SKIPSPACE(); READ_ATTVALUE(name,1); END_STRING(name); }
3d6ad42008-01-21Henrik Grubbström (Grubba) static void simple_read_pubid_literal(void)
23a7db2005-11-12Martin Nilsson { BEGIN_STRING(name); SKIPSPACE(); switch(PEEK(0)) { case '\'': READ(1);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  read_pubid(&name,'\'');
23a7db2005-11-12Martin Nilsson  break; case '\"': READ(1);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  read_pubid(&name,'\"');
23a7db2005-11-12Martin Nilsson  break; case '%': XMLERROR("PEReference outside quotes!."); SKIPTO(';'); break; case '&': XMLERROR("Reference outside quotes!."); SKIPTO(';'); break; default: XMLERROR("Unquoted public id."); } END_STRING(name); }
4d5ebc2006-06-15Henrik Grubbström (Grubba) static void simple_read_system_attvalue(int is_cdata)
23a7db2005-11-12Martin Nilsson {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  BEGIN_STRING(attvalue);
23a7db2005-11-12Martin Nilsson  SKIPSPACE(); IF_XMLDEBUG(READ(0));
4d5ebc2006-06-15Henrik Grubbström (Grubba)  READ_ATTVALUE(attvalue,is_cdata); END_STRING(attvalue);
23a7db2005-11-12Martin Nilsson  if(!is_cdata) STRIP_SPACES(); }
3d6ad42008-01-21Henrik Grubbström (Grubba) static void simple_readname(void)
23a7db2005-11-12Martin Nilsson { check_stack(1); BEGIN_STRING(name); SMEG(); READNAME(name); END_STRING(name); }
3d6ad42008-01-21Henrik Grubbström (Grubba) static int simple_readname_period(void)
23a7db2005-11-12Martin Nilsson { /* Returns whether '.' is included somewhere in the name. */ int found_period = 0; check_stack(1); BEGIN_STRING(name); SMEG(); READNAME_PERIOD(name, found_period); END_STRING(name); return found_period; }
3d6ad42008-01-21Henrik Grubbström (Grubba) static void simple_readnmtoken(void)
23a7db2005-11-12Martin Nilsson { BEGIN_STRING(name); READNMTOKEN(name); END_STRING(name); }
4d5ebc2006-06-15Henrik Grubbström (Grubba) #define SIMPLE_READ_ATTRIBUTES(CD) simple_read_attributes(CD);
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba) static void simple_read_attributes(struct mapping *is_cdata)
23a7db2005-11-12Martin Nilsson { SKIPSPACE();
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  /* Read unordered attributes */
4d5ebc2006-06-15Henrik Grubbström (Grubba)  while(THIS->input->len>0 && isFirstNameChar(PEEK(0)))
23a7db2005-11-12Martin Nilsson  { int iscd; SIMPLE_READNAME(); SKIPSPACE(); if(PEEK(0)!='=') XMLERROR("Missing '=' in attribute."); READ(1); iscd=1; if(is_cdata) { struct svalue *s=low_mapping_lookup(is_cdata,sp-1); if(s && SAFE_IS_ZERO(s)) iscd=0; } SIMPLE_READ_ATTVALUE(iscd); #ifdef VERBOSE_XMLDEBUG fprintf(stderr,"Attribute %s = %s (iscd = %d)\n", sp[-2].u.string->str, sp[-1].u.string->str,iscd); #endif assign_lvalue(sp-3, sp-1); pop_n_elems(2); SKIPSPACE(); } }
3d6ad42008-01-21Henrik Grubbström (Grubba) static void parse_optional_xmldecl(void)
23a7db2005-11-12Martin Nilsson { if(GOBBLE("<?xml")) { struct mapping *m;
faa2942005-12-28Henrik Grubbström (Grubba) #if 0
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if(THIS->input->pos != 5)
d73b482005-12-18Martin Nilsson  XMLERROR( "XML header must be at the first byte in the document." );
faa2942005-12-28Henrik Grubbström (Grubba) #endif
d73b482005-12-18Martin Nilsson 
23a7db2005-11-12Martin Nilsson  push_constant_text("<?xml"); push_int(0);
25a6dc2005-12-18Martin Nilsson  push_mapping(m = allocate_mapping(3)); /* Attributes */
23a7db2005-11-12Martin Nilsson  SIMPLE_READ_ATTRIBUTES(0);
25a6dc2005-12-18Martin Nilsson 
23a7db2005-11-12Martin Nilsson  if(PEEK(0) != '?' && PEEK(1)!='>') XMLERROR("Missing '?>' at end of XML header."); else READ(2);
217d892014-11-17Stephen R. van den Berg  if (!(THIS->flags & COMPAT_ALLOW_7_6_ERRORS)) {
23a7db2005-11-12Martin Nilsson  struct pike_string *str_version; MAKE_CONST_STRING (str_version, "version"); if (!low_mapping_string_lookup (m, str_version)) XMLERROR ("Required version attribute missing in XML header."); } push_int(0); /* No data */ SYS(); } }
4d5ebc2006-06-15Henrik Grubbström (Grubba) void read_choice_seq_or_name(int maybe_pcdata)
23a7db2005-11-12Martin Nilsson { #ifdef PIKE_DEBUG struct svalue * save_sp2=sp; #endif
bdf7082008-06-29Marcus Comstedt  p_wchar2 seq=0;
23a7db2005-11-12Martin Nilsson  int num=0; SKIPSPACE(); if(PEEK(0) == '(') { READ(1); SKIPSPACE(); if(PEEK(0) == '#') { if(!maybe_pcdata) XMLERROR("Found '#' (#PCDATA ?) in the wrong place."); if(!GOBBLE("#PCDATA")) { XMLERROR("Expected #PCDATA, found something else"); READ(1); } num=1; push_constant_text("#PCDATA"); SKIPSPACE(); while(PEEK(0) == '|') { READ(1); num++; SKIPSPACE(); SIMPLE_READNAME(); SKIPSPACE(); } if(!GOBBLE(")")) XMLERROR("Expected \")\" at the end of #PCDATA"); f_aggregate(num); if(!GOBBLE("*") && num>1) XMLERROR("Expected \"*\" at the end of #PCDATA"); seq = '#'; /* special #PCDATA marker */ }else{ push_int(0); /* allocate a little room */ num++; while(1) {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  read_choice_seq_or_name(0);
23a7db2005-11-12Martin Nilsson  num++; SKIPSPACE();
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  switch(PEEK(0)) { case 0:
734e7f2008-11-22Henrik Grubbström (Grubba)  XMLERROR_STAY("End of xml while reading ELEMENT declaration.");
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  default: XMLERROR("Expected | or ,");
5de7a62015-04-19Henrik Grubbström (Grubba)  break;
23a7db2005-11-12Martin Nilsson  case ')': READ(1); #if 0 if(num == 1) { /* We could be nice about this.... Naaahhh :) /Hubbe */ XMLERROR("Choice or sequence list with only one entry.\n"); } else #endif { #ifdef PIKE_DEBUG if(!SAFE_IS_ZERO(sp-num)) Pike_fatal("num not in sync with arguments on stack!\n"); #endif if(seq == ',') sp[-num].u.string=make_shared_string(","); else sp[-num].u.string=make_shared_string("|");
017b572011-10-28Henrik Grubbström (Grubba)  SET_SVAL_TYPE(sp[-num], T_STRING);
23a7db2005-11-12Martin Nilsson  f_aggregate(num); } break;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  case '|': if(!seq) { seq=PEEK(0); }else{ if(seq != PEEK(0)) XMLERROR("choice in sequence list!"); } READ(1); continue;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  case ',': if(!seq) { seq=PEEK(0); }else{ if(seq != PEEK(0)) XMLERROR("comma in choice list!"); } READ(1); continue;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  } break; } } }else{ SIMPLE_READNAME(); } IF_XMLDEBUG( READ(0) ); if(seq != '#') switch(PEEK(0)) { case '?': READ(1); push_constant_text("?"); stack_swap(); f_aggregate(2); break; case '*': READ(1); push_constant_text("*"); stack_swap(); f_aggregate(2); break; case '+': READ(1); push_constant_text("+"); stack_swap(); f_aggregate(2); break; } #ifdef PIKE_DEBUG if(sp != save_sp2+1) Pike_fatal("XML: Stack corruption in read_choice_seq_or_name.\n"); #endif }
3d6ad42008-01-21Henrik Grubbström (Grubba) static int really_low_parse_dtd(void)
23a7db2005-11-12Martin Nilsson { int done=0; #ifdef PIKE_DEBUG struct svalue *save_sp=sp; #endif while(!done && !XMLEOF()) { #ifdef PIKE_DEBUG if(sp<save_sp) Pike_fatal("Stack underflow.\n"); #endif switch(PEEK(0)) { default: if(!isSpace(PEEK(0))) { #ifdef VERBOSE_XMLDEBUG fprintf(stderr,"Non-space character on DTD top level: %c.",PEEK(0)); #endif XMLERROR("Non-space character on DTD top level."); while( (!XMLEOF()) && PEEK(0) != '>' && PEEK(0) != '<' && PEEK(0)!=']') READ(1); break; } READ(1); SKIPSPACE(); break; case '%': /* PEReference */
4d5ebc2006-06-15Henrik Grubbström (Grubba)  READ_PEREFERENCE(0,guggel, really_low_parse_dtd());
23a7db2005-11-12Martin Nilsson  break; case '<': switch(PEEK(1)) { case '!': switch(PEEK(2)) { case '-': /* Comment */ if(PEEK(3)=='-') { /* Comment */ push_constant_text("<!--"); push_int(0); /* No name */ push_int(0); /* No attribues */ READ(4); READ_COMMENT(); SYS(); }else{ XMLERROR("Expected <!-- but got something else."); } break; case 'E': /* ELEMENT or ENTITY */ if(GOBBLE("<!ENTITY")) { int may_have_ndata=0; int attributes=0; SKIPSPACE_NO_SMEG(); push_constant_text("<!ENTITY"); if(PEEK(0)=='%') { READ(1); SKIPSPACE(); push_constant_text("%"); SKIPSPACE(); SIMPLE_READNAME(); f_add(2); }else{ may_have_ndata=1; SIMPLE_READNAME(); } SKIPSPACE(); switch(PEEK(0)) { case '\'': case '"': push_int(0); /* no attributes */ BEGIN_STRING(value); READ_ENTITYVALUE(value); END_STRING(value);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  ref_push_string(sp[-3].u.string); ref_push_string(sp[-2].u.string); apply_external(1, f_Simple_define_entity_raw_fun_num, 2); pop_stack();
23a7db2005-11-12Martin Nilsson  SYS(); break; case 'S': /* SYSTEM */ if(PEEK(1)=='Y' && PEEK(2)=='S' && PEEK(3)=='T' && PEEK(4)=='E' && PEEK(5)=='M' && isSpace(PEEK(6))) { READ(7); SKIPSPACE(); push_constant_text("SYSTEM"); SIMPLE_READ_SYSTEMLITERAL(); attributes++; goto check_ndata; } goto not_system;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  case 'P': /* PUBLIC */ if(PEEK(1)=='U' && PEEK(2)=='B' && PEEK(3)=='L' && PEEK(4)=='I' && PEEK(5)=='C' && isSpace(PEEK(6))) { READ(7); attributes++; push_constant_text("PUBLIC"); SKIPSPACE(); SIMPLE_READ_PUBIDLITERAL(); SKIPSPACE(); attributes++; push_constant_text("SYSTEM"); SIMPLE_READ_SYSTEMLITERAL();
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  check_ndata: SKIPSPACE(); if(GOBBLE("NDATA")) { if(!may_have_ndata) { XMLERROR("This entity is not allowed to have an NDATA keyword."); }
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  attributes++; push_constant_text("NDATA"); SKIPSPACE(); SIMPLE_READNAME(); SKIPSPACE(); } f_aggregate_mapping(attributes*2); push_int(0); /* no data */ SYS(); break; }
3595ea2018-02-12Marcus Comstedt  /* FALLTHRU */
23a7db2005-11-12Martin Nilsson  default: not_system: /* FIXME, DTD's are IGNORED! */ XMLERROR("Unexpected data in <!ENTITY"); SKIPTO('>'); }
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  SKIPSPACE(); if(PEEK(0)!='>') XMLERROR("Missing '>' in <!ENTITY."); READ(1); break; } if(GOBBLE("<!ELEMENT")) { push_constant_text("<!ELEMENT"); SKIPSPACE(); SIMPLE_READNAME(); SKIPSPACE(); push_int(0); /* no attributes */ switch(PEEK(0)) { case 'E': /* EMPTY */ if(!GOBBLE("EMPTY")) { XMLERROR("Expected EMPTY, found something else"); READ(1); } push_constant_text("EMPTY"); break; case 'A': /* ANY */ if(!GOBBLE("ANY")) { XMLERROR("Expected ANY, found something else"); READ(1); } push_constant_text("ANY"); break; case '(': /* Mixed or children */
4d5ebc2006-06-15Henrik Grubbström (Grubba)  read_choice_seq_or_name(1);
23a7db2005-11-12Martin Nilsson  break; default: XMLERROR("Unknown stuff in <!ELEMENT>"); push_int(0); }
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  SKIPSPACE(); if(PEEK(0)!='>') XMLERROR("Missing '>' in <!ELEMENT."); READ(1); SYS(); break; }
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  goto unknown_entry_in_dtd; case 'A': if(GOBBLE("<!ATTLIST")) { SKIPSPACE(); push_constant_text("<!ATTLIST"); SIMPLE_READNAME(); push_mapping(allocate_mapping(10)); /* Attributes */ while(1) { int is_cdata=0; struct svalue *save; if(XMLEOF()) {
734e7f2008-11-22Henrik Grubbström (Grubba)  XMLERROR_STAY("End of file while parsing ATTLIST.");
23a7db2005-11-12Martin Nilsson  break; } SKIPSPACE(); if(PEEK(0)=='>') { READ(1); break; } SIMPLE_READNAME(); SKIPSPACE(); save=sp; switch(PEEK(0)) { case 'C': /* CDATA */ case 'I': /* ID, IDREF or IDREFS */ case 'E': /* ENTITY or ENTITIES */ case 'N': /* NOTATION, NMTOKEN or NMTOKENS */ SIMPLE_READNAME(); if(!strcmp(sp[-1].u.string->str,"CDATA")) { IF_XMLDEBUG(fprintf(stderr,"IS CDATA\n")); is_cdata=1; }else{ IF_XMLDEBUG(fprintf(stderr,"IS NOT CDATA\n")); is_cdata=0; } if(!strcmp(sp[-1].u.string->str,"NOTATION")) { SKIPSPACE(); if(PEEK(0)!='(') XMLERROR("Expected '(' after NOTATION."); READ(1); SIMPLE_READNAME(); SKIPSPACE(); while(PEEK(0)=='|') { READ(1); SKIPSPACE(); SIMPLE_READNAME(); SKIPSPACE(); check_stack(1); } if(PEEK(0)!=')') XMLERROR("Expected ')' after NOTATION enumeration."); READ(1); } break; case '(': /* Enumeration */ push_empty_string(); READ(1); SKIPSPACE();
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  SIMPLE_READNMTOKEN(); SKIPSPACE(); while(PEEK(0)=='|') { READ(1);
8b9dfc2008-11-30Henrik Grubbström (Grubba)  SKIPSPACE();
23a7db2005-11-12Martin Nilsson  SIMPLE_READNMTOKEN(); SKIPSPACE(); check_stack(1); } if(PEEK(0)!=')') XMLERROR("Expected ')' after enumeration."); READ(1); break; } #ifdef PIKE_DEBUG if(sp<save) Pike_fatal("Stack underflow.\n"); #endif
cc7cf42015-10-14Martin Nilsson  f_aggregate(sp - save);
23a7db2005-11-12Martin Nilsson  SKIPSPACE(); save=sp; switch(PEEK(0)) { case '#': switch(PEEK(1)) { case 'R':
13670c2015-05-25Martin Nilsson  if(PEEK(2)=='E' && PEEK(3)=='Q' && PEEK(4)=='U' && PEEK(5)=='I' && PEEK(6)=='R' && PEEK(7)=='E' &&
23a7db2005-11-12Martin Nilsson  PEEK(8)=='D') { READ(9); push_constant_text("#REQUIRED"); break; } goto bad_defaultdecl;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  case 'I':
13670c2015-05-25Martin Nilsson  if(PEEK(2)=='M' && PEEK(3)=='P' && PEEK(4)=='L' && PEEK(5)=='I' && PEEK(6)=='E' &&
23a7db2005-11-12Martin Nilsson  PEEK(7)=='D') { READ(8); push_constant_text("#IMPLIED"); break; } goto bad_defaultdecl; case 'F':
13670c2015-05-25Martin Nilsson  if(PEEK(2)=='I' && PEEK(3)=='X' && PEEK(4)=='E' &&
23a7db2005-11-12Martin Nilsson  PEEK(5)=='D') { READ(6); push_constant_text("#FIXED"); SKIPSPACE(); goto comefrom_fixed; }
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
8265532015-05-13Henrik Grubbström (Grubba) 
23a7db2005-11-12Martin Nilsson  default: bad_defaultdecl: XMLERROR("Bad default declaration."); break; } break; default: push_empty_string(); comefrom_fixed: #ifdef VERBOSE_XMLDEBUG fprintf(stderr,"READING ATTVALUE (is_cdata = %d)\n", is_cdata); #endif SIMPLE_READ_ATTVALUE(is_cdata);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  /* Stack is:
13670c2015-05-25Martin Nilsson  * -6 tagname
4d5ebc2006-06-15Henrik Grubbström (Grubba)  * -5 attmapping * -4 attname * -3 type * -2 "#FIXED" or "" * -1 attvalue */
13670c2015-05-25Martin Nilsson #ifdef VERBOSE_XMLDEBUG
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  fprintf(stderr, "Inserting default for %s %s = '%s'\n", sp[-6].u.string->str, sp[-4].u.string->str, sp[-1].u.string->str);
23a7db2005-11-12Martin Nilsson #endif
4d5ebc2006-06-15Henrik Grubbström (Grubba)  push_svalue(Pike_sp-6); push_svalue(Pike_sp-5); push_svalue(Pike_sp-3); apply_external(1, f_Simple_set_default_attribute_fun_num, 3); pop_stack();
23a7db2005-11-12Martin Nilsson  break; } /* Update is_cdata */
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if (is_cdata) { /* Stack is a bit unknown...
13670c2015-05-25Martin Nilsson  * x-6 tagname
4d5ebc2006-06-15Henrik Grubbström (Grubba)  * x-5 attmapping * x-4 attname */
13670c2015-05-25Martin Nilsson #ifdef VERBOSE_XMLDEBUG
4d5ebc2006-06-15Henrik Grubbström (Grubba)  fprintf(stderr, "Inserting CDATA for %s %s = %ld\n", save[-4].u.string->str, save[-2].u.string->str, (long)is_cdata);
23a7db2005-11-12Martin Nilsson #endif
4d5ebc2006-06-15Henrik Grubbström (Grubba)  push_svalue(save-4); push_svalue(save-2); apply_external(1, f_Simple_set_attribute_cdata_fun_num, 2); pop_stack();
23a7db2005-11-12Martin Nilsson  } #ifdef PIKE_DEBUG if(sp<save) Pike_fatal("Stack underflow.\n"); #endif
cc7cf42015-10-14Martin Nilsson  f_aggregate(sp - save);
23a7db2005-11-12Martin Nilsson  f_aggregate(2); assign_lvalue(sp-3, sp-1); pop_n_elems(2); } push_int(0); /* No data */ SYS(); break; } goto unknown_entry_in_dtd; case 'N': /* NOTATION */ if(GOBBLE("<!NOTATION")) { int n=0; push_constant_text("<!NOTATION"); SKIPSPACE(); SIMPLE_READNAME(); SKIPSPACE(); switch(PEEK(0)) { case 'P': if(GOBBLE("PUBLIC")) { push_constant_text("PUBLIC"); SIMPLE_READ_PUBIDLITERAL(); n++; SKIPSPACE(); if(PEEK(0) != '>') { read_system:
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  push_constant_text("SYSTEM"); SIMPLE_READ_SYSTEMLITERAL(); n++; } f_aggregate_mapping(n*2); break;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  }
3595ea2018-02-12Marcus Comstedt  /* FALLTHRU */
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  case 'S': if(GOBBLE("SYSTEM")) goto read_system;
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
5de7a62015-04-19Henrik Grubbström (Grubba) 
23a7db2005-11-12Martin Nilsson  default: XMLERROR("Expected PUBLIC or SYSTEM, found something else."); push_int(0);
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  } SKIPSPACE(); if(PEEK(0)!='>') XMLERROR("Missing '>' in <!NOTATION."); READ(1); push_int(0); /* no data */ SYS(); break; } goto unknown_entry_in_dtd; case '[': /* Conditional section */ READ(3); SKIPSPACE(); if(GOBBLE("IGNORE")) { int parlev=1; BEGIN_STRING(foo); push_constant_text("<![IGNORE["); push_int(0); push_int(0);
0d82772008-11-26Marcus Comstedt  SKIPSPACE(); if(XMLEOF() || PEEK(0)!='[') XMLERROR_STAY("Missing '[' in <![IGNORE."); else READ(1);
23a7db2005-11-12Martin Nilsson  while(1) { if(XMLEOF()) { XMLERROR("Expected to find ']]>', found EOF."); break; } if(PEEK(0) == '<' && PEEK(1)=='!' && PEEK(2)=='[') parlev++; else if(PEEK(0) == ']' && PEEK(1)==']' && PEEK(2)=='>') if(!--parlev) break; POKE(foo, PEEK(0)); READ(1); } END_STRING(foo); READ(3); SYS(); break; } if(GOBBLE("INCLUDE")) { push_constant_text("<![INCLUDE["); push_int(0); push_int(0); SKIPSPACE();
734e7f2008-11-22Henrik Grubbström (Grubba)  if(XMLEOF() || PEEK(0)!='[') XMLERROR_STAY("Missing '[' in <![INCLUDE."); else READ(1);
13670c2015-05-25Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  low_parse_dtd();
23a7db2005-11-12Martin Nilsson 
734e7f2008-11-22Henrik Grubbström (Grubba)  if(XMLEOF() || PEEK(0) != ']' || PEEK(1)!= ']' || PEEK(2)!='>') { XMLERROR_STAY("Missing ]]> in INCLUDE tag."); SYS(); break; }
23a7db2005-11-12Martin Nilsson  READ(3); SYS(); break; } XMLERROR("Expected INCLUDE or IGNORE in conditional section."); SKIPTO('>'); READ(1); break; /* Fall through */ default: unknown_entry_in_dtd: XMLERROR("Unknown entry in DTD."); /* Try to recover */ SKIPTO('>'); break; } #ifdef PIKE_DEBUG if(sp<save_sp) Pike_fatal("Stack underflow.\n"); #endif break; case '?': /* Processing Info */ READ(2); push_constant_text("<?"); SIMPLE_READNAME(); push_int(0); /* No attributes */ SKIPSPACE(); BEGIN_STRING(foo); while((!XMLEOF()) && !(PEEK(0)=='?' && PEEK(1)=='>')) { if(PEEK(0)=='\r') { READ(1); if(PEEK(0)=='\n') READ(1); POKE(foo,'\n'); }else{ POKE(foo, PEEK(0)); READ(1); } } READ(2); END_STRING(foo); SYS(); break; default: XMLERROR("Unknown entry in DTD."); break; } break; case ']': #ifdef VERBOSE_XMLDEBUG fprintf(stderr,"low_parse_dtd found ']'\n"); #endif done=1; } } return done; }
3d6ad42008-01-21Henrik Grubbström (Grubba) static int low_parse_dtd(void)
23a7db2005-11-12Martin Nilsson { int done; struct svalue *save_sp=sp;
2325a82006-07-21Henrik Grubbström (Grubba)  int old_flags = THIS->flags;
23a7db2005-11-12Martin Nilsson 
2325a82006-07-21Henrik Grubbström (Grubba)  THIS->flags |= ALLOW_PESMEG_EVERYWHERE;
4d5ebc2006-06-15Henrik Grubbström (Grubba)  done=really_low_parse_dtd();
2325a82006-07-21Henrik Grubbström (Grubba)  THIS->flags = old_flags;
23a7db2005-11-12Martin Nilsson #ifdef PIKE_DEBUG if(sp<save_sp) Pike_fatal("Stack underflow.\n"); #endif #ifdef VERBOSE_XMLDEBUG fprintf(stderr,"Exiting low_parse_dtd %p %p\n",sp,save_sp); #endif
cc7cf42015-10-14Martin Nilsson  f_aggregate(sp - save_sp);
23a7db2005-11-12Martin Nilsson #ifdef VERBOSE_XMLDEBUG fprintf(stderr,"Exiting low_parse_dtd done\n"); #endif /* There is now one value on the stack */ return done;
13670c2015-05-25Martin Nilsson }
23a7db2005-11-12Martin Nilsson  #define DOC_BEGINNING 0 #define DOC_GOT_DOCTYPE 1 #define DOC_IN_ROOT_ELEM 2 #define DOC_AFTER_ROOT_ELEM 3
4d5ebc2006-06-15Henrik Grubbström (Grubba) static struct pike_string *very_low_parse_xml(struct pike_string *end,
23a7db2005-11-12Martin Nilsson  struct string_builder *text,
4d5ebc2006-06-15Henrik Grubbström (Grubba)  int keepspaces)
23a7db2005-11-12Martin Nilsson {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  int done=0, toplevel = THIS->doc_seq_pos != DOC_IN_ROOT_ELEM;
23a7db2005-11-12Martin Nilsson  #define CHECK_TOPLEVEL_EPILOG(ERRMSG) do { \
217d892014-11-17Stephen R. van den Berg  if (THIS->doc_seq_pos == DOC_AFTER_ROOT_ELEM && \ !(THIS->flags & COMPAT_ALLOW_7_2_ERRORS)) { \
23a7db2005-11-12Martin Nilsson  XMLERROR_STAY (ERRMSG); \ } \ } while (0) while(!done && !XMLEOF()) { switch(PEEK(0)) { default: if(toplevel) { if(!isSpace(PEEK(0))) XMLERROR("All data must be inside tags"); SKIPSPACE(); break; } POKE(*text, PEEK(0)); READ(1);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  break;
23a7db2005-11-12Martin Nilsson  /* Strangely enough, \r and \r\n should be reported as \n, * but \n\r should be reported as \n\n */ case '\r': if(keepspaces) { POKE(*text,'\r'); READ(1); break; } if(toplevel) { SKIPSPACE(); break; } POKE(*text,'\n'); READ(1); if(PEEK(0) == '\n') READ(1);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  break;
23a7db2005-11-12Martin Nilsson  case '&':
4d5ebc2006-06-15Henrik Grubbström (Grubba)  READ_REFERENCE(0,*text,very_low_parse_xml(NULL, text, 1)); break;
23a7db2005-11-12Martin Nilsson  case '<': INTERMISSION(*text);
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  switch(PEEK(1)) { case '?': /* Ends with ?> */
25a6dc2005-12-18Martin Nilsson  if (PEEK(2)=='x' &&
23a7db2005-11-12Martin Nilsson  PEEK(3)=='m' && PEEK(4)=='l' && isSpace(PEEK(5))) {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  parse_optional_xmldecl();
25a6dc2005-12-18Martin Nilsson  break;
23a7db2005-11-12Martin Nilsson  }else{ READ(2); push_constant_text("<?"); SIMPLE_READNAME(); push_int(0); /* No attributes */ SKIPSPACE(); BEGIN_STRING(foo); while(!XMLEOF() && !(PEEK(0)=='?' && PEEK(1)=='>')) { if(PEEK(0)=='\r') { READ(1); if(PEEK(0)=='\n') READ(1); POKE(foo,'\n'); }else{ POKE(foo, PEEK(0)); READ(1); } } READ(2); END_STRING(foo); } SYS(); break; case '!': switch(PEEK(2)) { case '-': /* Comment */ if(PEEK(3)=='-') { /* Comment */ push_constant_text("<!--"); push_int(0); /* No name */ push_int(0); /* No attribues */ READ(4); READ_COMMENT(); SYS(); }else{ XMLERROR("Expected <!-- but got something else."); } break; case 'A': /* ATTLIST? */ case 'E': /* ENTITY? ELEMENT? */ XMLERROR("Invalid entry outside DTD."); break; case '[': if(PEEK(3)=='C' && PEEK(4)=='D' && PEEK(5)=='A' && PEEK(6)=='T' && PEEK(7)=='A' && PEEK(8)=='[') { READ(9);
217d892014-11-17Stephen R. van den Berg  if (!(THIS->flags & COMPAT_ALLOW_7_6_ERRORS)) { if (toplevel) XMLERROR_STAY ("All data must be inside tags"); } else CHECK_TOPLEVEL_EPILOG ("All data must be inside tags");
23a7db2005-11-12Martin Nilsson  push_constant_text("<![CDATA["); push_int(0); push_int(0); BEGIN_STRING(cdata); while((!XMLEOF()) && !(PEEK(0)==']' && PEEK(1)==']' && PEEK(2)=='>')) { if(PEEK(0)=='\r') { READ(1); if(PEEK(0)=='\n') READ(1); POKE(cdata,'\n'); }else{ POKE(cdata, PEEK(0)); READ(1); } } READ(3); END_STRING(cdata); SYS(); break; }
3595ea2018-02-12Marcus Comstedt  /* FALLTHRU */
23a7db2005-11-12Martin Nilsson  default: XMLERROR("Invalid entry."); break; case 'D': /* DOCTYPE? */ /* fprintf(stderr,"FOO: %c%c%c%c\n", SAFE_PEEK(3),SAFE_PEEK(4),SAFE_PEEK(5),SAFE_PEEK(6)); */ if(PEEK(3)!='O' || PEEK(4)!='C' || PEEK(5)!='T' || PEEK(6)!='Y' || PEEK(7)!='P' || PEEK(8)!='E' || !isSpace(PEEK(9))) { XMLERROR("Expected 'DOCTYPE', got something else."); }else{ READ(9);
217d892014-11-17Stephen R. van den Berg  if (!(THIS->flags & COMPAT_ALLOW_7_6_ERRORS)) { switch (THIS->doc_seq_pos) { case DOC_GOT_DOCTYPE: XMLERROR_STAY ( "Multiple DOCTYPE declarations are not allowed."); break; case DOC_IN_ROOT_ELEM: case DOC_AFTER_ROOT_ELEM: XMLERROR_STAY ( "DOCTYPE must occur before the root element."); break; }
23a7db2005-11-12Martin Nilsson  }
217d892014-11-17Stephen R. van den Berg  else CHECK_TOPLEVEL_EPILOG ( "DOCTYPE must occur before the root element.");
23a7db2005-11-12Martin Nilsson  SKIPSPACE(); push_constant_text("<!DOCTYPE"); SIMPLE_READNAME(); /* NAME */ SKIPSPACE(); switch(SAFE_PEEK(0)) { case 'P':
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if((THIS->input->len > 5) &&
23a7db2005-11-12Martin Nilsson  PEEK(1)=='U' && PEEK(2)=='B' && PEEK(3)=='L' && PEEK(4)=='I' && PEEK(5)=='C') { SIMPLE_READNAME(); SIMPLE_READ_PUBIDLITERAL(); push_constant_text("SYSTEM"); SIMPLE_READ_SYSTEMLITERAL(); SKIPSPACE(); f_aggregate_mapping(4); }else{ XMLERROR("Expected PUBLIC, found something else."); f_aggregate_mapping(0); } break; case 'S':
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if((THIS->input->len > 5) &&
23a7db2005-11-12Martin Nilsson  PEEK(1)=='Y' && PEEK(2)=='S' && PEEK(3)=='T' && PEEK(4)=='E' && PEEK(5)=='M') { SIMPLE_READNAME(); SIMPLE_READ_SYSTEMLITERAL(); SKIPSPACE(); f_aggregate_mapping(2); }else{ XMLERROR("Expected SYSTEM, found something else."); f_aggregate_mapping(0); } break; default: f_aggregate_mapping(0); } if(SAFE_PEEK(0)=='[') {
2325a82006-07-21Henrik Grubbström (Grubba)  PCHARP startp; PCHARP endp; int pos; struct mapping *attrs = Pike_sp[-1].u.mapping;
23a7db2005-11-12Martin Nilsson  READ(1);
2325a82006-07-21Henrik Grubbström (Grubba)  startp = endp = THIS->input->datap; pos = -THIS->input->pos;
4d5ebc2006-06-15Henrik Grubbström (Grubba)  low_parse_dtd();
23a7db2005-11-12Martin Nilsson #ifdef VERBOSE_XMLDEBUG fprintf(stderr,"FOO: %c%c%c%c\n",SAFE_PEEK(0), SAFE_PEEK(1),SAFE_PEEK(2),SAFE_PEEK(3)); #endif
2325a82006-07-21Henrik Grubbström (Grubba)  pos += THIS->input->pos; INC_PCHARP(endp, pos); if (LOW_COMPARE_PCHARP(endp, ==, THIS->input->datap)) { /* Still in the same string. * * Add the verbatim string to the attributes * so that the user can reconstruct the tag. */ push_string(make_shared_binary_pcharp(startp, pos)); mapping_string_insert(attrs, MK_STRING("internal_subset"), Pike_sp-1); pop_stack(); }
23a7db2005-11-12Martin Nilsson  if(SAFE_PEEK(0) != ']') XMLERROR("Missing ] in DOCTYPE tag."); READ(1); SKIPSPACE(); }else{ push_int(0); } if(SAFE_PEEK(0)!='>') XMLERROR("Missing '>' in DOCTYPE tag."); READ(1); SYS();
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if (THIS->doc_seq_pos < DOC_GOT_DOCTYPE) THIS->doc_seq_pos = DOC_GOT_DOCTYPE;
23a7db2005-11-12Martin Nilsson  } break; } break; case '/': /* End tag */ READ(2); SIMPLE_READNAME(); SKIPSPACE(); if(PEEK(0)!='>')
4d5ebc2006-06-15Henrik Grubbström (Grubba)  xmlerror("Missing > in end tag.", sp[-1].u.string);
23a7db2005-11-12Martin Nilsson  else READ(1); if(end!=sp[-1].u.string) {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  xmlerror("Unmatched end tag.", sp[-1].u.string);
23a7db2005-11-12Martin Nilsson  }else{ end=0; } done=1; pop_stack(); break; default: /* 'Normal' tag (we hope) */ CHECK_TOPLEVEL_EPILOG ( "There can not be more than one element on the top level.");
4d5ebc2006-06-15Henrik Grubbström (Grubba)  STACK_LEVEL_START(0);
13670c2015-05-25Martin Nilsson  push_constant_text(">");
23a7db2005-11-12Martin Nilsson  READ(1); SIMPLE_READNAME();
4d5ebc2006-06-15Henrik Grubbström (Grubba)  STACK_LEVEL_CHECK(2); /* FIXME: Consider adding flags to indicate if the following * two apply_externals are needed. */ stack_dup(); stack_dup(); apply_external(1, f_Simple_get_tag_attribute_cdata_fun_num, 1); STACK_LEVEL_CHECK(4); stack_swap(); apply_external(1, f_Simple_get_default_attributes_fun_num, 1); STACK_LEVEL_CHECK(4);
23a7db2005-11-12Martin Nilsson  {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  struct mapping *m = NULL;
017b572011-10-28Henrik Grubbström (Grubba)  if (TYPEOF(sp[-2]) == T_MAPPING)
4d5ebc2006-06-15Henrik Grubbström (Grubba)  m = sp[-2].u.mapping;
23a7db2005-11-12Martin Nilsson  SIMPLE_READ_ATTRIBUTES(m);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  /* Get rid of the cdata mapping. */ stack_swap(); pop_stack();
23a7db2005-11-12Martin Nilsson  } /* At this point the stack contains the following: * * sp[-3]: ">". * sp[-2]: tag_name. * sp[-1]: attributes. */
4d5ebc2006-06-15Henrik Grubbström (Grubba)  STACK_LEVEL_DONE(3);
23a7db2005-11-12Martin Nilsson  switch(PEEK(0)) { default:
4d5ebc2006-06-15Henrik Grubbström (Grubba)  xmlerror("Failed to find end of tag.", sp[-2].u.string);
23a7db2005-11-12Martin Nilsson  pop_n_elems(3); break; case '>': READ(1); { struct svalue *save_sp=sp; push_constant_text("<"); assign_svalues_no_free(sp,sp-3,2,BIT_STRING|BIT_MAPPING); sp+=2; push_int(0); SYS();
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  pop_n_elems(sp-save_sp); }
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if (toplevel) THIS->doc_seq_pos = DOC_IN_ROOT_ELEM; if(low_parse_xml(sp[-2].u.string)) {
23a7db2005-11-12Martin Nilsson  /* NOTE: low_parse_xml() pushes an element on the stack. */
4d5ebc2006-06-15Henrik Grubbström (Grubba)  xmlerror("Unmatched tag.", sp[-3].u.string);
23a7db2005-11-12Martin Nilsson  } SYS();
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if (toplevel) THIS->doc_seq_pos = DOC_AFTER_ROOT_ELEM;
23a7db2005-11-12Martin Nilsson  break; case '/': READ(1); if(PEEK(0)!='>')
4d5ebc2006-06-15Henrik Grubbström (Grubba)  xmlerror("Missing '>' in empty tag.", sp[-2].u.string);
23a7db2005-11-12Martin Nilsson  else READ(1); /* Self-contained tag */ free_string(sp[-3].u.string); sp[-3].u.string=make_shared_string("<>"); push_int(0); /* No data */ SYS();
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if (toplevel) THIS->doc_seq_pos = DOC_AFTER_ROOT_ELEM;
23a7db2005-11-12Martin Nilsson  break;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  } } } } return end; }
4d5ebc2006-06-15Henrik Grubbström (Grubba) static int low_parse_xml(struct pike_string *end)
23a7db2005-11-12Martin Nilsson { struct svalue *save_sp=sp; BEGIN_STRING(text);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  end=very_low_parse_xml(end,&text, 0);
23a7db2005-11-12Martin Nilsson  INTERMISSION(text); END_STRING(text); pop_stack(); #ifdef PIKE_DEBUG if(sp<save_sp) Pike_fatal("Stack underflow.\n"); #endif
cc7cf42015-10-14Martin Nilsson  f_aggregate(sp - save_sp);
23a7db2005-11-12Martin Nilsson  /* There is now one value on the stack */ return !!end; }
4d5ebc2006-06-15Henrik Grubbström (Grubba)  /*! @decl mixed parse_xml() */ PIKEFUN mixed parse_xml()
eb1b5f2006-05-03Henrik Grubbström (Grubba)  {
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if (!THIS->input) { push_undefined(); return; } THIS->doc_seq_pos = DOC_BEGINNING; low_parse_xml(NULL);
217d892014-11-17Stephen R. van den Berg  if (THIS->doc_seq_pos != DOC_AFTER_ROOT_ELEM && !(THIS->flags & COMPAT_ALLOW_7_6_ERRORS))
4d5ebc2006-06-15Henrik Grubbström (Grubba)  xmlerror ("Root element missing.", NULL);
eb1b5f2006-05-03Henrik Grubbström (Grubba)  }
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  /*! @decl mixed parse_dtd() */ PIKEFUN mixed parse_dtd() { if (!THIS->input) { push_undefined(); return; } parse_optional_xmldecl();
3d6ad42008-01-21Henrik Grubbström (Grubba)  low_parse_dtd();
4d5ebc2006-06-15Henrik Grubbström (Grubba)  }
eb1b5f2006-05-03Henrik Grubbström (Grubba) 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  /*! @decl string parse_entity() */ PIKEFUN string parse_entity() { if (!THIS->input) { push_undefined(); return; } parse_optional_xmldecl(); push_string(make_shared_binary_pcharp(THIS->input->datap, THIS->input->len));
eb1b5f2006-05-03Henrik Grubbström (Grubba)  }
23a7db2005-11-12Martin Nilsson 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  /*! @decl void push_string(string s)
9d1f782008-11-21Henrik Grubbström (Grubba)  *! @decl void push_string(string s, string context) *! *! Add a string to parse at the current position. *! *! @param s *! String to insert at the current parsing position. *! *! @param context *! Optional context used to refer to the inserted string. *! This is typically an URL, but may also be an entity *! (preceeded by an @expr{"&"@}) or a SMEG reference *! (preceeded by a @expr{"%"@}). *! Not used by the XML parser as such, but is simply *! passed into the callbackinfo mapping as *! the field @expr{"context"@} where it can be useful *! for eg resolving relative URLs when parsing DTDs, *! or for determining where errors occur.
4d5ebc2006-06-15Henrik Grubbström (Grubba)  */
9d1f782008-11-21Henrik Grubbström (Grubba)  PIKEFUN void push_string(string s, string|void context)
4d5ebc2006-06-15Henrik Grubbström (Grubba)  { struct xmlinput *i = new_string_xmlinput(s); i->next = THIS->input; THIS->input = i; if (i->next) { UPDATE_LOCATION(i->next->pos, i->next->callbackinfo); i->callbackinfo = copy_mapping(i->next->callbackinfo);
5e9fc02015-08-18Per Hedbor  push_static_text("previous");
4d5ebc2006-06-15Henrik Grubbström (Grubba)  ref_push_mapping(i->next->callbackinfo); mapping_insert(i->callbackinfo, sp-2, sp-1); } else { THIS->input->callbackinfo = allocate_mapping(0); }
9d1f782008-11-21Henrik Grubbström (Grubba)  if (context) { mapping_string_insert_string(THIS->input->callbackinfo, MK_STRING("context"), context); }
4d5ebc2006-06-15Henrik Grubbström (Grubba)  CHECK_INPUT(THIS->input);
65810c2017-12-28Martin Nilsson  pop_n_elems(args);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  }
23a7db2005-11-12Martin Nilsson 
9d1f782008-11-21Henrik Grubbström (Grubba)  /*! @decl void create(string s, string context, int flags, @ *! function cb, mixed ... extra_args) *! @decl void create(string s, int flags, @ *! function cb, mixed ... extra_args) *! *! @param s *! @param context *! These two arguments are passed along to @[push_string()]. *! *! @param flags *! Parser flags. *! *! @param cb *! Callback function. This function gets called at various *! stages during the parsing. */ PIKEFUN void create(string s, string|int flags, int|function cb, mixed ... extra_args) rawtype tOr(tFuncV(tString tInt CALLBACKTYPE, tMix, tVoid), tFuncV(tString tString tInt CALLBACKTYPE, tMix, tVoid));
4d5ebc2006-06-15Henrik Grubbström (Grubba)  {
017b572011-10-28Henrik Grubbström (Grubba)  if (TYPEOF(*flags) == T_STRING) {
9d1f782008-11-21Henrik Grubbström (Grubba)  /* We have a context argument. * Advance the other parameters, and type check. */ flags++;
017b572011-10-28Henrik Grubbström (Grubba)  if (TYPEOF(*flags) != T_INT) {
f982742016-01-26Martin Nilsson  SIMPLE_ARG_TYPE_ERROR("create", 3, "int");
9d1f782008-11-21Henrik Grubbström (Grubba)  } if (args < 4) { wrong_number_of_args_error("create", args, 4); } cb++; f_aggregate(args-4); args = 5; } else { f_aggregate(args-3); args = 4; } THIS->flags = flags->u.integer;
97bdb62006-05-02Henrik Grubbström (Grubba) 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  assign_svalue(&THIS->func, cb);
97bdb62006-05-02Henrik Grubbström (Grubba) 
4d5ebc2006-06-15Henrik Grubbström (Grubba)  if (THIS->extra_args) { free_array(THIS->extra_args); } add_ref(THIS->extra_args = sp[-1].u.array);
97bdb62006-05-02Henrik Grubbström (Grubba) 
9d1f782008-11-21Henrik Grubbström (Grubba)  pop_n_elems(3); /* flags, cb, extra_args */ args -= 3;
97bdb62006-05-02Henrik Grubbström (Grubba) 
9d1f782008-11-21Henrik Grubbström (Grubba)  apply_current(f_Simple_Context_push_string_fun_num, args);
4d5ebc2006-06-15Henrik Grubbström (Grubba)  }
eb1b5f2006-05-03Henrik Grubbström (Grubba)  }
c939352006-08-09Marek Habersack  /*! @endclass
4d5ebc2006-06-15Henrik Grubbström (Grubba)  */
eb1b5f2006-05-03Henrik Grubbström (Grubba) }
97bdb62006-05-02Henrik Grubbström (Grubba) /*! @endclass */
23a7db2005-11-12Martin Nilsson /*! @decl string autoconvert(string xml) */
61640a2006-05-03Henrik Grubbström (Grubba) PIKEFUN string autoconvert(string s) optflags OPT_TRY_OPTIMIZE;
23a7db2005-11-12Martin Nilsson { INT32 e; struct string_builder b; if(!s->size_shift) { int pos = 0; if (STR0(s)[2] && STR0(s)[3]) { switch((STR0(s)[0]<<8) | STR0(s)[1]) { case 0xfffe: /* UTF-16, little-endian */ { struct pike_string *t=begin_shared_string(s->len); IF_XMLDEBUG(fprintf(stderr,"UTF-16, little endian detected.\n")); for(e=0;e<s->len;e+=1) t->str[e]=s->str[e^1]; pop_stack(); push_string(end_shared_string(t)); }
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  case 0xfeff: /* UTF-16, big-endian */ IF_XMLDEBUG(fprintf(stderr,"UTF-16, big endian detected.\n")); push_int(2);
ff5cd72016-12-08Martin Nilsson  o_range2(RANGE_LOW_FROM_BEG|RANGE_HIGH_OPEN);
23a7db2005-11-12Martin Nilsson  f_unicode_to_string(1); return; case 0xefbb: /* UTF-8 */
ff5cd72016-12-08Martin Nilsson  if (STR0(s)[2] != 0xbf) break;
23a7db2005-11-12Martin Nilsson  IF_XMLDEBUG(fprintf(stderr, "UTF-8 detected.\n")); push_int(3);
ff5cd72016-12-08Martin Nilsson  o_range2(RANGE_LOW_FROM_BEG|RANGE_HIGH_OPEN);
23a7db2005-11-12Martin Nilsson  f_utf8_to_string(1); return; } } switch((STR0(s)[0]<<24) | (STR0(s)[1]<<16) | (STR0(s)[2]<<8) | STR0(s)[3]) { case 0x0000feff: pos = 4;
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
23a7db2005-11-12Martin Nilsson  case 0x0000003c: /* UCS4 1234 byte order (big endian) */ IF_XMLDEBUG(fprintf(stderr,"UCS4(1234) detected.\n")); init_string_builder(&b,4); for(e=pos;e<s->len;e+=4) string_builder_putchar(&b, (STR0(s)[e+0]<<24) | (STR0(s)[e+1]<<16) | (STR0(s)[e+2]<<8) | STR0(s)[e+3]); pop_stack(); push_string(finish_string_builder(&b)); return; case 0xfffe0000: pos = 4;
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
23a7db2005-11-12Martin Nilsson  case 0x3c000000: /* UCS4 4321 byte order (little endian)*/ IF_XMLDEBUG(fprintf(stderr,"UCS4(4321) detected.\n")); init_string_builder(&b,4); for(e=pos;e<s->len;e+=4) string_builder_putchar(&b, (STR0(s)[e+3]<<24) | (STR0(s)[e+2]<<16) | (STR0(s)[e+1]<<8) | STR0(s)[e+0]); pop_stack(); push_string(finish_string_builder(&b)); return; case 0x0000fffe: pos = 4;
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
23a7db2005-11-12Martin Nilsson  case 0x00003c00: /* UCS4 2143 byte order */ IF_XMLDEBUG(fprintf(stderr,"UCS4(2143) detected.\n")); init_string_builder(&b,4); for(e=pos;e<s->len;e+=4) string_builder_putchar(&b, (STR0(s)[e+1]<<24) | (STR0(s)[e+0]<<16) | (STR0(s)[e+3]<<8) | STR0(s)[e+2]); pop_stack(); push_string(finish_string_builder(&b)); return; case 0xfeff0000: pos = 4;
5f50842018-02-12Marcus Comstedt  /* FALLTHRU */
23a7db2005-11-12Martin Nilsson  case 0x003c0000: /* UCS4 3412 byte order */ IF_XMLDEBUG(fprintf(stderr,"UCS4(3412) detected.\n")); init_string_builder(&b,4); for(e=pos;e<s->len;e+=4) string_builder_putchar(&b, (STR0(s)[e+2]<<24) | (STR0(s)[e+3]<<16) | (STR0(s)[e+0]<<8) | STR0(s)[e+1]); pop_stack(); push_string(finish_string_builder(&b)); return; case 0x003c003f: /* UTF-16, big-endian, no byte order mark */ IF_XMLDEBUG(fprintf(stderr,"UTF-16, bit-endian, no byte order mark detected.\n")); f_unicode_to_string(1); return; case 0x3c003f00: /* UTF-16, little endian, no byte order mark */ IF_XMLDEBUG(fprintf(stderr,"UTF-16, little-endian, no byte order mark detected.\n")); Pike_error("XML: Little endian byte order not supported yet.\n"); case 0x3c3f786d: /* ASCII? UTF-8? ISO-8859? */ { int encstart; IF_XMLDEBUG(fprintf(stderr,"Extended ASCII detected (assuming UTF8).\n")); pos = 5; /* <?xml. version */ while(isSpace(STR0(s)[pos])) pos++; /* "autoconvert: <?xml .version */
67074e2014-09-03Martin Nilsson  if(memcmp(STR0(s)+pos,"version",7)) break;
23a7db2005-11-12Martin Nilsson  pos+=7; /* <?xml version. = "1.0" */ while(isSpace(STR0(s)[pos])) pos++; /* <?xml version .= "1.0" */ if(STR0(s)[pos]!='=') break; pos++; /* <?xml version =. "1.0" */ while(isSpace(STR0(s)[pos])) pos++;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  /* <?xml version = ."1.0" */ if(STR0(s)[pos]=='\'') { pos++; while(STR0(s)[pos] && STR0(s)[pos]!='\'') pos++; pos++; } else if(STR0(s)[pos]=='"') { pos++; while(STR0(s)[pos] && STR0(s)[pos]!='\"') pos++; pos++; }else{ break; /* No encoding detected */ }
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  /* <?xml version = "1.0". encoding="encname" */ while(isSpace(STR0(s)[pos])) pos++;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  /* <?xml version = "1.0" .encoding="encname" */
67074e2014-09-03Martin Nilsson  if(memcmp("encoding",STR0(s)+pos,8)) break;
23a7db2005-11-12Martin Nilsson  pos+=8;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  /* <?xml version = "1.0" encoding. ="encname" */ while(isSpace(STR0(s)[pos])) pos++;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  /* <?xml version = "1.0" encoding .="encname" */ if(STR0(s)[pos]!='=') break; pos++;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  /* <?xml version = "1.0" encoding =. "encname" */ while(isSpace(STR0(s)[pos])) pos++;
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  /* <?xml version = "1.0" encoding = ."encname" */ encstart=pos+1; if(STR0(s)[pos]=='\'') { pos++; while(STR0(s)[pos] && STR0(s)[pos]!='\'') pos++; } else if(STR0(s)[pos]=='"') { pos++; while(STR0(s)[pos] && STR0(s)[pos]!='\"') pos++; }else{ break; /* No encoding detected */ } /* <?xml version = "1.0" encoding = "encname." */
5e9fc02015-08-18Per Hedbor  push_static_text("Charset.decoder");
23a7db2005-11-12Martin Nilsson  APPLY_MASTER("resolv",1); push_string(make_shared_binary_string0(STR0(s)+encstart, pos-encstart)); f_call_function(2);
5e9fc02015-08-18Per Hedbor  push_static_text("feed");
23a7db2005-11-12Martin Nilsson  f_index(2); stack_swap(); f_call_function(2);
5e9fc02015-08-18Per Hedbor  push_static_text("drain");
23a7db2005-11-12Martin Nilsson  f_index(2); f_call_function(1); return; }
13670c2015-05-25Martin Nilsson 
23a7db2005-11-12Martin Nilsson  case 0x4c6fa794: /* EBCDIC */ IF_XMLDEBUG(fprintf(stderr,"EBCDIC detected.\n")); Pike_error("XML: EBCDIC not supported yet.\n"); } } IF_XMLDEBUG(fprintf(stderr,"No encoding detected.\n")); f_utf8_to_string(1); } /*! @endmodule */
25a6dc2005-12-18Martin Nilsson /*! @endmodule */
23a7db2005-11-12Martin Nilsson void init_parser_xml(void) {
5e9fc02015-08-18Per Hedbor  push_static_text("location");
23a7db2005-11-12Martin Nilsson  location_string_svalue=sp[-1]; sp--;
61640a2006-05-03Henrik Grubbström (Grubba)  INIT;
23a7db2005-11-12Martin Nilsson  ADD_FUNCTION("isbasechar",f_isBaseChar,tFunc(tInt,tInt),0); ADD_FUNCTION("isidographic",f_isIdeographic,tFunc(tInt,tInt),0); ADD_FUNCTION("isletter",f_isLetter,tFunc(tInt,tInt),0); ADD_FUNCTION("iscombiningchar",f_isCombiningChar,tFunc(tInt,tInt),0); ADD_FUNCTION("isdigit",f_isDigit,tFunc(tInt,tInt),0); ADD_FUNCTION("isextender",f_isExtender,tFunc(tInt,tInt),0); ADD_FUNCTION("isspace",f_isSpace,tFunc(tInt,tInt),0); ADD_FUNCTION("isnamechar",f_isNameChar,tFunc(tInt,tInt),0); ADD_FUNCTION("isfirstnamechar",f_isFirstNameChar,tFunc(tInt,tInt),0); ADD_FUNCTION("ishexchar",f_isHexChar,tFunc(tInt,tInt),0); } void exit_parser_xml(void) {
61640a2006-05-03Henrik Grubbström (Grubba)  EXIT;
d476592013-06-12Arne Goedeke  ba_destroy(&xmlinput_allocator);
23a7db2005-11-12Martin Nilsson  free_svalue(&location_string_svalue); }