835c6c2001-06-17Martin Nilsson // This file is part of Roxen WebServer. // Copyright © 1996 - 2001, Roxen IS.
24c6c12000-02-20Martin Nilsson //
835c6c2001-06-17Martin Nilsson // The Roxen RXML Parser. See also the RXML Pike modules.
24c6c12000-02-20Martin Nilsson //
07c09d2001-11-27Martin Stjernholm // $Id: rxml.pike,v 1.321 2001/11/27 17:32:44 mast Exp $
e5bab52000-04-06Kenneth Johansson 
7bf1451999-07-27David Hedbor 
b8b31d2000-01-10Martin Nilsson inherit "rxmlhelp";
cd7d5f2000-02-16Martin Stjernholm #include <config.h>
94892e2000-01-05Martin Stjernholm 
36aaa12000-01-23Martin Nilsson  // ------------------------- RXML Parser ------------------------------
f599312000-01-19Martin Stjernholm RXML.TagSet rxml_tag_set = class
41940b2000-01-25Martin Stjernholm // This tag set always has the highest priority.
4806252000-01-11Martin Stjernholm {
f599312000-01-19Martin Stjernholm  inherit RXML.TagSet;
84fff02000-01-18Martin Stjernholm 
f599312000-01-19Martin Stjernholm  string prefix = RXML_NAMESPACE;
411e692001-06-18Martin Stjernholm #if constant (thread_create)
0d7a532000-08-28Martin Stjernholm  Thread.Mutex lists_mutex = Thread.Mutex(); // Locks destructive changes to the arrays modules and imported. #endif
41940b2000-01-25Martin Stjernholm  array(RoxenModule) modules;
f599312000-01-19Martin Stjernholm  // Each element in the imported array is the registered tag set of a // parser module. This array contains the corresponding module // object. void sort_on_priority() {
411e692001-06-18Martin Stjernholm #if constant (thread_create)
0d7a532000-08-28Martin Stjernholm  Thread.MutexKey lock = lists_mutex->lock(); #endif
69e4cd2000-03-09Martin Stjernholm  int i = search (imported, Roxen.entities_tag_set);
89584d2000-02-10Martin Stjernholm  array(RXML.TagSet) new_imported = imported[..i-1] + imported[i+1..];
5bb5722000-02-10Martin Stjernholm  array(RoxenModule) new_modules = modules[..i-1] + modules[i+1..];
96627a2001-07-16Martin Stjernholm  array(string) module_ids = new_modules->module_identifier(); // Sort on the module identifiers first so that the order is well // defined within the same priority. That's important to make // get_hash return a stable value. sort (module_ids, new_imported, new_modules);
41940b2000-01-25Martin Stjernholm  array(int) priorities = new_modules->query ("_priority", 1);
f599312000-01-19Martin Stjernholm  priorities = replace (priorities, 0, 4); sort (priorities, new_imported, new_modules);
89584d2000-02-10Martin Stjernholm  new_imported = reverse (new_imported) + ({imported[i]});
f599312000-01-19Martin Stjernholm  if (equal (imported, new_imported)) return;
89584d2000-02-10Martin Stjernholm  new_modules = reverse (new_modules) + ({modules[i]}); `->= ("imported", new_imported); modules = new_modules;
f599312000-01-19Martin Stjernholm  }
89584d2000-02-10Martin Stjernholm  mixed `->= (string var, mixed val) // Currently necessary due to misfeature in Pike.
f599312000-01-19Martin Stjernholm  {
89584d2000-02-10Martin Stjernholm  if (var == "modules") modules = val; else ::`->= (var, val); return val;
f599312000-01-19Martin Stjernholm  }
5bb5722000-02-10Martin Stjernholm  void create (object rxml_object)
41940b2000-01-25Martin Stjernholm  {
dd9a412001-08-24Martin Stjernholm  ::create (rxml_object, "rxml_tag_set");
69e4cd2000-03-09Martin Stjernholm  imported = ({Roxen.entities_tag_set});
41940b2000-01-25Martin Stjernholm  modules = ({rxml_object}); }
411e692001-06-18Martin Stjernholm  void prepare_context (RXML.Context ctx) { RequestID id = ctx->id; PROF_ENTER( "rxml", "overhead" );
25d15b2001-11-23Martin Stjernholm  // The id->misc->defines mapping is handled in a fairly ugly way: // If this is a nested parse, it's temporarily overridden with
07c09d2001-11-27Martin Stjernholm  // ctx->misc (to get parse local scope), otherwise it's replaced // permanently. The latter is to be compatible with top level code // that uses id->misc->defines after the rxml evaluation.
25d15b2001-11-23Martin Stjernholm  if (mapping defines = id->misc->defines) {
07c09d2001-11-27Martin Stjernholm  mapping misc = ctx->misc; if (defines != misc) { if (defines->rxml_misc) ctx->id_defines = defines; misc->rxml_misc = 1; // These settings ought to be in id->misc but are in this // mapping for historical reasons. misc->language = defines->language; misc->present_languages = defines->present_languages; id->misc->defines = misc; }
b6aa492001-10-01Martin Stjernholm  }
25d15b2001-11-23Martin Stjernholm  else id->misc->defines = ctx->misc;
cfa6a62001-08-09Martin Stjernholm 
c1ac6c2001-07-02Martin Stjernholm #if ROXEN_COMPAT <= 1.3 if (old_rxml_compat) ctx->compatible_scope = 1; #endif
411e692001-06-18Martin Stjernholm 
6d126f2001-11-14Martin Stjernholm  ctx->misc[" _ok"] = ctx->misc[" _prev_ok"] = 1;
411e692001-06-18Martin Stjernholm  ctx->misc[" _error"] = 200; ctx->misc[" _extra_heads"] = ([ ]); if(id->misc->stat) ctx->misc[" _stat"] = id->misc->stat; } void eval_finish (RXML.Context ctx) { RequestID id = ctx->id;
cfa6a62001-08-09Martin Stjernholm  if(sizeof(ctx->misc[" _extra_heads"])) if (id->misc->moreheads) id->misc->moreheads |= ctx->misc[" _extra_heads"]; else id->misc->moreheads = ctx->misc[" _extra_heads"];
25d15b2001-11-23Martin Stjernholm  if (mapping orig_defines = ctx->id_defines) {
71d8e82001-08-10Martin Stjernholm  // Somehow it seems like these values are stored in the wrong place.. :P
25d15b2001-11-23Martin Stjernholm  if (int v = ctx->misc[" _error"]) orig_defines[" _error"] = v; if (string v = ctx->misc[" _rettext"]) orig_defines[" _rettext"] = v; id->misc->defines = orig_defines;
71d8e82001-08-10Martin Stjernholm  }
411e692001-06-18Martin Stjernholm  PROF_LEAVE( "rxml", "overhead" ); }
5bb5722000-02-10Martin Stjernholm } (this_object());
e7ed2e2000-01-08Martin Stjernholm 
cede492000-02-15Martin Stjernholm RXML.Type default_content_type = RXML.t_html (RXML.PXml);
5a31102000-02-15Martin Stjernholm RXML.Type default_arg_type = RXML.t_text (RXML.PEnt);
49a74b2000-01-14Martin Stjernholm 
8e29c32000-03-04Martin Stjernholm int old_rxml_compat;
f599312000-01-19Martin Stjernholm 
68d85b2000-02-11Martin Stjernholm // A note on tag overriding: It's possible for old style tags to
76b5932000-06-23Martin Stjernholm // propagate their results to the tags they have overridden (new style // tags can use RXML.Frame.propagate_tag()). This is done by an // extension to the return value:
68d85b2000-02-11Martin Stjernholm // // If an array of the form // // ({int 1, string name, mapping(string:string) args, void|string content}) // // is returned, the tag function with the given name is called with // these arguments. If the name is the same as the current tag, the // overridden tag function is called. If there's no overridden // function, the tag is generated in the output. Any argument may be // left out to default to its value in the current tag. ({1, 0, 0}) or // ({1, 0, 0, 0}) may be shortened to ({1}). // // Note that there's no other way to handle tag overriding -- the page // is no longer parsed multiple times.
1c1cb02000-08-05Martin Stjernholm string parse_rxml(string what, RequestID id, void|Stdio.File file, void|mapping defines ) // Note: Don't use this function to do recursive parsing inside an // rxml parse session. The RXML module provides several different ways // to accomplish that.
b796b51998-11-18Per Hedbor {
4254f62000-02-13Martin Stjernholm  RXML.PXml parser;
c1ac6c2001-07-02Martin Stjernholm  RXML.Context ctx = RXML_CONTEXT;
0ab9872001-08-21Martin Stjernholm  int orig_state_updated = -1;
9508082001-08-22Martin Stjernholm  int orig_dont_cache_result;
211b9a2000-01-10Martin Stjernholm 
0ab9872001-08-21Martin Stjernholm  if (ctx && ctx->id == id) {
b0d4172001-07-09Martin Stjernholm  parser = default_content_type->get_parser (ctx, ctx->tag_set, 0);
0ab9872001-08-21Martin Stjernholm  orig_state_updated = ctx->state_updated;
9508082001-08-22Martin Stjernholm  if (ctx->frame) orig_dont_cache_result = ctx->frame->flags & RXML.FLAG_DONT_CACHE_RESULT;
0ab9872001-08-21Martin Stjernholm #ifdef RXML_PCODE_UPDATE_DEBUG report_debug ("%O: Saved p-code update count %d before parse_rxml " "with inherited context\n", ctx, orig_state_updated); #endif }
1f47d92000-01-30Per Hedbor  else {
411e692001-06-18Martin Stjernholm  parser = rxml_tag_set->get_parser (default_content_type, id); ctx = parser->context;
d2e56f2000-01-25Martin Nilsson  }
c1ac6c2001-07-02Martin Stjernholm  parser->recover_errors = 1;
1c1cb02000-08-05Martin Stjernholm 
411e692001-06-18Martin Stjernholm  if (defines) { ctx->misc = id->misc->defines = defines; if (!defines[" _error"]) defines[" _error"] = 200; if (!defines[" _extra_heads"]) defines[" _extra_heads"] = ([ ]); if (!defines[" _stat"] && id->misc->stat) defines[" _stat"] = id->misc->stat; } else defines = ctx->misc; if (file) { if (!defines[" _stat"]) defines[" _stat"] = file->stat();
c1ac6c2001-07-02Martin Stjernholm  defines["_source file"] = file;
411e692001-06-18Martin Stjernholm  }
05aee52000-01-13Martin Nilsson 
0ab9872001-08-21Martin Stjernholm  mixed err = catch {
c1ac6c2001-07-02Martin Stjernholm  if (ctx == RXML_CONTEXT)
411e692001-06-18Martin Stjernholm  parser->finish (what); // Skip the unnecessary work in write_end. DDTAH.
211b9a2000-01-10Martin Stjernholm  else
1c1cb02000-08-05Martin Stjernholm  parser->write_end (what); what = parser->eval();
0ab9872001-08-21Martin Stjernholm  }; if (file) m_delete (defines, "_source file"); if (orig_state_updated >= 0) { #ifdef RXML_PCODE_UPDATE_DEBUG report_debug ("%O: Restoring p-code update count from %d to %d " "after parse_rxml with inherited context\n", ctx, ctx->state_updated, orig_state_updated); #endif ctx->state_updated = orig_state_updated;
9508082001-08-22Martin Stjernholm  if (ctx->frame && !orig_dont_cache_result) ctx->frame->flags &= ~RXML.FLAG_DONT_CACHE_RESULT;
0ab9872001-08-21Martin Stjernholm  } if (err) {
1c1cb02000-08-05Martin Stjernholm #ifdef DEBUG
95f91d2000-06-19Henrik Grubbström (Grubba)  if (!parser) {
4a8d2b2001-03-15Martin Nilsson  report_debug("RXML: Parser destructed!\n");
95f91d2000-06-19Henrik Grubbström (Grubba) #if constant(_describe) _describe(parser); #endif /* constant(_describe) */ error("Parser destructed!\n"); }
1c1cb02000-08-05Martin Stjernholm #endif
211b9a2000-01-10Martin Stjernholm  if (objectp (err) && err->thrown_at_unwind) error ("Can't handle RXML parser unwinding in " "compatibility mode (error=%O).\n", err); else throw (err); }
7517d42000-02-16Martin Stjernholm  return what; }
76b5932000-06-23Martin Stjernholm #define COMPAT_TAG_TYPE \ function(string,mapping(string:string),RequestID,void|Stdio.File,void|mapping: \ string|array(int|string)) #define COMPAT_CONTAINER_TYPE \ function(string,mapping(string:string),string,RequestID,void|Stdio.File,void|mapping: \ string|array(int|string)) class CompatTag { inherit RXML.Tag;
5e71002000-12-19Anders Johansson  constant is_compat_tag=1;
76b5932000-06-23Martin Stjernholm  string name; int flags; string|COMPAT_TAG_TYPE|COMPAT_CONTAINER_TYPE fn; RXML.Type content_type = RXML.t_same; // No preparsing. void create (string _name, int empty, string|COMPAT_TAG_TYPE|COMPAT_CONTAINER_TYPE _fn) { name = _name, fn = _fn; flags = empty && RXML.FLAG_EMPTY_ELEMENT;
e328212001-09-20Martin Nilsson  result_types = result_types(RXML.PXml); // Postparsing
76b5932000-06-23Martin Stjernholm  } class Frame { inherit RXML.Frame; string raw_tag_text;
a49d432000-07-03Martin Stjernholm  array do_enter (RequestID id) { if (args->preparse) content_type = content_type (RXML.PXml); }
76b5932000-06-23Martin Stjernholm  array do_return (RequestID id) { id->misc->line = "0"; // No working system for this yet.
af1be92001-11-15Martin Stjernholm  if (!content) content = "";
76b5932000-06-23Martin Stjernholm  if (stringp (fn)) return ({fn});
eebd9b2000-11-06Martin Stjernholm  if (!fn) { result_type = result_type (RXML.PNone); return ({propagate_tag()}); }
76b5932000-06-23Martin Stjernholm 
c1ac6c2001-07-02Martin Stjernholm  mapping defines = RXML_CONTEXT->misc; Stdio.File source_file = defines["_source file"];
76b5932000-06-23Martin Stjernholm  string|array(string) result; if (flags & RXML.FLAG_EMPTY_ELEMENT) result = fn (name, args, id, source_file, defines);
a49d432000-07-03Martin Stjernholm  else { if(args->trimwhites) content = String.trim_all_whites(content);
76b5932000-06-23Martin Stjernholm  result = fn (name, args, content, id, source_file, defines);
a49d432000-07-03Martin Stjernholm  }
76b5932000-06-23Martin Stjernholm  if (arrayp (result)) { result_type = result_type (RXML.PNone); if (sizeof (result) && result[0] == 1) { [string pname, mapping(string:string) pargs, string pcontent] = (result[1..] + ({0, 0, 0}))[..2]; if (!pname || pname == name) return ({!pargs && !pcontent ? propagate_tag () : propagate_tag (pargs || args, pcontent || content)}); else
0c88412001-04-19Martin Stjernholm  return ({RXML.make_unparsed_tag (
af1be92001-11-15Martin Stjernholm  pname, pargs || args, pcontent || content)});
76b5932000-06-23Martin Stjernholm  } else return result; } else if (result) { if (args->noparse) result_type = result_type (RXML.PNone); return ({result}); }
eebd9b2000-11-06Martin Stjernholm  else { result_type = result_type (RXML.PNone); return ({propagate_tag()}); }
76b5932000-06-23Martin Stjernholm  } } }
d734522000-01-31Martin Nilsson class GenericTag { inherit RXML.Tag;
0970f92000-03-16Martin Nilsson  constant is_generic_tag=1;
d734522000-01-31Martin Nilsson  string name; int flags;
0970f92000-03-16Martin Nilsson 
38bfe52000-02-08Martin Stjernholm  function(string,mapping(string:string),string,RequestID,RXML.Frame: array|string) _do_return;
d734522000-01-31Martin Nilsson  void create(string _name, int _flags,
38bfe52000-02-08Martin Stjernholm  function(string,mapping(string:string),string,RequestID,RXML.Frame: array|string) __do_return) {
d734522000-01-31Martin Nilsson  name=_name; flags=_flags; _do_return=__do_return; if(flags&RXML.FLAG_DONT_PREPARSE)
7b73ef2000-02-08Martin Stjernholm  content_type = RXML.t_same;
d734522000-01-31Martin Nilsson  } class Frame { inherit RXML.Frame;
38bfe52000-02-08Martin Stjernholm  array do_return(RequestID id, void|mixed piece) {
1c1cb02000-08-05Martin Stjernholm  // Note: args may be zero here since this function is inherited // by GenericPITag.
7b73ef2000-02-08Martin Stjernholm  if (flags & RXML.FLAG_POSTPARSE)
4254f62000-02-13Martin Stjernholm  result_type = result_type (RXML.PXml);
0cac272000-02-08Martin Stjernholm  if (!(flags & RXML.FLAG_STREAM_CONTENT))
38bfe52000-02-08Martin Stjernholm  piece = content || ""; array|string res = _do_return(name, args, piece, id, this_object());
7b73ef2000-02-08Martin Stjernholm  return stringp (res) ? ({res}) : res;
d734522000-01-31Martin Nilsson  } } }
1c1cb02000-08-05Martin Stjernholm class GenericPITag { inherit GenericTag; void create (string _name, int _flags, function(string,mapping(string:string),string,RequestID,RXML.Frame: array|string) __do_return) { ::create (_name, _flags | RXML.FLAG_PROC_INSTR, __do_return); content_type = RXML.t_text; // The content is always treated literally; // RXML.FLAG_DONT_PREPARSE has no effect. } }
f599312000-01-19Martin Stjernholm void add_parse_module (RoxenModule mod)
b796b51998-11-18Per Hedbor {
f599312000-01-19Martin Stjernholm  RXML.TagSet tag_set =
dd9a412001-08-24Martin Stjernholm  mod->query_tag_set ? mod->query_tag_set() : RXML.TagSet (mod, "");
f599312000-01-19Martin Stjernholm  mapping(string:mixed) defs; if (mod->query_tag_callers && mappingp (defs = mod->query_tag_callers()) && sizeof (defs))
76b5932000-06-23Martin Stjernholm  tag_set->add_tags (map (indices (defs), lambda (string name) { return CompatTag (name, 1, defs[name]); }));
f599312000-01-19Martin Stjernholm  if (mod->query_container_callers && mappingp (defs = mod->query_container_callers()) && sizeof (defs))
76b5932000-06-23Martin Stjernholm  tag_set->add_tags (map (indices (defs), lambda (string name) { return CompatTag (name, 0, defs[name]); }));
adc9df2000-03-14Martin Nilsson  if (mod->query_simpletag_callers && mappingp (defs = mod->query_simpletag_callers()) &&
d734522000-01-31Martin Nilsson  sizeof (defs)) tag_set->add_tags(Array.map(indices(defs), lambda(string tag){ return GenericTag(tag, @defs[tag]); }));
1c1cb02000-08-05Martin Stjernholm  if (mod->query_simple_pi_tag_callers && mappingp (defs = mod->query_simple_pi_tag_callers()) && sizeof (defs)) tag_set->add_tags (map (indices (defs), lambda (string name) { return GenericPITag (name, @defs[name]); }));
f599312000-01-19Martin Stjernholm  if (search (rxml_tag_set->imported, tag_set) < 0) {
0d7a532000-08-28Martin Stjernholm #ifdef THREADS Thread.MutexKey lock = rxml_tag_set->lists_mutex->lock(); #endif
4efd212000-02-13Martin Stjernholm  rxml_tag_set->modules += ({mod}); rxml_tag_set->imported += ({tag_set});
ac67522000-08-29Per Hedbor #ifdef THREADS
0d7a532000-08-28Martin Stjernholm  lock = 0;
ac67522000-08-29Per Hedbor #endif
0d7a532000-08-28Martin Stjernholm  remove_call_out (rxml_tag_set->sort_on_priority);
f599312000-01-19Martin Stjernholm  call_out (rxml_tag_set->sort_on_priority, 0); }
b796b51998-11-18Per Hedbor }
f599312000-01-19Martin Stjernholm void remove_parse_module (RoxenModule mod)
b796b51998-11-18Per Hedbor {
f599312000-01-19Martin Stjernholm  int i = search (rxml_tag_set->modules, mod); if (i >= 0) { RXML.TagSet tag_set = rxml_tag_set->imported[i];
71a5c92000-02-12Martin Stjernholm  rxml_tag_set->modules = rxml_tag_set->modules[..i - 1] + rxml_tag_set->modules[i + 1..];
f599312000-01-19Martin Stjernholm  rxml_tag_set->imported = rxml_tag_set->imported[..i - 1] + rxml_tag_set->imported[i + 1..]; if (tag_set) destruct (tag_set);
49a74b2000-01-14Martin Stjernholm  }
f599312000-01-19Martin Stjernholm }
b796b51998-11-18Per Hedbor 
7517d42000-02-16Martin Stjernholm void ready_to_receive_requests (object this)
b796b51998-11-18Per Hedbor {
7517d42000-02-16Martin Stjernholm  remove_call_out (rxml_tag_set->sort_on_priority); rxml_tag_set->sort_on_priority();
b796b51998-11-18Per Hedbor }