ed81751999-12-11Martin Stjernholm //! RXML parser and compiler framework. //! //! Created 1999-07-30 by Martin Stjernholm. //!
5bcbe82000-01-18Martin Stjernholm //! $Id: module.pmod,v 1.23 2000/01/18 10:38:21 mast Exp $
ed81751999-12-11Martin Stjernholm  //! Kludge: Must use "RXML.refs" somewhere for the whole module to be //! loaded correctly. //! WARNING: This API is not yet set in stone; expect incompatible //! changes. #pragma strict_types class Tag //! Interface class for the static information about a tag. {
cece9e2000-01-11Martin Stjernholm  constant is_RXML_Tag = 1;
ed81751999-12-11Martin Stjernholm  //! Interface. //!string name; //! The name of the tag. Required and considered constant.
f585722000-01-18Martin Stjernholm  /*extern*/ int flags;
ed81751999-12-11Martin Stjernholm  //! Various bit flags that affect parsing; see the FLAG_* constants. //! RXML.Frame.flags is initialized from this. mapping(string:Type) req_arg_types; mapping(string:Type) opt_arg_types; //! Define to declare the names and types of the required and //! optional arguments. If a type specifies a parser, it'll be used //! on the argument value. Note that the order in which arguments //! are parsed is arbitrary. Type content_type = t_text (PHtml); //! The handled type of the content, if the tag is used as a //! container. It's taken from the actual result type if set to //! zero. //! //! This default says it's text, but the HTML parser is used to read //! it, which means that the content is preparsed with HTML syntax. //! Use t_text directly with no parser to get the raw text. array(Type) result_types = ({t_text});
c6245b1999-12-31Martin Stjernholm  //! The possible types of the result, in order of precedence. If a //! result type has a parser, it'll be used to parse any strings //! gotten from Frame.do_return (see that function for details).
ed81751999-12-11Martin Stjernholm  string scope_name; //! RXML.Frame.scope_name is initialized from this. TagSet additional_tags, local_tags; //! RXML.Frame.additional_tags and RXML.Frame.local_tags are //! initialized from these.
56532d1999-12-19Martin Stjernholm  function(:Frame) frame();
ed81751999-12-11Martin Stjernholm  //! This function should return an object to be used as a frame. The //! frame object must (in practice) inherit RXML.Frame. //! Services. inline Frame `() (mapping(string:mixed) args, void|mixed|PCode content) //! Make an initialized frame for the tag. Typically useful when //! returning generated tags from e.g. RXML.Frame.do_return(). The //! argument values and the content are not parsed; see //! RXML.Frame.do_return() for details. Note: Never reuse the same //! frame object. {
56532d1999-12-19Martin Stjernholm  Tag this = this_object(); Frame frame = ([function(:Frame)] this->frame)();
ed81751999-12-11Martin Stjernholm  frame->tag = this; frame->flags = flags; if (scope_name) frame->scope_name = scope_name; if (additional_tags) frame->additional_tags = additional_tags; if (local_tags) frame->local_tags = local_tags; frame->args = args; if (!zero_type (content)) frame->content = content; return frame; } // Internals.
01e43b2000-01-14Martin Stjernholm  array _handle_tag (TagSetParser parser, mapping(string:string) args,
c6245b1999-12-31Martin Stjernholm  void|string content)
ed81751999-12-11Martin Stjernholm  // Callback for tag set parsers. Returns a sequence of result values
c6245b1999-12-31Martin Stjernholm  // to be added to the result queue. Note that this function handles // an unwind frame for the parser.
ed81751999-12-11Martin Stjernholm  {
56532d1999-12-19Martin Stjernholm  Context ctx = parser->context;
ed81751999-12-11Martin Stjernholm  // FIXME: P-code generation.
56532d1999-12-19Martin Stjernholm  Frame frame; if (mapping(string:mixed)|mapping(object:array) ustate = ctx->unwind_state)
f9c80f1999-12-30Martin Stjernholm  if (ustate[parser]) frame = [object(Frame)] ustate[parser][0];
56532d1999-12-19Martin Stjernholm  else frame = `() (args, Void); else frame = `() (args, Void); mixed err = catch { frame->_eval (parser, args, content); return frame->result == Void ? ({}) : ({frame->result}); }; if (objectp (err) && ([object] err)->thrown_at_unwind) { mapping(string:mixed)|mapping(object:array) ustate = ctx->unwind_state; if (!ustate) ustate = ctx->unwind_state = ([]); #ifdef DEBUG if (err != frame)
01e43b2000-01-14Martin Stjernholm  error ("Internal error: Unexpected unwind object catched.\n");
56532d1999-12-19Martin Stjernholm  if (ustate[parser])
01e43b2000-01-14Martin Stjernholm  error ("Internal error: Clobbering unwind state for parser.\n");
56532d1999-12-19Martin Stjernholm #endif ustate[parser] = ({err}); err = parser; } throw (err);
ed81751999-12-11Martin Stjernholm  } string _sprintf() {
2bd21a2000-01-05Martin Stjernholm  return "RXML.Tag(" + [string] this_object()->name + ")";
ed81751999-12-11Martin Stjernholm  } } class TagSet //! Contains a set of tags. Tag sets can import other tag sets, and //! later changes are propagated. Parser instances (contexts) to parse
2bd21a2000-01-05Martin Stjernholm //! data are created from this. TagSet objects may somewhat safely be //! destructed explicitly; the tags in a destructed tag set will not //! be active in parsers that are instantiated later, but will work in //! current instances.
ed81751999-12-11Martin Stjernholm {
2bd21a2000-01-05Martin Stjernholm  string name; //! Used for identification only.
ed81751999-12-11Martin Stjernholm  string prefix; //! A prefix that may precede the tags. If zero, it's up to the //! importing tag set(s). int prefix_required; //! The prefix must precede the tags. array(TagSet) imported = ({}); //! Other tag sets that will be used. The precedence is local tags //! first, then imported from left to right. It's not safe to //! destructively change entries in this array.
660cb52000-01-12Martin Stjernholm  function(Context:void) prepare_context; //! If set, this is a function that will be called before a new //! Context object is taken into use. It'll typically prepare //! predefined scopes and variables. The functions will be called in //! order of precedence; highest last.
ed81751999-12-11Martin Stjernholm  int generation = 1; //! A number that is increased every time something changes in this //! object or in some tag set it imports.
56ffee2000-01-12Martin Stjernholm #define LOW_TAG_TYPE \ string|array| \ function(:int(1..1)|string|array)| \ function(object,mapping(string:string):int(1..1)|string|array) #define LOW_CONTAINER_TYPE \ string|array| \ function(:int(1..1)|string|array)| \ function(object,mapping(string:string),string:int(1..1)|string|array) #define LOW_ENTITY_TYPE \ string|array| \ function(:int(1..1)|string|array)| \ function(object:int(1..1)|string|array) mapping(string:LOW_TAG_TYPE) low_tags; mapping(string:LOW_CONTAINER_TYPE) low_containers; mapping(string:LOW_ENTITY_TYPE) low_entities;
da54bd2000-01-08Martin Stjernholm  //! Passed directly to Parser.HTML when that parser is used. This is //! intended for compatibility only and might eventually be removed. //! Note: Changes in these aren't tracked; changed() must be called.
ed81751999-12-11Martin Stjernholm 
2bd21a2000-01-05Martin Stjernholm  static void create (string _name, void|array(Tag) _tags)
ed81751999-12-11Martin Stjernholm  //! {
2bd21a2000-01-05Martin Stjernholm  name = _name;
ed81751999-12-11Martin Stjernholm  if (_tags) tags = mkmapping ([array(string)] _tags->name, _tags); } void add_tag (Tag tag) //! { tags[tag->name] = tag; changed(); } void add_tags (array(Tag) _tags) //! { tags += mkmapping (/*[array(string)]HMM*/ _tags->name, _tags); changed(); } void remove_tag (string|Tag tag) //! { if (stringp (tag)) m_delete (tags, tag);
56532d1999-12-19Martin Stjernholm  else for (string n; !zero_type (n = search (tags, [object(Tag)] tag));)
ed81751999-12-11Martin Stjernholm  m_delete (tags, n); changed(); }
56ffee2000-01-12Martin Stjernholm  local Tag|array(LOW_TAG_TYPE|LOW_CONTAINER_TYPE) get_local_tag (string name) //! Returns the tag definition for the given name in this tag set. //! The return value is either a Tag object or an array ({low_tag, //! low_container}), where one element always is zero.
ed81751999-12-11Martin Stjernholm  {
56ffee2000-01-12Martin Stjernholm  if (Tag tag = tags[name]) return tag; else if (LOW_CONTAINER_TYPE cdef = low_containers && low_containers[name]) return ({0, cdef}); else if (LOW_TAG_TYPE tdef = low_tags && low_tags[name]) return ({tdef, 0}); else return 0;
ed81751999-12-11Martin Stjernholm  } array(Tag) get_local_tags()
56ffee2000-01-12Martin Stjernholm  //! Doesn't return the low tag/container definitions.
ed81751999-12-11Martin Stjernholm  { return values (tags); }
56ffee2000-01-12Martin Stjernholm  Tag|array(LOW_TAG_TYPE|LOW_CONTAINER_TYPE) get_tag (string name) //! Returns the active tag definition for the given name. The return //! value is the same as for get_local_tag().
2bd21a2000-01-05Martin Stjernholm  {
56ffee2000-01-12Martin Stjernholm  if (object(Tag)|array(LOW_TAG_TYPE|LOW_CONTAINER_TYPE) def = get_local_tag (name)) return def; foreach (imported, TagSet tag_set) if (object(Tag) tag = [object(Tag)] tag_set->get_tag (name)) return tag; return 0;
da54bd2000-01-08Martin Stjernholm  }
56ffee2000-01-12Martin Stjernholm  Tag|array(LOW_TAG_TYPE|LOW_CONTAINER_TYPE) get_overridden_tag ( Tag|LOW_TAG_TYPE|LOW_CONTAINER_TYPE tagdef, void|string name) //! Returns the tag definition that the given one overrides, or zero //! if none. tag is a Tag object or a low tag/container definition. //! In the latter case, the tag name must be given as the second //! argument. The return value is the same as for get_local_tag().
da54bd2000-01-08Martin Stjernholm  {
56ffee2000-01-12Martin Stjernholm  if (objectp (tagdef) && ([object] tagdef)->is_RXML_Tag) name = [string] ([object] tagdef)->name; #ifdef MODULE_DEBUG if (!name) error ("Need tag name.\n"); #endif if (tags[name] == tagdef || (low_containers && low_containers[name] == tagdef) || (low_tags && low_tags[name] == tagdef)) {
da54bd2000-01-08Martin Stjernholm  foreach (imported, TagSet tag_set)
56ffee2000-01-12Martin Stjernholm  if (object(Tag)|array(LOW_TAG_TYPE|LOW_CONTAINER_TYPE) tagdef = tag_set->get_tag (name)) return tagdef;
da54bd2000-01-08Martin Stjernholm  } else { int found = 0; foreach (imported, TagSet tag_set)
56ffee2000-01-12Martin Stjernholm  if (object(Tag)|array(LOW_TAG_TYPE|LOW_CONTAINER_TYPE) subtag = tag_set->get_tag (name))
da54bd2000-01-08Martin Stjernholm  if (found) return subtag;
56ffee2000-01-12Martin Stjernholm  else if (arrayp (subtag) ? subtag[0] == tagdef || subtag[1] == tagdef : subtag == tagdef) if ((subtag = tag_set->get_overridden_tag (tagdef, name))) return subtag; else found = 1;
2bd21a2000-01-05Martin Stjernholm  }
da54bd2000-01-08Martin Stjernholm  return 0;
2bd21a2000-01-05Martin Stjernholm  }
56ffee2000-01-12Martin Stjernholm  array(Tag|array(LOW_TAG_TYPE|LOW_CONTAINER_TYPE)) get_overridden_tags (string name) //! Returns all tag definitions for the given name, i.e. including //! the overridden ones. A tag to the left overrides one to the //! right. The elements in the returned array are the same as for //! get_local_tag(). { if (object(Tag)|array(LOW_TAG_TYPE|LOW_CONTAINER_TYPE) def = get_local_tag (name)) return ({def}) + imported->get_all_tags (name) * ({}); else return imported->get_all_tags (name) * ({}); } multiset(string) get_tag_names() //! { multiset(string) res = (multiset) indices (tags); if (low_tags) res |= (multiset) indices (low_tags); if (low_containers) res |= (multiset) indices (low_containers); return `| (res, @imported->get_tag_names()); }
ed81751999-12-11Martin Stjernholm  mixed `->= (string var, mixed val) { switch (var) { case "imported": (imported - ({0}))->dont_notify (changed); imported = [array(TagSet)] val; imported->do_notify (changed); break; default: ::`->= (var, val); } changed(); return val; } mixed `[]= (string var, mixed val) {return `->= (var, val);} Parser `() (Type top_level_type, void|RequestID id) //! Creates a new context for parsing content of the specified type, //! and returns the parser object for it. id is put into the //! context. {
660cb52000-01-12Martin Stjernholm  Context ctx = Context (this_object(), id); if (!prepare_funs) prepare_funs = get_prepare_funs(); prepare_funs -= ({0}); prepare_funs (ctx); return ctx->new_parser (top_level_type);
ed81751999-12-11Martin Stjernholm  } void changed() //! Should be called whenever something is changed. Done //! automatically most of the time, however. { generation++;
660cb52000-01-12Martin Stjernholm  prepare_funs = 0;
ed81751999-12-11Martin Stjernholm  (notify_funcs -= ({0}))(); set_weak_flag (notify_funcs, 1); } // Internals. void do_notify (function(:void) func) { notify_funcs |= ({func}); set_weak_flag (notify_funcs, 1); } void dont_notify (function(:void) func) { notify_funcs -= ({func}); set_weak_flag (notify_funcs, 1); } void destroy() { catch (changed()); } private mapping(string:Tag) tags = ([]); // Private since we want to track changes in this. private array(function(:void)) notify_funcs = ({}); // Weak (when nonempty).
2bd21a2000-01-05Martin Stjernholm 
660cb52000-01-12Martin Stjernholm  private array(function(Context:void)) prepare_funs; /*private*/ array(function(Context:void)) get_prepare_funs() { if (prepare_funs) return prepare_funs; array(function(Context:void)) funs = ({}); for (int i = sizeof (imported) - 1; i >= 0; i--) funs += imported[i]->get_prepare_funs(); if (prepare_context) funs += ({prepare_context}); // We don't cache in prepare_funs; do that only at the top level. return funs; }
2bd21a2000-01-05Martin Stjernholm  string _sprintf() { return name ? "RXML.TagSet(" + name + ")" : "RXML.TagSet"; }
ed81751999-12-11Martin Stjernholm }
1b2b752000-01-07Martin Stjernholm TagSet empty_tag_set;
ed81751999-12-11Martin Stjernholm //! The empty tag set.
af06d52000-01-12Martin Stjernholm class Value //! Interface for objects used as variable values that are evaluated //! when referenced. { mixed rxml_var_eval (Context ctx, string var, string scope_name); } class Scope //! Interface for objects that emulates a scope mapping. {
01e43b2000-01-14Martin Stjernholm  mixed `[] (string var, void|Context ctx, void|string scope_name) {rxml_error ("Cannot query variable" + _in_the_scope (scope_name) + ".\n");} mixed `[]= (string var, mixed val, void|Context ctx, void|string scope_name) {rxml_error ("Cannot set variable" + _in_the_scope (scope_name) + ".\n");} array(string) _indices (void|Context ctx, void|string scope_name) {rxml_error ("Cannot list variables" + _in_the_scope (scope_name) + ".\n");} void m_delete (string var, void|Context ctx, void|string scope_name) {rxml_error ("Cannot delete variable" + _in_the_scope (scope_name) + ".\n");} private string _in_the_scope (string scope_name) { if (scope_name)
3e59342000-01-18Martin Stjernholm  if (scope_name != "_") return " in the scope " + scope_name;
01e43b2000-01-14Martin Stjernholm  else return " in the current scope"; else return ""; }
af06d52000-01-12Martin Stjernholm }
01e43b2000-01-14Martin Stjernholm #define SCOPE_TYPE mapping(string:mixed)|object(Scope)
af06d52000-01-12Martin Stjernholm 
ed81751999-12-11Martin Stjernholm class Context //! A parser context. This contains the current variable bindings and //! so on. The current context can always be retrieved with //! get_context(). //!
db04172000-01-14Martin Stjernholm //! Note: Don't store pointers to this object since that will likely
ed81751999-12-11Martin Stjernholm //! introduce circular references. It can be retrieved easily through //! get_context() or parser->context. { Frame frame; //! The currently evaluating frame. RequestID id; //! int type_check; //! Whether to do type checking.
db04172000-01-14Martin Stjernholm  int error_count; //! Number of RXML errors that has occurred.
01e43b2000-01-14Martin Stjernholm 
1b2b752000-01-07Martin Stjernholm  TagSet tag_set;
ed81751999-12-11Martin Stjernholm  //! The current tag set that will be inherited by subparsers.
1b2b752000-01-07Martin Stjernholm  int tag_set_is_local; //! Nonzero if tag_set is a copy local to this context. A local tag //! set that imports the old tag set is created whenever need be.
ed81751999-12-11Martin Stjernholm  mixed get_var (string var, void|string scope_name) //! Returns the value a variable in the specified scope, or the //! current scope if none is given. Returns zero with zero_type 1 if //! there's no such variable. {
3e59342000-01-18Martin Stjernholm  if (SCOPE_TYPE vars = scopes[scope_name || "_"]) {
ed81751999-12-11Martin Stjernholm  mixed val;
5708532000-01-15Martin Stjernholm  if (objectp (vars)) {
3e59342000-01-18Martin Stjernholm  val = ([object(Scope)] vars)->`[] (var, this_object(), scope_name || "_");
01e43b2000-01-14Martin Stjernholm  if (val == Void) return ([])[0]; } else if (zero_type (val = vars[var])) return ([])[0];
5bcbe82000-01-18Martin Stjernholm  if (objectp (val) && ([object] val)->rxml_var_eval)
01e43b2000-01-14Martin Stjernholm  return ([object(Value)] val)->
3e59342000-01-18Martin Stjernholm  rxml_var_eval (this_object(), var, scope_name || "_");
ed81751999-12-11Martin Stjernholm  else return val; }
01e43b2000-01-14Martin Stjernholm  else if (scope_name) rxml_error ("Unknown scope %O.\n", scope_name); else rxml_error ("No current scope.\n");
ed81751999-12-11Martin Stjernholm  } mixed set_var (string var, mixed val, void|string scope_name) //! Sets the value of a variable in the specified scope, or the //! current scope if none is given. Returns val. {
3e59342000-01-18Martin Stjernholm  if (SCOPE_TYPE vars = scopes[scope_name || "_"])
01e43b2000-01-14Martin Stjernholm  if (objectp (vars))
3e59342000-01-18Martin Stjernholm  return ([object(Scope)] vars)->`[]= (var, val, this_object(), scope_name || "_");
01e43b2000-01-14Martin Stjernholm  else return vars[var] = val; else if (scope_name) rxml_error ("Unknown scope %O.\n", scope_name); else rxml_error ("No current scope.\n");
ed81751999-12-11Martin Stjernholm  } void delete_var (string var, void|string scope_name) //! Removes a variable in the specified scope, or the current scope //! if none is given. {
3e59342000-01-18Martin Stjernholm  if (SCOPE_TYPE vars = scopes[scope_name || "_"])
01e43b2000-01-14Martin Stjernholm  if (objectp (vars))
3e59342000-01-18Martin Stjernholm  ([object(Scope)] vars)->m_delete (var, this_object(), scope_name || "_");
01e43b2000-01-14Martin Stjernholm  else m_delete ([mapping(string:mixed)] vars, var); else if (scope_name) rxml_error ("Unknown scope %O.\n", scope_name); else rxml_error ("No current scope.\n");
ed81751999-12-11Martin Stjernholm  } array(string) list_var (void|string scope_name) //! Returns the names of all variables in the specified scope, or //! the current scope if none is given. {
3e59342000-01-18Martin Stjernholm  if (SCOPE_TYPE vars = scopes[scope_name || "_"])
01e43b2000-01-14Martin Stjernholm  if (objectp (vars))
3e59342000-01-18Martin Stjernholm  return ([object(Scope)] vars)->_indices (this_object(), scope_name || "_");
01e43b2000-01-14Martin Stjernholm  else return indices ([mapping(string:mixed)] vars); else if (scope_name) rxml_error ("Unknown scope %O.\n", scope_name); else rxml_error ("No current scope.\n");
ed81751999-12-11Martin Stjernholm  } array(string) list_scopes() //! Returns the names of all defined scopes. {
3e59342000-01-18Martin Stjernholm  return indices (scopes) - ({"_"});
ed81751999-12-11Martin Stjernholm  }
af06d52000-01-12Martin Stjernholm  int exist_scope (void|string scope_name) //! {
3e59342000-01-18Martin Stjernholm  return !!scopes[scope_name || "_"];
af06d52000-01-12Martin Stjernholm  } void add_scope (string scope_name, SCOPE_TYPE vars) //! Adds or replaces the specified scope at the global level. A //! scope can be an object. It must implement the methods `[], //! _indices(), and m_delete().
ed81751999-12-11Martin Stjernholm  { if (scopes[scope_name])
3e59342000-01-18Martin Stjernholm  if (scope_name == "_") { SCOPE_TYPE inner = scopes["_"];
af06d52000-01-12Martin Stjernholm  while (SCOPE_TYPE outer = hidden[inner]) inner = outer;
ed81751999-12-11Martin Stjernholm  hidden[inner] = vars; } else { Frame outermost; for (Frame f = frame; f; f = f->up) if (f->scope_name == scope_name) outermost = f; if (outermost) hidden[outermost] = vars; else scopes[scope_name] = vars; } else scopes[scope_name] = vars; } void remove_scope (string scope_name) //! Removes the named scope from the global level, if it exists. { #ifdef MODULE_DEBUG
3e59342000-01-18Martin Stjernholm  if (scope_name == "_") error ("Cannot remove current scope.\n");
ed81751999-12-11Martin Stjernholm #endif Frame outermost; for (Frame f = frame; f; f = f->up) if (f->scope_name == scope_name) outermost = f; if (outermost) m_delete (hidden, outermost); else m_delete (scopes, scope_name); } string current_scope() //! Returns the name of the current scope, if it has any. {
3e59342000-01-18Martin Stjernholm  if (SCOPE_TYPE vars = scopes["_"]) {
ed81751999-12-11Martin Stjernholm  string scope_name; while (scope_name = search (scopes, vars, scope_name))
3e59342000-01-18Martin Stjernholm  if (scope_name != "_") return scope_name;
ed81751999-12-11Martin Stjernholm  } return 0; }
af06d52000-01-12Martin Stjernholm  void add_runtime_tag (Tag tag) //! Adds a tag that will exist from this point forward in the //! current context only. It will have effect in the current parser //! and parent parsers up to the point where tag_set changes. { if (!new_runtime_tags) new_runtime_tags = RuntimeTags(); new_runtime_tags->add_tags[tag] = 1; // By doing the following, we can let remove_tags take precedence. new_runtime_tags->remove_tags[tag] = 0; new_runtime_tags->remove_tags[tag->name] = 0; } void remove_runtime_tag (string|Tag tag) //! Removes a tag added by add_runtime_tag(). It will have effect in //! the current parser and parent parsers up to the point where //! tag_set changes. { if (!new_runtime_tags) new_runtime_tags = RuntimeTags(); new_runtime_tags->remove_tags[tag] = 1; }
01e43b2000-01-14Martin Stjernholm  void rxml_error (string msg, mixed... args) //! Throws an RXML error with a dump of the parser stack.
ed81751999-12-11Martin Stjernholm  { if (sizeof (args)) msg = sprintf (msg, @args);
01e43b2000-01-14Martin Stjernholm  msg = rxml_errmsg_prefix + ": " + msg;
db04172000-01-14Martin Stjernholm  if (current_var) msg += " | &" + current_var + ";\n";
ed81751999-12-11Martin Stjernholm  for (Frame f = frame; f; f = f->up) {
db04172000-01-14Martin Stjernholm  if (f->tag) msg += " | <" + f->tag->name;
ed81751999-12-11Martin Stjernholm  else if (!f->up) break;
db04172000-01-14Martin Stjernholm  else msg += " | <(unknown tag)";
ed81751999-12-11Martin Stjernholm  if (f->args) foreach (sort (indices (f->args)), string arg) { mixed val = f->args[arg]; msg += " " + arg + "="; if (arrayp (val)) msg += map (val, error_print_val) * ","; else msg += error_print_val (val); } else msg += " (no argmap)"; msg += ">\n"; } array b = backtrace(); throw (({msg, b[..sizeof (b) - 2]})); }
01e43b2000-01-14Martin Stjernholm  void handle_exception (mixed err, PCode|Parser evaluator) //! This function gets any exception that is catched during //! evaluation. evaluator is the object that catched the error.
c757c42000-01-08Martin Stjernholm  {
db04172000-01-14Martin Stjernholm  error_count++;
01e43b2000-01-14Martin Stjernholm  string msg = describe_error (err); if (msg[..sizeof (rxml_errmsg_prefix) - 1] == rxml_errmsg_prefix) { // An RXML error.
db04172000-01-14Martin Stjernholm  while (evaluator->_parent) { evaluator->error_count++; evaluator = evaluator->_parent; }
01e43b2000-01-14Martin Stjernholm  if (id && id->conf) msg = ([function(mixed,Type:string)]
97022d2000-01-14Martin Stjernholm  ([object] id->conf)->handle_rxml_error) (err, evaluator->type);
01e43b2000-01-14Martin Stjernholm  else {
c757c42000-01-08Martin Stjernholm #ifdef MODULE_DEBUG
01e43b2000-01-14Martin Stjernholm  report_notice (describe_backtrace (err));
c757c42000-01-08Martin Stjernholm #else
01e43b2000-01-14Martin Stjernholm  report_notice (msg);
c757c42000-01-08Martin Stjernholm #endif
01e43b2000-01-14Martin Stjernholm  } if (evaluator->type->free_text && evaluator->report_error) evaluator->report_error (msg);
c757c42000-01-08Martin Stjernholm  }
01e43b2000-01-14Martin Stjernholm  else throw (err);
c757c42000-01-08Martin Stjernholm  }
ed81751999-12-11Martin Stjernholm  // Internals.
01e43b2000-01-14Martin Stjernholm  constant rxml_errmsg_prefix = "RXML parser error";
ed81751999-12-11Martin Stjernholm  private string error_print_val (mixed val) { if (arrayp (val)) return "array"; else if (mappingp (val)) return "mapping"; else if (multisetp (val)) return "multiset"; else return sprintf ("%O", val); }
db04172000-01-14Martin Stjernholm  string current_var; // Used to get the parsed variable into the RXML error backtrace.
c6245b1999-12-31Martin Stjernholm  Parser new_parser (Type top_level_type) // Returns a new parser object to start parsing with this context. // Normally TagSet.`() should be used instead of this. { #ifdef MODULE_DEBUG if (in_use || frame) error ("Context already in use.\n"); #endif return top_level_type->get_parser (this_object()); }
af06d52000-01-12Martin Stjernholm  mapping(string:SCOPE_TYPE) scopes = ([]);
ed81751999-12-11Martin Stjernholm  // The variable mappings for every currently visible scope. A
3e59342000-01-18Martin Stjernholm  // special entry "_" points to the current local scope.
ed81751999-12-11Martin Stjernholm 
af06d52000-01-12Martin Stjernholm  mapping(SCOPE_TYPE|Frame:SCOPE_TYPE) hidden = ([]);
3e59342000-01-18Martin Stjernholm  // The currently hidden variable mappings in scopes. The old "_"
ed81751999-12-11Martin Stjernholm  // entries are indexed by the replacing variable mapping. The old // named scope entries are indexed by the frame object which // replaced them. void enter_scope (Frame frame) {
af06d52000-01-12Martin Stjernholm  SCOPE_TYPE vars;
ed81751999-12-11Martin Stjernholm #ifdef DEBUG if (!frame->vars) error ("Internal error: Frame has no variables.\n"); #endif
3e59342000-01-18Martin Stjernholm  if ((vars = [SCOPE_TYPE] frame->vars) != scopes["_"]) { hidden[vars] = scopes["_"]; scopes["_"] = vars;
ed81751999-12-11Martin Stjernholm  if (string scope_name = [string] frame->scope_name) { hidden[frame] = scopes[scope_name]; scopes[scope_name] = vars; } } } void leave_scope (Frame frame) { if (string scope_name = [string] frame->scope_name) if (hidden[frame]) { scopes[scope_name] = hidden[frame]; m_delete (hidden, frame); }
af06d52000-01-12Martin Stjernholm  SCOPE_TYPE vars; if (hidden[vars = [SCOPE_TYPE] frame->vars]) {
3e59342000-01-18Martin Stjernholm  scopes["_"] = hidden[vars];
ed81751999-12-11Martin Stjernholm  m_delete (hidden, vars); } } #define ENTER_SCOPE(ctx, frame) (frame->vars && ctx->enter_scope (frame)) #define LEAVE_SCOPE(ctx, frame) (frame->vars && ctx->leave_scope (frame))
1b2b752000-01-07Martin Stjernholm  void make_tag_set_local()
ed81751999-12-11Martin Stjernholm  {
1b2b752000-01-07Martin Stjernholm  if (!tag_set_is_local) { TagSet new_tag_set = TagSet (tag_set->name + " (local)"); // FIXME: Cache this? new_tag_set->imported = ({tag_set}); tag_set = new_tag_set; tag_set_is_local = 1;
ed81751999-12-11Martin Stjernholm  } }
c6245b1999-12-31Martin Stjernholm  class RuntimeTags
ed81751999-12-11Martin Stjernholm  {
c6245b1999-12-31Martin Stjernholm  multiset(Tag) add_tags = (<>);
2bd21a2000-01-05Martin Stjernholm  multiset(Tag|string) remove_tags = (<>);
ed81751999-12-11Martin Stjernholm  }
c6245b1999-12-31Martin Stjernholm  RuntimeTags new_runtime_tags; // Used to record the result of any add_runtime_tag() and // remove_runtime_tag() calls since the last time the parsers ran.
ed81751999-12-11Martin Stjernholm 
1b2b752000-01-07Martin Stjernholm  void create (TagSet _tag_set, void|RequestID _id)
ed81751999-12-11Martin Stjernholm  // Normally TagSet.`() should be used instead of this. {
1b2b752000-01-07Martin Stjernholm  tag_set = _tag_set;
ed81751999-12-11Martin Stjernholm  id = _id; }
56532d1999-12-19Martin Stjernholm  mapping(string:mixed)|mapping(object:array) unwind_state;
ed81751999-12-11Martin Stjernholm  // If this is a mapping, we have an unwound stack state. It contains
56532d1999-12-19Martin Stjernholm  // strings with arbitrary exception info, and the objects being // unwound with arrays containing the extra state info they need. // The first entry in these arrays are always the subobject. The // special entries are:
ed81751999-12-11Martin Stjernholm  //
56532d1999-12-19Martin Stjernholm  // "top": ({Frame|Parser|PCode (top object)})
ed81751999-12-11Martin Stjernholm  // "stream_piece": mixed (When continuing, do a streaming // do_return() with this stream piece.) // "exec_left": array (Exec array left to evaluate. Only used // between Frame._exec_array() and Frame._eval().)
2bd21a2000-01-05Martin Stjernholm  string _sprintf() {return "RXML.Context";}
ed81751999-12-11Martin Stjernholm #ifdef MODULE_DEBUG int in_use; #endif } //! Current context. //! It's set before any function in RXML.Tag or RXML.Frame is called. #if constant (thread_create)
56532d1999-12-19Martin Stjernholm private Thread.Local _context = thread_local();
ed81751999-12-11Martin Stjernholm inline void set_context (Context ctx) {_context->set (ctx);} inline Context get_context() {return [object(Context)] _context->get();} #else private Context _context; inline void set_context (Context ctx) {_context = ctx;} inline Context get_context() {return _context;} #endif #ifdef MODULE_DEBUG // Got races in this debug check, but looks like we have to live with that. :\ #define ENTER_CONTEXT(ctx) \ Context __old_ctx = get_context(); \ set_context (ctx); \ if (ctx) { \ if (ctx->in_use && __old_ctx != ctx) \
01e43b2000-01-14Martin Stjernholm  error ("Attempt to use context asynchronously.\n"); \
ed81751999-12-11Martin Stjernholm  ctx->in_use = 1; \ } #define LEAVE_CONTEXT() \ if (Context ctx = get_context()) \ if (__old_ctx != ctx) ctx->in_use = 0; \ set_context (__old_ctx); #else #define ENTER_CONTEXT(ctx) \ Context __old_ctx = get_context(); \ set_context (ctx); #define LEAVE_CONTEXT() \ set_context (__old_ctx); #endif
01e43b2000-01-14Martin Stjernholm void rxml_error (string msg, mixed... args) //! Tries to throw an error with rxml_error() in the current context.
ed81751999-12-11Martin Stjernholm { Context ctx = get_context();
01e43b2000-01-14Martin Stjernholm  if (ctx && ctx->rxml_error) ctx->rxml_error (msg, @args);
ed81751999-12-11Martin Stjernholm  else { if (sizeof (args)) msg = sprintf (msg, @args);
01e43b2000-01-14Martin Stjernholm  msg = Context.rxml_errmsg_prefix + " (no context): " + msg;
ed81751999-12-11Martin Stjernholm  array b = backtrace(); throw (({msg, b[..sizeof (b) - 2]})); } } //! Constants for the bit field RXML.Frame.flags. //! Static flags (i.e. tested in the Tag object). constant FLAG_CONTAINER = 0x00000001; //! If set, the tag accepts non-empty content. E.g. with the standard //! HTML parser this defines whether the tag is a container or not.
cece9e2000-01-11Martin Stjernholm constant FLAG_NO_PREFIX = 0x00000002; //! Never apply any prefix to this tag.
ed81751999-12-11Martin Stjernholm //! The rest of the flags are dynamic (i.e. tested in the Frame object). constant FLAG_PARENT_SCOPE = 0x00000100; //! If set, the array from do_return() and cached_return() will be //! interpreted in the scope of the parent tag, rather than in the //! current one. constant FLAG_NO_IMPLICIT_ARGS = 0x00000200; //! If set, the parser won't apply any implicit arguments. FIXME: Not //! yet implemented. constant FLAG_STREAM_RESULT = 0x00000400; //! If set, the do_return() function will be called repeatedly until //! it returns 0 or no more content is wanted. constant FLAG_STREAM_CONTENT = 0x00000800; //! If set, the tag supports getting its content in streaming mode: //! do_return() will be called repeatedly with successive parts of the //! content then. Can't be changed from do_return(). //! Note: It might be obvious, but using streaming is significantly //! less effective than nonstreaming, so it should only be done when //! big delays are expected. constant FLAG_STREAM = FLAG_STREAM_RESULT | FLAG_STREAM_CONTENT; //! The following flags specifies whether certain conditions must be //! met for a cached frame to be considered (if RXML.Frame.is_valid() //! is defined). They may be read directly after do_return() returns. //! The tag name is always the same. FIXME: These are ideas only; not //! yet implemented. constant FLAG_CACHE_DIFF_ARGS = 0x00010000; //! If set, the arguments to the tag need not be the same (using //! equal()) as the cached args. constant FLAG_CACHE_DIFF_CONTENT = 0x00020000; //! If set, the content need not be the same. constant FLAG_CACHE_DIFF_RESULT_TYPE = 0x00040000; //! If set, the result type need not be the same. (Typically //! not useful unless cached_return() is used.) constant FLAG_CACHE_DIFF_VARS = 0x00080000; //! If set, the variables with external scope in vars (i.e. normally //! those that has been accessed with get_var()) need not have the //! same values (using equal()) as the actual variables. constant FLAG_CACHE_SAME_STACK = 0x00100000; //! If set, the stack of call frames needs to be the same. constant FLAG_CACHE_EXECUTE_RESULT = 0x00200000; //! If set, an array to execute will be stored in the frame instead of //! the final result. On a cache hit it'll be executed like the return //! value from do_return() to produce the result. class Frame //! A tag instance. { constant is_RXML_Frame = 1;
56532d1999-12-19Martin Stjernholm  constant thrown_at_unwind = 1;
ed81751999-12-11Martin Stjernholm  //! Interface. Frame up; //! The parent frame. This frame is either created from the content //! inside the up frame, or it's in the array returned from //! do_return() in the up frame. Tag tag; //! The RXML.Tag object this frame was created from. int flags; //! Various bit flags that affect parsing. See the FLAG_* constants. mapping(string:mixed) args; //! The arguments passed to the tag. Set before //! do_enter()/do_return() are called. Type content_type; //! The type of the content. mixed content = Void; //! The content. Set before do_return() is called, but only when the //! tag is actually used with container syntax. Type result_type; //! The required result type. Set before do_enter()/do_return() are //! called. do_return() should produce a result of this type. mixed result = Void; //! The result. //!mapping(string:mixed) vars; //! Set this to introduce a new variable scope that will be active //! during parsing of the content and return values (but see also //! FLAG_PARENT_SCOPE). Don't replace or remove the mapping later. //!string scope_name; //! The scope name for the variables. Don't change this later. //!TagSet additional_tags; //! If set, the tags in this tag set will be used in addition to the //! tags inherited from the surrounding parser. The additional tags //! will in turn be inherited by subparsers. //!TagSet local_tags; //! If set, the tags in this tag set will be used in the parser for //! the content, instead of the one inherited from the surrounding //! parser. The tags are not inherited by subparsers.
c6245b1999-12-31Martin Stjernholm  optional int|function(RequestID:int|function) do_enter (RequestID id);
ed81751999-12-11Martin Stjernholm  //! Called before the content (if any) is processed. This function //! typically only initializes vars. Return values: //! //! int - Do this many passes through the content. do_return() //! will be called after each pass. //! function(RequestID:int|function) - A function that is handled //! just like do_enter(), only repeatedly until it returns //! 0 or another function. //! //! If this function is missing, one pass is done. //!array do_return (RequestID id, void|mixed piece); //! Called after the content (if any) has been processed. //! //! The result_type variable is set to the type of result the parser //! wants. It's any type that is valid by tag->result_type. If the //! result type is sequential, it's spliced into the surrounding //! content, otherwise it replaces the previous value of the //! content, if any. //! //! Return values: //! //! array - A so-called execution array to be handled by the parser: //! //! string - Added or put into the result. If the result type has //! a parser, the string will be parsed with it before //! it's assigned to the result variable and passed on. //! RXML.Frame - Already initialized frame to process. Neither //! arguments nor content will be parsed. It's result is //! added or put into the result of this tag. //! mapping(string:mixed) - Fields to merge into the headers. //! FIXME: Not yet implemented. FIXME: Somehow represent //! removal of headers? //! object - Treated as a file object to read in blocking or //! nonblocking mode. FIXME: Not yet implemented, details //! not decided. //! multiset(mixed) - Should only contain one element that'll be //! added or put into the result. Normally not necessary; //! assign it directly to the result variable instead. //! //! 0 - Do nothing special. Ends the stream when //! FLAG_STREAM_RESULT is set. //! //! Note that the intended use is not to postparse by returning //! strings, but instead to return an array with literal strings and //! RXML.Frame objects where parsing (or, more accurately, //! evaluation) needs to be done. //! //! piece is used when the tag is operating in streaming mode (i.e. //! FLAG_STREAM_CONTENT is set). It's then set to each successive //! part of the content in the stream, and the content variable is //! never touched. do_return() is also called "normally" with no //! piece argument afterwards. Note that tags that support streaming //! mode might still be used nonstreaming (it might also vary //! between iterations). //! //! As long as FLAG_STREAM_RESULT is set, do_return() will be called //! repeatedly until it returns 0. It's only the result piece from //! the execution array that is propagated after each turn; the //! result variable only accumulates all these pieces. //! //! If this function is an array, it's executed as above. If it's //! zero, the value in the result variable is simply used. If the //! result variable is Void, content is used as result if it's of a //! compatible type. //!int|function(:int) is_valid; //! When defined, the frame may be cached. First the name of the tag //! must be the same. Then the conditions specified by the cache //! bits in flag are checked. Then, if this is a function, it's //! called. If it returns 1, the frame is reused. FIXME: Not yet //! implemented.
c6245b1999-12-31Martin Stjernholm  optional array cached_return (Context ctx, void|mixed piece);
ed81751999-12-11Martin Stjernholm  //! If defined, this will be called to get the value from a cached //! frame (that's still valid) instead of using the cached result. //! It's otherwise handled like do_return(). Note that the cached //! frame may be used from several threads. FIXME: Not yet //! implemented. //! Services.
01e43b2000-01-14Martin Stjernholm  void rxml_error (string msg, mixed... args) //! Throws an RXML error from the current context.
ed81751999-12-11Martin Stjernholm  {
01e43b2000-01-14Martin Stjernholm  get_context()/*HMM*/->rxml_error (msg, @args);
ed81751999-12-11Martin Stjernholm  } void terminate() //! Makes the parser abort. The data parsed so far will be returned. //! Does not return; throws a special exception instead. {
c6245b1999-12-31Martin Stjernholm  error ("FIXME\n");
ed81751999-12-11Martin Stjernholm  } void suspend() //! Used together with resume() for nonblocking mode. May be called //! from do_enter() or do_return() to suspend the parser: The parser //! will just stop, leaving the context intact. If it returns, the //! parser is used in a place that doesn't support nonblocking, so //! just go ahead and block. {
c6245b1999-12-31Martin Stjernholm  error ("FIXME\n");
ed81751999-12-11Martin Stjernholm  } void resume() //! Makes the parser continue where it left off. The function that //! called suspend() will be called again. {
c6245b1999-12-31Martin Stjernholm  error ("FIXME\n");
ed81751999-12-11Martin Stjernholm  } // Internals.
c6245b1999-12-31Martin Stjernholm  mixed _exec_array (TagSetParser parser, array exec)
ed81751999-12-11Martin Stjernholm  {
56532d1999-12-19Martin Stjernholm  Frame this = this_object();
c6245b1999-12-31Martin Stjernholm  Context ctx = parser->context;
ed81751999-12-11Martin Stjernholm  int i = 0; mixed res = Void; Parser subparser = 0; mixed err = catch { if (flags & FLAG_PARENT_SCOPE) LEAVE_SCOPE (ctx, this); for (; i < sizeof (exec); i++) { mixed elem = exec[i], piece = Void; switch (sprintf ("%t", elem)) { case "string": if (result_type->_parser_prog == PNone) piece = elem; else { subparser = result_type->get_parser (ctx);
c6245b1999-12-31Martin Stjernholm  subparser->_parent = parser;
56532d1999-12-19Martin Stjernholm  subparser->finish ([string] elem); // Might unwind. piece = subparser->eval(); // Might unwind.
ed81751999-12-11Martin Stjernholm  subparser = 0; } break; case "object":
56532d1999-12-19Martin Stjernholm  if (([object] elem)->is_RXML_Frame) { ([object(Frame)] elem)->_eval (0); // Might unwind. piece = ([object(Frame)] elem)->result;
ed81751999-12-11Martin Stjernholm  }
56532d1999-12-19Martin Stjernholm  else if (([object] elem)->is_RXML_Parser) { // The subparser above unwound. ([object(Parser)] elem)->finish(); // Might unwind. piece = ([object(Parser)] elem)->eval(); // Might unwind.
ed81751999-12-11Martin Stjernholm  } else error ("File objects not yet implemented.\n"); break; case "mapping": error ("Header mappings not yet implemented.\n"); break; case "multiset":
56532d1999-12-19Martin Stjernholm  if (sizeof ([multiset] elem) == 1) piece = ((array) elem)[0]; else if (sizeof ([multiset] elem) > 1) error (sizeof ([multiset] elem) + " values in multiset in exec array.\n");
ed81751999-12-11Martin Stjernholm  else error ("No value in multiset in exec array.\n"); break; default: error ("Invalid type %t in exec array.\n", elem); } if (result_type->sequential) res += piece; else if (piece != Void) result = res = piece; } if (result_type->sequential) result += res; if (flags & FLAG_PARENT_SCOPE) ENTER_SCOPE (ctx, this); return res; }; if (result_type->sequential) result += res;
56532d1999-12-19Martin Stjernholm  if (objectp (err) && ([object] err)->thrown_at_unwind) { mapping(string:mixed)|mapping(object:array) ustate;
ed81751999-12-11Martin Stjernholm  if ((ustate = ctx->unwind_state) && !zero_type (ustate->stream_piece)) // Subframe wants to stream. Update stream_piece and send it on. if (result_type->sequential) ustate->stream_piece = res + ustate->stream_piece; else if (ustate->stream_piece == Void) ustate->stream_piece = res; ustate->exec_left = exec[i..]; // Left to execute. if (subparser) // Replace the string with the subparser object so that we'll
56532d1999-12-19Martin Stjernholm  // continue in it later. It's done here to keep the original // exec array untouched. ([array] ustate->exec_left)[0] = subparser;
ed81751999-12-11Martin Stjernholm  } throw (err); }
2bd21a2000-01-05Martin Stjernholm  private void _handle_runtime_tags (TagSetParser parser, Context.RuntimeTags runtime_tags) { // FIXME: PCode handling. multiset(string|Tag) rem_tags = runtime_tags->remove_tags; multiset(Tag) add_tags = runtime_tags->add_tags - rem_tags; if (sizeof (rem_tags)) foreach (indices (add_tags), Tag tag) if (rem_tags[tag->name]) add_tags[tag] = 0; array(string|Tag) arr_rem_tags = (array) rem_tags; array(Tag) arr_add_tags = (array) add_tags; for (Parser p = parser; p; p = p->_parent)
1b2b752000-01-07Martin Stjernholm  if (p->tag_set_eval) {
2bd21a2000-01-05Martin Stjernholm  foreach (arr_add_tags, Tag tag) ([object(TagSetParser)] p)->add_runtime_tag (tag); foreach (arr_rem_tags, string|object(Tag) tag) ([object(TagSetParser)] p)->remove_runtime_tag (tag); } }
ed81751999-12-11Martin Stjernholm  void _eval (TagSetParser parser, void|mapping(string:string) raw_args, void|string raw_content) // Note: It might be somewhat tricky to override this function. {
56532d1999-12-19Martin Stjernholm  Frame this = this_object();
ed81751999-12-11Martin Stjernholm  Context ctx = parser->context;
c6245b1999-12-31Martin Stjernholm  // Unwind state data: //raw_content
ed81751999-12-11Martin Stjernholm  int|function(RequestID:int|function) fn, iter; Parser subparser; mixed piece; array exec;
1b2b752000-01-07Martin Stjernholm  int tags_added; // Flag that we added additional_tags to ctx->tag_set.
c6245b1999-12-31Martin Stjernholm  //ctx->new_runtime_tags
ed81751999-12-11Martin Stjernholm  #define PRE_INIT_ERROR(X) (ctx->frame = this, error (X))
2bd21a2000-01-05Martin Stjernholm #ifdef DEBUG // Internal sanity checks. if (ctx != get_context()) PRE_INIT_ERROR ("Internal error: Context not current.\n");
1b2b752000-01-07Martin Stjernholm  if (!parser->tag_set_eval)
2bd21a2000-01-05Martin Stjernholm  PRE_INIT_ERROR ("Internal error: Calling _eval() with non-tag set parser.\n"); #endif #ifdef MODULE_DEBUG if (ctx->new_runtime_tags) PRE_INIT_ERROR ("Looks like Context.add_runtime_tag() or " "Context.remove_runtime_tag() was used outside any parser.\n"); #endif
ed81751999-12-11Martin Stjernholm  if (array state = ctx->unwind_state && ctx->unwind_state[this]) { #ifdef DEBUG if (!up) PRE_INIT_ERROR ("Internal error: Resuming frame without up pointer.\n"); if (raw_args || raw_content) PRE_INIT_ERROR ("Internal error: Can't feed new arguments or content " "when resuming parse.\n"); #endif
56532d1999-12-19Martin Stjernholm  object ignored;
2bd21a2000-01-05Martin Stjernholm  [ignored, fn, iter, raw_content, subparser, piece, exec, tags_added, ctx->new_runtime_tags] = state;
ed81751999-12-11Martin Stjernholm  m_delete (ctx->unwind_state, this); if (!sizeof (ctx->unwind_state)) ctx->unwind_state = 0; } else { #ifdef MODULE_DEBUG if (up && up != ctx->frame) PRE_INIT_ERROR ("Reuse of frame in different context.\n"); #endif up = ctx->frame; piece = Void; }
2bd21a2000-01-05Martin Stjernholm 
ed81751999-12-11Martin Stjernholm #undef PRE_INIT_ERROR ctx->frame = this; if (raw_args) {
01e43b2000-01-14Martin Stjernholm  args = raw_args;
ed81751999-12-11Martin Stjernholm  mapping(string:Type) atypes; if (tag->req_arg_types) {
56532d1999-12-19Martin Stjernholm  atypes = raw_args & tag->req_arg_types;
ed81751999-12-11Martin Stjernholm  if (sizeof (atypes) < sizeof (tag->req_arg_types)) { array(string) missing = sort (indices (tag->req_arg_types - atypes));
01e43b2000-01-14Martin Stjernholm  rxml_error ("Required " + (sizeof (missing) > 1 ? "arguments " + String.implode_nicely (missing) + " are" : "argument " + missing[0] + " is") + " missing.\n");
ed81751999-12-11Martin Stjernholm  } } if (tag->opt_arg_types)
56532d1999-12-19Martin Stjernholm  if (atypes) atypes += raw_args & tag->opt_arg_types; else atypes = raw_args & tag->opt_arg_types;
ed81751999-12-11Martin Stjernholm  if (atypes) if (mixed err = catch { foreach (indices (atypes), string arg)
56532d1999-12-19Martin Stjernholm  args[arg] = atypes[arg]->eval (
01e43b2000-01-14Martin Stjernholm  raw_args[arg], ctx, 0, parser, 1); // Should currently NOT unwind.
ed81751999-12-11Martin Stjernholm  }) {
56532d1999-12-19Martin Stjernholm  if (objectp (err) && ([object] err)->thrown_at_unwind)
ed81751999-12-11Martin Stjernholm  error ("Can't save parser state when evaluating arguments.\n"); throw (err); } } #ifdef DEBUG if (!args) error ("Internal error: args not set.\n"); #endif if (TagSet add_tags = raw_content && [object(TagSet)] this->additional_tags) {
1b2b752000-01-07Martin Stjernholm  if (!ctx->tag_set_is_local) ctx->make_tag_set_local(); if (search (ctx->tag_set->imported, add_tags) < 0) { ctx->tag_set->imported = ({add_tags}) + ctx->tag_set->imported;
ed81751999-12-11Martin Stjernholm  tags_added = 1; } } if (!result_type) {
56532d1999-12-19Martin Stjernholm  Type ptype = parser->type;
ed81751999-12-11Martin Stjernholm  foreach (tag->result_types, Type rtype)
01e43b2000-01-14Martin Stjernholm  if (ptype->subtype_of (rtype)) {result_type = rtype; break;}
ed81751999-12-11Martin Stjernholm  if (!result_type) // Sigh..
01e43b2000-01-14Martin Stjernholm  rxml_error ( "Tag returns " + String.implode_nicely ([array(string)] tag->result_types->name, "or") + " but " + [string] parser->type->name + " is expected.\n");
ed81751999-12-11Martin Stjernholm  } if (!content_type) content_type = tag->content_type || result_type; mixed err = catch {
56532d1999-12-19Martin Stjernholm  if (!fn) fn = this->do_enter ? this->do_enter (ctx->id) : 1; // Might unwind.
ed81751999-12-11Martin Stjernholm  do { if (!iter) {
8e8ac62000-01-07Martin Stjernholm  iter = fn, fn = 0;
ed81751999-12-11Martin Stjernholm  while (functionp (iter)) { // Got a function from do_enter. int|function(RequestID:int|function) newiter =
56532d1999-12-19Martin Stjernholm  [int|function(RequestID:int|function)] iter (ctx->id); // Might unwind.
ed81751999-12-11Martin Stjernholm  fn = iter, iter = newiter; } } ENTER_SCOPE (ctx, this); for (; iter > 0; iter--) { if (raw_content) { // Got nested parsing to do.
2bd21a2000-01-05Martin Stjernholm  if (ctx->new_runtime_tags) { // Empty this first in case do_enter() set it. _handle_runtime_tags (parser, ctx->new_runtime_tags); ctx->new_runtime_tags = 0; }
ed81751999-12-11Martin Stjernholm  int finished = 0; if (!subparser) { // The nested content is not yet parsed.
56532d1999-12-19Martin Stjernholm  subparser = content_type->get_parser ( ctx, [object(TagSet)] this->local_tags);
c6245b1999-12-31Martin Stjernholm  subparser->_parent = parser;
56532d1999-12-19Martin Stjernholm  subparser->finish (raw_content); // Might unwind.
ed81751999-12-11Martin Stjernholm  finished = 1; } do { if (flags & FLAG_STREAM_CONTENT && subparser->read) { // Handle a stream piece. // Squeeze out any free text from the subparser first.
2bd21a2000-01-05Martin Stjernholm  mixed res = subparser->read();
ed81751999-12-11Martin Stjernholm  if (content_type->sequential) piece = res + piece; else if (piece == Void) piece = res; if (piece != Void) {
2bd21a2000-01-05Martin Stjernholm  int|array|function(RequestID,void|mixed:array) do_return;
ed81751999-12-11Martin Stjernholm  if ((do_return =
2bd21a2000-01-05Martin Stjernholm  [int|array|function(RequestID,void|mixed:array)] this->do_return) && functionp (do_return)) {
56532d1999-12-19Martin Stjernholm  if (!exec) exec = do_return (ctx->id, piece); // Might unwind.
ed81751999-12-11Martin Stjernholm  if (exec) {
c6245b1999-12-31Martin Stjernholm  mixed res = _exec_array (parser, exec); // Might unwind.
ed81751999-12-11Martin Stjernholm  if (flags & FLAG_STREAM_RESULT) { #ifdef DEBUG if (!zero_type (ctx->unwind_state->stream_piece)) error ("Internal error: " "Clobbering unwind_state->stream_piece.\n"); #endif ctx->unwind_state->stream_piece = res; throw (this); } exec = 0; } else if (flags & FLAG_STREAM_RESULT) { // do_return() finished the stream. Ignore remaining content. ctx->unwind_state = 0; piece = Void; break; } } piece = Void; } if (finished) break; } else { // The frame doesn't handle streamed content. piece = Void; if (finished) {
56532d1999-12-19Martin Stjernholm  mixed res = subparser->eval(); // Might unwind.
ed81751999-12-11Martin Stjernholm  if (content_type->sequential) content += res; else if (res != Void) content = res; break; } }
56532d1999-12-19Martin Stjernholm  subparser->finish(); // Might unwind.
ed81751999-12-11Martin Stjernholm  finished = 1; } while (1); // Only loops when an unwound subparser has been recovered. subparser = 0; }
2bd21a2000-01-05Martin Stjernholm  if (int|array|function(RequestID,void|mixed:array) do_return = [int|array|function(RequestID,void|mixed:array)] this->do_return) {
ed81751999-12-11Martin Stjernholm  if (!exec)
2bd21a2000-01-05Martin Stjernholm  exec = functionp (do_return) ? ([function(RequestID,void|mixed:array)] do_return) ( ctx->id) : // Might unwind. [array] do_return;
ed81751999-12-11Martin Stjernholm  if (exec) {
c6245b1999-12-31Martin Stjernholm  mixed res = _exec_array (parser, exec); // Might unwind.
ed81751999-12-11Martin Stjernholm  if (flags & FLAG_STREAM_RESULT) { #ifdef DEBUG if (ctx->unwind_state) error ("Internal error: Clobbering unwind_state to do streaming.\n"); if (piece != Void) error ("Internal error: Thanks, we think about how nice it must " "be to play the harmonica...\n"); #endif ctx->unwind_state = (["stream_piece": res]); throw (this); } } } else if (result == Void && content_type->subtype_of (result_type)) result = content; } } while (fn);
c6245b1999-12-31Martin Stjernholm  if (ctx->new_runtime_tags) {
2bd21a2000-01-05Martin Stjernholm  _handle_runtime_tags (parser, ctx->new_runtime_tags);
c6245b1999-12-31Martin Stjernholm  ctx->new_runtime_tags = 0; }
ed81751999-12-11Martin Stjernholm  }; LEAVE_SCOPE (ctx, this); if (err) { string action;
56532d1999-12-19Martin Stjernholm  if (objectp (err) && ([object] err)->thrown_at_unwind) { mapping(string:mixed)|mapping(object:array) ustate = ctx->unwind_state;
ed81751999-12-11Martin Stjernholm  if (!ustate) ustate = ctx->unwind_state = ([]); #ifdef DEBUG if (ustate[this]) error ("Internal error: Frame already has an unwind state.\n"); #endif if (ustate->exec_left) { exec = [array] ustate->exec_left; m_delete (ustate, "exec_left"); }
56532d1999-12-19Martin Stjernholm  if (err == this || exec && sizeof (exec) && err == exec[0])
ed81751999-12-11Martin Stjernholm  // This frame or a frame in the exec array wants to stream.
56532d1999-12-19Martin Stjernholm  if (parser->unwind_safe) { // Rethrow to continue in parent since we've already done // the appropriate do_return stuff in this frame in either // case. if (err == this) err = 0; if (tags_added) {
1b2b752000-01-07Martin Stjernholm  ctx->tag_set->imported -= ({/*[object(TagSet)]HMM*/ this->additional_tags});
56532d1999-12-19Martin Stjernholm  tags_added = 0; } action = "break"; } else { // Can't stream since the parser isn't unwind safe. Just // continue. m_delete (ustate, "stream_piece"); action = "continue";
ed81751999-12-11Martin Stjernholm  } else if (!zero_type (ustate->stream_piece)) { // Got a stream piece from a subframe. We handle it above; // store the state and tail recurse. piece = ustate->stream_piece; m_delete (ustate, "stream_piece"); action = "continue"; } else action = "break"; // Some other reason - back up to the top.
2bd21a2000-01-05Martin Stjernholm  ustate[this] = ({err, fn, iter, raw_content, subparser, piece, exec, tags_added, ctx->new_runtime_tags});
ed81751999-12-11Martin Stjernholm  }
01e43b2000-01-14Martin Stjernholm  else { ctx->handle_exception (err, parser); // May throw. action = "return"; }
ed81751999-12-11Martin Stjernholm  switch (action) { case "break": // Throw and handle in parent frame.
56532d1999-12-19Martin Stjernholm #ifdef MODULE_DEBUG if (!parser->unwind_state) error ("Trying to unwind inside a parser that isn't unwind safe.\n"); #endif
ed81751999-12-11Martin Stjernholm  throw (this); case "continue": // Continue in this frame through tail recursion. _eval (parser); return;
01e43b2000-01-14Martin Stjernholm  case "return": // A normal return. break;
ed81751999-12-11Martin Stjernholm  default: error ("Internal error: Don't you come here and %O on me!\n", action); } }
01e43b2000-01-14Martin Stjernholm  if (tags_added) ctx->tag_set->imported -= ({/*[object(TagSet)]HMM*/ this->additional_tags}); ctx->frame = up;
ed81751999-12-11Martin Stjernholm  } string _sprintf() {
2bd21a2000-01-05Martin Stjernholm  return "RXML.Frame(" + (tag && [string] tag->name) + ")";
ed81751999-12-11Martin Stjernholm  } } //! Parsers. class Parser //! Interface class for a syntax parser that scans, parses and //! evaluates an input stream. Access to a parser object is assumed to //! be done in a thread safe way except where noted. { constant is_RXML_Parser = 1;
56532d1999-12-19Martin Stjernholm  constant thrown_at_unwind = 1;
ed81751999-12-11Martin Stjernholm  //! Services.
db04172000-01-14Martin Stjernholm  int error_count; //! Number of RXML errors that occurred during evaluation. If this //! is nonzero, the value from eval() shouldn't be trusted.
ed81751999-12-11Martin Stjernholm  function(Parser:void) data_callback; //! A function to be called when data is likely to be available from //! eval(). It's always called when the source stream closes. //! write() and write_end() are the functions to use from outside //! the parser system, not feed() or finish(). int write (string in) //! Writes some source data to the parser. Returns nonzero if there //! might be data available in eval(). { int res; ENTER_CONTEXT (context); mixed err = catch {
56532d1999-12-19Martin Stjernholm  if (context && context->unwind_state && context->unwind_state->top) {
c6245b1999-12-31Martin Stjernholm #ifdef MODULE_DEBUG if (context->unwind_state->top != this_object()) error ("The context got an unwound state from another parser. Can't rewind.\n"); #endif
56532d1999-12-19Martin Stjernholm  m_delete (context->unwind_state, "top"); if (!sizeof (context->unwind_state)) context->unwind_state = 0; }
2bd21a2000-01-05Martin Stjernholm  if (this_object()/*HMM*/->feed (in)) res = 1; // Might unwind.
ed81751999-12-11Martin Stjernholm  if (res && data_callback) data_callback (this_object()); }; LEAVE_CONTEXT();
56532d1999-12-19Martin Stjernholm  if (err) if (objectp (err) && ([object] err)->thrown_at_unwind) {
c6245b1999-12-31Martin Stjernholm #ifdef DEBUG if (err != this_object())
01e43b2000-01-14Martin Stjernholm  error ("Internal error: Unexpected unwind object catched.\n");
c6245b1999-12-31Martin Stjernholm #endif
56532d1999-12-19Martin Stjernholm  if (!context->unwind_state) context->unwind_state = ([]); context->unwind_state->top = err; }
01e43b2000-01-14Martin Stjernholm  else if (context) context->handle_exception (err, this_object()); // May throw. else throw (err);
ed81751999-12-11Martin Stjernholm  return res; } void write_end (void|string in) //! Closes the source data stream, optionally with a last bit of //! data. { int res; ENTER_CONTEXT (context); mixed err = catch {
56532d1999-12-19Martin Stjernholm  if (context && context->unwind_state && context->unwind_state->top) {
c6245b1999-12-31Martin Stjernholm #ifdef MODULE_DEBUG if (context->unwind_state->top != this_object()) error ("The context got an unwound state from another parser. Can't rewind.\n"); #endif
56532d1999-12-19Martin Stjernholm  m_delete (context->unwind_state, "top"); if (!sizeof (context->unwind_state)) context->unwind_state = 0; }
2bd21a2000-01-05Martin Stjernholm  this_object()/*HMM*/->finish (in); // Might unwind.
ed81751999-12-11Martin Stjernholm  if (data_callback) data_callback (this_object()); }; LEAVE_CONTEXT();
56532d1999-12-19Martin Stjernholm  if (err) if (objectp (err) && ([object] err)->thrown_at_unwind) {
c6245b1999-12-31Martin Stjernholm #ifdef DEBUG if (err != this_object())
01e43b2000-01-14Martin Stjernholm  error ("Internal error: Unexpected unwind object catched.\n");
c6245b1999-12-31Martin Stjernholm #endif
56532d1999-12-19Martin Stjernholm  if (!context->unwind_state) context->unwind_state = ([]); context->unwind_state->top = err; }
01e43b2000-01-14Martin Stjernholm  else if (context) context->handle_exception (err, this_object()); // May throw. else throw (err); } array handle_var (string varref) // Parses and evaluates a possible variable reference, with the // appropriate error handling. { // We're always evaluating here, so context is always set. array(string) split = varref / "."; if (sizeof (split) == 2) if (mixed err = catch {
db04172000-01-14Martin Stjernholm  context->current_var = varref;
01e43b2000-01-14Martin Stjernholm  mixed val;
db04172000-01-14Martin Stjernholm  if (zero_type (val = context->get_var (split[1], split[0]))) { // May throw. context->current_var = 0;
01e43b2000-01-14Martin Stjernholm  return ({});
db04172000-01-14Martin Stjernholm  } context->current_var = 0;
01e43b2000-01-14Martin Stjernholm  if (type->free_text) val = (string) val;
f585722000-01-18Martin Stjernholm  return val == Void ? ({}) : ({val});
01e43b2000-01-14Martin Stjernholm  }) {
db04172000-01-14Martin Stjernholm  context->current_var = 0;
01e43b2000-01-14Martin Stjernholm  context->handle_exception (err, this_object()); // May throw. return ({}); } return type->free_text ? 0 : ({});
ed81751999-12-11Martin Stjernholm  } //! Interface. Context context; //! The context to do evaluation in. It's assumed to never be //! modified asynchronously during the time the parser is working on //! an input stream. Type type; //! The expected result type of the current stream. (The parser //! should not do any type checking on this.) int compile; //! Must be set to nonzero before a stream is fed which should be //! compiled to p-code.
56532d1999-12-19Martin Stjernholm  //!mixed unwind_safe; //! If nonzero, the parser supports unwinding with throw()/catch(). //! Whenever an exception is thrown from some evaluation function, //! it should be able to call that function again with identical //! arguments the next time it continues.
ed81751999-12-11Martin Stjernholm  mixed feed (string in); //! Feeds some source data to the parse stream. The parser may do //! scanning and parsing before returning. If context is set, it may //! also do evaluation in that context. Returns nonzero if there //! could be new data to get from eval(). void finish (void|string in); //! Like feed(), but also finishes the parse stream. A last bit of //! data may be given. It should work to call this on an already //! finished stream if no argument is given to it.
01e43b2000-01-14Martin Stjernholm  optional void report_error (string msg); //! Used to report errors to the end user through the output. This //! is only called when the type allows free text. msg should be //! stored in the output queue to be returned by eval().
c6245b1999-12-31Martin Stjernholm  optional mixed read();
ed81751999-12-11Martin Stjernholm  //! Define to allow streaming operation. Returns the evaluated //! result so far, but does not do any evaluation. Returns Void if //! there's no data (for sequential types the empty value is also //! ok).
56532d1999-12-19Martin Stjernholm  mixed eval();
ed81751999-12-11Martin Stjernholm  //! Evaluates the data fed so far and returns the result. The result //! returned by previous eval() calls should not be returned again //! as (part of) this return value. Returns Void if there's no data //! (for sequential types the empty value is also ok).
c6245b1999-12-31Martin Stjernholm  optional PCode p_compile();
ed81751999-12-11Martin Stjernholm  //! Define this to return a p-code representation of the current //! stream, which always is finished.
c6245b1999-12-31Martin Stjernholm  optional void reset (Context ctx, Type type, mixed... args);
ed81751999-12-11Martin Stjernholm  //! Define to support reuse of a parser object. It'll be called //! instead of making a new object for a new stream. It keeps the
c6245b1999-12-31Martin Stjernholm  //! static configuration, i.e. the type. Note that this function //! needs to deal with leftovers from add_runtime_tag() for //! TagSetParser objects.
ed81751999-12-11Martin Stjernholm 
c6245b1999-12-31Martin Stjernholm  optional Parser clone (Context ctx, Type type, mixed... args);
ed81751999-12-11Martin Stjernholm  //! Define to create new parser objects by cloning instead of //! creating from scratch. It returns a new instance of this parser
c6245b1999-12-31Martin Stjernholm  //! with the same static configuration, i.e. the type. The instance //! this function is called in is never actually used for parsing.
ed81751999-12-11Martin Stjernholm 
2bd21a2000-01-05Martin Stjernholm  static void create (Context ctx, Type _type, mixed... args)
ed81751999-12-11Martin Stjernholm  { context = ctx; type = _type; } // Internals. Parser _next_free; // Used to link together unused parser objects for reuse.
c6245b1999-12-31Martin Stjernholm  Parser _parent; // The parent parser if this one is nested.
2bd21a2000-01-05Martin Stjernholm 
01e43b2000-01-14Martin Stjernholm  Stdio.File _source_file; mapping _defines; // These two are compatibility kludges for use with parse_rxml().
2bd21a2000-01-05Martin Stjernholm  string _sprintf() {return "RXML.Parser";}
ed81751999-12-11Martin Stjernholm } class TagSetParser //! Interface class for parsers that evaluates using the tag set. It //! provides the evaluation and compilation functionality. The parser
c6245b1999-12-31Martin Stjernholm //! should call Tag._handle_tag() from feed() and finish() for every
01e43b2000-01-14Martin Stjernholm //! encountered tag, and Parser.handle_var() for encountered variable
c6245b1999-12-31Martin Stjernholm //! references. It must be able to continue cleanly after throw() from //! Tag._handle_tag().
ed81751999-12-11Martin Stjernholm { inherit Parser;
1b2b752000-01-07Martin Stjernholm  constant tag_set_eval = 1;
2bd21a2000-01-05Martin Stjernholm  // Services. mixed eval() {return this_object()/*HMM*/->read();}
ed81751999-12-11Martin Stjernholm  // Interface.
1b2b752000-01-07Martin Stjernholm  TagSet tag_set;
ed81751999-12-11Martin Stjernholm  //! The tag set used for parsing.
1b2b752000-01-07Martin Stjernholm  optional void reset (Context ctx, Type type, TagSet tag_set, mixed... args); optional Parser clone (Context ctx, Type type, TagSet tag_set, mixed... args); static void create (Context ctx, Type type, TagSet _tag_set, mixed... args)
ed81751999-12-11Martin Stjernholm  { ::create (ctx, type);
1b2b752000-01-07Martin Stjernholm  tag_set = _tag_set;
ed81751999-12-11Martin Stjernholm  } //! In addition to the type, the tag set is part of the static //! configuration.
2bd21a2000-01-05Martin Stjernholm  mixed read(); //! Not optional. Since the evaluation is done in Tag._handle_tag() //! or similar, this always does the same as eval().
c6245b1999-12-31Martin Stjernholm  void add_runtime_tag (Tag tag); //! Adds a tag that will exist from this point forward in the //! current parser instance only.
ed81751999-12-11Martin Stjernholm 
c6245b1999-12-31Martin Stjernholm  void remove_runtime_tag (string|Tag tag); //! Removes a tag added by add_runtime_tag().
2bd21a2000-01-05Martin Stjernholm  string _sprintf() {return "RXML.TagSetParser";}
c6245b1999-12-31Martin Stjernholm }
ed81751999-12-11Martin Stjernholm  class PNone //! The identity parser. It only returns its input. { inherit Parser; string data = ""; int evalpos = 0; int feed (string in) { data += in; return 1; } void finish (void|string in) { if (in) data += in; } string eval() { string res = data[evalpos..]; evalpos = sizeof (data); return res; } string byte_compile() { return data; } string byte_interpret (string byte_code, Context ctx) { return byte_code; } void reset (Context ctx) { context = ctx; data = ""; evalpos = 0; }
2bd21a2000-01-05Martin Stjernholm  string _sprintf() {return "RXML.PNone";}
ed81751999-12-11Martin Stjernholm } mixed simple_parse (string in, void|program parser) //! A convenience function to parse a string with no type info, no tag //! set, and no variable references. The parser defaults to PExpr. { // FIXME: Recycle contexts?
1b2b752000-01-07Martin Stjernholm  return t_any (parser || PExpr)->eval (in, Context (empty_tag_set));
ed81751999-12-11Martin Stjernholm } //! Types. class Type //! A static type definition. It does type checking and specifies some //! properties of the type. It may also contain a Parser program that //! will be used to read text and evaluate values of this type. Note //! that the parser is not relevant for type checking. { constant is_RXML_Type = 1; //! Interface. //!string name; //! Unique type identifier. Required and considered constant. Type //! hierarchies are currently implemented with glob patterns, e.g. //! "image/png" is a subtype of "image/*", and "array(string)" is a //! subtype of "array(*)". //!mixed sequential; //! Nonzero if data of this type is sequential, defined as: //! o One or more data items can be concatenated with `+. //! o (Sane) parsers are homomorphic on the type, i.e. //! eval ("da") + eval ("ta") == eval ("da" + "ta") //! and //! eval ("data") + eval ("") == eval ("data")
c6245b1999-12-31Martin Stjernholm  //! provided the data is only split between (sensibly defined) //! atomic elements.
ed81751999-12-11Martin Stjernholm 
e213f01999-12-13Martin Stjernholm  //!mixed empty_value;
ed81751999-12-11Martin Stjernholm  //! The empty value for sequential data types, i.e. what eval ("") //! would produce.
e213f01999-12-13Martin Stjernholm  //!mixed free_text;
ed81751999-12-11Martin Stjernholm  //! Nonzero if the type keeps the free text between parsed tokens, //! e.g. the plain text between tags in HTML. The type must be //! sequential and use strings. void type_check (mixed val);
01e43b2000-01-14Martin Stjernholm  //! Checks whether the given value is a valid one of this type. Type //! errors are thrown with RXML.rxml_error().
ed81751999-12-11Martin Stjernholm  Type clone() //! Returns a copy of the type. {
56532d1999-12-19Martin Stjernholm  Type newtype = object_program (this_object())();
ed81751999-12-11Martin Stjernholm  newtype->_parser_prog = _parser_prog;
c757c42000-01-08Martin Stjernholm  newtype->_parser_args = _parser_args;
ed81751999-12-11Martin Stjernholm  newtype->_t_obj_cache = _t_obj_cache; return newtype; } //! Services. int `== (mixed other) //! {
56532d1999-12-19Martin Stjernholm  return objectp (other) && ([object] other)->is_RXML_Type && ([object(Type)] other)->name == this_object()->name;
ed81751999-12-11Martin Stjernholm  } int subtype_of (Type other) //! {
56532d1999-12-19Martin Stjernholm  return glob ([string] other->name, [string] this_object()->name);
ed81751999-12-11Martin Stjernholm  }
2bd21a2000-01-05Martin Stjernholm  Type `() (program/*(Parser)HMM*/ newparser, mixed... parser_args)
ed81751999-12-11Martin Stjernholm  //! Returns a type identical to this one, but which has the given //! parser. parser_args is passed as extra arguments to the //! create()/reset()/clone() functions. { Type newtype; if (sizeof (parser_args)) { // Can't cache this. newtype = clone();
c757c42000-01-08Martin Stjernholm  newtype->_parser_prog = newparser;
ed81751999-12-11Martin Stjernholm  newtype->_parser_args = parser_args;
1b2b752000-01-07Martin Stjernholm  if (newparser->tag_set_eval) newtype->_p_cache = ([]);
ed81751999-12-11Martin Stjernholm  } else { if (!_t_obj_cache) _t_obj_cache = ([]); if (!(newtype = _t_obj_cache[newparser])) if (newparser == _parser_prog)
56532d1999-12-19Martin Stjernholm  _t_obj_cache[newparser] = newtype = this_object();
ed81751999-12-11Martin Stjernholm  else { _t_obj_cache[newparser] = newtype = clone(); newtype->_parser_prog = newparser;
1b2b752000-01-07Martin Stjernholm  if (newparser->tag_set_eval) newtype->_p_cache = ([]);
ed81751999-12-11Martin Stjernholm  } } return newtype; }
1b2b752000-01-07Martin Stjernholm  inline Parser get_parser (Context ctx, void|TagSet tag_set)
ed81751999-12-11Martin Stjernholm  //! Returns a parser instance initialized with the given context. { Parser p; if (_p_cache) { // It's a tag set parser. TagSet tset; // vvv Using interpreter lock from here.
1b2b752000-01-07Martin Stjernholm  PCacheObj pco = _p_cache[tset = tag_set || ctx->tag_set]; if (pco && pco->tag_set_gen == tset->generation) {
ed81751999-12-11Martin Stjernholm  if ((p = pco->free_parser)) { pco->free_parser = p->_next_free; // ^^^ Using interpreter lock to here. p->data_callback = p->compile = 0;
2bd21a2000-01-05Martin Stjernholm  p->reset (ctx, this_object(), tset, @_parser_args);
ed81751999-12-11Martin Stjernholm  } else // ^^^ Using interpreter lock to here. if (pco->clone_parser)
2bd21a2000-01-05Martin Stjernholm  p = pco->clone_parser->clone (ctx, this_object(), tset, @_parser_args); else if ((p = _parser_prog (ctx, this_object(), tset, @_parser_args))->clone)
ed81751999-12-11Martin Stjernholm  // pco->clone_parser might already be initialized here due // to race, but that doesn't matter.
2bd21a2000-01-05Martin Stjernholm  p = (pco->clone_parser = p)->clone (ctx, this_object(), tset, @_parser_args);
ed81751999-12-11Martin Stjernholm  } else { // ^^^ Using interpreter lock to here. pco = PCacheObj();
1b2b752000-01-07Martin Stjernholm  pco->tag_set_gen = tset->generation;
ed81751999-12-11Martin Stjernholm  _p_cache[tset] = pco; // Might replace an object due to race, but that's ok.
2bd21a2000-01-05Martin Stjernholm  if ((p = _parser_prog (ctx, this_object(), tset, @_parser_args))->clone)
ed81751999-12-11Martin Stjernholm  // pco->clone_parser might already be initialized here due // to race, but that doesn't matter.
2bd21a2000-01-05Martin Stjernholm  p = (pco->clone_parser = p)->clone (ctx, this_object(), tset, @_parser_args);
ed81751999-12-11Martin Stjernholm  } } else { if ((p = free_parser)) { // Relying on interpreter lock here. free_parser = p->_next_free; p->data_callback = p->compile = 0; p->reset (ctx, this_object(), @_parser_args); } else if (clone_parser) // Relying on interpreter lock here.
c6245b1999-12-31Martin Stjernholm  p = clone_parser->clone (ctx, this_object(), @_parser_args);
56532d1999-12-19Martin Stjernholm  else if ((p = _parser_prog (ctx, this_object(), @_parser_args))->clone)
ed81751999-12-11Martin Stjernholm  // clone_parser might already be initialized here due to race, // but that doesn't matter.
c6245b1999-12-31Martin Stjernholm  p = (clone_parser = p)->clone (ctx, this_object(), @_parser_args);
ed81751999-12-11Martin Stjernholm  } return p; }
01e43b2000-01-14Martin Stjernholm  mixed eval (string in, void|Context ctx, void|TagSet tag_set, void|Parser|PCode parent, void|int dont_switch_ctx)
ed81751999-12-11Martin Stjernholm  //! Convenience function to parse and evaluate the value in the //! given string. If a context isn't given, the current one is used. //! The current context and ctx are assumed to be the same if //! dont_switch_ctx is nonzero. { mixed res; if (!ctx) ctx = get_context(); if (_parser_prog == PNone) res = in; else {
1b2b752000-01-07Martin Stjernholm  Parser p = get_parser (ctx, tag_set);
01e43b2000-01-14Martin Stjernholm  p->_parent = parent;
ed81751999-12-11Martin Stjernholm  if (dont_switch_ctx) p->finish (in); // Optimize the job in p->write_end(). else p->write_end (in); res = p->eval(); if (p->reset) if (_p_cache) { // Relying on interpreter lock in this block.
1b2b752000-01-07Martin Stjernholm  PCacheObj pco = _p_cache[tag_set || ctx->tag_set];
ed81751999-12-11Martin Stjernholm  p->_next_free = pco->free_parser; pco->free_parser = p; } else { // Relying on interpreter lock in this block. p->_next_free = free_parser; free_parser = p; } } if (ctx->type_check) type_check (res); return res; } // Internals. program/*(Parser)HMM*/ _parser_prog = PNone; // The parser to use. Should never be changed in a type object.
c757c42000-01-08Martin Stjernholm  /*private*/ array(mixed) _parser_args = ({});
ed81751999-12-11Martin Stjernholm  /*private*/ mapping(program:Type) _t_obj_cache; // To avoid creating new type objects all the time in `(). // Cache used for parsers that doesn't depend on the tag set. private Parser clone_parser; // Used with Parser.clone(). private Parser free_parser; // The list of objects to reuse with Parser.reset(). // Cache used for parsers that depend on the tag set. private class PCacheObj {
1b2b752000-01-07Martin Stjernholm  int tag_set_gen;
ed81751999-12-11Martin Stjernholm  Parser clone_parser; Parser free_parser; } /*private*/ mapping(TagSet:PCacheObj) _p_cache;
2bd21a2000-01-05Martin Stjernholm  string _sprintf() {return "RXML.Type";}
ed81751999-12-11Martin Stjernholm }
01e43b2000-01-14Martin Stjernholm static class TypeAny //! A completely unspecified nonsequential type. { inherit Type; constant name = "*"; string _sprintf() {return "RXML.t_any";} } TypeAny t_any = TypeAny(); static class TypeText
ed81751999-12-11Martin Stjernholm //! The standard type for generic document text. { inherit Type; constant name = "text/*"; constant sequential = 1; constant empty_value = ""; constant free_text = 1;
2bd21a2000-01-05Martin Stjernholm  string _sprintf() {return "RXML.t_text";}
01e43b2000-01-14Martin Stjernholm } TypeText t_text = TypeText();
ed81751999-12-11Martin Stjernholm 
01e43b2000-01-14Martin Stjernholm static class TypeHtml //! The standard type for generic document text.
ed81751999-12-11Martin Stjernholm {
01e43b2000-01-14Martin Stjernholm  inherit TypeText; constant name = "text/html"; string _sprintf() {return "RXML.t_html";} } TypeHtml t_html = TypeHtml();
ed81751999-12-11Martin Stjernholm  // P-code compilation and evaluation. class VarRef //! A helper for representing variable reference tokens. { constant is_RXML_VarRef = 1; string scope, var;
2bd21a2000-01-05Martin Stjernholm  static void create (string _scope, string _var) {scope = _scope, var = _var;}
af06d52000-01-12Martin Stjernholm  int valid (Context ctx) {return ctx->exist_scope (scope);} mixed get (Context ctx) {return ctx->get_var (var, scope);} mixed set (Context ctx, mixed val) {return ctx->set_var (var, val, scope);} void delete (Context ctx) {ctx->delete_var (var, scope);}
ed81751999-12-11Martin Stjernholm  string name() {return scope + "." + var;}
af06d52000-01-12Martin Stjernholm  string _sprintf() {return "RXML.VarRef(" + scope + "." + var + ")";}
ed81751999-12-11Martin Stjernholm } class PCode //! Holds p-code and evaluates it. P-code is the intermediate form //! after parsing and before evaluation. { constant is_RXML_PCode = 1;
56532d1999-12-19Martin Stjernholm  constant thrown_at_unwind = 1;
ed81751999-12-11Martin Stjernholm  array p_code = ({});
db04172000-01-14Martin Stjernholm  int error_count; //! Number of RXML errors that occurred during evaluation. If this //! is nonzero, the value from eval() shouldn't be trusted.
ed81751999-12-11Martin Stjernholm  mixed eval (Context ctx) //! Evaluates the p-code in the given context. { // FIXME }
56532d1999-12-19Martin Stjernholm  function(Context:mixed) compile();
ed81751999-12-11Martin Stjernholm  //! Returns a compiled function for doing the evaluation. The //! function will receive a context to do the evaluation in.
2bd21a2000-01-05Martin Stjernholm  string _sprintf() {return "RXML.PCode";}
01e43b2000-01-14Martin Stjernholm  // Internals. void report_error (string msg) { // FIXME } PCode|Parser _parent; // The parent evaluator if this one is nested.
ed81751999-12-11Martin Stjernholm } //! Some parser tools. static class VoidType { mixed `+ (mixed... vals) {return sizeof (vals) ? predef::`+ (@vals) : this_object();} mixed ``+ (mixed val) {return val;} int `!() {return 1;}
2bd21a2000-01-05Martin Stjernholm  string _sprintf() {return "RXML.Void";}
ed81751999-12-11Martin Stjernholm }; VoidType Void = VoidType(); //! An object representing the void value. Works as initializer for //! sequences, since Void + anything == anything + Void == anything. class ScanStream //! A helper class for the input and scanner stage in a parser. It's a //! stream that takes unparsed strings and splits them into tokens //! which are queued. Intended to be inherited in a Parser class. { private class Link { array data; Link next; } private Link head = Link(); // Last link is an empty eof marker. private Link tail = head; private int next_token = 0; private string end = ""; private int fin = 0; array scan (string in, int finished); //! The scanner function. It gets an unparsed string and should //! return an array of tokens. If the second argument is nonzero, //! there won't be any more data later. If the second argument is //! zero, the last item in the returned array is handled as unparsed //! data that will be passed back to the scanner later. Tokens may //! be of any type. Use VarRef objects for variables. void feed (string in) //! { #ifdef MODULE_DEBUG if (fin) error ("Cannot feed data to a finished stream.\n"); #endif array tokens = scan (end + in, 0); end = [string] tokens[-1]; if (sizeof (tokens) > 1) { tail->data = tokens[..sizeof (tokens) - 2]; tail = tail->next = Link(); } } void finish (void|string in) //! { if (in || !fin && sizeof (end)) { #ifdef MODULE_DEBUG if (in && fin) error ("Cannot feed data to a finished stream.\n"); #endif fin = 1; if (in) end += in; tail->data = scan (end, 1); tail = tail->next = Link(); } } void reset() //! { head = Link(); tail = head; next_token = 0; end = ""; fin = 0; } mixed read() //! Returns the next token, or Void if there's no more data. { while (head->next) if (next_token >= sizeof (head->data)) { next_token = 0; head = head->next; } else return head->data[next_token++]; return Void; } void unread (mixed... put_back) //! Puts back tokens and variable references at the beginning of the //! stream so that the leftmost argument will be read first. { int i = sizeof (put_back); while (i) head->data[--next_token] = put_back[--i]; if (i) { Link l = Link(); l->next = head, head = l;
56532d1999-12-19Martin Stjernholm  l->data = allocate (next_token = max (i - 32, 0)) + put_back[..--i];
ed81751999-12-11Martin Stjernholm  } } array read_all() //! { array data; if (next_token) { data = head->data[next_token..]; head = head->next; next_token = 0; } else data = ({}); while (head->next) { data += head->data; head = head->next; } return data; } int finished() //! Returns nonzero if the write end is finished. { return fin; }
2bd21a2000-01-05Martin Stjernholm  string _sprintf() {return "RXML.ScanStream";}
ed81751999-12-11Martin Stjernholm } // Various internal stuff. // Argh! static program PHtml; static program PExpr; void _fix_module_ref (string name, mixed val) { mixed err = catch { switch (name) { case "PHtml": PHtml = [program] val; break; case "PExpr": PExpr = [program] val; break;
1b2b752000-01-07Martin Stjernholm  case "empty_tag_set": empty_tag_set = [object(TagSet)] val; break;
ed81751999-12-11Martin Stjernholm  default: error ("Herk\n"); } }; if (err) werror (describe_backtrace (err)); }