Branch: Tag:

2000-08-12

2000-08-12 04:49:08 by Martin Stjernholm <mast@lysator.liu.se>

Cleaned up various details in the type system and documented some
things more clearly. Incompatible change: t_none is now t_nil. Void is
now called nil, but the old name still works.

Rev: server/etc/modules/RXML.pmod/module.pmod:1.99

2:   //!   //! Created 1999-07-30 by Martin Stjernholm.   //! - //! $Id: module.pmod,v 1.98 2000/08/09 13:25:53 mast Exp $ + //! $Id: module.pmod,v 1.99 2000/08/12 04:49:08 mast Exp $      //! Kludge: Must use "RXML.refs" somewhere for the whole module to be   //! loaded correctly.
92:    //! result type has a parser, it'll be used to parse any strings in    //! the exec array returned from Frame.do_enter() and similar    //! callbacks. +  //! +  //! When the tag is used in content of some type, it suffices that +  //! the content type is a subtype of any type in result_types. The +  //! tag must therefore be prepared to produce result of more +  //! specific types than those declared here. I.e. the extreme case, +  //! t_any, means that this tag takes the responsibility to produce +  //! result of any type that's asked for, not that it has the liberty +  //! to produce results of any type it chooses.       //!program Frame;    //!object(Frame) Frame();
141:    //! Make an initialized frame for the tag. Typically useful when    //! returning generated tags from e.g. RXML.Frame.do_process(). The    //! argument values and the content are normally not parsed. -  //! -  //! Note: Never reuse the same frame object. +     {    Tag this = this_object();    object/*(Frame)HMM*/ frame = ([function(:object/*(Frame)HMM*/)] this->Frame)();
228:    m_delete (ustate, parser);    if (!sizeof (ustate)) ctx->unwind_state = 0;    } -  else frame = `() (args, Void); -  else frame = `() (args, Void); +  else frame = `() (args, nil); +  else frame = `() (args, nil);       if (!zero_type (frame->raw_tag_text)) {    if (splice_args)
241:    mixed err = catch {    frame->_eval (parser, args, content);    mixed res; -  if ((res = frame->result) == Void) return ({}); -  if (frame->result_type->quoting_scheme != parser->type->quoting_scheme) -  res = parser->type->quote (res); +  if ((res = frame->result) == nil) return ({}); +  if (!parser->type->encoding_type || +  frame->result_type->encoding_type != parser->type->encoding_type) +  res = parser->type->convert (res, frame->result_type);    return ({res});    };   
269:    // Callback similar to _handle_tag() for tag set parsers to handle    // PI tags.    { -  sscanf (content, "%[ \t\n\r]%s", string ws, content); -  if (ws == "" && content != "") +  sscanf (content, "%[ \t\n\r]%s", string ws, string rest); +  if (ws == "" && rest != "")    // The parser didn't match a complete name, so this is a false    // alarm for an unknown PI tag.    return 0;
833:    if (objectp (vars)) {    if (zero_type (val = ([object(Scope)] vars)->`[] (    var, this_object(), scope_name || "_")) || -  val == Void) +  val == nil)    return ([])[0];    }    else
843:    return    zero_type (val = ([object(Value)] val)->rxml_var_eval (    this_object(), var, scope_name || "_", want_type)) || -  val == Void ? ([])[0] : val; +  val == nil ? ([])[0] : val;    }    else    if (want_type)    return    // FIXME: Some system to find out the source type?    zero_type (val = want_type->convert (val)) || -  val == Void ? ([])[0] : val; +  val == nil ? ([])[0] : val;    else    return val;    }
1479:   //! The following flags specifies whether certain conditions must be   //! met for a cached frame to be considered (if RXML.Frame.is_valid()   //! is defined). They may be read directly after do_return() returns. - //! The tag name is always the same. FIXME: These are ideas only; not - //! yet implemented. + //! The tag name is always the same. FIXME: These are ideas only; + //! nothing is currently implemented and they might change + //! arbitrarily.      constant FLAG_CACHE_DIFF_ARGS = 0x00010000;   //! If set, the arguments to the tag need not be the same (using
1498:   //! those that has been accessed with get_var()) need not have the   //! same values (using equal()) as the actual variables.    - constant FLAG_CACHE_SAME_STACK = 0x00100000; - //! If set, the stack of call frames needs to be the same. + constant FLAG_CACHE_DIFF_TAG_INSTANCE = 0x00100000; + //! If set, the tag in the source document needs to be the same, so + //! the same frame may be used when the tag occurs in another context.      constant FLAG_CACHE_EXECUTE_RESULT = 0x00200000;   //! If set, an exec array will be stored in the frame instead of the
1507:   //! result.      class Frame - //! A tag instance. + //! A tag instance. A new frame is normally created for every parsed + //! tag in the source document. It might be reused both when the + //! document is requested again and when the tag is reevaluated in a + //! loop, but it's not certain in either case. Therefore, be careful + //! about using variable initializers.   {    constant is_RXML_Frame = 1;    constant thrown_at_unwind = 1;
1524:       int flags;    //! Various bit flags that affect parsing. See the FLAG_* constants. +  //! It's copied from Tag.flag when the frame is created.       mapping(string:mixed) args; -  //! The arguments passed to the tag. Set before any frame callbacks -  //! are called. Not set for processing instruction (FLAG_PROC_INSTR) +  //! The (parsed and evaluated) arguments passed to the tag. Set +  //! every time the frame is executed, before any frame callbacks are +  //! called. Not set for processing instruction (FLAG_PROC_INSTR)    //! tags.       Type content_type;    //! The type of the content.    -  mixed content = Void; +  mixed content;    //! The content, if any. Set before do_process() and do_return() are -  //! called. +  //! called. Initialized to RXML.nil every time the frame executed.       Type result_type; -  //! The required result type. Set before any frame callbacks are -  //! called. The frame should produce a result of this type. +  //! The required result type. If it has a parser, it will affect how +  //! execution arrays are handled; see the return value for +  //! do_return() for details. +  //! +  //! This is set by the type inference from Tag.result_types before +  //! any frame callbacks are called. The frame may change this type, +  //! but it must produce a result value which matches it. The value +  //! is converted before being inserted into the parent content if +  //! necessary. An exception (which this frame can't catch) is thrown +  //! if conversion is impossible.    -  mixed result = Void; -  //! The result, which is assumed to be either Void or a valid value -  //! according to result_type. The exec arrays returned by e.g. +  mixed result; +  //! The result, which is assumed to be either RXML.nil or a valid +  //! value according to result_type. The exec arrays returned by e.g.    //! do_return() changes this. It may also be set directly. -  +  //! Initialized to RXML.nil every time the frame executed. +  //! +  //! If result_type has a parser set, it will be used by do_return() +  //! etc before assigning to this variable. Thus it contains the +  //! value after any parsing and will not be parsed again.       //!mapping(string:mixed) vars;    //! Set this to introduce a new variable scope that will be active
1583:    //! do_return() is called after the last call to do_process().    //!    //! The result_type variable is set to the type of result the parser -  //! wants. It's any type or subtype that is valid by -  //! tag->result_type. If the result type is sequential, it's spliced -  //! into the surrounding content, otherwise it replaces the previous -  //! value of the content, if any. If the result is Void, it does not -  //! affect the surrounding content at all. +  //! wants. The tag may change it; the value will then be converted +  //! to the type that the parser wants. If the result type is +  //! sequential, it's spliced into the surrounding content, otherwise +  //! it replaces the previous value of the content, if any. If the +  //! result is RXML.nil, it does not affect the surrounding content +  //! at all.    //!    //! Return values:    //!
1624:    //! accurately, evaluation) needs to be done.    //!    //! If an array instead of a function is given, the array is handled -  //! as above. If the result variable is Void (which it defaults to), -  //! content is used as result if it's of a compatible type. +  //! as above. If the result variable is RXML.nil (which it defaults +  //! to), content is used as result if it's of a compatible type.    //!    //! If there is no do_return() and the result from parsing the -  //! content is not Void, it's assigned to or added to the content -  //! variable. Assignment is used if the content type is +  //! content is not RXML.nil, it's assigned to or added to the +  //! content variable. Assignment is used if the content type is    //! nonsequential, addition otherwise. Thus earlier values are    //! simply overridden for nonsequential types.    //!    //! Regarding do_process only:    //!    //! Normally the content variable is set to the parsed content of -  //! the tag before do_process() is called. This may be Void if the -  //! content parsing didn't produce any result. +  //! the tag before do_process() is called. This may be RXML.nil if +  //! the content parsing didn't produce any result.    //!    //! piece is used when the tag is operating in streaming mode (i.e.    //! FLAG_STREAM_CONTENT is set). It's then set to each successive
1828:    Frame this = this_object();    Context ctx = parser->context;    int i = 0; -  mixed res = Void; +  mixed res = nil;    Parser subparser = 0;       mixed err = catch {    if (parent_scope) LEAVE_SCOPE (ctx, this);       for (; i < sizeof (exec); i++) { -  mixed elem = exec[i], piece = Void; +  mixed elem = exec[i], piece = nil;       switch (sprintf ("%t", elem)) {    case "string":
1878:    }       if (result_type->sequential) res += piece; -  else if (piece != Void) result = res = piece; +  else if (piece != nil) result = res = piece;    }       if (result_type->sequential) result += res;
1893:    mapping(string:mixed)|mapping(object:array) ustate;    if ((ustate = ctx->unwind_state) && !zero_type (ustate->stream_piece)) {    // Subframe wants to stream. Update stream_piece and send it on. -  if (result_type->quoting_scheme != parser->type->quoting_scheme) -  res = parser->type->quote (res); +  if (!result_type->encoding_type || +  result_type->encoding_type != parser->type->encoding_type) +  res = parser->type->convert (res, result_type);    if (result_type->sequential)    ustate->stream_piece = res + ustate->stream_piece; -  else if (ustate->stream_piece == Void) +  else if (ustate->stream_piece == nil)    ustate->stream_piece = res;    }    ustate->exec_left = exec[i..]; // Left to execute.
1975:    "unparsed frame.\n");   #endif    raw_args = args, args = 0; -  raw_content = content, content = Void; +  raw_content = content, content = nil;   #ifdef MODULE_DEBUG    if (!stringp (raw_content))    PRE_INIT_ERROR ("Content is not a string in unparsed tag frame.\n");
2002:    PRE_INIT_ERROR ("Reuse of frame in different context.\n");   #endif    up = ctx->frame; -  piece = Void; +  content = result = piece = nil;    if (++ctx->frame_depth >= ctx->max_frame_depth) {    ctx->frame = this;    ctx->frame_depth--;
2140: Inside #if defined(DEBUG)
   if (ctx->unwind_state)    fatal_error ("Internal error: Clobbering unwind_state "    "to do streaming.\n"); -  if (piece != Void) +  if (piece != nil)    fatal_error ("Internal error: Thanks, we think about how nice "    "it must be to play the harmonica...\n");   #endif -  if (result_type->quoting_scheme != parser->type->quoting_scheme) -  res = parser->type->quote (res); +  if (!result_type->encoding_type || +  result_type->encoding_type != parser->type->encoding_type) +  res = parser->type->convert (res, result_type);    ctx->unwind_state = (["stream_piece": res]);    throw (this);    }
2196:    // Squeeze out any free text from the subparser first.    mixed res = subparser->read();    if (content_type->sequential) piece = res + piece; -  else if (piece == Void) piece = res; -  if (piece != Void) { +  else if (piece == nil) piece = res; +  if (piece != nil) {    array|function(RequestID,void|mixed:array) do_process;    if ((do_process =    [array|function(RequestID,void|mixed:array)]
2218: Inside #if defined(DEBUG)
   fatal_error ("Internal error: "    "Clobbering unwind_state->stream_piece.\n");   #endif -  if (result_type->quoting_scheme != -  parser->type->quoting_scheme) -  res = parser->type->quote (res); +  if (!result_type->encoding_type || +  result_type->encoding_type != +  parser->type->encoding_type) +  res = parser->type->convert (res, result_type);    ctx->unwind_state->stream_piece = res;    throw (this);    }
2229:    else if (flags & FLAG_STREAM_RESULT) {    // do_process() finished the stream. Ignore remaining content.    ctx->unwind_state = 0; -  piece = Void; +  piece = nil;    break;    }    } -  piece = Void; +  piece = nil;    }    if (finished) break;    }    else { // The frame doesn't handle streamed content. -  piece = Void; +  piece = nil;    if (finished) {    mixed res = subparser->eval(); // Might unwind.    if (content_type->sequential) content += res; -  else if (res != Void) content = res; +  else if (res != nil) content = res;    break;    }    }
2271: Inside #if defined(DEBUG)
   if (ctx->unwind_state)    fatal_error ("Internal error: Clobbering unwind_state "    "to do streaming.\n"); -  if (piece != Void) +  if (piece != nil)    fatal_error ("Internal error: Thanks, we think about how nice "    "it must be to play the harmonica...\n");   #endif -  if (result_type->quoting_scheme != parser->type->quoting_scheme) -  res = parser->type->quote (res); +  if (!result_type->encoding_type || +  result_type->encoding_type != parser->type->encoding_type) +  res = parser->type->convert (res, result_type);    ctx->unwind_state = (["stream_piece": res]);    throw (this);    }
2304:    exec = 0;    }    } -  else if (result == Void && !(flags & FLAG_EMPTY_ELEMENT)) +  else if (result == nil && !(flags & FLAG_EMPTY_ELEMENT))    if (result_type->_parser_prog == PNone) {    if (content_type->subtype_of (result_type))    result = content;
2375:    if (id->misc->trace_leave && tag)    TRACE_LEAVE ("exception");    ctx->handle_exception (err, parser); // Will rethrow unknown errors. -  result = Void; +  result = nil;    action = "return";    }   
2677:    //! Must be set to nonzero before a stream is fed which should be    //! compiled to p-code.    -  //!mixed unwind_safe; +  //!int unwind_safe;    //! If nonzero, the parser supports unwinding with throw()/catch().    //! Whenever an exception is thrown from some evaluation function,    //! it should be able to call that function again with identical
2703:       optional mixed read();    //! Define to allow streaming operation. Returns the evaluated -  //! result so far, but does not do any evaluation. Returns Void if -  //! there's no data (for sequential types the empty value is also +  //! result so far, but does not do any evaluation. Returns RXML.nil +  //! if there's no data (for sequential types the empty value is also    //! ok).       mixed eval();    //! Evaluates the data fed so far and returns the result. The result    //! returned by previous eval() calls should not be returned again -  //! as (part of) this return value. Returns Void if there's no data -  //! (for sequential types the empty value is also ok). +  //! as (part of) this return value. Returns RXML.nil if there's no +  //! data (for sequential types the empty value is also ok).       optional PCode p_compile();    //! Define this to return a p-code representation of the current
2902:    //!string name;    //! Unique type identifier. Required and considered constant. Type    //! hierarchies are currently implemented with glob patterns, e.g. -  //! "image/png" is a subtype of "image/*", and "array(string)" is a -  //! subtype of "array(*)". +  //! "image/png" is a subtype of "image/*". However, this syntax will +  //! be developed further, so for now use only characters [a-zA-Z/] +  //! to write MIME-like types and use only * for globbing.    -  //!mixed sequential; +  //!int sequential;    //! Nonzero if data of this type is sequential, defined as:    //! o One or more data items can be concatenated with `+.    //! o (Sane) parsers are homomorphic on the type, i.e.
2918:    //!mixed empty_value;    //! The empty value, i.e. what eval ("") would produce.    -  //!mixed free_text; +  //!int free_text;    //! Nonzero if the type keeps the free text between parsed tokens,    //! e.g. the plain text between tags in XML. The type must be    //! sequential and use strings.
2927:    //! Checks whether the given value is a valid one of this type. Type    //! errors are thrown with RXML.parse_error().    -  //!string quoting_scheme; -  //! An identifier for the quoting scheme this type uses, if any. The -  //! quoting scheme specifies how literals needs to be quoted for the -  //! type. Values converted between types with the same quoting -  //! scheme are not quoted. +  //!string encoding_type; +  //! A type name identifying the encoding in this type, if +  //! applicable. Conversion between two types with identical +  //! encoding_type is always a nop, so the call to convert() may be +  //! skipped.    -  mixed quote (mixed val) -  //! Quotes the given value according to the quoting scheme for this -  //! type. -  { -  return val; -  } -  +     mixed convert (mixed val, void|Type from);    //! Converts the given value to this type. If the from type is    //! given, it's the type of the value. Since it's not always known, -  //! this function should try to do something sensible based on the -  //! primitive pike type. If the type can't be reasonably converted, -  //! an RXML fatal should be thrown. +  //! this function should try to do something sensible(*) based on +  //! the primitive pike type, e.g. a string should be considered a +  //! raw literal and be encoded if necessary. If the type can't be +  //! converted, an RXML fatal should be thrown.    //! -  //! Quoting should be done if the from type is missing or has a -  //! different quoting scheme. +  //! *) Beware: This is not yet defined.       Type clone()    //! Returns a copy of the type.
3182:         static class TAny - //! A completely unspecified nonsequential type. + //! A completely unspecified nonsequential type. Every type is a + //! subtype of this one.   {    inherit Type;    constant name = "*"; -  constant quoting_scheme = "none"; -  +     mixed convert (mixed val) {return val;} -  +     string _sprintf() {return "RXML.t_any" + OBJ_COUNT;}   }   TAny t_any = TAny();    - static class TNone - //! A sequential type accepting only the empty value. + static class TNil + //! A sequential type accepting only the value nil. This type is by + //! definition a subtype of every sequential type.   {    inherit Type; -  constant name = "none"; +  constant name = "nil";    constant sequential = 1; -  VoidType empty_value = Void; -  constant quoting_scheme = "none"; -  +  Nil empty_value = nil;    void type_check (mixed val) -  { -  if (val != Void) parse_error ("A value is not accepted.\n"); +  {if (val != nil) parse_error ("A non-nil value is not accepted.\n");} +  mixed convert (mixed val) {type_check (val); return nil;} +  int subtype_of (Type other) {return other->sequential || other == t_any;} +  string _sprintf() {return "RXML.t_nil" + OBJ_COUNT;}   } -  + TNil t_nil = TNil();    -  mixed convert (mixed val) -  { -  type_check (val); -  return Void; -  } -  -  string _sprintf() {return "RXML.t_none" + OBJ_COUNT;} - } - TNone t_none = TNone(); -  +    static class TSame   //! A magic type used in Tag.content_type.   {
3235:    constant sequential = 1;    constant empty_value = "";    constant free_text = 1; -  constant quoting_scheme = "none"; +  constant encoding_type = "none";       string convert (mixed val)    {
3252:   {    inherit TText;    constant name = "text/xml"; -  constant quoting_scheme = "xml"; +  constant encoding_type = "xml";    -  string quote (string val) +  string convert (mixed val, void|Type from)    { -  +  if (mixed err = catch {val = (string) val;}) +  parse_error ("Couldn't convert value to text: " + describe_error (err)); +  if (!from || from->encoding_type != encoding_type)    return replace ( -  val, +  [string] val,    // FIXME: This ignores the invalid Unicode character blocks.    ({"&", "<", ">", "\"", "\'",    "\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007",
3271:    "&#16;", "&#17;", "&#18;", "&#19;", "&#20;", "&#21;", "&#22;", "&#23;",    "&#24;", "&#25;", "&#26;", "&#27;", "&#28;", "&#29;", "&#30;", "&#31;",    })); +  return [string] val;    }    -  string convert (mixed val, void|Type from) -  { -  if (mixed err = catch {val = (string) val;}) -  parse_error ("Couldn't convert value to text: " + describe_error (err)); -  if (!from || from->quoting_scheme != quoting_scheme) -  val = quote ([string] val); -  return val; -  } -  +     string format_tag (string|Tag tag, void|mapping(string:string) args,    void|string content, void|int flags)    //! Returns a formatted XML tag. The flags argument contains a flag
3397:      //! Some parser tools.    - static class VoidType + static class Nil   {    mixed `+ (mixed... vals) {return sizeof (vals) ? predef::`+ (@vals) : this_object();}    mixed ``+ (mixed val) {return val;}    int `!() {return 1;} -  string _sprintf() {return "RXML.Void";} +  string _sprintf() {return "RXML.nil";}    mixed cast(string type)    {    switch(type)
3420:    case "mapping":    return ([]);    default: -  throw( ({ "Cannot cast RXML.Void to "+type+".\n", backtrace() }) ); +  error ("Cannot cast RXML.nil to "+type+".\n");    }    }   };    - VoidType Void = VoidType(); - //! An object representing the void value. Works as initializer for - //! sequences, since Void + anything == anything + Void == anything. + Nil nil = Nil(); + //! An object representing the empty value. Works as initializer for + //! sequences, since nil + anything == anything + nil == anything. It + //! can cast itself to the empty value for the basic Pike types. It + //! also evaluates to false in a boolean context, but it's not equal + //! to 0.    -  + Nil Void = nil; // Compatibility. +    class ScanStream   //! A helper class for the input and scanner stage in a parser. It's a   //! stream that takes unparsed strings and splits them into tokens
3487:    }       mixed read() -  //! Returns the next token, or Void if there's no more data. +  //! Returns the next token, or RXML.nil if there's no more data.    {    while (head->next)    if (next_token >= sizeof (head->data)) {
3495:    head = head->next;    }    else return head->data[next_token++]; -  return Void; +  return nil;    }       void unread (mixed... put_back)