2001-03-01
2001-03-01 03:06:00 by Martin Stjernholm <mast@lysator.liu.se>
-
fe628df80aef3f224234c0353fe3e4eaae046f79
(602 lines)
(+485/-117)
[
Show
| Annotate
]
Branch: 5.2
Improved the design of the type system. Should not give any noticeable
compatibility problems, though. Added types t_int and t_float. Added
upper_case, lower_case and capitalize to the text types.
Rev: server/etc/modules/RXML.pmod/module.pmod:1.134
2:
//!
//! Created 1999-07-30 by Martin Stjernholm.
//!
- //! $Id: module.pmod,v 1.133 2001/02/17 17:57:34 mast Exp $
+ //! $Id: module.pmod,v 1.134 2001/03/01 03:06:00 mast Exp $
//! Kludge: Must use "RXML.refs" somewhere for the whole module to be
//! loaded correctly.
292:
frame->_eval (parser, args, content);
mixed res;
if ((res = frame->result) == nil) return ({});
- if (frame->result_type->encoding_type ?
- frame->result_type->encoding_type != parser->type->encoding_type :
- frame->result_type != parser->type) {
+ if (frame->result_type->name != parser->type->name) {
TAG_DEBUG (frame, sprintf (
"Converting result from %s to %s of surrounding content\n",
frame->result_type->name, parser->type->name));
563:
}
void add_string_entities (mapping(string:string) entities)
- //! Adds a set of entitity replacements that are used foremost by
- //! the PXml parser to decode simple entities like &. The
- //! indices are the entity names without & and ;.
+ //! Adds a set of entity replacements that are used foremost by the
+ //! @[PXml] parser to decode simple entities like @tt{&@}. The
+ //! indices are the entity names without @tt{&@} and @tt{;@}.
{
if (string_entities) string_entities |= entities;
else string_entities = entities + ([]);
833:
//! which is preferably optimized like this:
//!
//! @example
- //! return type && type != RXML.TText ?
- //! type->encode (my_string) : my_string;
+ //! return type && type != RXML.t_text ?
+ //! type->encode (my_string, RXML.t_text) : my_string;
//! @endexample
//!
//! Also, by letting the producer know the type context of the value
842:
//! to adapt the value according to the context it'll be used in,
//! e.g. to return a powerful object if no type conversion is
//! wanted, a simple text representation of it when the type is
- //! @[RXML.TText], and a more nicely formatted representation when
- //! it's @[RXML.THtml].
+ //! @[RXML.t_text], and a more nicely formatted representation when
+ //! it's @[RXML.t_html].
//!
//! @note The @[type] argument being @tt{void|Type@} means that the
//! caller is free to leave out that argument, not that the function
1708:
constant FLAG_DONT_REPORT_ERRORS = FLAG_DONT_RECOVER; // For compatibility.
constant FLAG_RAW_ARGS = 0x00004000;
- //! Special flag to TXml.format_tag(); only defined here as a
- //! placeholder. When this is given to TXml.format_tag(), it only
+ //! Special flag to @[t_xml.format_tag]; only defined here as a
+ //! placeholder. When this is given to @[t_xml.format_tag], it only
//! encodes the argument quote character with the "Roxen encoding"
//! when writing argument values, instead of encoding with entity
//! references. It's intended for reformatting a tag which has been
1896:
//! to), content is used as result if it's of a compatible type.
//!
//! If there is no do_return() and the result from parsing the
- //! content is not RXML.nil, it's assigned to or added to the
- //! content variable. Assignment is used if the content type is
+ //! content is not RXML.nil, it's assigned to or added to the result
+ //! variable. Assignment is used if the content type is
//! nonsequential, addition otherwise. Thus earlier values are
//! simply overridden for nonsequential types.
//!
2220:
mapping(string:mixed)|mapping(object:array) ustate;
if ((ustate = ctx->unwind_state) && !zero_type (ustate->stream_piece)) {
// Subframe wants to stream. Update stream_piece and send it on.
- if (result_type->encoding_type ?
- result_type->encoding_type != parser->type->encoding_type :
- result_type != parser->type)
+ if (result_type->name != parser->type->name)
res = parser->type->encode (res, result_type);
if (result_type->sequential)
ustate->stream_piece = res + ustate->stream_piece;
2547: Inside #if defined(DEBUG)
fatal_error ("Internal error: Thanks, we think about how nice "
"it must be to play the harmonica...\n");
#endif
- if (result_type->encoding_type ?
- result_type->encoding_type != parser->type->encoding_type :
- result_type != parser->type) {
+ if (result_type->name != parser->type->name) {
THIS_TAG_DEBUG (sprintf ("Converting result from %s to %s of "
"surrounding content\n",
result_type->name, parser->type->name));
2674: Inside #if defined(DEBUG)
fatal_error ("Internal error: Clobbering "
"unwind_state->stream_piece.\n");
#endif
- if (result_type->encoding_type ?
- result_type->encoding_type !=
- parser->type->encoding_type :
- result_type != parser->type) {
+ if (result_type->name != parser->type->name) {
THIS_TAG_DEBUG (
sprintf ("Iter[%d]: Converting result from %s to %s"
" of surrounding content\n", debug_iter,
2767: Inside #if defined(DEBUG)
fatal_error ("Internal error: Thanks, we think about how nice "
"it must be to play the harmonica...\n");
#endif
- if (result_type->encoding_type ?
- result_type->encoding_type != parser->type->encoding_type :
- result_type != parser->type) {
+ if (result_type->name != parser->type->name) {
THIS_TAG_DEBUG (sprintf ("Iter[%d]: Converting result from "
"type %s to type %s of surrounding "
"content\n", debug_iter,
2819:
else if (result == nil && !(flags & FLAG_EMPTY_ELEMENT)) {
if (result_type->_parser_prog == PNone) {
- if (content_type->encoding_type ?
- content_type->encoding_type != result_type->encoding_type :
- content_type != result_type) {
+ if (content_type->name != result_type->name) {
THIS_TAG_DEBUG (sprintf ("Assigning content to result after "
"converting from %s to %s\n",
content_type->name, result_type->name));
3092:
else if (val == nil)
parse_error ("%s produced no value to index with %O.\n", scope_name, index);
else if( objectp( val ) && val->`[] ) {
+ #ifdef MODULE_DEBUG
+ Scope scope = [object(Scope)] val;
+ #endif
if (zero_type (
val = ([object(Scope)] val)->`[](
index, ctx, scope_name,
i == sizeof (idxpath) && (scope_got_type = 1, want_type))))
val = nil;
-
+ #ifdef MODULE_DEBUG
+ else if (mixed err = scope_got_type && want_type &&
+ catch (want_type->type_check (val)))
+ if (([object] err)->is_RXML_Backtrace)
+ error ("%O->`[] didn't return a value of the correct type:\n%s",
+ scope, err->msg);
+ else throw (err);
+ #endif
}
else if( mappingp( val ) || objectp (val) ) {
if (zero_type (val = val[ index ])) val = nil;
3154: Inside #if defined(MODULE_DEBUG)
error ("Cyclic rxml_var_eval chain detected in %O.\n"
"All called objects:%{ %O%}\n", val, indices (called));
called[val] = 1;
+ Value val_obj = [object(Value)] val;
#endif
if (zero_type (val = ([object(Value)] val)->rxml_var_eval (
ctx, index, scope_name, want_type)) ||
val == nil)
return ([])[0];
-
+ #ifdef MODULE_DEBUG
+ else if (mixed err = want_type && catch (want_type->type_check (val)))
+ if (([object] err)->is_RXML_Backtrace)
+ error ("%O->rxml_var_eval didn't return a value of the correct type:\n%s",
+ val_obj, err->msg);
+ else throw (err);
+ #endif
} while (objectp (val) && ([object] val)->rxml_var_eval);
return val;
}
3602:
//! Interface.
- //!string name;
- //! Unique type identifier. Required and considered constant. Type
- //! hierarchies are currently implemented with glob patterns, e.g.
- //! "image/png" is a subtype of "image/*". However, this syntax will
- //! be developed further, so for now use only characters [a-zA-Z/]
- //! to write MIME-like types and use only * for globbing.
+ //! @decl constant string name;
+ //!
+ //! Unique type identifier. Required and considered constant.
+ //!
+ //! If it contains a "/", it's treated as a MIME type and should
+ //! then follow the rules for a MIME type with subtype (RFC 2045,
+ //! section 5.1). Among other things, that means that the valid
+ //! characters are, besides the "/", US-ASCII values 33-126 except
+ //! "(", ")", "<", ">", "@@", ",", ";", ":", "\", """, "/", "[",
+ //! "]", "?" and "=".
+ //!
+ //! If it doesn't contain a "/", it's treated as a type outside the
+ //! MIME system, e.g. "int" for an integer. Any type that can be
+ //! mapped to a MIME type should be so.
+ //!
+ //! Type hierarchies are currently implemented with glob patterns,
+ //! e.g. "image/png" is a subtype of "image/*". However, this syntax
+ //! will be developed further.
+ //!
+ //! A type in the type hierarchy should be able to express any value
+ //! that any of its subtypes can express without (or with at most
+ //! negligible) loss of information, but not necessarily on the same
+ //! form. This is different from the type tree described by
+ //! @[conversion_type], although it's always preferable if a
+ //! supertype also can be used as @[conversion_type] in its
+ //! subtypes.
//!int sequential;
//! Nonzero if data of this type is sequential, defined as:
3622:
//!mixed empty_value;
//! The empty value, i.e. what eval ("") would produce.
+ Type conversion_type;
+ //! The type to use as middlestep in indirect conversions. Required
+ //! and considered constant. As a last fallback, it should be set to
+ //! @[t_any] (for any type but @[t_any] itself). The
+ //! @[conversion_type] references must never produce a cycle between
+ //! types.
+ //!
+ //! It's values of the conversion type that @[decode] tries to
+ //! return, and also that @[encode] must handle without resorting to
+ //! indirect conversions. It's used as a fallback between types
+ //! which doesn't have explicit conversion functions for each other;
+ //! see @[indirect_convert].
+ //!
+ //! @note The tree described by the conversion types is not a proper
+ //! type hierarchy in the sense of value set sizes, as opposed to
+ //! the relations expressed by the glob patterns in @[name]. The
+ //! conversion type is chosen purely on pragmatic grounds for doing
+ //! indirect conversions. It's better if the conversion type is a
+ //! supertype (i.e. has a larger value set), but in lack of proper
+ //! supertypes it may also be a subtype, to make it possible to use
+ //! indirect conversion for at least a subset of the values. See the
+ //! example in @[decode].
+
//!int free_text;
//! Nonzero if the type keeps the free text between parsed tokens,
//! e.g. the plain text between tags in XML. The type must be
//! sequential and use strings.
- void type_check (mixed val);
- //! Checks whether the given value is a valid one of this type. Type
- //! errors are thrown with RXML.parse_error().
-
- //!string encoding_type;
- //! A type name identifying the encoding in this type, if
- //! applicable. Conversion between two types with identical
- //! encoding_type is always a nop, so the call to encode() may be
- //! skipped.
+ //! @decl optional constant mixed entity_syntax;
//!
- //! Types that have no encoding_type is taken to represent values in
- //! internal "raw" form, e.g. for strings this means that they are
- //! literals with no encoding scheme, so that every character
- //! represents only itself.
+ //! Nonzero constant for all types with string values that use
+ //! entity syntax, like XML or HTML.
-
+ void type_check (mixed val, void|string msg, mixed... args);
+ //! Checks whether the given value is a valid one of this type.
+ //! Errors are thrown as RXML parse errors, and in that case @[msg],
+ //! if given, is prepended to the error message with ": " between
+ //! them. If there are any more arguments on the line, the prepended
+ //! message is formatted with @tt{sprintf (@[msg], @@@[args])@}.
+ //! There's a @[type_check_error] helper that can be used to handle
+ //! the message formatting and error throwing.
+
mixed encode (mixed val, void|Type from);
- //! Converts the given value to this type. If the from type is
- //! given, it's the type of the value. If it's not given, the value
- //! is assumed to be in raw form (see encoding_type) If the type
- //! can't be converted, an RXML parse error should be thrown.
+ //! Converts the given value to this type.
+ //!
+ //! If the @[from] type isn't given, the function should try to
+ //! convert it to the required internal form for this type, using a
+ //! cast as a last resort if the type of @[val] isn't recognized. It
+ //! should then encode it, if necessary, as though it were a literal
+ //! (typically only applicable for types using strings with
+ //! encodings, like the @[t_xml] type). Any conversion error,
+ //! including in the cast, should be thrown as an RXML parse error.
+ //!
+ //! If the @[from] type is given, it's the type of the value. If the
+ //! encode function doesn't have routines to explicitly handle a
+ //! conversion from that type, then indirect conversion using
+ //! @[conversion_type] should be done. The @[indirect_convert]
+ //! function implements that. The encode function should at least be
+ //! able to convert values of @[conversion_type] to this type, or
+ //! else throw an RXML parse error if it isn't possible.
+ //!
+ //! @note Remember to override @[convertible] if this function can
+ //! convert directly from any type besides the conversion type.
+ //! Don't count on that the conversion type tree is constant so that
+ //! the default implementation would return true anyway.
- mixed decode (mixed val);
- //! Converts the value, which is of this type, to the raw form (see
- //! encoding_type). If the type can't be converted, an RXML parse
- //! error should be thrown. That might happen if the value contains
- //! markup or similar that can't be represented in raw form.
+ optional mixed decode (mixed val);
+ //! Converts the value, which is of this type, to a value of type
+ //! @[conversion_type]. If this function isn't defined, then any
+ //! value of this type works directly in the conversion type.
//!
- //! E.g. when converting some XML text, the function should return a
- //! literal string only if the text doesn't contain tags, otherwise
- //! it should throw an error (since there currently exists no
- //! internal representation of an XML node tree). It should never
- //! both decode e.g. "<" to "<" and just leave literal "<" in the
- //! string. It should also not parse the value with some evaluating
- //! parser (see get_parser) since the value should not be evaluated,
- //! it should only change representation.
+ //! If the type can't be converted, an RXML parse error should be
+ //! thrown. That might happen if the value contains markup or
+ //! similar that can't be represented in the conversion type.
+ //!
+ //! E.g. in a type for XML markup which have @[t_text] as the
+ //! conversion type, this function should return a literal string
+ //! only if the text doesn't contain tags, otherwise it should throw
+ //! an error. It should never both decode e.g. "<" to "<" and
+ //! just leave literal "<" in the string. It should also not parse
+ //! the value with some evaluating parser (see @[get_parser]) since
+ //! the value should only change representation. (This example shows
+ //! that a more fitting conversion type for XML markup would be a
+ //! DOM type that can represent XML node trees, since values
+ //! containing tags could be decoded then.)
Type clone()
//! Returns a copy of the type. Exists only for overriding purposes;
3694:
//! Services.
int `== (mixed other)
- //!
+ //! Returns nonzero iff this type is the same as @[other], i.e. has
+ //! the same name. If @[other] is known to be a type, it's somewhat
+ //! faster to compare the names directly.
{
- return objectp (other) && ([object] other)->is_RXML_Type &&
+ return /*::`== (this_object(), other) ||*/
+ objectp (other) && ([object] other)->is_RXML_Type &&
([object(Type)] other)->name == this_object()->name;
}
int subtype_of (Type other)
- //!
+ //! Returns nonzero iff this type is the same as or a subtype of
+ //! @[other], meaning that any value of this type can be expressed
+ //! by @[other] without (or with at most negligible) loss of
+ //! information.
{
return glob ([string] other->name, [string] this_object()->name);
}
-
+ int `< (mixed other)
+ //! Comparison regards a type as less than its supertypes. If
+ //! @[other] isn't a type, it's compared with @[name].
+ {
+ if (objectp (other) && ([object] other)->is_RXML_Type) {
+ if (([object(Type)] other)->name == this_object()->name) return 0;
+ return subtype_of (other);
+ }
+ return this_object()->name < other;
+ }
+
+ int convertible (Type from)
+ //! Returns nonzero iff it's possible to convert values of the type
+ //! @[from] to this type using some chain of conversions.
+ {
+ if (conversion_type->name == from->name ||
+ conversion_type->name == from->conversion_type->name ||
+ this_object()->name == from->conversion_type->name ||
+ this_object()->name == from->name)
+ return 1;
+ // The following is not terribly efficient, but most situations
+ // should be handled by the special cases above.
+ for (Type tconv = conversion_type; tconv; tconv = tconv->conversion_type)
+ for (Type fconv = from->conversion_type; fconv; fconv = fconv->conversion_type)
+ if (fconv->name == tconv->name)
+ return 1;
+ return 0;
+ }
+
Type `() (program/*(Parser)HMM*/ newparser, mixed... parser_args)
//! Returns a type identical to this one, but which has the given
//! parser. parser_args is passed as extra arguments to the
3866:
return res;
}
+ static final void type_check_error (string msg1, array args1,
+ string msg2, mixed... args2)
+ //! Helper intended to format and throw an RXML parse error in
+ //! @[type_check]. Assuming the same argument names as in the
+ //! @[type_check] declaration, use like this:
+ //!
+ //! @example
+ //! if (value is bogus)
+ //! type_check_error (msg, args, "My error message with %O %O.\n", foo, bar);
+ //! @end example
+ {
+ if (sizeof (args2)) msg2 = sprintf (msg2, @args2);
+ if (msg1) {
+ if (sizeof (args1)) msg1 = sprintf (msg1, @args1);
+ parse_error (msg1 + ": " + msg2);
+ }
+ else parse_error (msg2);
+ }
+
+ /*static*/ final mixed indirect_convert (mixed val, Type from)
+ //! Converts @[val], which is a value of the type @[from], to this
+ //! type. Uses indirect conversion via @[conversion_type] as
+ //! necessary. Only intended as a helper function for @[encode], so
+ //! it won't do a direct conversion from @[conversion_type] to this
+ //! type. Throws RXML parse error on any conversion error.
+ {
+ error ("test\n");
+ #ifdef MODULE_DEBUG
+ if (conversion_type->name == from->name)
+ error ("Won't convert the conversion type %s to %s; "
+ "encode() should handle that.\n");
+ #endif
+ if (conversion_type->name == from->conversion_type->name)
+ return encode (from->decode ? from->decode (val) : val, conversion_type);
+ if (this_object()->name == from->conversion_type->name)
+ return from->decode ? from->decode (val) : val;
+ string name = this_object()->name;
+ if (name == from->name)
+ return val;
+ // The following is not terribly efficient, but most situations
+ // should be handled by the special cases above.
+ int levels = 1;
+ for (Type conv = from->conversion_type; conv; conv = conv->conversion_type, levels++)
+ if (conv->name == name) {
+ while (levels) {
+ val = from->decode ? from->decode (val) : val;
+ from = from->conversion_type;
+ }
+ return val;
+ }
+ if (conversion_type)
+ if (conversion_type->conversion_type->name == from->name)
+ return encode (conversion_type->encode (val, from), conversion_type);
+ else
+ return encode (conversion_type->indirect_convert (val, from), conversion_type);
+ else
+ parse_error ("Cannot convert type %s to %s.\n", from, this_object());
+ }
+
// Internals.
program/*(Parser)HMM*/ _parser_prog = PNone;
3885:
MARK_OBJECT_ONLY;
- string _sprintf() {return "RXML.Type" + OBJ_COUNT;}
+ string _sprintf() {return "RXML.Type(" + this_object()->name + ")" + OBJ_COUNT;}
}
static class PCacheObj
3895:
Parser free_parser;
}
+ // Special types.
+
TAny t_any = TAny();
//! A completely unspecified nonsequential type. Every type is a
- //! subtype of this one.
+ //! subtype of this one, and it's also the root of the tree that
+ //! @[Type.conversion_type] describes.
static class TAny
{
inherit Type;
constant name = "*";
-
+ constant conversion_type = 0;
+
+ mixed encode (mixed val, void|Type from)
+ {
+ if (from && from->name != local::name)
+ return indirect_convert (val, from);
+ return val;
+ }
+
string _sprintf() {return "RXML.t_any" + OBJ_COUNT;}
}
TNil t_nil = TNil();
//! A sequential type accepting only the value nil. This type is by
- //! definition a subtype of every sequential type.
+ //! definition a subtype of every other type.
static class TNil
{
3916:
constant name = "nil";
constant sequential = 1;
Nil empty_value = nil;
+ Type conversion_type = t_any;
- void type_check (mixed val)
+ void type_check (mixed val, void|string msg, mixed... args)
{
- if (val != nil) parse_error ("A non-nil value is not accepted.\n");
+ if (val != nil)
+ type_check_error (msg, args, "Expected nil, got %t.\n", val);
}
- mixed encode (mixed val)
+ Nil encode (mixed val, void|Type from)
{
- #ifdef MODULE_DEBUG
+ if (from)
+ if (from->name == TAny.name)
type_check (val);
- #endif
- return nil;
- }
-
- mixed decode (mixed val)
- {
+ else {
+ val = indirect_convert (val, from);
#ifdef MODULE_DEBUG
type_check (val);
#endif
-
+ }
return nil;
}
- int subtype_of (Type other) {return other->sequential || other == t_any;}
+ int subtype_of (Type other) {return 1;}
string _sprintf() {return "RXML.t_nil" + OBJ_COUNT;}
}
3953:
string _sprintf() {return "RXML.t_same" + OBJ_COUNT;}
}
+ // Basic types. Even though most of these have a `+ that fulfills
+ // requirements to make them sequential, we don't want all those to be
+ // treated that way. It would imply that a sequence of e.g. integers
+ // are implicitly added together, which would be nonintuitive.
+
+ TNum t_num = TNum();
+ //! Type for any number, currently integer or float.
+ //!
+ //! FIXME: This is currently not labeled as a supertype for t_int and
+ //! t_float, so it's only marginally useful. It's name will probably
+ //! change.
+
+ static class TNum
+ {
+ inherit Type;
+ constant name = "number";
+ constant sequential = 0;
+ constant empty_value = 0;
+ Type conversion_type = t_any;
+
+ void type_check (mixed val, void|string msg, mixed... args)
+ {
+ if (!intp (val) && !floatp (val))
+ type_check_error (msg, args, "Expected numeric value, got %t.\n", val);
+ }
+
+ int|float encode (mixed val, void|Type from)
+ {
+ if (from)
+ if (from->name == local::name) return [int|float] val;
+ else if (from->name == TAny.name) {type_check (val); return [int|float] val;}
+ else return [int|float] indirect_convert (val, from);
+ if (stringp (val))
+ if (sscanf (val, "%d%*c", int i) == 1) return i;
+ else if (sscanf (val, "%f%*c", float f) == 1) return f;
+ else parse_error ("String contains neither integer nor float.\n");
+ // Somewhat awkward case.
+ mixed err = catch {return (float) val;} && catch {return (int) val;};
+ parse_error ("Cannot convert value to number: " + describe_error (err));
+ }
+
+ string _sprintf() {return "RXML.t_num" + OBJ_COUNT;}
+ }
+
+ TInt t_int = TInt();
+ //! Type for integers.
+
+ static class TInt
+ {
+ inherit Type;
+ constant name = "int";
+ constant sequential = 0;
+ constant empty_value = 0;
+ Type conversion_type = t_num;
+
+ void type_check (mixed val, void|string msg, mixed... args)
+ {
+ if (!intp (val))
+ type_check_error (msg, args, "Expected integer value, got %t.\n", val);
+ }
+
+ int encode (mixed val, void|Type from)
+ {
+ if (from)
+ if (from->name == local::name) return [int] val;
+ else if (from->name == TNum.name) {type_check (val); return [int] val;}
+ else return [int] indirect_convert (val, from);
+ if (stringp (val))
+ if (sscanf (val, "%d%*c", int i) == 1) return i;
+ else parse_error ("String does not contain an integer.\n");
+ mixed err = catch {return (int) val;};
+ parse_error ("Cannot convert value to integer: " + describe_error (err));
+ }
+
+ string _sprintf() {return "RXML.t_int" + OBJ_COUNT;}
+ }
+
+ TFloat t_float = TFloat();
+ //! Type for floats.
+
+ static class TFloat
+ {
+ inherit Type;
+ constant name = "float";
+ constant sequential = 0;
+ constant empty_value = 0;
+ Type conversion_type = t_num;
+
+ void type_check (mixed val, void|string msg, mixed... args)
+ {
+ if (!floatp (val))
+ type_check_error (msg, args, "Expected float value, got %t.\n", val);
+ }
+
+ float encode (mixed val, void|Type from)
+ {
+ if (from)
+ if (from->name == local::name) return [float] val;
+ else if (from->name == TNum.name) {type_check (val); return [float] val;}
+ else return [float] indirect_convert (val, from);
+ if (stringp (val))
+ if (sscanf (val, "%f%*c", float f) == 1) return f;
+ else parse_error ("String does not contain a float.\n");
+ mixed err = catch {return (float) val;};
+ parse_error ("Cannot convert value to float: " + describe_error (err));
+ }
+
+ string _sprintf() {return "RXML.t_float" + OBJ_COUNT;}
+ }
+
+ // Text types.
+
TText t_text = TText();
- //! The standard type for generic document text.
+ //! The type for generic document text. Labelled "text/*" and thus
+ //! acts as a supertype for all text types.
static class TText
{
inherit Type;
- constant name = "text/plain";
+ constant name = "text/*";
constant sequential = 1;
constant empty_value = "";
-
+ Type conversion_type = t_any;
constant free_text = 1;
- constant encoding_type = "none";
+
- void type_check (mixed val)
+ void type_check (mixed val, void|string msg, mixed... args)
{
- if (!stringp (val)) parse_error ("The text value is not a string.\n");
+ if (!stringp (val))
+ type_check_error (msg, args, "Expected string for %s, got %t.\n", name, val);
}
string encode (mixed val, void|Type from)
{
- if (mixed err = catch {val = (string) val;})
- parse_error ("Cannot convert value to text: " + describe_error (err));
- if (from && from->encoding_type != encoding_type)
- val = from->decode ([string] val);
- return [string] val;
+ if (from)
+ if (from->name == local::name) return [string] val;
+ else if (from->name == TAny.name) {type_check (val); return [string] val;}
+ else return [string] indirect_convert (val, from);
+ mixed err = catch {return (string) val;};
+ parse_error ("Cannot convert value to %s: %s", name, describe_error (err));
}
- string decode (mixed val)
- {
- #ifdef MODULE_DEBUG
- type_check (val);
- #endif
- return val;
- }
+ string lower_case (string val) {return lower_case (val);}
+ //! Converts all literal uppercase characters in @[val] to lowercase.
-
+ string upper_case (string val) {return upper_case (val);}
+ //! Converts all literal lowercase characters in @[val] to uppercase.
+
+ string capitalize (string val) {return String.capitalize (val);}
+ //! Converts the first literal character in @[val] to uppercase.
+
string _sprintf() {return "RXML.t_text" + OBJ_COUNT;}
}
3997:
{
inherit TText;
constant name = "text/xml";
- constant encoding_type = "xml";
+ Type conversion_type = t_text;
+ constant entity_syntax = 1;
+ constant encoding_type = "xml"; // For compatibility.
-
+ // FIXME: Perhaps stricter type_check?
+
string encode (mixed val, void|Type from)
{
-
+ if (!from) {
if (mixed err = catch {val = (string) val;})
- parse_error ("Cannot convert value to text: " + describe_error (err));
- if (!from) from = t_text;
- if (from->encoding_type != encoding_type)
+ parse_error ("Cannot convert value to %s: %s", name, describe_error (err));
+ from = t_text;
+ }
+ if (from->name == local::name)
+ return [string] val;
+ else if (from->name == TText.name)
return replace (
- from->decode ([string] val),
+ [string] val,
// FIXME: This ignores the invalid Unicode character blocks.
({"&", "<", ">", "\"", "\'",
"\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007",
4020:
"", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "",
}));
- return [string] val;
+ else
+ return [string] indirect_convert (val, from);
}
string decode (mixed val)
{
- #ifdef MODULE_DEBUG
- type_check (val);
- #endif
- return charref_decode_parser->clone()->finish (val)->read();
+ return charref_decode_parser->clone()->finish ([string] val)->read();
}
-
+ string lower_case (string val)
+ {return lowercaser->clone()->finish (val)->read();}
+
+ string upper_case (string val)
+ {return uppercaser->clone()->finish (val)->read();}
+
+ string capitalize (string val)
+ {return capitalizer->clone()->finish (val)->read();}
+
string format_tag (string|Tag tag, void|mapping(string:string) args,
void|string content, void|int flags)
//! Returns a formatted XML tag. The flags argument contains a flag
4083:
{
inherit TXml;
constant name = "text/html";
+ Type conversion_type = t_xml;
+
+ string encode (mixed val, void|Type from)
+ {
+ if (from && from->name == local::name)
+ return ::encode (val, t_xml);
+ else
+ return ::encode (val, from);
+ }
+
+ string decode (mixed val)
+ {
+ return [string] val;
+ }
+
string _sprintf() {return "RXML.t_html" + OBJ_COUNT;}
}
4315:
// Various internal kludges.
- static object/*(Parser.HTML)*/ charref_decode_parser;
+ static object/*(Parser.HTML)*/
+ charref_decode_parser, lowercaser, uppercaser, capitalizer;
- static void init_charref_decode_parser()
+ static void init_parsers()
{
// Pretty similar to PEnt..
object/*(Parser.HTML)*/ p = Parser_HTML();
4348:
"since it contains a tag %O.\n", p->current());
});
charref_decode_parser = p;
+
+ p = Parser_HTML();
+ p->_set_data_callback (
+ lambda (object/*(Parser.HTML)*/ p, string data) {
+ return ({lower_case (data)});
+ });
+ p->_set_entity_callback (
+ lambda (object/*(Parser.HTML)*/ p, string data) {
+ if (string char = Roxen->decode_charref (data))
+ return ({Roxen->encode_charref (lower_case (char))});
+ return 0;
+ });
+ lowercaser = p;
+
+ p = Parser_HTML();
+ p->_set_data_callback (
+ lambda (object/*(Parser.HTML)*/ p, string data) {
+ return ({upper_case (data)});
+ });
+ p->_set_entity_callback (
+ lambda (object/*(Parser.HTML)*/ p, string data) {
+ if (string char = Roxen->decode_charref (data))
+ return ({Roxen->encode_charref (upper_case (char))});
+ return 0;
+ });
+ uppercaser = p;
+
+ p = Parser_HTML();
+ p->_set_data_callback (
+ lambda (object/*(Parser.HTML)*/ p, string data) {
+ p->_set_data_callback (0);
+ p->_set_entity_callback (0);
+ return ({String.capitalize (data)});
+ });
+ p->_set_entity_callback (
+ lambda (object/*(Parser.HTML)*/ p, string data) {
+ p->_set_data_callback (0);
+ p->_set_entity_callback (0);
+ if (string char = Roxen->decode_charref (data))
+ return ({Roxen->encode_charref (upper_case (char))});
+ return 0;
+ });
+ capitalizer = p;
}
static function(string,mixed...:void) _run_error = run_error;
4366:
case "PXml": PXml = [program] val; break;
case "PEnt": PEnt = [program] val; break;
case "PExpr": PExpr = [program] val; break;
- case "Roxen": Roxen = [object] val; init_charref_decode_parser(); break;
+ case "Roxen": Roxen = [object] val; init_parsers(); break;
case "empty_tag_set": empty_tag_set = [object(TagSet)] val; break;
default: error ("Herk\n");
}
};
if (err) werror (describe_backtrace (err));
}