4e331a | 1999-06-12 | Mirar (Pontus Hagland) | | //! module Parser
class SGML
//! class SGML
//! This is a handy simple parser of SGML-like
//! syntax like HTML. It doesn't do anything advanced,
//! but finding the corresponding end-tags.
//!
//! It's used like this:
//! <pre>array res=Parser.SGML()->feed(string)->finish()->result();</pre>
//!
//! The resulting structure is an array of atoms,
//! where the atom can be a string or a tag.
//! A tag contains a similar array, as data.
//!
//! Example:
//! A string
//! <tt>"<gat> <gurka> </gurka> <banan> <kiwi> </gat>"</tt>
//! results in
//! <pre>
//! ({
//! tag "gat" object with data:
//! ({
//! tag "gurka" object with data:
//! ({
//! " "
//! })
//! tag "banan" object with data:
//! ({
//! " "
//! tag "kiwi" object with data:
//! ({
//! " "
//! })
//! })
//! })
//! })
//! </pre>
//!
//! ie, simple "tags" (not containers) are not detected,
//! but containers are ended implicitely by a surrounding
//! container _with_ an end tag.
//!
//! The 'tag' is an object with the following variables:
//! <pre>
//! string name; - name of tag
//! mapping args; - argument to tag
//! int line,char,column; - position of tag
//! string file; - filename (see <ref>create</ref>)
//! array(SGMLatom) data; - contained data
//! </pre>
//!
{
string file;
class SGMLatom
{
string name;
mapping args;
int line,char,column;
string file;
array(SGMLatom) data;
}
static array(array(object(SGMLatom)|string)) res=({({})});
static array(SGMLatom) tagstack=({});
static array(object) errors;
array(object(SGMLatom)|string) data;
static private array(string) got_tag(object g)
{
string name=g->tag_name();
if (name!="" && name[0]=='/')
{
int i=search(tagstack->name,name[1..]);
if (i!=-1)
{
i++;
while (i--)
{
tagstack[0]->data=res[0];
res=res[1..];
tagstack=tagstack[1..];
}
return ({});
}
}
object t=SGMLatom();
t->name=name;
t->args=g->tag_args();
[t->line,t->char,t->column]=g->at();
t->file=file;
res[0]+=({t});
tagstack=({t})+tagstack;
res=({({})})+res;
return ({}); // don't care
}
void debug(array|void arr,void|int level)
{
level+=2;
if (!arr) arr=data;
foreach (arr,string|object(SGMLatom) t)
if (stringp(t))
write("%*s%-=*s\n",level,"",79-level,sprintf("%O",t));
else
{
write("%*stag %O\n",level,"",t->name,);
if (sizeof(t->args))
write("%*s%-=*s\n",level+4,"",75-level,sprintf("%O",t->args));
debug(t->data,level);
}
}
private static object p=HTML();
|