Branch: Tag:

1999-06-12

1999-06-12 19:13:08 by Mirar (Pontus Hagland) <pike@sort.mirar.org>

it works, abit

Rev: src/modules/Parser/html.c:1.18
Rev: src/modules/Parser/module.pmod.in:1.2

1:   /* -  * $Id: module.pmod.in,v 1.1 1999/02/19 04:58:40 mirar Exp $ +  * $Id: module.pmod.in,v 1.2 1999/06/12 19:13:08 mirar Exp $    *    */      inherit @module@;    -  + //! module Parser    -  + class SGML + //! class SGML + //! This is a handy simple parser of SGML-like + //! syntax like HTML. It doesn't do anything advanced, + //! but finding the corresponding end-tags. + //! + //! It's used like this: + //! <pre>array res=Parser.SGML()->feed(string)->finish()->result();</pre> + //! + //! The resulting structure is an array of atoms, + //! where the atom can be a string or a tag. + //! A tag contains a similar array, as data. + //! + //! Example: + //! A string + //! <tt>"<gat>&nbsp;<gurka>&nbsp;</gurka>&nbsp;<banan>&nbsp;<kiwi>&nbsp;</gat>"</tt> + //! results in + //! <pre> + //! ({ + //! tag "gat" object with data: + //! ({ + //! tag "gurka" object with data: + //! ({ + //! " " + //! }) + //! tag "banan" object with data: + //! ({ + //! " " + //! tag "kiwi" object with data: + //! ({ + //! " " + //! }) + //! }) + //! }) + //! }) + //! </pre> + //! + //! ie, simple "tags" (not containers) are not detected, + //! but containers are ended implicitely by a surrounding + //! container _with_ an end tag. + //! + //! The 'tag' is an object with the following variables: + //! <pre> + //! string name; - name of tag + //! mapping args; - argument to tag + //! int line,char,column; - position of tag + //! string file; - filename (see <ref>create</ref>) + //! array(SGMLatom) data; - contained data + //! </pre> + //! + { +  string file; +  +  class SGMLatom +  { +  string name; +  mapping args; +  int line,char,column; +  string file; +  array(SGMLatom) data; +  } +  +  static array(array(object(SGMLatom)|string)) res=({({})}); +  static array(SGMLatom) tagstack=({}); +  static array(object) errors; +  +  array(object(SGMLatom)|string) data; +  +  static private array(string) got_tag(object g) +  { +  string name=g->tag_name(); +  +  if (name!="" && name[0]=='/') +  { +  int i=search(tagstack->name,name[1..]); +  if (i!=-1) +  { +  i++; +  while (i--) +  { +  tagstack[0]->data=res[0]; +  res=res[1..]; +  tagstack=tagstack[1..]; +  } +  return ({}); +  } +  } +  +  object t=SGMLatom(); +  t->name=name; +  t->args=g->tag_args(); +  [t->line,t->char,t->column]=g->at(); +  t->file=file; +  res[0]+=({t}); +  tagstack=({t})+tagstack; +  res=({({})})+res; +  +  return ({}); // don't care +  } +  +  void debug(array|void arr,void|int level) +  { +  level+=2; +  if (!arr) arr=data; +  foreach (arr,string|object(SGMLatom) t) +  if (stringp(t)) +  write("%*s%-=*s\n",level,"",79-level,sprintf("%O",t)); +  else +  { +  write("%*stag %O\n",level,"",t->name,); +  if (sizeof(t->args)) +  write("%*s%-=*s\n",level+4,"",75-level,sprintf("%O",t->args)); +  debug(t->data,level); +  } +  } +  +  +  private static object p=HTML(); +  + //! static void create() + //! static void create(string filename) + //! This object is created with this filename. + //! It's passed to all created tags, for debug and trace purposes. + //! note: + //! No, it doesn't read the file itself. See <ref>feed</ref>. +  +  void create(void|string _file) +  { +  file=_file; +  +  p->_set_tag_callback(got_tag); +  p->_set_data_callback(lambda(object g,string data) +  { if (data!="") res[0]+=({data}); return ({}); }); +  } +  + //! static object feed(string s) + //! static array finish() + //! static array result(string s) + //! Feed new data to the object, or finish the stream. + //! No result can be used until finish() is called. + //! + //! Both finish() and result() returns the computed data. + //! + //! feed() returns the called object. +  +  object feed(string s) +  { +  p->feed(s); +  return this_object(); +  } +  +  array finish() +  { +  p->finish(); +  data=res[0]; +  res=0; +  return data; +  } +  +  array(object(SGMLatom)|string) result() +  { +  return data; +  } + } +  +