pike.git / lib / modules / Parser.pmod / Tabular.pike

version» Context lines:

pike.git/lib/modules/Parser.pmod/Tabular.pike:1:   //! This is a parser for line and block oriented data.   //! It provides a flexible yet concise record-description language to parse   //! character/column/delimiter-organised records.   //!   //! @seealso - //! @[Parser.LR] + //! @[Parser.LR], @url{http://www.wikipedia.org/wiki/Comma-separated_values@}, + //! @url{http://www.wikipedia.org/wiki/EDIFACT@}      #pike __REAL_VERSION__    - private Stdio.FILE in; + Stdio.FILE _in; + int _eol;   private int prefetch=1024; // TODO: Document and make this available    // through compile().   private String.Buffer alread=String.Buffer(prefetch);   private mapping|array fms; - private int eol; +    private Regexp simple=Regexp("^[^[\\](){}<>^$|+*?\\\\]+$");   private Regexp emptyline=Regexp("^[ \t\v\r\x1a]*$");   private mixed severity=1;   private int verb=0;   private int recordcount=1;      //! This function initialises the parser.   //!   //! @param input   //! The input stream or string.
pike.git/lib/modules/Parser.pmod/Tabular.pike:56:   { if(zero_type(verbose)&&intp(format))    verbose=format;    else    fms=stringp(format)||objectp(format)?compile(format):format;    verb=verbose==1?70:verbose;    if(!input)    input=" ";    if(stringp(input))    input=Stdio.FakeFile(input);    if(!input->unread) -  (in=Stdio.FILE())->assign(input); +  (_in=Stdio.FILE())->assign(input);    else -  in=input; +  _in=input;   }      #if 0 // Currently unused function   private int getchar() - { int c=in->getchar(); + { int c=_in->getchar();    if(c<0)    throw(severity);    alread->putchar(c);    return c;   }   #endif      private string read(int n)   { string s; -  s=in->read(n); +  s=_in->read(n);    alread->add(s);    if(sizeof(s)!=n)    throw(severity);    return s;   }      private string gets(int n)   { string s;    if(n)    { s=read(n);    if(has_value(s,"\n")||has_value(s,"\r"))    throw(severity);    }    else -  { s=in->gets(); +  { s=_in->gets();    if(!s)    throw(severity);    if(has_value(s,"\r")) // Retrofix \r-only line endings    { array t;    t=s/"\r"; -  s=t[0];in->unread(t[1..]*"\n"); +  s=t[0];_in->unread(t[1..]*"\n");    }    alread->add(s);alread->putchar('\n');    if(has_suffix(s,"\r"))    s=s[..<1]; -  eol=1; +  _eol=1;    }    return s;   }    - private class checkpoint + class _checkpoint   { private string oldalread;       void create()    { oldalread=alread->get();    }       final void release()    { string s=alread->get();    alread->add(oldalread);    alread->add(s);    oldalread=0;    }       protected void destroy()    { if(oldalread)    { string back=alread->get();    if(sizeof(back)) -  { in->unread(back); +  { _in->unread(back);    if(verb<0)    { back-="\n";    if(sizeof(back))    werror("Backtracking %O\n",back);    }    }    alread->add(oldalread);    }    }   }      #define FETCHAR(c,buf,i) (catch((c)=(buf)[(i)++])?((c)=-1):(c))    - private mapping getrecord(array fmt,int found) - { mapping ret=([]),options; -  if(stringp(fmt[0])) -  { options=(["name":fmt[0]]); -  if(fmt[1]) -  options+=fmt[1]; -  else -  fmt[1]=0; -  } -  else -  options=fmt[0]; -  if(found) -  { if(options->single) -  throw(severity); // early exit, already found one -  } -  else if(options->mandatory) -  severity=2; -  if(verb<0) -  werror("Checking record %d for %O\n",recordcount,options->name); -  eol=0; -  foreach(fmt;int fi;array|mapping m) -  { if(fi<2) -  continue; -  string value; -  if(arrayp(m)) -  { array field=m; -  fmt[fi]=m=(["name":field[0]]); -  mixed nm=field[1]; -  if(!mappingp(nm)) -  { if(arrayp(nm)) -  ret+=getrecord(nm,found); -  else -  m+=([(intp(nm)?"width":(stringp(nm)?"match":"delim")):nm]); -  if(sizeof(field)>2) -  m+=field[2]; -  } -  fmt[fi]=m; -  } -  if(eol) -  throw(severity); -  if(!zero_type(m->width)) -  value=gets(m->width); -  if(m->delim) + string _getdelimword(mapping m)   { multiset delim=m->delim;    int i,pref=m->prefetch || prefetch;    String.Buffer word=String.Buffer(pref);    string buf,skipclass;    skipclass="%[^"+(string)indices(delim)+"\"\r\x1a\n]";    if(sizeof(delim-(<',',';','\t',' '>)))   delimready:    for(;;)    { i=0; -  buf=in->read(pref); +  buf=_in->read(pref);    int c;    FETCHAR(c,buf,i);    while(c>=0)    { if(delim[c])    break delimready;    else switch(c)    { default:    { string s;    sscanf(buf[--i..],skipclass,s);    word->add(s);    i+=sizeof(s);    break;    }    case '\n':    FETCHAR(c,buf,i);    switch(c)    { default:i--;    case '\r':case '\x1a':;    } -  eol=1; +  _eol=1;    break delimready;    case '\r':    FETCHAR(c,buf,i);    if(c!='\n')    i--; -  eol=1; +  _eol=1;    break delimready;    case '\x1a':;    }    FETCHAR(c,buf,i);    }    if(!sizeof(buf))    throw(severity);    alread->add(buf);    }    else    { int leadspace=1,inquotes=0;   csvready:    for(;;)    { i=0; -  buf=in->read(pref); +  buf=_in->read(pref);    int c;    FETCHAR(c,buf,i);    while(c>=0)    { if(delim[c])    { if(!inquotes)    break csvready;    word->putchar(c);    } -  else switch(c) +  else +  switch(c)    { case '"':leadspace=0;    if(!inquotes)    inquotes=1;    else if(FETCHAR(c,buf,i)=='"')    word->putchar(c);    else    { inquotes=0;    continue;    }    break;
pike.git/lib/modules/Parser.pmod/Tabular.pike:265:    i+=sizeof(s);    }    break;    case '\n':    FETCHAR(c,buf,i);    switch(c)    { default:i--;    case '\r':case '\x1a':;    }    if(!inquotes) -  { eol=1; +  { _eol=1;    break csvready;    }    word->putchar('\n');    break;    case '\r':    FETCHAR(c,buf,i);    if(c!='\n')    i--;    if(!inquotes) -  { eol=1; +  { _eol=1;    break csvready;    }    word->putchar('\n');    case '\x1a':;    }    FETCHAR(c,buf,i);    }    if(!sizeof(buf))    throw(severity);    alread->add(buf);    }    }    alread->add(buf[..i-1]); -  in->unread(buf[i..]); -  value=word->get(); +  _in->unread(buf[i..]); +  return word->get();   } -  +  + private mapping getrecord(array fmt,int found) + { mapping ret=([]),options; +  if(stringp(fmt[0])) +  { options=(["name":fmt[0]]); +  if(fmt[1]) +  options+=fmt[1]; +  else +  fmt[1]=0; +  } +  else +  options=fmt[0]; +  if(found) +  { if(options->single) +  throw(severity); // early exit, already found one +  } +  else if(options->mandatory) +  severity=2; +  if(verb<0) +  werror("Checking record %d for %O\n",recordcount,options->name); +  _eol=0; +  foreach(fmt;int fi;array|mapping m) +  { if(fi<2) +  continue; +  string value; +  if(arrayp(m)) +  { array field=m; +  fmt[fi]=m=(["name":field[0]]); +  mixed nm=field[1]; +  if(!mappingp(nm)) +  { if(arrayp(nm)) +  ret+=getrecord(nm,found); +  else +  m+=([(intp(nm)?"width":(stringp(nm)?"match":"delim")):nm]); +  if(sizeof(field)>2) +  m+=field[2]; +  } +  fmt[fi]=m; +  } +  if(_eol) +  throw(severity); +  if(!zero_type(m->width)) +  value=gets(m->width); +  if(m->delim) +  value=_getdelimword(m);    if(m->match)    { Regexp rgx;    if(stringp(m->match))    { if(!value && simple->match(m->match))    { m->width=sizeof(m->match);    value=gets(m->width);    }    m->match=Regexp("^("+m->match+")"+(value?"$":""));    }    rgx=m->match;    if(value)    { if(!rgx->match(value))    { if(verb<-3)    werror(sprintf("Mismatch %O!=%O\n",value,rgx)    -"Regexp.SimpleRegexp");    throw(severity);    }    }    else -  { string buf=in->read(m->prefetch || prefetch); +  { string buf=_in->read(m->prefetch || prefetch);    { array spr;    if(!buf || !(spr=rgx->split(buf)))    { alread->add(buf);    if(verb<-3)    werror(sprintf("Mismatch %O!=%O\n",buf[..32],rgx)    -"Regexp.SimpleRegexp");    throw(severity);    } -  in->unread(buf[sizeof(value=spr[0])..]); +  _in->unread(buf[sizeof(value=spr[0])..]);    }    alread->add(value);    value-="\r";    if(has_suffix(value,"\n"))    value=value[..<1];    }    }    if(!m->drop)    ret[m->name]=value;    } -  if(!eol && gets(0)!="") +  if(!_eol && gets(0)!="")    throw(severity);    severity=1;    if(verb&&verb!=-1)    { array s=({options->name,"::"});    foreach(sort(indices(ret)),string name)    { string value=ret[name];    if(sizeof(value))    { if(verb<-2)    s+=({name,":"});    s+=({value,","});
pike.git/lib/modules/Parser.pmod/Tabular.pike:379:   //! the input. This is unnecessary if no argument is   //! specified for @[fetch()].   //!   //! @returns   //! It returns true if EOF has been reached.   //!   //! @seealso   //! @[fetch()]   int skipemptylines()   { string line; int eof=1; -  while((line=in->gets()) && String.width(line)==8 && emptyline->match(line)) +  while((line=_in->gets()) && String.width(line)==8 && emptyline->match(line))    recordcount++;    if(line) -  eof=0,in->unread(line+"\n"); +  eof=0,_in->unread(line+"\n");    return eof;   }      //! This function consumes as much input as needed to parse   //! the full tabular structures at once.   //!   //! @param format   //! Describes (precompiled only) formats to be parsed.   //! If no format is specified,   //! the format specified on @[create()] is used, and empty lines are
pike.git/lib/modules/Parser.pmod/Tabular.pike:416:   { mapping ret=([]);    int skipempty=0;    if(!format)    { if(skipemptylines())    return UNDEFINED;    skipempty=1;format=fms;    }   ret:    { if(arrayp(format))    { mixed err=catch -  { checkpoint checkp=checkpoint(); +  { _checkpoint checkp=_checkpoint();    foreach(format;;array|mapping fmt)    if(arrayp(fmt))    for(int found=0;;found=1)    { mixed err=catch -  { checkpoint checkp=checkpoint(); +  { _checkpoint checkp=_checkpoint();    mapping rec=getrecord(fmt,found);    foreach(rec;string name;mixed value)    add2map(ret,name,value);    checkp->release();    continue;    };    severity=1;    switch(err)    { case 2:    err=1;
pike.git/lib/modules/Parser.pmod/Tabular.pike:456:    case 1:    return 0;    }    if(skipempty)    skipemptylines();    }    else    { int found;    do    { found=0; +  if(!mappingp(format)) +  error("Empty format definition\n");    foreach(format;string name;array|mapping subfmt)    for(;;)    { if(verb<0)    werror("Trying format %O\n",name);    mapping m;    if(m=fetch(subfmt))    { found=1;add2map(ret,name,m);    continue;    }    break;
pike.git/lib/modules/Parser.pmod/Tabular.pike:485:      //! @param content   //! Is injected into the input stream.   //!   //! @returns   //! This object.   //!   //! @seealso   //! @[fetch()]   object feed(string content) - { in->unread(content); + { _in->unread(content);    return this;   }      //! @param format   //! Replaces the default (precompiled only) format.   //!   //! @returns   //! The previous default format.   //!   //! @seealso