pike.git / lib / modules / Parser.pmod / C.pmod

version» Context lines:

pike.git/lib/modules/Parser.pmod/C.pmod:1:   //   // This file needs to support old pikes that e.g. don't understand   // "#pike". Some of them fail when they see an unknown cpp directive.   //   // #pike __REAL_VERSION__   //    - //! Splits the @[data] string into an array of tokens. An additional - //! element with a newline will be added to the resulting array of - //! tokens. If the optional argument @[state] is provided the split - //! function is able to pause and resume splitting inside /**/ tokens. - //! The @[state] argument should be an initially empty mapping, in - //! which split will store its state between successive calls. - array(string) split(string data, void|mapping state) - { -  int start; -  int line=1; -  array(string) ret=({}); -  int pos; -  if(data=="") return ({"\n"}); -  data += "\n\0"; /* End sentinel. */ + protected constant splitter = Parser._parser._Pike.tokenize;    -  if(state && state->in_token) { -  switch(state->remains[0..1]) { -  -  case "/*": -  if(sizeof(state->remains)>2 && state->remains[-1]=='*' -  && data[0]=='/') { -  ret += ({ state->remains + "/" }); -  pos++; -  m_delete(state, "remains"); -  break; + // NB: This module is used by several of the precompilers, + // and may thus be used before the Unicode module has + // been compiled! + #if constant(Unicode.is_whitespace) + protected constant is_whitespace = Unicode.is_whitespace; + #else + protected constant whitespace_tab = (< ' ', '\t', '\14', '\r', '\n', >); + protected int is_whitespace(int c) + { +  return whitespace_tab[c];   } -  pos = search(data, "*/"); -  if(pos==-1) { -  state->in_token = 1; -  state->remains += data[..<1]; -  return ret; -  } -  ret += ({ state->remains + data[..pos+1] }); -  m_delete(state, "remains"); -  pos+=2; -  break; -  } -  state->in_token = 0; -  } + #endif    -  while(1) + class UnterminatedStringError + //! Error thrown when an unterminated string token is encountered.   { -  int start=pos; +  inherit Error.Generic; +  constant error_type = "unterminated_string"; +  constant is_unterminated_string_error = 1;    -  switch(data[pos]) -  { -  case '\0': -  return ret; +  string err_str; +  //! The string that failed to be tokenized    -  case '#': +  protected void create(string pre, string post)    { -  pos=search(data,"\n",pos); -  if(pos==-1) -  error("Failed to find end of preprocessor statement.\n"); -  -  while(data[pos-1]=='\\' || (data[pos-1]=='\r' && data[pos-2]=='\\')) -  pos=search(data,"\n",pos+1); -  break; -  -  case 'a'..'z': -  case 'A'..'Z': -  case 128..65536: // Lets simplify things for now... -  case '_': -  while(1) -  { -  switch(data[pos]) -  { -  case '$': // allowed in some C (notably digital) -  case 'a'..'z': -  case 'A'..'Z': -  case '0'..'9': -  case 128..65536: // Lets simplify things for now... -  case '_': -  pos++; -  continue; +  int line = String.count(pre, "\n")+1; +  err_str = pre+post; +  if( sizeof(post) > 100 ) +  ::create(sprintf("Unterminated string %O[%d] at line %d\n", +  post[..100], sizeof(post)-100, line)); +  else +  ::create(sprintf("Unterminated string %O at line %d\n", +  post, line));    } -  break; +    } -  break; +     -  case '.': -  if(data[start..start+2]=="...") + /* accessed from testsuite */ + /*private*/ array(string) low_split(string data, void|mapping(string:string) state)   { -  pos+=3; -  break; -  } -  if(data[start..start+1]=="..") -  { -  pos+=3; -  break; -  } +  if(state && state->remains) +  data = (string)m_delete(state, "remains") + data; +  // Cast to string above to work around old Pike 7.0 bug.    -  case '0'..'9': -  if(data[pos]=='0' && (data[pos+1]=='x' || data[pos+1]=='X')) -  { -  pos+=2; -  while(1) -  { -  switch(data[pos]) -  { -  case '0'..'9': -  case 'a'..'f': -  case 'A'..'F': -  pos++; -  continue; +  array(string) ret; +  string rem; +  [ret, rem] = splitter(data); +  if(sizeof(rem)) { +  if(rem[0]=='"') +  throw(UnterminatedStringError(ret*"", rem)); +  if(state) state->remains=rem;    } -  break; -  } -  break; -  } -  while(data[pos]>='0' && data[pos]<='9') pos++; -  if(data[pos]=='.') -  { -  pos++; -  while(data[pos]>='0' && data[pos]<='9') pos++; -  if(data[pos]=='e' || data[pos]=='E') -  { -  pos++; -  if(data[pos]=='-') pos++; -  while(data[pos]>='0' && data[pos]<='9') pos++; -  } -  break; -  } -  if(data[pos]=='e' || data[pos]=='E') -  { -  pos++; -  while(data[pos]>='0' && data[pos]<='9') pos++; -  } -  break; -  -  default: -  error("Unknown token %O\n",data[pos..pos+20]); -  -  case '`': -  while(data[pos]=='`') data[pos]++; -  -  case '\\': pos++; continue; /* IGNORED */ -  -  case '/': -  case '{': case '}': -  case '[': case ']': -  case '(': case ')': -  case ';': -  case ',': -  case '*': case '%': -  case '?': case ':': -  case '&': case '|': case '^': -  case '!': case '~': -  case '=': -  case '@': -  case '+': -  case '-': -  case '<': case '>': -  switch(data[pos..pos+1]) -  { -  case "//": -  pos=search(data,"\n",pos); -  break; -  -  case "/*": -  pos=search(data,"*/",pos); -  if(pos==-1) { -  if(state) { -  state->remains = data[start..<2]; -  state->in_token = 1; +     return ret;   } -  error("Failed to find end of comment.\n"); -  } -  pos+=2; -  break; +     -  case "<<": case ">>": -  if(data[pos+2]=='=') pos++; -  case "==": case "!=": case "<=": case ">=": -  case "*=": case "/=": case "%=": -  case "&=": case "|=": case "^=": -  case "+=": case "-=": -  case "++": case "--": -  case "&&": case "||": -  case "->": -  pos++; -  default: -  pos++; -  } -  break; + //! Splits the @[data] string into an array of tokens. An additional + //! element with a newline will be added to the resulting array of + //! tokens. If the optional argument @[state] is provided the split + //! function is able to pause and resume splitting inside #"" and + //! /**/ tokens. The @[state] argument should be an initially empty + //! mapping, in which split will store its state between successive + //! calls. + array(string) split(string data, void|mapping(string:string) state) { +  array r = low_split(data, state);    -  +  array new = ({}); +  for(int i; i<sizeof(r); i++) +  if(r[i][..1]=="//" && r[i][-1]=='\n') +  new += ({ r[i][..<1], "\n" }); +  else +  new += ({ r[i] });    -  case ' ': -  case '\n': -  case '\r': -  case '\t': -  case '\14': -  while(1) -  { -  switch(data[pos]) -  { -  case ' ': -  case '\n': -  case '\r': -  case '\t': -  case '\14': -  pos++; -  continue; +  if(sizeof(new) && (< "\n", " " >)[new[-1]]) +  new[-1] += "\n"; +  else +  new += ({ "\n" }); +  return new;   } -  break; -  } -  break; +     -  case '\'': -  pos++; -  if(data[pos]=='\\') pos+=2; -  int end=search(data, "'", pos)+1; -  if(!end) -  throw( ({sprintf("Unknown token: %O\n",data[pos-1..pos+19]) }) ); -  pos=end; -  break; -  -  case '"': -  { -  int q,s; -  while(1) -  { -  q=search(data,"\"",pos+1); -  s=search(data,"\\",pos+1); -  if(q==-1) q=sizeof(data)-1; -  if(s==-1) s=sizeof(data)-1; -  -  if(q<s) -  { -  pos=q+1; -  break; -  }else{ -  pos=s+1; -  } -  } -  break; -  } -  } -  } -  -  ret+=({ data[start..pos-1] }); -  } - } -  +    //! Represents a C token, along with a selection of associated data and   //! operations.   class Token   {    //! The line where the token was found.    int line;       //! The actual token.    string text;       //! The file in which the token was found.    string file;       //! Trailing whitespaces.    string trailing_whitespaces="";       //! @decl void create(string text, void|int line, void|string file,@    //! void|string trailing_whitespace) -  void create(string t, void|int l, void|string f, void|string space) +  protected void create(string t, void|int l, void|string f, void|string space)    {    text=t;    line=l;    file=f;    if(space) trailing_whitespaces=space;    }       //! If the object is printed as %s it will only output its text contents. -  string _sprintf(int how) +  protected string _sprintf(int how)    {    switch(how)    {    case 's':    return text;    case 'O':    return sprintf("%O(%O,%O,%d)",this_program,text,file,line);    }    }       //! Tokens are considered equal if the text contents are equal. It    //! is also possible to compare the Token object with a text string    //! directly. -  int `==(mixed foo) +  protected int `==(mixed foo)    {    return (objectp(foo) ? foo->text : foo) == text;    }       //! A string can be added to the Token, which will be added to the    //! text contents. -  string `+(string ... s) +  protected string `+(string s)    { -  return predef::`+(text,@s); +  return predef::`+(text, s);    }       //! A string can be added to the Token, which will be added to the    //! text contents. -  string ``+(string ... s) +  protected string ``+(string s)    { -  return predef::`+(@s,text); +  return predef::`+(s, text);    }       //! It is possible to case a Token object to a string. The text content    //! will be returned. -  mixed cast(string to) +  protected mixed cast(string to)    {    if(to=="string") return text; -  +  return UNDEFINED;    }       //! Characters and ranges may be indexed from the text contents of the token. -  int|string `[](int a, void|int b) { -  if(zero_type(b)) return text[a]; +  protected int|string `[](int a, void|int b) { +  if(undefinedp(b)) return text[a];    return text[a..b];    }   }      //! Returns an array of @[Token] objects given an array of string tokens.   array(Token) tokenize(array(string) s, void|string file)   {    array(Token) ret=allocate(sizeof(s));    int line=1;    foreach(s; int e; string str)
pike.git/lib/modules/Parser.pmod/C.pmod:370:    actions[y]=2;    }       foreach(tokens, Token token)    {    switch(actions[(string)token])    {    case 0: ret+=({token}); break;    case 1: stack->push(ret); ret=({token}); break;    case 2: -  if (!sizeof(ret) || !stack->ptr || +  if (!sizeof(ret) || !sizeof(stack) ||    (groupings[(string)ret[0]] != (string)token)) { -  + #if 0    // Mismatch    werror ("%s:%d: Expected %O, got %O\n",    token->file||"-", token->line,    groupings[(string)ret[0]], (string) token); -  + #endif    return ret;    }    ret=stack->pop()+({ ret + ({token}) });    }    } -  +  while (sizeof(stack)) { +  Token token = ret[0]; + #if 0 +  werror("%s:%d: Missing %O.\n", +  token->file||"-", token->line, +  groupings[(string)token]); + #endif +  ret = stack->pop() + +  ({ ret + +  ({ Token(groupings[(string)token], !stringp(token) && token->line, +  !stringp(token) && token->file) }) }); +  }    return ret;   }      /* FIXME:    * This actually strips all preprocessing tokens    */      //! Strips off all (preprocessor) line statements from a token array.   array(Token|array) strip_line_statements(array(Token|array) tokens)   {
pike.git/lib/modules/Parser.pmod/C.pmod:414:      //! Folds all whitespace tokens into the previous token's trailing_whitespaces.   array hide_whitespaces(array tokens)   {    array(Token) ret=({tokens[0]});    foreach(tokens[1..], array|object(Token) t)    {    if(arrayp(t))    {    ret+=({ hide_whitespaces(t) }); -  }else{ -  switch( ((string)t) [0]) +  } +  else if( is_whitespace(t->text[0]) )    { -  case ' ': -  case '\t': -  case '\14': -  case '\r': -  case '\n': +     mixed tmp=ret[-1];    while(arrayp(tmp)) tmp=tmp[-1]; -  tmp->trailing_whitespaces+=(string)t; -  break; -  -  default: +  tmp->trailing_whitespaces+=t->text+t->trailing_whitespaces; +  } +  else    ret+=({t});    } -  } -  } +     return ret;   }      //! Reconstitutes the token array into a plain string again; essentially   //! reversing @[split()] and whichever of the @[tokenize], @[group] and   //! @[hide_whitespaces] methods may have been invoked.   string simple_reconstitute(array(string|object(Token)|array) tokens)   {    string ret="";    foreach(Array.flatten(tokens), mixed tok)