5d445f2002-03-20Martin Nilsson // // This file needs to support old pikes that e.g. don't understand // "#pike". Some of them fail when they see an unknown cpp directive. // // #pike __REAL_VERSION__ //
f133a52003-01-18Martin Nilsson // $Id: Pike.pmod,v 1.30 2003/01/18 18:38:12 nilsson Exp $
916c022002-06-28Martin Nilsson  //! This module parses and tokanizes Pike source code.
a20af62000-09-26Fredrik Hübinette (Hubbe) 
74c57a2000-07-14Andreas Lange inherit "C.pmod";
ba7b0a2001-01-18David Norlin #define UNKNOWN_TOKEN \
a0f55b2002-12-30Martin Nilsson  error("Unknown pike token: %O\n", data[pos..pos+20])
ba7b0a2001-01-18David Norlin  static mapping(string : int) backquoteops =
4f096f2001-08-13Fredrik Hübinette (Hubbe) (["/":1, "%":1, "*":1, "*=":2, "&":1, "|":1, "^":1, "~":1,
ba7b0a2001-01-18David Norlin  "+=":2, "+":1, "<<":2, "<=":2, "<":1, ">>":2, ">=":2, ">":1, "!=":2, "!":1, "==":2, "=":1, "()":2, "->=":3, "->":2, "-":1, "[]=":3, "[]":2 ]);
916c022002-06-28Martin Nilsson //! Splits the @[data] string into an array of tokens. An additional //! element with a newline will be added to the resulting array of //! tokens. If the optional argument @[state] is provided the split //! function is able to pause and resume splitting inside #"" and //! /**/ tokens. The @[state] argument should be an initially empty //! mapping, in which split will store its state between successive //! calls.
9ee9322002-03-03Martin Nilsson array(string) split(string data, void|mapping state)
74c57a2000-07-14Andreas Lange { int line=1; array(string) ret=({}); int pos;
f65b1a2003-01-18Martin Nilsson  if(data=="") return ({"\n"});
9ee9322002-03-03Martin Nilsson  data += "\n\0"; // End sentinel. if(state && state->in_token) { switch(state->remains[0..1]) { case "/*":
f65b1a2003-01-18Martin Nilsson  if(sizeof(state->remains)>2 && state->remains[-1]=='*' && data[0]=='/') { ret += ({ state->remains + "/" }); pos++; m_delete(state, "remains"); break; }
9ee9322002-03-03Martin Nilsson  pos = search(data, "*/"); if(pos==-1) { state->in_token = 1; state->remains += data[..sizeof(data)-2];
52b86c2002-04-02Martin Nilsson  return ret;
9ee9322002-03-03Martin Nilsson  }
f133a52003-01-18Martin Nilsson  ret += ({ state->remains + data[..pos+1] });
5d5cb32002-03-04Henrik Grubbström (Grubba)  m_delete(state, "remains");
9ee9322002-03-03Martin Nilsson  pos+=2; break; case "#\"": int q,s; pos=-1; while(1) { q = search(data,"\"",pos+1); s = search(data,"\\",pos+1); if( q==-1 || (s==sizeof(data)-2 && s<q) ) { state->in_token = 1;
f65b1a2003-01-18Martin Nilsson  state->remains += data[..sizeof(data)-3];
52b86c2002-04-02Martin Nilsson  return ret;
9ee9322002-03-03Martin Nilsson  } if(s==-1 || s>q) { pos = q+1; break; } pos=s+1; }
5d5cb32002-03-04Henrik Grubbström (Grubba)  ret += ({ state->remains + data[..pos-1] }); m_delete(state, "remains");
9ee9322002-03-03Martin Nilsson  break; } state->in_token = 0; }
74c57a2000-07-14Andreas Lange  while(1) { int start=pos; switch(data[pos]) { case '\0': return ret; case '#': { pos+=1;
9ee9322002-03-03Martin Nilsson  if(data[pos]=='\"') { int q,s; while(1) { q = search(data,"\"",pos+1); s = search(data,"\\",pos+1); if( q==-1 || (s==sizeof(data)-2 && s<q) ) { if(state) { state->in_token = 1;
f65b1a2003-01-18Martin Nilsson  state->remains = data[pos-1..sizeof(data)-3];
52b86c2002-04-02Martin Nilsson  return ret;
9ee9322002-03-03Martin Nilsson  } error("Failed to find end of multiline string.\n"); } if(s==-1 || s>q) { pos = q+1; break; } pos=s+1; }
74c57a2000-07-14Andreas Lange  break;
9ee9322002-03-03Martin Nilsson  }
74c57a2000-07-14Andreas Lange  pos=search(data,"\n",pos); if(pos==-1) error("Failed to find end of preprocessor statement.\n"); while(data[pos-1]=='\\') pos=search(data,"\n",pos+1);
5f450d2000-08-19Andreas Lange  sscanf(data[start..pos], "#%*[ \t]charset%*[ \t\\]%s%*[ \n]", string charset); if(charset) data = (data[0..pos]+ master()->decode_charset(data[pos+1..sizeof(data)-3], charset) +"\n\0"); // New end sentinel.
64a60f2002-07-14Martin Nilsson  pos++;
74c57a2000-07-14Andreas Lange  break; case 'a'..'z': case 'A'..'Z':
5f450d2000-08-19Andreas Lange  case 128..: // Lets simplify things for now...
74c57a2000-07-14Andreas Lange  case '_': while(1) { switch(data[pos]) { case 'a'..'z': case 'A'..'Z': case '0'..'9':
5f450d2000-08-19Andreas Lange  case 128..: // Lets simplify things for now...
74c57a2000-07-14Andreas Lange  case '_': pos++; continue; } break; } break; case '.':
93d4272000-10-16David Norlin  if(data[start..start+2]=="...")
74c57a2000-07-14Andreas Lange  {
93d4272000-10-16David Norlin  pos+=3;
74c57a2000-07-14Andreas Lange  break; }
93d4272000-10-16David Norlin  if(data[start..start+1]=="..") { pos+=2; break; } pos++; break;
74c57a2000-07-14Andreas Lange  case '0'..'9':
6969df2002-10-10Martin Nilsson  if(data[pos]=='0') { if(data[pos+1]=='x' || data[pos+1]=='X') { pos+=2; while(1)
74c57a2000-07-14Andreas Lange  {
6969df2002-10-10Martin Nilsson  switch(data[pos]) {
74c57a2000-07-14Andreas Lange  case '0'..'9': case 'a'..'f': case 'A'..'F': pos++; continue;
6969df2002-10-10Martin Nilsson  } break;
74c57a2000-07-14Andreas Lange  } break; }
6969df2002-10-10Martin Nilsson  else if(data[pos+1]=='b' || data[pos+1]=='B') { pos+=2; while(1) { if(data[pos]!='0' && data[pos]!='1') break; pos++; } }
74c57a2000-07-14Andreas Lange  } while(data[pos]>='0' && data[pos]<='9') pos++;
93d4272000-10-16David Norlin  if(data[pos]=='.' && data[pos+1]>='0' && data[pos+1]<='9')
74c57a2000-07-14Andreas Lange  { pos++; while(data[pos]>='0' && data[pos]<='9') pos++; if(data[pos]=='e' || data[pos]=='E') { pos++; while(data[pos]>='0' && data[pos]<='9') pos++; }
467e8f2002-06-07Martin Nilsson  break; } if( (data[pos]=='e' || data[pos]=='E') && data[pos+1]>='0' && data[pos+1]<='9' ) { pos++; while(data[pos]>='0' && data[pos]<='9') pos++;
74c57a2000-07-14Andreas Lange  } break; default:
ba7b0a2001-01-18David Norlin  UNKNOWN_TOKEN;
74c57a2000-07-14Andreas Lange 
9762562002-12-20Henrik Grubbström (Grubba)  case '`': { int bqstart = pos;
ba7b0a2001-01-18David Norlin  while(data[pos]=='`') ++pos;
7a894f2002-12-20Henrik Grubbström (Grubba)  if (pos - bqstart > 3) { // max. three ``` {
9762562002-12-20Henrik Grubbström (Grubba)  pos = bqstart;
ba7b0a2001-01-18David Norlin  UNKNOWN_TOKEN;
9762562002-12-20Henrik Grubbström (Grubba)  }
ba7b0a2001-01-18David Norlin  int chars = backquoteops[data[pos..pos+2]] || backquoteops[data[pos..pos+1]] || backquoteops[data[pos..pos]]; if (chars) pos += chars;
9762562002-12-20Henrik Grubbström (Grubba)  else { pos = bqstart;
ba7b0a2001-01-18David Norlin  UNKNOWN_TOKEN;
9762562002-12-20Henrik Grubbström (Grubba)  }
ba7b0a2001-01-18David Norlin  } break;
74c57a2000-07-14Andreas Lange  case '/': case '{': case '}': case '[': case ']': case '(': case ')': case ';': case ',': case '*': case '%': case '?': case ':': case '&': case '|': case '^': case '!': case '~': case '=': case '+': case '-': case '@': case '<': case '>': switch(data[pos..pos+1]) { case "//": pos=search(data,"\n",pos); break; case "/*": pos=search(data,"*/",pos);
9ee9322002-03-03Martin Nilsson  if(pos==-1) { if(state) {
f65b1a2003-01-18Martin Nilsson  state->remains = data[start..sizeof(data)-3];
9ee9322002-03-03Martin Nilsson  state->in_token = 1; return ret; }
d6272a2002-02-17Martin Nilsson  error("Failed to find end of comment.\n");
9ee9322002-03-03Martin Nilsson  }
74c57a2000-07-14Andreas Lange  pos+=2; break; case "<<": case ">>": if(data[pos+2]=='=') pos++;
43d9e02002-12-18Martin Nilsson  case "==": case "!=": case "<=": case ">=":
74c57a2000-07-14Andreas Lange  case "*=": case "/=": case "%=": case "&=": case "|=": case "^=": case "+=": case "-=": case "++": case "--": case "&&": case "||": case "->":
c4c6d22000-12-20David Norlin  case "::":
74c57a2000-07-14Andreas Lange  pos++; default: pos++; } break; case ' ': case '\n': case '\r': case '\t': while(1) { switch(data[pos]) { case ' ': case '\n': case '\r': case '\t': pos++; continue; } break; } break; case '\'': pos++;
eb0fe02001-11-08Fredrik Hübinette (Hubbe)  if(data[pos]=='\\') pos+=2;
2710da2001-07-27David Norlin  int end=search(data, "'", pos)+1;
370d362001-07-27David Norlin  if (!end) { --pos;
2710da2001-07-27David Norlin  UNKNOWN_TOKEN;
370d362001-07-27David Norlin  }
2710da2001-07-27David Norlin  pos=end;
74c57a2000-07-14Andreas Lange  break; case '"': { int q,s; while(1) { q=search(data,"\"",pos+1); s=search(data,"\\",pos+1);
9ee9322002-03-03Martin Nilsson  if( q==-1 || (s==sizeof(data)-2 && s<q) ) error("Unterminated string.\n"); if(s==-1 || s>q) { pos = q+1;
74c57a2000-07-14Andreas Lange  break; }
9ee9322002-03-03Martin Nilsson  pos=s+1;
74c57a2000-07-14Andreas Lange  }
9ee9322002-03-03Martin Nilsson  if(has_value(data[start..pos-1], "\n")) error("Newline in string.\n");
74c57a2000-07-14Andreas Lange  break; } } } ret+=({ data[start..pos-1] }); } }