f9f5e42017-03-18Martin Nilsson #include "whitespace.h"
55355a2004-10-05Martin Nilsson #define NEWLINE() do { while( pos<len && data[pos]!='\n' )pos++; } while(0)
a604c42005-03-29Martin Nilsson #define SKIPWHT() do {while(data[pos]==' '||data[pos]=='\t')pos++; } while(0)
55355a2004-10-05Martin Nilsson  static unsigned int TOKENIZE(struct array **res, CHAR *data, unsigned int len) { unsigned int start=0; unsigned int pos; for( pos=0;pos<len; pos++ ) { switch(data[pos]) { case '.': if( data[pos+1]=='.' ) { pos++; if( data[pos+1] == '.') pos++; break; }
2334ee2005-03-16Martin Nilsson  break;
55355a2004-10-05Martin Nilsson  case '0': if( data[pos+1]=='x' || data[pos+1]=='X' ) { pos+=2; while( (pos < len) && ((data[pos]>='0' && data[pos] <='9') || (data[pos]>='a' && data[pos] <='f') || (data[pos]>='A' && data[pos] <='F'))) pos++; if( pos != len ) pos--; break; } else if( data[pos+1]=='b' || data[pos+1]=='B' ) { pos+=2;
b424972005-03-28Martin Nilsson  while( pos<len && (data[pos]=='0' || data[pos]=='1') )
55355a2004-10-05Martin Nilsson  pos++; if( pos != len ) pos--; break; }
a9c93c2005-03-25Henrik Grubbström (Grubba)  /* FALL_THROUGH */
55355a2004-10-05Martin Nilsson  case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': while(pos<len && data[pos]>='0' && data[pos]<='9') pos++; if( pos == len ) break;
2334ee2005-03-16Martin Nilsson  if(data[pos]=='.' && data[pos+1]>='0' && data[pos+1]<='9')
55355a2004-10-05Martin Nilsson  { pos++; while(pos<len && data[pos]>='0' && data[pos]<='9') pos++; if(data[pos]=='e' || data[pos]=='E') { pos++; if(data[pos]=='-' || data[pos]=='+') pos++; while(data[pos]>='0' && data[pos]<='9') pos++; } } if(data[pos]=='e' || data[pos]=='E') { pos++;
b424972005-03-28Martin Nilsson  if(data[pos]=='-' || data[pos]=='+') pos++;
55355a2004-10-05Martin Nilsson  while(data[pos]>='0' && data[pos]<='9') pos++; }
3f7dec2016-05-25Per Hedbor  if(data[pos]=='b') { pos++; if(data[pos]=='i') pos++; if(data[pos]=='t') pos++; }
55355a2004-10-05Martin Nilsson  if( pos != len ) pos--; break; case '`': if(data[pos+1]=='`') pos++;
3f7dec2016-05-25Per Hedbor  if(m_isidchar(data[pos+1])) { do { pos++;
ea06cd2016-08-01Henrik Grubbström (Grubba)  } while(m_isidchar2(data[pos+1]));
3f7dec2016-05-25Per Hedbor  if(data[pos+1] == '=') pos++; break; // NOTE: Depends on string having null at end. }
55355a2004-10-05Martin Nilsson  switch(data[pos+1]) { case '<': case '>':
2334ee2005-03-16Martin Nilsson  pos++; if(data[pos+1]==data[pos]) pos++;
55355a2004-10-05Martin Nilsson  break; case '-':
2334ee2005-03-16Martin Nilsson  pos++; if(data[pos+1]=='>') pos++;
55355a2004-10-05Martin Nilsson  break; case '(':
2334ee2005-03-16Martin Nilsson  pos++; if(data[pos+1]==')') pos++;
55355a2004-10-05Martin Nilsson  break; case '[':
2334ee2005-03-16Martin Nilsson  pos++; if(data[pos+1]=='.') pos++; if(data[pos+1]=='.') pos++; if(data[pos+1]==']') pos++; break; case '/': case '%': case '*': case '&': case '|':
ccfbec2005-03-16Martin Nilsson  case '^': case '+': case '!': case '=': case '~':
2334ee2005-03-16Martin Nilsson  pos++;
3f7dec2016-05-25Per Hedbor  if(data[pos+1] == '*') pos++;
55355a2004-10-05Martin Nilsson  break; }
2334ee2005-03-16Martin Nilsson  if(data[pos+1]=='=') pos++;
55355a2004-10-05Martin Nilsson  break; case '\\': continue; /* IGNORED */ case '/': if( pos == len-1 ) break; switch( data[pos+1] ) { case '/': NEWLINE(); /* line comment */ break; case '*': pos += 2; while( pos < len && !(data[pos] == '/' && data[pos-1] == '*') ) pos++; if( pos == len ) goto failed_to_find_end; break;
dd29582005-03-28Martin Nilsson  case '=': pos++; break;
55355a2004-10-05Martin Nilsson  default: break; }
dd29582005-03-28Martin Nilsson 
55355a2004-10-05Martin Nilsson  case '{': case '}': case '[': case ']': case '(': case ')':
2334ee2005-03-16Martin Nilsson  case ';':
55355a2004-10-05Martin Nilsson  case ',': case '?': case '@': /* Hm. Pike specific if I ever saw one. */ break; /* all done, one character token */
2334ee2005-03-16Martin Nilsson  case ':': if( data[pos+1] == ':' ) pos++; break;
55355a2004-10-05Martin Nilsson  case '<': if( data[pos+1] == '<' ) pos++; if( data[pos+1] == '=' ) pos++; break; case '-':
2334ee2005-03-16Martin Nilsson  if( data[pos+1] == '-' ) pos++;
55355a2004-10-05Martin Nilsson  else {
2334ee2005-03-16Martin Nilsson  if( data[pos+1] == '>' ) pos++;
55355a2004-10-05Martin Nilsson  if( data[pos+1] == '=' ) pos++; } break; case '>': if( data[pos+1] == '>' ) pos++; if( data[pos+1] == '=' ) pos++; break; case '+': case '&': case '|': if( data[pos+1] == data[pos] ) pos++; else if( data[pos+1] == '=' ) pos++;
6555062015-04-21Henrik Grubbström (Grubba)  /* FALL_THROUGH */
55355a2004-10-05Martin Nilsson 
13670c2015-05-25Martin Nilsson  case '*': case '%':
55355a2004-10-05Martin Nilsson  case '^': case '!': case '~': case '=': if( data[pos+1] == '=' ) pos++; break;
13670c2015-05-25Martin Nilsson 
f9f5e42017-03-18Martin Nilsson  SPACECASE8 pos++;
55355a2004-10-05Martin Nilsson  while( pos < len ) { switch(data[pos])
f9f5e42017-03-18Martin Nilsson  { SPACECASE8 pos++;
55355a2004-10-05Martin Nilsson  continue; } break; } if( pos != len ) pos--; break; case '\'': pos++; while( pos < len ) { if( data[pos] == '\\' ) pos++; else if( data[pos] == '\'' ) break; pos++; } if( pos >= len ) goto failed_to_find_end; break; case '"': pos++; while( pos < len ) { if( data[pos] == '\\' ) pos++; else if( data[pos] == '"' ) break;
a9c93c2005-03-25Henrik Grubbström (Grubba)  /* FIXME: Newline in string detection! */
55355a2004-10-05Martin Nilsson  pos++; } if( pos >= len ) goto failed_to_find_end; break;
13670c2015-05-25Martin Nilsson 
55355a2004-10-05Martin Nilsson  case '#':
a9c93c2005-03-25Henrik Grubbström (Grubba)  pos++;
a604c42005-03-29Martin Nilsson  SKIPWHT();
a9c93c2005-03-25Henrik Grubbström (Grubba)  if (data[pos] == '\"') { /* Support for #"" */ for (pos++; pos < len; pos++) { if (data[pos] == '\"') break; if (data[pos] == '\\') pos++; } if (pos >= len) goto failed_to_find_end; break;
eae5082015-08-23Martin Nilsson  } { char end = 0;
b70e782015-08-27Martin Nilsson  switch( data[pos] ) { case '(': end=')'; break; case '[': end=']'; break; case '{': end='}'; break; } if(end)
eae5082015-08-23Martin Nilsson  { for (pos++; pos<len-1; pos++) if (data[pos] == '#' && data[pos+1] == end) { pos++; end=0; break; } if (end) goto failed_to_find_end; break; } }
b424972005-03-28Martin Nilsson  if( data[pos] == 's' && data[pos+1] == 't' && data[pos+2] == 'r' && data[pos+3] == 'i' && data[pos+4] == 'n' &&
a604c42005-03-29Martin Nilsson  data[pos+5] == 'g' ) {
b424972005-03-28Martin Nilsson  pos += 6;
a604c42005-03-29Martin Nilsson  SKIPWHT(); if(data[pos]=='\"') { for (pos++; pos < len; pos++) { if (data[pos] == '\"') break; if (data[pos] == '\\') pos++; } } else if(data[pos]=='<') { for(pos++; pos<len; pos++) if(data[pos]=='>') break;
9cd8e22012-03-24Henrik Grubbström (Grubba)  } else if (!data[pos]) { pos--; break;
b424972005-03-28Martin Nilsson  }
a604c42005-03-29Martin Nilsson  else Pike_error("Illegal character after #string\n");
b424972005-03-28Martin Nilsson  if (pos >= len) goto failed_to_find_end; break; }
55355a2004-10-05Martin Nilsson  NEWLINE();
a9c93c2005-03-25Henrik Grubbström (Grubba)  while( data[pos-1]=='\\' || (pos>2 && data[pos-1]=='\r' && data[pos-2]=='\\') )
55355a2004-10-05Martin Nilsson  { pos++; NEWLINE(); } break;
2334ee2005-03-16Martin Nilsson  case 0: goto failed_to_find_end;
55355a2004-10-05Martin Nilsson  default: if( m_isidchar( data[pos] ) ) { pos++; while( m_isidchar2( data[pos] ) && pos < len) pos++; if( pos != len ) pos--; } else Pike_error("Unexpected character %x (%c) at position %d.\n", data[pos], (isprint(data[pos])?data[pos]:'?'), pos ); } PUSH_TOKEN( res, data+start, ( pos == len )?pos-start:pos-start+1 ); start = pos+1; } failed_to_find_end: return MINIMUM(start,len); }