|
|
|
|
|
|
|
|
|
|
|
|
|
array(string) split(string data, void|mapping state) |
{ |
int start; |
int line=1; |
array(string) ret=({}); |
int pos; |
if(data=="") return ({"\n"}); |
data += "\n\0"; |
|
if(state && state->in_token) { |
switch(state->remains[0..1]) { |
|
case "/*": |
if(sizeof(state->remains)>2 && state->remains[-1]=='*' |
&& data[0]=='/') { |
ret += ({ state->remains + "/" }); |
pos++; |
m_delete(state, "remains"); |
break; |
} |
pos = search(data, "*/"); |
if(pos==-1) { |
state->in_token = 1; |
state->remains += data[..<1]; |
return ret; |
} |
ret += ({ state->remains + data[..pos+1] }); |
m_delete(state, "remains"); |
pos+=2; |
break; |
} |
state->in_token = 0; |
} |
|
while(1) |
{ |
int start=pos; |
|
switch(data[pos]) |
{ |
case '\0': |
return ret; |
|
case '#': |
{ |
pos=search(data,"\n",pos); |
if(pos==-1) |
error("Failed to find end of preprocessor statement.\n"); |
|
while(data[pos-1]=='\\' || (data[pos-1]=='\r' && data[pos-2]=='\\')) |
pos=search(data,"\n",pos+1); |
break; |
|
case 'a'..'z': |
case 'A'..'Z': |
case 128..65536: |
case '_': |
while(1) |
{ |
switch(data[pos]) |
{ |
case '$': |
case 'a'..'z': |
case 'A'..'Z': |
case '0'..'9': |
case 128..65536: |
case '_': |
pos++; |
continue; |
} |
break; |
} |
break; |
|
case '.': |
if(data[start..start+2]=="...") |
{ |
pos+=3; |
break; |
} |
if(data[start..start+1]=="..") |
{ |
pos+=3; |
break; |
} |
|
case '0'..'9': |
if(data[pos]=='0' && (data[pos+1]=='x' || data[pos+1]=='X')) |
{ |
pos+=2; |
while(1) |
{ |
switch(data[pos]) |
{ |
case '0'..'9': |
case 'a'..'f': |
case 'A'..'F': |
pos++; |
continue; |
} |
break; |
} |
break; |
} |
while(data[pos]>='0' && data[pos]<='9') pos++; |
if(data[pos]=='.') |
{ |
pos++; |
while(data[pos]>='0' && data[pos]<='9') pos++; |
if(data[pos]=='e' || data[pos]=='E') |
{ |
pos++; |
if(data[pos]=='-') pos++; |
while(data[pos]>='0' && data[pos]<='9') pos++; |
} |
break; |
} |
if(data[pos]=='e' || data[pos]=='E') |
{ |
pos++; |
while(data[pos]>='0' && data[pos]<='9') pos++; |
} |
break; |
|
default: |
error("Unknown token %O\n",data[pos..pos+20]); |
|
case '`': |
while(data[pos]=='`') data[pos]++; |
|
case '\\': pos++; continue; |
|
case '/': |
case '{': case '}': |
case '[': case ']': |
case '(': case ')': |
case ';': |
case ',': |
case '*': case '%': |
case '?': case ':': |
case '&': case '|': case '^': |
case '!': case '~': |
case '=': |
case '@': |
case '+': |
case '-': |
case '<': case '>': |
switch(data[pos..pos+1]) |
{ |
case "//": |
pos=search(data,"\n",pos); |
break; |
|
case "/*": |
pos=search(data,"*/",pos); |
if(pos==-1) { |
if(state) { |
state->remains = data[start..<2]; |
state->in_token = 1; |
return ret; |
} |
error("Failed to find end of comment.\n"); |
} |
pos+=2; |
break; |
|
case "<<": case ">>": |
if(data[pos+2]=='=') pos++; |
case "==": case "!=": case "<=": case ">=": |
case "*=": case "/=": case "%=": |
case "&=": case "|=": case "^=": |
case "+=": case "-=": |
case "++": case "--": |
case "&&": case "||": |
case "->": |
pos++; |
default: |
pos++; |
} |
break; |
|
|
case ' ': |
case '\n': |
case '\r': |
case '\t': |
case '\14': |
while(1) |
{ |
switch(data[pos]) |
{ |
case ' ': |
case '\n': |
case '\r': |
case '\t': |
case '\14': |
pos++; |
continue; |
} |
break; |
} |
break; |
|
case '\'': |
pos++; |
if(data[pos]=='\\') pos+=2; |
int end=search(data, "'", pos)+1; |
if(!end) |
throw( ({sprintf("Unknown token: %O\n",data[pos-1..pos+19]) }) ); |
pos=end; |
break; |
|
case '"': |
{ |
int q,s; |
while(1) |
{ |
q=search(data,"\"",pos+1); |
s=search(data,"\\",pos+1); |
if(q==-1) q=sizeof(data)-1; |
if(s==-1) s=sizeof(data)-1; |
|
if(q<s) |
{ |
pos=q+1; |
break; |
}else{ |
pos=s+1; |
} |
} |
break; |
} |
} |
} |
|
ret+=({ data[start..pos-1] }); |
} |
} |
|
|
|
class Token |
{ |
|
int line; |
|
|
string text; |
|
|
string file; |
|
|
string trailing_whitespaces=""; |
|
|
|
void create(string t, void|int l, void|string f, void|string space) |
{ |
text=t; |
line=l; |
file=f; |
if(space) trailing_whitespaces=space; |
} |
|
|
string _sprintf(int how) |
{ |
switch(how) |
{ |
case 's': |
return text; |
case 'O': |
return sprintf("%O(%O,%O,%d)",this_program,text,file,line); |
} |
} |
|
|
|
|
int `==(mixed foo) |
{ |
return (objectp(foo) ? foo->text : foo) == text; |
} |
|
|
|
string `+(string ... s) |
{ |
return predef::`+(text,@s); |
} |
|
|
|
string ``+(string ... s) |
{ |
return predef::`+(@s,text); |
} |
|
|
|
mixed cast(string to) |
{ |
if(to=="string") return text; |
} |
|
|
int|string `[](int a, void|int b) { |
if(zero_type(b)) return text[a]; |
return text[a..b]; |
} |
} |
|
|
array(Token) tokenize(array(string) s, void|string file) |
{ |
array(Token) ret=allocate(sizeof(s)); |
int line=1; |
foreach(s; int e; string str) |
{ |
ret[e]=Token(str,line,file); |
if(str[0]=='#') |
{ |
if( (sscanf(str,"#%*[ \t\14]%d%*[ \t\14]\"%s\"", line,file) == 4) || |
(sscanf(str,"#%*[ \t\14]line%*[ \t\14]%d%*[ \t\14]\"%s\"",line,file)==5)) |
line--; |
} |
line+=sizeof(str/"\n")-1; |
} |
return ret; |
} |
|
protected constant global_groupings = ([ "{":"}", "(":")", "[":"]" ]); |
|
|
|
|
|
|
|
|
|
|
|
array(Token|array) group(array(string|Token) tokens, |
void|mapping(string:string) groupings) |
{ |
ADT.Stack stack=ADT.Stack(); |
array(Token) ret=({}); |
mapping actions=([]); |
|
if(!groupings) groupings=global_groupings; |
|
foreach((array)groupings,[string x, string y]) |
{ |
actions[x]=1; |
actions[y]=2; |
} |
|
foreach(tokens, Token token) |
{ |
switch(actions[(string)token]) |
{ |
case 0: ret+=({token}); break; |
case 1: stack->push(ret); ret=({token}); break; |
case 2: |
if (!sizeof(ret) || !stack->ptr || |
(groupings[(string)ret[0]] != (string)token)) { |
|
werror ("%s:%d: Expected %O, got %O\n", |
token->file||"-", token->line, |
groupings[(string)ret[0]], (string) token); |
return ret; |
} |
ret=stack->pop()+({ ret + ({token}) }); |
} |
} |
return ret; |
} |
|
|
|
|
|
|
array(Token|array) strip_line_statements(array(Token|array) tokens) |
{ |
array(Token|array) ret=({}); |
foreach(tokens, array|object(Token) t) |
{ |
if(arrayp(t)) |
{ |
ret+=({ strip_line_statements(t) }); |
}else{ |
if( ((string)t) [0] != '#') |
ret+=({t}); |
} |
} |
return ret; |
} |
|
|
array hide_whitespaces(array tokens) |
{ |
array(Token) ret=({tokens[0]}); |
foreach(tokens[1..], array|object(Token) t) |
{ |
if(arrayp(t)) |
{ |
ret+=({ hide_whitespaces(t) }); |
}else{ |
switch( ((string)t) [0]) |
{ |
case ' ': |
case '\t': |
case '\14': |
case '\r': |
case '\n': |
mixed tmp=ret[-1]; |
while(arrayp(tmp)) tmp=tmp[-1]; |
tmp->trailing_whitespaces+=(string)t; |
break; |
|
default: |
ret+=({t}); |
} |
} |
} |
return ret; |
} |
|
|
|
|
string simple_reconstitute(array(string|object(Token)|array) tokens) |
{ |
string ret=""; |
foreach(Array.flatten(tokens), mixed tok) |
{ |
if(objectp(tok)) |
tok=tok->text + tok->trailing_whitespaces; |
ret+=tok; |
} |
|
return ret; |
} |
|
|
|
|
string reconstitute_with_line_numbers(array(string|object(Token)|array) tokens) |
{ |
int line=1; |
string file; |
string ret=""; |
foreach(Array.flatten(tokens), mixed tok) |
{ |
if(objectp(tok)) |
{ |
if((tok->line && tok->line != line) || |
(tok->file && tok->file != file)) |
{ |
if(sizeof(ret) && ret[-1]!='\n') ret+="\n"; |
line=tok->line; |
if(tok->file) file=tok->file; |
ret+=sprintf("#line %d %O\n",line,file||"-"); |
} |
tok=tok->text + tok->trailing_whitespaces; |
} |
ret+=tok; |
line+=String.count(tok,"\n"); |
} |
|
return ret; |
} |
|
|