1
  
2
  
3
  
4
  
5
  
6
  
7
  
8
  
9
  
10
  
11
  
12
  
13
  
14
  
15
  
16
  
17
  
18
  
19
  
20
  
21
  
22
  
23
  
24
  
25
  
26
  
27
  
28
  
29
  
30
  
31
  
32
  
33
  
34
  
35
  
36
  
37
  
38
  
39
  
40
  
41
  
42
  
43
  
44
  
45
  
46
  
47
  
48
  
49
  
50
  
51
  
52
  
53
  
54
  
55
  
56
  
57
  
58
  
59
  
60
  
61
  
62
  
63
  
64
  
65
  
66
  
67
  
68
  
69
  
70
  
71
  
72
  
73
  
74
  
75
  
76
  
77
  
78
  
79
  
80
  
81
  
82
  
83
  
84
  
85
  
86
  
87
  
88
  
89
  
90
  
91
  
92
  
93
  
94
  
95
  
96
  
97
  
98
  
99
  
100
  
101
  
102
  
103
  
104
  
105
  
106
  
107
  
108
  
109
  
110
  
111
  
112
  
113
  
114
  
115
  
116
  
117
  
118
  
119
  
120
  
121
  
122
  
123
  
124
  
125
  
126
  
127
  
128
  
129
  
130
  
131
  
132
  
133
  
134
  
135
  
136
  
137
  
138
  
139
  
140
  
141
  
142
  
143
  
144
  
145
  
146
  
147
  
148
  
149
  
150
  
151
  
152
  
153
  
154
  
155
  
156
  
157
  
158
  
159
  
160
  
161
  
162
  
163
  
164
  
165
  
166
  
167
  
168
  
169
  
170
  
171
  
172
  
173
  
174
  
175
  
176
  
177
  
178
  
179
  
180
  
181
  
182
  
183
  
184
  
185
  
186
  
187
  
188
  
189
  
190
  
191
  
192
  
193
  
194
  
195
  
196
  
197
  
198
  
199
  
200
  
201
  
202
  
203
  
204
  
205
  
206
  
207
  
208
  
209
  
210
  
211
  
212
  
213
  
214
  
215
  
216
  
217
  
218
  
219
  
220
  
221
  
222
  
223
  
224
  
225
  
226
  
227
  
228
  
229
  
230
  
231
  
232
  
233
  
234
  
235
  
236
  
237
  
238
  
239
  
240
  
241
  
242
  
243
  
244
  
245
  
246
  
247
  
248
  
249
  
250
  
251
  
252
  
253
  
254
  
255
  
256
  
257
  
258
  
259
  
260
  
261
  
262
  
263
  
264
  
265
  
266
  
267
  
268
  
269
  
270
  
271
  
272
  
273
  
274
  
275
  
276
  
277
  
278
  
279
  
280
  
281
  
282
  
283
  
284
  
285
  
286
  
287
  
288
  
289
  
290
  
291
  
292
  
293
  
294
  
295
  
296
  
297
  
298
  
299
  
300
  
301
  
302
  
303
  
304
  
305
  
306
  
307
  
308
  
309
  
310
  
311
  
312
  
313
  
314
  
315
  
316
  
317
  
318
  
319
  
320
  
321
  
322
  
323
  
324
  
325
  
326
  
327
  
328
  
329
  
330
  
331
  
332
  
// 
// This file needs to support old pikes that e.g. don't understand 
// "#pike". Some of them fail when they see an unknown cpp directive. 
// 
// #pike __REAL_VERSION__ 
// 
 
protected constant splitter = Parser._parser._Pike.tokenize; 
 
// NB: This module is used by several of the precompilers, 
//     and may thus be used before the Unicode module has 
//     been compiled! 
#if constant(Unicode.is_whitespace) 
protected constant is_whitespace = Unicode.is_whitespace; 
#else 
protected constant whitespace_tab = (< ' ', '\t', '\14', '\r', '\n', >); 
protected int is_whitespace(int c) 
{ 
  return whitespace_tab[c]; 
} 
#endif 
 
class UnterminatedStringError 
//! Error thrown when an unterminated string token is encountered. 
{ 
  inherit Error.Generic; 
  constant error_type = "unterminated_string"; 
  constant is_unterminated_string_error = 1; 
 
  string err_str; 
  //! The string that failed to be tokenized 
 
  protected void create(string pre, string post) 
  { 
    int line = String.count(pre, "\n")+1; 
    err_str = pre+post; 
    if( sizeof(post) > 100 ) 
      ::create(sprintf("Unterminated string %O[%d] at line %d\n", 
                       post[..100], sizeof(post)-100, line)); 
    else 
      ::create(sprintf("Unterminated string %O at line %d\n", 
                       post, line)); 
  } 
} 
 
/* accessed from testsuite */ 
/*private*/ array(string) low_split(string data, void|mapping(string:string) state) 
{ 
  if(state && state->remains) 
    data = (string)m_delete(state, "remains") + data; 
  // Cast to string above to work around old Pike 7.0 bug. 
 
  array(string) ret; 
  string rem; 
  [ret, rem] = splitter(data); 
  if(sizeof(rem)) { 
    if(rem[0]=='"') 
      throw(UnterminatedStringError(ret*"", rem)); 
    if(state) state->remains=rem; 
  } 
  return ret; 
} 
 
//! Splits the @[data] string into an array of tokens. An additional 
//! element with a newline will be added to the resulting array of 
//! tokens. If the optional argument @[state] is provided the split 
//! function is able to pause and resume splitting inside #"" and 
//! /**/ tokens. The @[state] argument should be an initially empty 
//! mapping, in which split will store its state between successive 
//! calls. 
array(string) split(string data, void|mapping(string:string) state) { 
  array r = low_split(data, state); 
 
  array new = ({}); 
  for(int i; i<sizeof(r); i++) 
    if(r[i][..1]=="//" && r[i][-1]=='\n') 
      new += ({ r[i][..<1], "\n" }); 
    else 
      new += ({ r[i] }); 
 
  if(sizeof(new) && (< "\n", " " >)[new[-1]]) 
    new[-1] += "\n"; 
  else 
    new += ({ "\n" }); 
  return new; 
} 
 
//! Represents a C token, along with a selection of associated data and 
//! operations. 
class Token 
{ 
  //! The line where the token was found. 
  int line; 
 
  //! The actual token. 
  string text; 
 
  //! The file in which the token was found. 
  string file; 
 
  //! Trailing whitespaces. 
  string trailing_whitespaces=""; 
 
  //! @decl void create(string text, void|int line, void|string file,@ 
  //!   void|string trailing_whitespace) 
  protected void create(string t, void|int l, void|string f, void|string space) 
    { 
      text=t; 
      line=l; 
      file=f; 
      if(space) trailing_whitespaces=space; 
    } 
 
  //! If the object is printed as %s it will only output its text contents. 
  protected string _sprintf(int how) 
    { 
      switch(how) 
      { 
        case 's': 
          return text; 
        case 'O': 
          return sprintf("%O(%O,%O,%d)",this_program,text,file,line); 
      } 
    } 
 
  //! Tokens are considered equal if the text contents are equal. It 
  //! is also possible to compare the Token object with a text string 
  //! directly. 
  protected int `==(mixed foo) 
    { 
      return (objectp(foo) ? foo->text : foo) == text; 
    } 
 
  //! A string can be added to the Token, which will be added to the 
  //! text contents. 
  protected string `+(string s) 
    { 
      return predef::`+(text, s); 
    } 
 
  //! A string can be added to the Token, which will be added to the 
  //! text contents. 
  protected string ``+(string s) 
    { 
      return predef::`+(s, text); 
    } 
 
  //! It is possible to case a Token object to a string. The text content 
  //! will be returned. 
  protected mixed cast(string to) 
    { 
      if(to=="string") return text; 
      return UNDEFINED; 
    } 
 
  //! Characters and ranges may be indexed from the text contents of the token. 
  protected int|string `[](int a, void|int b) { 
    if(undefinedp(b)) return text[a]; 
    return text[a..b]; 
  } 
} 
 
//! Returns an array of @[Token] objects given an array of string tokens. 
array(Token) tokenize(array(string) s, void|string file) 
{ 
  array(Token) ret=allocate(sizeof(s)); 
  int line=1; 
  foreach(s; int e; string str) 
  { 
    ret[e]=Token(str,line,file); 
    if(str[0]=='#') 
    { 
      if( (sscanf(str,"#%*[ \t\14]%d%*[ \t\14]\"%s\"", line,file) == 4) || 
          (sscanf(str,"#%*[ \t\14]line%*[ \t\14]%d%*[ \t\14]\"%s\"",line,file)==5)) 
        line--; 
    } 
    line+=sizeof(str/"\n")-1; 
  } 
  return ret; 
} 
 
protected constant global_groupings = ([ "{":"}", "(":")", "[":"]" ]); 
 
//! Fold sub blocks of an array of tokens into sub arrays, 
//! for grouping purposes. 
//! @param tokens 
//!   The token array to fold. 
//! @param groupings 
//!   Supplies the tokens marking the boundaries of blocks to fold. 
//!   The indices of the mapping mark the start of a block, the 
//!   corresponding values mark where the block ends. The sub arrays 
//!   will start and end in these tokens. If no groupings mapping is 
//!   provided, {}, () and [] are used as block boundaries. 
array(Token|array) group(array(string|Token) tokens, 
                         void|mapping(string:string) groupings) 
{ 
  ADT.Stack stack=ADT.Stack(); 
  array(Token) ret=({}); 
  mapping actions=([]); 
 
  if(!groupings) groupings=global_groupings; 
 
  foreach((array)groupings,[string x, string y]) 
  { 
    actions[x]=1; 
    actions[y]=2; 
  } 
 
  foreach(tokens, Token token) 
  { 
    switch(actions[(string)token]) 
    { 
      case 0: ret+=({token}); break; 
      case 1: stack->push(ret); ret=({token}); break; 
      case 2: 
        if (!sizeof(ret) || !sizeof(stack) || 
            (groupings[(string)ret[0]] != (string)token)) { 
#if 0 
        // Mismatch 
          werror ("%s:%d: Expected %O, got %O\n", 
                  token->file||"-", token->line, 
                  groupings[(string)ret[0]], (string) token); 
#endif 
        return ret; 
        } 
        ret=stack->pop()+({ ret + ({token}) }); 
    } 
  } 
  while (sizeof(stack)) { 
    Token token = ret[0]; 
#if 0 
    werror("%s:%d: Missing %O.\n", 
           token->file||"-", token->line, 
           groupings[(string)token]); 
#endif 
    ret = stack->pop() + 
      ({ ret + 
         ({ Token(groupings[(string)token], !stringp(token) && token->line, 
                  !stringp(token) && token->file) }) }); 
  } 
  return ret; 
} 
 
/* FIXME: 
 * This actually strips all preprocessing tokens 
 */ 
 
//! Strips off all (preprocessor) line statements from a token array. 
array(Token|array) strip_line_statements(array(Token|array) tokens) 
{ 
  array(Token|array) ret=({}); 
  foreach(tokens, array|object(Token) t) 
    { 
      if(arrayp(t)) 
      { 
        ret+=({ strip_line_statements(t) }); 
      }else{ 
        if( ((string)t) [0] != '#') 
          ret+=({t}); 
      } 
    } 
  return ret; 
} 
 
//! Folds all whitespace tokens into the previous token's trailing_whitespaces. 
array hide_whitespaces(array tokens) 
{ 
  array(Token) ret=({tokens[0]}); 
  foreach(tokens[1..], array|object(Token) t) 
  { 
    if(arrayp(t)) 
    { 
      ret+=({ hide_whitespaces(t) }); 
    } 
    else if( is_whitespace(t->text[0]) ) 
    { 
      mixed tmp=ret[-1]; 
      while(arrayp(tmp)) tmp=tmp[-1]; 
      tmp->trailing_whitespaces+=t->text+t->trailing_whitespaces; 
    } 
    else 
      ret+=({t}); 
  } 
  return ret; 
} 
 
//! Reconstitutes the token array into a plain string again; essentially 
//! reversing @[split()] and whichever of the @[tokenize], @[group] and 
//! @[hide_whitespaces] methods may have been invoked. 
string simple_reconstitute(array(string|object(Token)|array) tokens) 
{ 
  string ret=""; 
  foreach(Array.flatten(tokens), mixed tok) 
    { 
      if(objectp(tok)) 
        tok=tok->text + tok->trailing_whitespaces; 
      ret+=tok; 
    } 
 
  return ret; 
} 
 
//! Like @[simple_reconstitute], but adding additional @tt{#line n "file"@} 
//! preprocessor statements in the output whereever a new line or 
//! file starts. 
string reconstitute_with_line_numbers(array(string|object(Token)|array) tokens) 
{ 
  int line=1; 
  string file; 
  string ret=""; 
  foreach(Array.flatten(tokens), mixed tok) 
    { 
      if(objectp(tok)) 
      { 
        if((tok->line && tok->line != line) || 
           (tok->file && tok->file != file)) 
        { 
          if(sizeof(ret) && ret[-1]!='\n') ret+="\n"; 
          line=tok->line; 
          if(tok->file) file=tok->file; 
          ret+=sprintf("#line %d %O\n",line,file||"-"); 
        } 
        tok=tok->text + tok->trailing_whitespaces; 
      } 
      ret+=tok; 
      line+=String.count(tok,"\n"); 
    } 
 
  return ret; 
}