1
  
2
  
3
  
4
  
5
  
6
  
7
  
8
  
9
  
10
  
11
  
12
  
13
  
14
  
15
  
16
  
17
  
18
  
19
  
20
  
21
  
22
  
23
  
24
  
25
  
26
  
27
  
28
  
29
  
30
  
31
  
32
  
33
  
34
  
35
  
36
  
37
  
38
  
39
  
40
  
41
  
42
  
43
  
44
  
45
  
46
  
47
  
48
  
49
  
50
  
51
  
52
  
53
  
54
  
55
  
56
  
57
  
58
  
59
  
60
  
61
  
62
  
63
  
64
  
65
  
66
  
67
  
68
  
69
  
70
  
71
  
72
  
73
  
74
  
75
  
76
  
77
  
78
  
79
  
80
  
81
  
82
  
83
  
84
  
85
  
86
  
87
  
88
  
89
  
90
  
91
  
92
  
93
  
94
  
95
  
96
  
97
  
98
  
99
  
100
  
101
  
102
  
103
  
104
  
105
  
106
  
107
  
108
  
109
  
110
  
111
  
112
  
113
  
114
  
115
  
116
  
117
  
118
  
119
  
120
  
121
  
122
  
123
  
124
  
125
  
126
  
127
  
128
  
129
  
130
  
131
  
132
  
133
  
134
  
135
  
136
  
137
  
138
  
139
  
140
  
141
  
142
  
143
  
144
  
145
  
146
  
147
  
148
  
149
  
150
  
151
  
152
  
153
  
154
  
155
  
156
  
157
  
158
  
159
  
160
  
161
  
162
  
163
  
164
  
165
  
166
  
167
  
168
  
169
  
170
  
171
  
172
  
173
  
174
  
175
  
176
  
177
  
178
  
179
  
180
  
181
  
182
  
183
  
184
  
185
  
186
  
187
  
188
  
189
  
190
  
191
  
192
  
193
  
194
  
195
  
196
  
197
  
198
  
199
  
200
  
201
  
202
  
203
  
204
  
205
  
206
  
207
  
208
  
209
  
210
  
211
  
212
  
213
  
214
  
215
  
216
  
217
  
218
  
219
  
220
  
221
  
222
  
223
  
224
  
225
  
226
  
227
  
228
  
229
  
230
  
231
  
232
  
233
  
234
  
235
  
236
  
237
  
238
  
239
  
240
  
241
  
242
  
243
  
244
  
245
  
246
  
247
  
248
  
249
  
250
  
251
  
252
  
253
  
254
  
255
  
256
  
257
  
258
  
259
  
260
  
261
  
262
  
263
  
264
  
265
  
266
  
267
  
268
  
269
  
270
  
271
  
272
  
273
  
274
  
275
  
276
  
277
  
278
  
279
  
280
  
281
  
282
  
283
  
284
  
285
  
286
  
287
  
288
  
289
  
290
  
291
  
292
  
293
  
294
  
295
  
296
  
297
  
298
  
299
  
300
  
301
  
302
  
303
  
304
  
305
  
306
  
307
  
308
  
309
  
310
  
311
  
312
  
313
  
314
  
315
  
316
  
317
  
318
  
319
  
320
  
321
  
322
  
323
  
324
  
325
  
326
  
327
  
328
  
329
  
330
  
331
  
332
  
333
  
334
  
335
  
336
  
337
  
338
  
339
  
340
  
341
  
342
  
343
  
344
  
345
  
346
  
347
  
348
  
349
  
350
  
351
  
352
  
353
  
354
  
355
  
356
  
357
  
358
  
359
  
360
  
361
  
362
  
363
  
364
  
365
  
366
  
367
  
368
  
369
  
370
  
371
  
372
  
373
  
374
  
375
  
376
  
377
  
378
  
379
  
380
  
381
  
382
  
383
  
384
  
385
  
386
  
387
  
388
  
389
  
390
  
391
  
392
  
393
  
394
  
395
  
396
  
397
  
398
  
399
  
400
  
401
  
402
  
403
  
404
  
405
  
406
  
407
  
408
  
409
  
410
  
411
  
412
  
413
  
414
  
415
  
416
  
417
  
418
  
419
  
420
  
421
  
422
  
423
  
424
  
425
  
426
  
427
  
428
  
429
  
430
  
431
  
432
  
433
  
434
  
435
  
436
  
437
  
438
  
439
  
440
  
441
  
442
  
443
  
444
  
445
  
446
  
447
  
448
  
449
  
450
  
451
  
452
  
453
  
454
  
455
  
456
  
457
  
458
  
459
  
460
  
461
  
462
  
463
  
464
  
465
  
466
  
467
  
468
  
469
  
470
  
471
  
472
  
473
  
474
  
475
  
476
  
477
  
478
  
479
  
480
  
481
  
482
  
483
  
484
  
485
  
486
  
487
  
488
  
489
  
490
  
491
  
492
  
493
  
494
  
495
  
496
  
497
  
498
  
499
  
500
  
501
  
502
  
503
  
504
  
505
  
506
  
507
  
508
  
509
  
510
  
511
  
512
  
513
  
514
  
515
  
516
  
517
  
518
  
519
  
520
  
521
  
522
  
523
  
524
  
525
  
526
  
527
  
528
  
529
  
530
  
531
  
532
  
533
  
534
  
535
  
536
  
537
  
538
  
539
  
540
  
541
  
542
  
543
  
544
  
545
  
546
  
547
  
548
  
549
  
550
  
551
  
552
  
553
  
554
  
555
  
556
  
557
  
558
  
559
  
560
  
561
  
562
  
563
  
564
  
565
  
566
  
567
  
568
  
569
  
570
  
571
  
572
  
573
  
574
  
575
  
576
  
577
  
578
  
579
  
580
  
581
  
582
  
583
  
584
  
585
  
586
  
587
  
588
  
589
  
590
  
591
  
592
  
593
  
594
  
595
  
596
  
597
  
598
  
599
  
600
  
601
  
602
  
603
  
604
  
605
  
606
  
607
  
608
  
609
  
610
  
611
  
612
  
613
  
614
  
615
  
616
  
617
  
618
  
619
  
620
  
621
  
622
  
623
  
624
  
625
  
626
  
627
  
628
  
629
  
630
  
631
  
632
  
633
  
634
  
635
  
636
  
637
  
638
  
639
  
640
  
641
  
642
  
643
  
644
  
645
  
646
  
647
  
648
  
649
  
650
  
651
  
652
  
653
  
654
  
655
  
656
  
657
  
658
  
659
  
660
  
661
  
662
  
663
  
664
  
665
  
666
  
667
  
668
  
669
  
670
  
671
  
672
  
673
  
674
  
675
  
676
  
677
  
678
  
679
  
680
  
681
  
682
  
683
  
684
  
685
  
686
  
687
  
688
  
689
  
690
  
691
  
692
  
693
  
694
  
695
  
696
  
697
  
698
  
699
  
700
  
701
  
702
  
703
  
704
  
705
  
706
  
707
  
708
  
709
  
710
  
711
  
712
  
713
  
714
  
715
  
716
  
717
  
718
  
719
  
720
  
721
  
722
  
723
  
724
  
725
  
726
  
727
  
728
  
729
  
730
  
731
  
732
  
733
  
734
  
735
  
736
  
737
  
738
  
739
  
740
  
741
  
742
  
743
  
744
  
745
  
746
  
747
  
748
  
749
  
750
  
751
  
752
  
753
  
754
  
755
  
756
  
757
  
758
  
759
  
760
  
761
  
762
  
763
  
764
  
765
  
766
  
767
  
768
  
769
  
770
  
771
  
772
  
773
  
774
  
775
  
776
  
777
  
778
  
779
  
780
  
781
  
782
  
783
  
784
  
785
  
786
  
787
  
788
  
789
  
790
  
791
  
792
  
793
  
794
  
795
  
796
  
797
  
798
  
799
  
800
  
801
  
802
  
803
  
804
  
805
  
806
  
807
  
808
  
809
  
810
  
811
  
812
  
813
  
814
  
815
  
816
  
817
  
818
  
819
  
820
  
821
  
822
  
823
  
824
  
825
  
/* -*- c -*- 
|| This file is part of Pike. For copyright information see COPYRIGHT. 
|| Pike is distributed under GPL, LGPL and MPL. See the file COPYING 
|| for more information. 
|| (original author: mirar) 
*/ 
 
#include "global.h" 
#include "pcre_machine.h" 
 
#include "pike_macros.h" 
#include "object.h" 
#include "constants.h" 
#include "interpret.h" 
#include "svalue.h" 
#include "threads.h" 
#include "array.h" 
#include "mapping.h" 
#include "pike_error.h" 
#include "stralloc.h" 
#include "builtin_functions.h" 
#include "module_support.h" 
#include "operators.h" 
#include "sprintf.h" 
 
/*! @module Regexp 
 */ 
 
/*! @module PCRE 
 */ 
 
DECLARATIONS 
 
#ifdef HAVE_LIBPCRE 
 
#ifdef HAVE_PCRE_PCRE_H 
#include <pcre/pcre.h> 
#else 
#include <pcre.h> 
#endif 
 
/*** _pcre the regexp object ***********************************/ 
 
/*! @class _pcre 
 */ 
 
PIKECLASS _pcre 
{ 
   CVAR pcre *re; 
   CVAR pcre_extra *extra; 
   PIKEVAR string pattern; 
 
   /*! @decl void create(string pattern, void|int options, void|object table) 
    *! 
    *! The option bits are: 
    *! @int 
    *!   @value  OPTION.ANCHORED 
    *!     Force pattern anchoring 
    *!   @value  OPTION.CASELESS 
    *!     Do caseless matching 
    *!   @value  OPTION.DOLLAR_ENDONLY 
    *!     $ not to match newline at end 
    *!   @value  OPTION.DOTALL 
    *!     . matches anything including NL 
    *!   @value  OPTION.EXTENDED 
    *!     Ignore whitespace and # comments 
    *!   @value  OPTION.EXTRA 
    *!     PCRE extra features (not much use currently) 
    *!   @value  OPTION.MULTILINE 
    *!     ^ and $ match newlines within data 
    *!   @value  OPTION.NO_AUTO_CAPTURE 
    *!     Disable numbered capturing parentheses (named ones available) 
    *!   @value  OPTION.UNGREEDY 
    *!     Invert greediness of quantifiers 
    *!   @value  OPTION.UTF8 
    *!     Run in UTF-8 mode 
    *! @endint 
    */ 
 
   PIKEFUN void create(string pattern, 
                       void|int options, 
                       void|object table) 
   { 
     struct object *table=NULL; 
     const char *errptr; 
     int erroffset; 
 
     if (THIS->pattern) { free_string(THIS->pattern); THIS->pattern=NULL; } 
     THIS->pattern = pattern; 
     add_ref(THIS->pattern); 
 
     if (THIS->re) (*pcre_free)(THIS->re); /* -> free() usually */ 
     if (THIS->extra) (*pcre_free)(THIS->extra); /* -> free() usually */ 
     THIS->extra=NULL; 
 
     THIS->re=pcre_compile( 
       THIS->pattern->str, 
       options ? options->u.integer : 0, 
       &errptr,&erroffset, 
       NULL /* table */ ); 
 
     if (!THIS->re) 
       Pike_error("error calling pcre_compile [%d]: %s\n", 
                  erroffset,errptr); 
   } 
 
   /*! @decl object study() 
    *! 
    *!  (from the pcreapi man-page) "When a pattern is going to be 
    *!  used several times, it is worth spending more time analyzing 
    *!  it in order to speed up the time taken for match- ing." 
    */ 
 
   PIKEFUN object study() 
   { 
     const char *errmsg=NULL; 
     if (!THIS->re) 
       Pike_error("need to initialize before study() is called\n"); 
 
     if (THIS->extra) (*pcre_free)(THIS->extra); /* -> free() usually */ 
 
     THIS->extra=pcre_study(THIS->re,0,&errmsg); 
 
     if (errmsg) 
       Pike_error("error calling pcre_study: %s\n",errmsg); 
 
     RETURN this_object(); 
   } 
 
   /*! @decl protected string _sprintf(int c, mapping flags) 
    */ 
   PIKEFUN string _sprintf(int c, mapping flags) 
     flags ID_PROTECTED; 
   { 
     switch(c) 
     { 
     default: 
       push_undefined(); 
       return; 
 
     case 'O': 
       push_static_text ("%t(%O)"); 
       ref_push_object(Pike_fp->current_object); 
       if (THIS->pattern) 
         ref_push_string(THIS->pattern); 
       else 
         push_undefined(); 
       f_sprintf(3); 
       return; 
 
     case 's': 
       if (THIS->pattern) 
         ref_push_string(THIS->pattern); 
       else 
         push_undefined(); 
       return; 
 
     case 't': 
       push_static_text("Regexp.PCRE._pcre"); 
       return; 
     } 
   } 
 
#ifdef HAVE_PCRE_FULLINFO 
 
   /*! @decl mapping info() 
    *! 
    *! Returns additional information about a compiled pattern. Only 
    *! available if PCRE was compiled with Fullinfo. 
    *! 
    *! @returns 
    *! @mapping 
    *!   @member int "backrefmax" 
    *!     Return the number of the highest back reference in the 
    *!     pattern. The fourth argument should point to an int 
    *!     variable. Zero is returned if there are no back references. 
    *!   @member int "capturecount" 
    *!     Return the number of capturing subpatterns in the pattern. 
    *!     The fourth argument should point to an int variable. 
    *!   @member int "firstbyte" 
    *!     Return information about the first byte of any matched 
    *!     string, for a non-anchored pattern. (This option used to be 
    *!     called PCRE_INFO_FIRSTCHAR; the old name is still 
    *!     recognized for backwards compatibility.) 
    *! 
    *!     If there is a fixed first byte, e.g. from a pattern such as 
    *!     (cat|cow|coyote), it is returned in the integer pointed to 
    *!     by where. Otherwise, if either 
    *! 
    *!     (a) the pattern was compiled with the PCRE_MULTILINE 
    *!     option, and every branch starts with @expr{"^"@}, or 
    *! 
    *!     (b) every branch of the pattern starts with @expr{".*"@} 
    *!     and PCRE_DOTALL is not set (if it were set, the pattern 
    *!     would be anchored), 
    *! 
    *!     @expr{-1@} is returned, indicating that the pattern matches 
    *!     only at the start of a subject string or after any newline 
    *!     within the string. Otherwise @expr{-2@} is returned. For 
    *!     anchored patterns, @expr{-2@} is returned. 
    *!   @member int "lastliteral" 
    *!     Return the value of the rightmost literal byte that must 
    *!     exist in any matched string, other than at its start, if 
    *!     such a byte has been recorded. The fourth argument should 
    *!     point to an int variable. If there is no such byte, 
    *!     @expr{-1@} is returned. For anchored patterns, a last 
    *!     literal byte is recorded only if it follows something of 
    *!     variable length. For example, for the pattern /^a\d+z\d+/ 
    *!     the returned value is @expr{"z"@}, but for /^a\dz\d/ the 
    *!     returned value is @expr{-1@}. 
    *!   @member int "namecount" 
    *!   @member int "nameentrysize" 
    *! 
    *!   @member int "options" 
    *!     Return a copy of the options with which the pattern was 
    *!     compiled. The fourth argument should point to an unsigned 
    *!     long int variable. These option bits are those specified in 
    *!     the call to pcre_compile(), modified by any top-level 
    *!     option settings within the pattern itself. 
    *! 
    *!     A pattern is automatically anchored by PCRE if all of its 
    *!     top-level alternatives begin with one of the following: 
    *! 
    *! @xml{<matrix> 
    *! <r><c>^</c><c>unless PCRE_MULTILINE is set</c></r> 
    *! <r><c>\A</c><c>always</c></r> 
    *! <r><c>\G</c><c>always</c></r> 
    *! <r><c>.*</c><c>if PCRE_DOTALL is set and there are no back 
    *! references to the subpattern in which .* appears</c></r> 
    *! </matrix>@} 
    *! 
    *!     For such patterns, the PCRE_ANCHORED bit is set in the 
    *!     options returned. 
    *!   @member int "size" 
    *!      Return the size of the compiled pattern, that is, the 
    *!      value that was passed as the argument to pcre_malloc() 
    *!      when PCRE was getting memory in which to place the 
    *!      compiled data. The fourth argument should point to a 
    *!      size_t variable. 
    *!   @member int "studysize" 
    *!      Returns the size of the data block pointed to by the 
    *!      study_data field in a pcre_extra block. That is, it is the 
    *!      value that was passed to pcre_malloc() when PCRE was 
    *!      getting memory into which to place the data created by 
    *!      pcre_study(). The fourth argument should point to a size_t 
    *!      variable. 
    *! @endmapping 
    */ 
   PIKEFUN mapping info() 
   { 
     int backrefmax,firstbyte,lastliteral,capturecount; 
     void *firsttable,*nametable; 
     int namecount,nameentrysize,options; 
     size_t size,studysize; 
     struct svalue *save_sp; 
 
     if (!THIS->re) 
       Pike_error("need to initialize before info() is called\n"); 
 
     if (pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_BACKREFMAX,&backrefmax) || 
         pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_CAPTURECOUNT,&capturecount) || 
#ifdef PCRE_INFO_FIRSTBYTE 
         pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_FIRSTBYTE,&firstbyte) || 
#endif /* PCRE_INFO_FIRSTBYTE */ 
         pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_FIRSTTABLE,&firsttable) || 
         pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_LASTLITERAL,&lastliteral) || 
#ifdef PCRE_INFO_NAMECOUNT 
         pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_NAMECOUNT,&namecount) || 
#endif /* PCRE_INFO_NAMECOUNT */ 
#ifdef PCRE_INFO_NAMEENTRYSIZE 
         pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_NAMEENTRYSIZE,&nameentrysize) || 
#endif /* PCRE_INFO_NAMEENTRYSIZE */ 
#ifdef PCRE_INFO_NAMETABLE 
         pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_NAMETABLE,&nametable) || 
#endif /* PCRE_INFO_NAMETABLE */ 
         pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_OPTIONS,&options) || 
         pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_SIZE,&size) || 
#ifdef PCRE_INFO_STUDYSIZE 
         pcre_fullinfo(THIS->re,THIS->extra, 
                       PCRE_INFO_STUDYSIZE,&studysize) || 
#endif /* PCRE_INFO_STUDYSIZE */ 
         0) 
       Pike_error("pcre_fullinfo gave errors (unexpected)\n"); 
 
     pop_n_elems(args); 
     save_sp = Pike_sp; 
 
     push_static_text("backrefmax");    push_int(backrefmax); 
     push_static_text("capturecount");  push_int(capturecount); 
#ifdef PCRE_INFO_FIRSTBYTE 
     push_static_text("firstbyte");     push_int(firstbyte); 
#endif /* PCRE_INFO_FIRSTBYTE */ 
     push_static_text("firsttable");    push_int(0); /* FIXME: table */ 
     push_static_text("lastliteral");   push_int(lastliteral); 
#ifdef PCRE_INFO_NAMECOUNT 
     push_static_text("namecount");     push_int(namecount); 
#endif /* PCRE_INFO_NAMECOUNT */ 
#ifdef PCRE_INFO_NAMEENTRYSIZE 
     push_static_text("nameentrysize");         push_int(nameentrysize); 
#endif /* PCRE_INFO_NAMEENTRYSIZE */ 
#ifdef PCRE_INFO_NAMETABLE 
     push_static_text("nametable");     push_int(0); /* FIXME: table */ 
#endif /* PCRE_INFO_NAMETABLE */ 
     push_static_text("options");               push_int(options); 
     push_static_text("size");          push_int(size); 
#ifdef PCRE_INFO_STUDYSIZE 
     push_static_text("studysize");     push_int(studysize); 
#endif /* PCRE_INFO_STUDYSIZE */ 
     f_aggregate_mapping(Pike_sp - save_sp); 
   } 
 
#endif /* HAVE_PCRE_FULLINFO */ 
 
/*! @decl int|array exec(string subject,void|int startoffset) 
 *!     Matches the regexp against @[subject], starting at 
 *!     @[startoffset] if it is given. 
 *! 
 *!     If the match is successful, the return value is an array that 
 *!     holds the offsets of all matches: 
 *! 
 *!     Elements 0 and 1 have the start and end offsets, respectively, 
 *!     of the match for the whole regexp. The start offset is 
 *!     inclusive while the end is exclusive, i.e. the matching string 
 *!     is @expr{@[subject][res[0]..res[1]-1]@}. 
 *! 
 *!     Elements 2 and 3 have the offsets of the first capturing 
 *!     submatch (if any) in the same way, elements 4 and 5 are for 
 *!     the second capturing submatch, etc. If a submatch didn't match 
 *!     anything, the corresponding elements are set to -1. If a 
 *!     submatch has matched successfully several times, the offsets 
 *!     of the last match are returned. 
 *! 
 *!     The returned array is always of length 2*n + 1, where n is the 
 *!     total number of capturing submatches in the regexp. 
 *! 
 *!     If there is an error, an integer error code is returned: 
 *! 
 *!  @int 
 *!     @value ERROR.NOMATCH 
 *! 
 *!   The subject string did not match the pattern. 
 *! 
 *!     @value ERROR.NULL 
 *! 
 *!   Either code or subject was passed as NULL, or ovector was NULL 
 *!   and oversize was not zero. 
 *! 
 *!     @value ERROR.BADOPTION 
 *! 
 *!   An unrecognized bit was set in the options argument. 
 *! 
 *!     @value ERROR.BADMAGIC 
 *! 
 *!   PCRE stores a 4-byte "magic number" at the start of the compiled 
 *!   code, to catch the case when it is passed a junk pointer. This 
 *!   is the error it gives when the magic number isn't present. 
 *! 
 *!     @value ERROR.UNKNOWN_NODE 
 *! 
 *!   While running the pattern match, an unknown item was encountered 
 *!   in the compiled pattern. This error could be caused by a bug in 
 *!   PCRE or by overwriting of the compiled pattern. 
 *! 
 *!     @value ERROR.NOMEMORY 
 *! 
 *!   If a pattern contains back references, but the ovector that is 
 *!   passed to pcre_exec() is not big enough to remember the 
 *!   referenced substrings, PCRE gets a block of memory at the start 
 *!   of matching to use for this purpose. If the call via 
 *!   pcre_malloc() fails, this error is given. The memory is freed at 
 *!   the end of matching. 
 *! 
 *!     @value ERROR.NOSUBSTRING 
 *! 
 *!   This error is used by the pcre_copy_substring(), 
 *!   pcre_get_substring(), and pcre_get_substring_list() functions 
 *!   (see below). It is never returned by pcre_exec(). 
 *! 
 *!     @value ERROR.MATCHLIMIT 
 *! 
 *!   The recursion and backtracking limit, as specified by the 
 *!   match_limit field in a pcre_extra structure (or defaulted) was 
 *!   reached. See the description above. 
 *! 
 *!     @value ERROR.CALLOUT 
 *! 
 *!   This error is never generated by pcre_exec() itself. It is 
 *!   provided for use by callout functions that want to yield a 
 *!   distinctive error code. See the pcrecallout documentation for 
 *!   details. 
 *! @endint 
 */ 
 
#define OVECTOR_SIZE 3000 /* multiple of three; possible hits*3 */ 
 
   PIKEFUN int|array(int) exec(string subject, 
                               void|int startoffset) 
   { 
     struct array *res; 
 
     int ovector[OVECTOR_SIZE]; 
     INT32 off=0; 
     int opts=0; 
 
     if (!THIS->re) 
       Pike_error("need to initialize before exec() is called\n"); 
 
     if (startoffset) 
       off = startoffset->u.integer; 
 
     /* FIXME: also MATCH_LIMIT and CALLOUT_DATA ... later */ 
 
     if (off > subject->len) { 
       /* If we don't check this then pcre_exec might return an empty 
        * string match past the end of the input string.  That's 
        * arguably a bug in the pcre lib (observed in version 7.8). */ 
       push_int (PCRE_ERROR_NOMATCH); 
     } 
 
     else { 
       int rc=pcre_exec(THIS->re,THIS->extra, 
                        subject->str,subject->len, 
                        off,opts, 
                        ovector,OVECTOR_SIZE); 
 
       if (rc<0) 
       { 
         push_int(rc); 
       } 
       else 
       { 
         int i, len = 0; 
#ifdef HAVE_PCRE_FULLINFO 
         /* fullinfo only returns an error if this->re is invalid. */ 
         pcre_fullinfo( THIS->re, NULL, PCRE_INFO_CAPTURECOUNT, &len ); 
         len= (len + 1)*2; 
#else 
         len = (pcre_info (THIS->re, NULL, NULL) + 1) * 2; 
#endif 
         rc*=2; 
         res=allocate_array(len); 
         for (i=0; i<rc; i++) 
         { 
           SET_SVAL(ITEM(res)[i], T_INT, NUMBER_NUMBER, integer, 
                    ovector[i]); 
         } 
         for (; i < len; i++) { 
           SET_SVAL(ITEM(res)[i], T_INT, NUMBER_NUMBER, integer, -1); 
         } 
         push_array(res); 
       } 
     } 
   } 
 
/*! @decl int get_stringnumber(string stringname) 
 *!    returns the number of a named subpattern 
 */ 
 
#ifdef HAVE_PCRE_GET_STRINGNUMBER 
   PIKEFUN int get_stringnumber(string stringname) 
   { 
     if (stringname->size_shift) 
       SIMPLE_ARG_TYPE_ERROR("get_stringnumber",1,"string (8bit)"); 
     RETURN pcre_get_stringnumber(THIS->re,stringname->str); 
   } 
#endif /* HAVE_PCRE_GET_STRINGNUMBER */ 
 
/* init and exit */ 
 
#ifdef PIKE_NULL_IS_SPECIAL 
   INIT 
   { 
     THIS->re=NULL; 
     THIS->extra=NULL; 
     THIS->pattern=NULL; 
   } 
#endif 
 
   EXIT 
     gc_trivial; 
   { 
     if (THIS->re) (*pcre_free)(THIS->re); /* -> free() usually */ 
     if (THIS->extra) (*pcre_free)(THIS->extra); /* -> free() usually */ 
   } 
} 
 
/*! @endclass 
 */ 
 
#endif /* HAVE_LIBPCRE */ 
 
/*! @decl array(string) split_subject(string subject, @ 
 *!                        array(int) previous_result) 
 *! Convenience function that 
 *! splits a subject string on the result from _pcre->exec() 
 *! 
 *! equal to 
 *! map(previous_result/2, 
 *!     lambda(array v) { return subject[v[0]..v[1]-1]; }) 
 */ 
 
PIKEFUN array(string) split_subject(string subject,array(int) previous_result) 
{ 
  int i,j; 
  int sz=previous_result->size/2; 
  struct array *res; 
 
  for (i=0; i<sz*2; i++) 
    if (TYPEOF(ITEM(previous_result)[i]) != T_INT) 
      SIMPLE_ARG_TYPE_ERROR("split_subject",2,"array(int)"); 
 
  res=allocate_array(sz); 
  for (i=j=0; j<sz; i+=2,j++) 
  { 
    INT_TYPE start = ITEM(previous_result)[i].u.integer; 
    INT_TYPE end = ITEM(previous_result)[i+1].u.integer; 
    if (start >= 0 && end >= start) { 
      SET_SVAL(ITEM(res)[j], T_STRING, 0, string, 
               string_slice(subject, start, end - start)); 
    } 
  } 
 
   RETURN res; 
} 
 
/*** module init & exit & stuff *****************************************/ 
 
PIKE_MODULE_EXIT 
{ 
  EXIT 
} 
 
PIKE_MODULE_INIT 
{ 
#ifdef HAVE_LIBPCRE 
 
/*! @decl constant buildconfig_UTF8 
 *!     (from the pcreapi man-page) 
 *!     "The output is an integer that is set to one if UTF-8 support 
 *!     is available; otherwise it is set to zero." 
 *!     This constant is calculated when the module is initiated 
 *!     by using pcre_config(3). 
 *! 
 *! @decl constant buildconfig_NEWLINE 
 *!     (from the pcreapi man-page) 
 *!     "The output is an integer that is set to  the  value  of  the 
 *!     code  that  is  used for the newline character. It is either 
 *!     linefeed (10) or carriage return (13), and  should  normally 
 *!     be the standard character for your operating system." 
 *!     This constant is calculated when the module is initiated 
 *!     by using pcre_config(3). 
 *! 
 *! @decl constant buildconfig_LINK_SIZE 
 *!     (from the pcreapi man-page) 
 *!     "The output is an integer that contains the number  of  bytes 
 *!     used  for  internal linkage in compiled regular expressions. 
 *!     The value is 2, 3, or 4. Larger values allow larger  regular 
 *!     expressions  to be compiled, at the expense of slower match- 
 *!     ing. The default value of 2 is sufficient for  all  but  the 
 *!     most  massive patterns, since it allows the compiled pattern 
 *!     to be up to 64K in size." 
 *!     This constant is calculated when the module is initiated 
 *!     by using pcre_config(3). 
 *! 
 *! @decl constant buildconfig_POSIX_MALLOC_THRESHOLD 
 *!     (from the pcreapi man-page) 
 *!     "The output is an integer that contains the  threshold  above 
 *!     which  the POSIX interface uses malloc() for output vectors. 
 *!     Further details are given in the pcreposix documentation." 
 *!     This constant is calculated when the module is initiated 
 *!     by using pcre_config(3). 
 *! 
 *! @decl constant buildconfig_MATCH_LIMIT 
 *!     (from the pcreapi man-page) 
 *!     "The output is an integer that gives the  default  limit  for 
 *!     the   number  of  internal  matching  function  calls  in  a 
 *!     pcre_exec()  execution.  Further  details  are  given   with 
 *!     pcre_exec() below." 
 *!     This constant is calculated when the module is initiated 
 *!     by using pcre_config(3). 
 *! 
 */ 
 
/* we need a constant that *isn't there* if we don't have UTF8 support */ 
#ifdef PCRE_CONFIG_UTF8 
  { 
    int outcome; 
    if (pcre_config(PCRE_CONFIG_UTF8,&outcome)==0 && outcome) 
      add_integer_constant("UTF8_SUPPORTED",1,0); 
  } 
#endif 
 
#define FIGURE_BUILD_TIME_OPTION(X,T)                                   \ 
  do                                                                    \ 
  {                                                                     \ 
    T outcome;                                                          \ 
    if (pcre_config(PCRE_CONFIG_##X,&outcome)==0)                       \ 
      add_integer_constant("buildconfig_"#X,outcome,0);                 \ 
  }                                                                     \ 
  while (0) 
 
#ifdef PCRE_CONFIG_UTF8 
  FIGURE_BUILD_TIME_OPTION(UTF8,int); 
#endif 
#ifdef PCRE_CONFIG_NEWLINE 
  FIGURE_BUILD_TIME_OPTION(NEWLINE,int); 
#endif 
#ifdef PCRE_CONFIG_LINK_SIZE 
  FIGURE_BUILD_TIME_OPTION(LINK_SIZE,int); 
#endif 
#ifdef PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 
  FIGURE_BUILD_TIME_OPTION(POSIX_MALLOC_THRESHOLD,int); 
#endif 
#ifdef PCRE_CONFIG_MATCH_LIMIT 
  FIGURE_BUILD_TIME_OPTION(MATCH_LIMIT,unsigned long int); 
#endif 
 
  /*! @module OPTION 
   *!  contains all option constants 
   */ 
 
  start_new_program(); 
 
  /*! @decl constant ANCHORED 
   *!  (from the pcreapi manpage) If this bit is set, the pattern is 
   *!  forced to be "anchored", that is, it is constrained to match 
   *!  only at the first matching point in the string which is being 
   *!  searched (the "subject string"). This effect can also be 
   *!  achieved by appropriate constructs in the pattern itself, which 
   *!  is the only way to do it in Perl. 
   */ 
  add_integer_constant("ANCHORED",PCRE_ANCHORED,0); 
 
  /*! @decl constant CASELESS 
   *!  (from the pcreapi manpage) If this bit is set, letters in the 
   *!  pattern match both upper and lower case letters. It is 
   *!  equivalent to Perl's /i option, and it can be changed within a 
   *!  pattern by a (?i) option setting. 
   */ 
   add_integer_constant("CASELESS",PCRE_CASELESS,0); 
 
   /*! @decl constant DOLLAR_ENDONLY 
    *!  (from the pcreapi manpage) If this bit is set, a dollar 
    *!  metacharacter in the pattern matches only at the end of the 
    *!  subject string. Without this option, a dollar also matches 
    *!  immediately before the final character if it is a newline (but 
    *!  not before any other newlines).  The PCRE_DOLLAR_ENDONLY 
    *!  option is ignored if PCRE_MULTILINE is set. There is no 
    *!  equivalent to this option in Perl, and no way to set it within 
    *!  a pattern. 
    */ 
   add_integer_constant("DOLLAR_ENDONLY",PCRE_DOLLAR_ENDONLY,0); 
 
   /*! @decl constant DOTALL 
    *!  (from the pcreapi manpage) If this bit is set, a dot 
    *!  metacharater in the pattern matches all characters, including 
    *!  newlines. Without it, newlines are excluded. This option is 
    *!  equivalent to Perl's /s option, and it can be changed within a 
    *!  pattern by a (?s) option setting. A negative class such as 
    *!  [^a] always matches a newline character, independent of the 
    *!  setting of this option. 
    */ 
   add_integer_constant("DOTALL",PCRE_DOTALL,0); 
 
   /*! @decl constant EXTENDED 
    *!  (from the pcreapi manpage) If this bit is set, whitespace data 
    *!  characters in the pattern are totally ignored except when 
    *!  escaped or inside a character class.  Whitespace does not 
    *!  include the VT character (code 11). In addition, characters 
    *!  between an unescaped # outside a character class and the next 
    *!  newline character, inclusive, are also ignored. This is 
    *!  equivalent to Perl's /x option, and it can be changed within a 
    *!  pattern by a (?x) option setting. 
    *! 
    *!  This option makes it possible to include comments inside 
    *!  complicated patterns.  Note, however, that this applies only 
    *!  to data characters.  Whitespace characters may never appear 
    *!  within special character sequences in a pattern, for example 
    *!  within the sequence (?( which introduces a conditional 
    *!  subpattern. 
    */ 
   add_integer_constant("EXTENDED",PCRE_EXTENDED,0); 
 
   /*! @decl constant EXTRA 
    *!  (from the pcreapi manpage) This option was invented in order 
    *!  to turn on additional functionality of PCRE that is 
    *!  incompatible with Perl, but it is currently of very little 
    *!  use. When set, any backslash in a pattern that is followed by 
    *!  a letter that has no special meaning causes an error, thus 
    *!  reserving these combinations for future expansion.  By 
    *!  default, as in Perl, a backslash followed by a letter with no 
    *!  special meaning is treated as a literal.  There are at present 
    *!  no other features controlled by this option. It can also be 
    *!  set by a (?X) option setting within a pattern. 
    */ 
   add_integer_constant("EXTRA",PCRE_EXTRA,0); 
 
   /*! @decl constant MULTILINE 
    *!  (from the pcreapi manpage) By default, PCRE treats the subject 
    *!  string as consisting of a single "line" of characters (even if 
    *!  it actually contains several newlines).  The "start of line" 
    *!  metacharacter (^) matches only at the start of the string, 
    *!  while the "end of line" metacharacter ($) matches only at the 
    *!  end of the string, or before a terminating newline (unless 
    *!  PCRE_DOL- LAR_ENDONLY is set). This is the same as Perl. 
    *! 
    *!  When PCRE_MULTILINE it is set, the "start of line" and "end of 
    *!  line" constructs match immediately following or immediately 
    *!  before any new- line in the subject string, respectively, as 
    *!  well as at the very start and end. This is equivalent to 
    *!  Perl's /m option, and it can be changed within a pattern by a 
    *!  (?m) option setting. If there are no "\n" charac- ters in a 
    *!  subject string, or no occurrences of ^ or $ in a pattern, 
    *!  setting PCRE_MULTILINE has no effect. 
    */ 
   add_integer_constant("MULTILINE",PCRE_MULTILINE,0); 
 
#ifdef PCRE_NO_AUTO_CAPTURE 
   /*! @decl constant NO_AUTO_CAPTURE 
    *!  (from the pcreapi manpage) If this option is set, it disables 
    *!  the use of numbered capturing paren- theses in the 
    *!  pattern. Any opening parenthesis that is not followed by ? 
    *!  behaves as if it were followed by ?: but named parentheses can 
    *!  still be used for capturing (and they acquire numbers in the 
    *!  usual way).  There is no equivalent of this option in Perl. 
    */ 
   add_integer_constant("NO_AUTO_CAPTURE",PCRE_NO_AUTO_CAPTURE,0); 
#endif 
 
   /*! @decl constant UNGREEDY 
    *!  (from the pcreapi manpage) This option inverts the 
    *!  "greediness" of the quantifiers so that they are not greedy by 
    *!  default, but become greedy if followed by "?". It is not 
    *!  compatible with Perl. It can also be set by a (?U) option 
    *!  setting within the pattern. 
    */ 
   add_integer_constant("UNGREEDY",PCRE_UNGREEDY,0); 
 
#ifdef PCRE_UTF8 
   /*! @decl constant UTF8 
    *!  (from the pcreapi manpage) This option causes PCRE to regard 
    *!  both the pattern and the subject as strings of UTF-8 
    *!  characters instead of single-byte character strings.  However, 
    *!  it is available only if PCRE has been built to include UTF-8 
    *!  support.  If not, the use of this option provokes an 
    *!  error. Details of how this option changes the behaviour of 
    *!  PCRE are given in the section on UTF-8 support in the main 
    *!  pcre page. 
    */ 
   add_integer_constant("UTF8",PCRE_UTF8,0); 
#endif 
 
#define END_PROGRAM_MAKE_SUBMODULE(X)                                   \ 
   do                                                                   \ 
   {                                                                    \ 
     struct program *p=end_program();                                   \ 
     struct object *obj=clone_object(p,0);                              \ 
     add_object_constant(X,obj,0);                                      \ 
     free_object(obj);                                                  \ 
     free_program(p);                                                   \ 
   }                                                                    \ 
   while (0) 
 
   END_PROGRAM_MAKE_SUBMODULE("OPTION"); 
 
   /*! @endmodule OPTION 
    */ 
 
   /*! @module ERROR 
    */ 
 
   /*! @decl constant NOMATCH 
    *! @decl constant NULL 
    *! @decl constant BADOPTION 
    *! @decl constant BADMAGIC 
    *! @decl constant UNKNOWN_NODE 
    *! @decl constant NOMEMORY 
    *! @decl constant NOSUBSTRING 
    *! @decl constant MATCHLIMIT 
    *! @decl constant CALLOUT 
    *!   Documented in @[exec]. 
    */ 
 
   start_new_program(); 
   /* documented in exec() */ 
   add_integer_constant("NOMATCH",PCRE_ERROR_NOMATCH,0); 
   add_integer_constant("NULL",PCRE_ERROR_NULL,0); 
   add_integer_constant("BADOPTION",PCRE_ERROR_BADOPTION,0); 
   add_integer_constant("BADMAGIC",PCRE_ERROR_BADMAGIC,0); 
   add_integer_constant("UNKNOWN_NODE",PCRE_ERROR_UNKNOWN_NODE,0); 
   add_integer_constant("NOMEMORY",PCRE_ERROR_NOMEMORY,0); 
   add_integer_constant("NOSUBSTRING",PCRE_ERROR_NOSUBSTRING,0); 
#ifdef PCRE_ERROR_MATCHLIMIT 
   add_integer_constant("MATCHLIMIT",PCRE_ERROR_MATCHLIMIT,0); 
#endif 
#ifdef PCRE_ERROR_CALLOUT 
   add_integer_constant("CALLOUT",PCRE_ERROR_CALLOUT,0); 
#endif 
 
   END_PROGRAM_MAKE_SUBMODULE("ERROR"); 
 
   /*! @endmodule ERROR 
    */ 
 
   /*! @endmodule PCRE 
    */ 
 
   /*! @endmodule Regexp 
    */ 
 
   INIT 
 
#endif /* HAVE_LIBPCRE */ 
}