1
  
2
  
3
  
4
  
5
  
6
  
7
  
8
  
9
  
10
  
11
  
12
  
13
  
14
  
15
  
16
  
17
  
18
  
19
  
20
  
21
  
22
  
23
  
24
  
25
  
26
  
27
  
28
  
29
  
30
  
31
  
32
  
33
  
34
  
35
  
36
  
37
  
38
  
39
  
40
  
41
  
42
  
43
  
44
  
45
  
46
  
47
  
48
  
49
  
50
  
51
  
52
  
53
  
54
  
55
  
56
  
57
  
58
  
59
  
60
  
61
  
62
  
63
  
64
  
65
  
66
  
67
  
68
  
69
  
70
  
71
  
72
  
73
  
74
  
75
  
76
  
77
  
78
  
79
  
80
  
81
  
82
  
83
  
84
  
85
  
86
  
87
  
88
  
89
  
90
  
91
  
92
  
93
  
94
  
95
  
96
  
97
  
98
  
99
  
100
  
101
  
102
  
103
  
104
  
105
  
106
  
107
  
108
  
109
  
110
  
111
  
112
  
113
  
114
  
115
  
116
  
117
  
118
  
119
  
120
  
121
  
122
  
123
  
124
  
125
  
126
  
127
  
128
  
129
  
130
  
131
  
132
  
133
  
134
  
135
  
136
  
137
  
138
  
139
  
140
  
141
  
142
  
143
  
144
  
145
  
146
  
147
  
148
  
149
  
150
  
151
  
152
  
153
  
154
  
155
  
156
  
157
  
158
  
159
  
160
  
161
  
162
  
163
  
164
  
165
  
166
  
167
  
168
  
169
  
170
  
171
  
172
  
173
  
174
  
175
  
176
  
177
  
178
  
179
  
180
  
181
  
182
  
183
  
184
  
185
  
186
  
187
  
188
  
189
  
190
  
191
  
192
  
193
  
194
  
195
  
196
  
197
  
198
  
199
  
200
  
201
  
202
  
203
  
204
  
205
  
206
  
207
  
208
  
209
  
210
  
211
  
212
  
213
  
214
  
215
  
216
  
217
  
218
  
219
  
220
  
221
  
222
  
223
  
224
  
225
  
226
  
227
  
228
  
229
  
230
  
231
  
232
  
233
  
234
  
235
  
236
  
237
  
238
  
239
  
240
  
241
  
242
  
243
  
244
  
245
  
246
  
247
  
248
  
249
  
250
  
251
  
252
  
253
  
254
  
255
  
256
  
257
  
258
  
259
  
260
  
261
  
262
  
263
  
264
  
265
  
266
  
267
  
268
  
269
  
270
  
271
  
272
  
273
  
274
  
275
  
276
  
277
  
278
  
279
  
280
  
281
  
282
  
283
  
284
  
285
  
286
  
287
  
288
  
289
  
290
  
291
  
292
  
293
  
294
  
295
  
296
  
297
  
298
  
299
  
300
  
301
  
302
  
303
  
304
  
305
  
306
  
307
  
308
  
309
  
310
  
311
  
312
  
313
  
314
  
315
  
316
  
317
  
318
  
319
  
320
  
321
  
322
  
323
  
324
  
325
  
326
  
327
  
328
  
329
  
330
  
331
  
332
  
333
  
334
  
335
  
336
  
337
  
338
  
339
  
340
  
341
  
342
  
343
  
344
  
345
  
346
  
347
  
348
  
349
  
350
  
351
  
352
  
353
  
354
  
355
  
356
  
357
  
358
  
359
  
360
  
361
  
362
  
363
  
364
  
365
  
366
  
367
  
368
  
369
  
370
  
371
  
372
  
373
  
374
  
375
  
376
  
377
  
378
  
379
  
380
  
381
  
382
  
383
  
384
  
385
  
386
  
387
  
388
  
389
  
390
  
391
  
392
  
393
  
394
  
395
  
396
  
397
  
398
  
399
  
400
  
401
  
402
  
403
  
404
  
405
  
406
  
407
  
408
  
409
  
410
  
411
  
412
  
413
  
414
  
415
  
416
  
417
  
418
  
419
  
420
  
421
  
422
  
423
  
424
  
425
  
426
  
427
  
428
  
429
  
430
  
431
  
432
  
433
  
434
  
435
  
436
  
437
  
438
  
439
  
440
  
441
  
442
  
443
  
444
  
445
  
446
  
447
  
448
  
449
  
450
  
451
  
452
  
453
  
454
  
455
  
456
  
457
  
458
  
459
  
460
  
461
  
462
  
463
  
464
  
465
  
466
  
467
  
468
  
469
  
470
  
471
  
472
  
473
  
474
  
475
  
476
  
477
  
478
  
479
  
480
  
481
  
482
  
483
  
484
  
485
  
486
  
487
  
488
  
489
  
490
  
491
  
492
  
493
  
494
  
495
  
496
  
497
  
498
  
499
  
500
  
501
  
502
  
503
  
504
  
505
  
506
  
507
  
508
  
509
  
510
  
511
  
512
  
513
  
514
  
515
  
516
  
517
  
518
  
519
  
520
  
521
  
522
  
523
  
524
  
525
  
526
  
527
  
528
  
529
  
530
  
531
  
532
  
533
  
534
  
535
  
536
  
537
  
538
  
539
  
540
  
541
  
542
  
543
  
544
  
545
  
546
  
547
  
548
  
549
  
550
  
551
  
552
  
553
  
554
  
555
  
556
  
557
  
558
  
559
  
560
  
561
  
562
  
563
  
564
  
565
  
566
  
567
  
568
  
569
  
570
  
571
  
572
  
573
  
574
  
575
  
576
  
577
  
578
  
579
  
580
  
581
  
582
  
583
  
584
  
585
  
586
  
587
  
588
  
589
  
590
  
591
  
592
  
593
  
594
  
// 
// Unified file system garbage collector. 
// 
// 2013-09-12 Henrik Grubbström 
// 
 
#if constant(Filesystem.Monitor.basic) 
 
// #define FSGC_DEBUG 
// #define FSGC_PRETEND 
 
#ifdef FSGC_DEBUG 
#define GC_WERR(X...)       werror(X) 
#else 
#define GC_WERR(X...) 
#endif 
 
// The following are mixed in when inherited by roxen.pike. 
Variable.Variable getvar( string name ); 
 
/* Some notes: 
 * 
 *   There are multiple data for a file that may affect the garbage policy: 
 * 
 *     * The age of the file. 
 * 
 *     * The size of the file. 
 * 
 *   Garbage collection for a root may be triggered by several factors: 
 * 
 *     * A maxium age for a file has been reached. 
 * 
 *     * Too many files under a root. 
 * 
 *     * The total size of the files under a root is too large. 
 * 
 *   Symlinks are not followed and not garbage collected due 
 *   to the inherent risks of escaping directory structures 
 *   and/or removing manually added stuff. 
 */ 
 
//! Filesystem garbage collector for a single root directory. 
class FSGarb 
{ 
  inherit Filesystem.Monitor.basic : basic; 
 
  int num_files; 
  int total_size; 
 
  string modid; 
  string root; 
  int max_age; 
  int max_files; 
  int max_size; 
 
  int(0..1) cleanup_parent_dirs; 
 
  mapping(string:object) handle_lookup = ([]); 
  ADT.Priority_queue pending_gc = ADT.Priority_queue(); 
 
  //! If set, move files to this directory instead of deleting them. 
  //! 
  //! If set to @[root] or @expr{""@} keep the files as is. 
  string quarantine; 
 
  Configuration owner_mod_conf; 
 
  protected void log_remove(string path, string op) 
  { 
    if (!owner_mod_conf) { 
      if (RoxenModule mod = Roxen.get_module(modid)) 
        owner_mod_conf = mod->my_configuration(); 
    } 
    if (owner_mod_conf) 
      owner_mod_conf->log_event("fsgc", op, path, ([ ]) ); 
  } 
 
  protected int rm_and_parent_cleanup(string path, int(0..1) is_quarantined) 
  { 
    //  Make path canonic to avoid visit a leaf directory twice 
    path = canonic_path(path); 
 
    //  It's acceptable if the delete fails for a quarantined file that we 
    //  just moved using mv(). We can still perform parent dir cleanup if 
    //  necessary. 
    int res = predef::rm(path); 
    if (res || is_quarantined) { 
      log_remove(path, is_quarantined ? "quarantined-file" : "delete-file"); 
 
      if (cleanup_parent_dirs) { 
        while (1) { 
          //  Traverse upward. This gives the parent directory without 
          //  trailing slash. The root it already canonic (i.e. no trailing 
          //  slash) so the prefix check ensures we're still below the root. 
          path = dirname(path); 
          if (!has_prefix(path, root + "/")) 
            break; 
 
          //  Attempt to delete directory and stop if not successful 
          if (!predef::rm(path)) 
            break; 
        } 
      } 
    } 
    return res; 
  } 
 
  protected int rm(string path) 
  { 
    GC_WERR("FSGC: Zap %O\n", path); 
    if (quarantine) { 
      if ((quarantine == root) || (quarantine == "")) return 0; 
      if (!has_prefix(path, root + "/")) return 0; 
      string rel = path[sizeof(root)..]; 
 
      // First try the trivial case. 
      if (mv(path, quarantine + rel)) { 
        rm_and_parent_cleanup(path, 1); 
        return 1; 
      } 
 
      string dirs = dirname(rel); 
      if (sizeof(dirs)) { 
        if (Stdio.mkdirhier(quarantine + dirs)) { 
          // Try again with the directory existing. 
          if (mv(path, quarantine + rel)) { 
            rm_and_parent_cleanup(path, 1); 
            return 1; 
          } 
        } 
      } 
 
      // Different filesystems? 
      if (Stdio.cp(path, quarantine + rel)) { 
        return rm_and_parent_cleanup(path, 1); 
      } 
      werror("FSGC: Failed to copy file %O to %O: %s.\n", 
             path, quarantine + rel, strerror(errno())); 
      return 0; 
    } else { 
      return rm_and_parent_cleanup(path, 0); 
    } 
  } 
 
  void check_threshold() 
  { 
    GC_WERR("FSGC: Checking thresholds...\n" 
            "      total_size: %d max_size: %d\n" 
            "      num_files: %d max_files: %d\n", 
            total_size, max_size, 
            num_files, max_files); 
 
    while ((max_size && (total_size > max_size)) || 
           (max_files && (num_files > max_files))) { 
      GC_WERR("FSGC: Filesystem limits exceeded forcing early removal.\n"); 
      if (!zap_one_file()) break; 
    } 
  } 
 
  protected int zap_one_file() 
  { 
    if (!sizeof(pending_gc)) return 0; 
 
    // Pop the next pending file from the queue. 
    Monitor m = pending_gc->pop(); 
    m_delete(handle_lookup, m->path); 
 
    // Account for the deletion immediately, and 
    // make sure it isn't counted twice. 
    int bytes = m->st->size; 
    m->st->size = 0; 
    m->st->isreg = 0; 
 
    GC_WERR("Deleting file %O...\n", m->path); 
    if (rm(m->path)) { 
      num_files--; 
      total_size -= bytes; 
 
      // Make sure the deletion is notified properly soon. 
      m->next_poll = time(1); 
      monitor_queue->adjust(m); 
    } else { 
      GC_WERR("Failed to delete file %O: %s\n", 
              m->path, strerror(errno())); 
      // Restore the state in case the file is altered externally. 
      m->st->size = bytes; 
      m->st->isreg = 1; 
    } 
    return 1; 
  } 
 
  int st_to_pri(Stdio.Stat st) 
  { 
    return st->mtime - st->size / 1024; 
  } 
 
  protected void remove_pending(Monitor m) 
  { 
    // Register us for threshold-based deletion. 
    object handle = m_delete(handle_lookup, m->path); 
    if (handle) { 
      pending_gc->adjust_pri(handle, -0x80000000); 
      pending_gc->pop(); 
    } 
  } 
 
  protected class Monitor { 
    inherit basic::Monitor; 
 
    protected void create(string path, 
                          MonitorFlags flags, 
                          int max_dir_check_interval, 
                          int file_interval_factor, 
                          int stable_time) 
    { 
      ::create(path, flags, max_dir_check_interval, 
               file_interval_factor, stable_time); 
      GC_WERR("%O->create(%O, %O, %O, %O, %O)\n", 
              this_object(), path, flags, max_dir_check_interval, 
              file_interval_factor, stable_time); 
    } 
 
    void check_for_release(int mask, int flags) 
    { 
      GC_WERR("%O->check_for_relase(0x%x, 0x%x)\n", 
              this_object(), mask, flags); 
      ::check_for_release(mask, flags); 
      if (!monitors[path]) { 
        // We've been relased. 
        // Make sure to update our parent (if any) soon. 
        array a = path/"/"; 
        Monitor m = monitors[canonic_path(a[..sizeof(a)-2]*"/")]; 
        if (m) { 
          GC_WERR("Waking up our parent dir: %O\n", m); 
          m->next_poll = time(1)-1; 
          monitor_queue->adjust(m); 
        } 
      } 
    } 
 
    protected void file_exists(string path, Stdio.Stat st) 
    { 
      ::file_exists(path, st); 
      // Make sure we get the stable change callback... 
      last_change = st->mtime; 
 
      if (st->isreg) { 
        num_files++; 
        total_size += st->size; 
 
        // Register us for threadhold-based deletion. 
        handle_lookup[path] = pending_gc->push(st_to_pri(st), this); 
 
        check_threshold(); 
      } 
    } 
 
    // NB: Needs to be visible so that reconfigure() can call it. 
    void update(Stdio.Stat st) 
    { 
      int delta = max_dir_check_interval || basic::max_dir_check_interval; 
      if (!next_poll) { 
        // Attempt to distribute polls evenly at startup. 
        delta = 1 + random(delta); 
        if (st) { 
          last_change = st->mtime; 
        } 
      } 
 
      ::update(st); 
 
      // We're only interested in stable time, so there's no reason 
      // to scan as frequently as the default implementation. 
 
      if (last_change <= time(1)) { 
        // Time until stable. 
        int d = last_change + (stable_time || basic::stable_time) - time(1); 
 
        GC_WERR("%O: last: %s, d: %d, delta: %d\n", 
                this_object(), ctime(last_change) - "\n", d, delta); 
        if (d < 0) d = 1; 
        if (d < delta) delta = d; 
      } 
      next_poll = time(1) + (delta || 1); 
      GC_WERR("%O->update(%O) ==> next: %s\n", 
              this_object(), st, ctime(next_poll) - "\n"); 
      monitor_queue->adjust(this); 
    } 
 
    protected string _sprintf(int c) 
    { 
      return sprintf("FSGarb.Monitor(%O, %O, last: %d, next: %s, st: %O)", 
                     path, flags, last_change, ctime(next_poll) - "\n", st); 
    } 
 
    int(0..1) check(MonitorFlags|void flags) 
    { 
      int(0..1) ret = ::check(flags); 
      return ret; 
    } 
 
    int(0..1) status_change(Stdio.Stat old_st, Stdio.Stat st, 
                            MonitorFlags old_flags, MonitorFlags flags) 
    { 
      GC_WERR("Status change %O(0x%x) ==> %O(0x%x) for %O!\n", 
              old_st, old_flags, st, flags, this_object()); 
      int res = ::status_change(old_st, st, old_flags, flags); 
      if (st->isdir && (flags & MF_RECURSE)) { 
        foreach(files, string file) { 
          file = canonic_path(Stdio.append_path(path, file)); 
          if (!monitors[file]) { 
            // Lost update due to race-condition: 
            // 
            //   Exist ==> Deleted ==> Exists 
            // 
            // with no update of directory inbetween. 
            // 
            // Create the lost submonitor again. 
            res = 1; 
            monitor(file, old_flags | MF_AUTO | MF_HARD, 
                    max_dir_check_interval, 
                    file_interval_factor, 
                    stable_time); 
            monitors[file]->check(); 
          } 
        } 
      } 
 
      num_files += st->isreg - old_st->isreg; 
 
      if (old_st->isreg) { 
        total_size -= old_st->size; 
 
        if (!st->isreg) { 
          remove_pending(this); 
        } 
      } 
      if (st->isreg) { 
        total_size += st->size; 
 
        // Register us for threshold-based deletion. 
        if (!old_st->isreg) { 
          handle_lookup[path] = pending_gc->push(st_to_pri(st), this); 
        } else { 
          object handle = handle_lookup[path]; 
          if (handle && (st_to_pri(st) != st_to_pri(old_st))) { 
            pending_gc->adjust_pri(handle, st_to_pri(st)); 
          } 
        } 
      } 
 
      check_threshold(); 
      return res; 
    } 
 
    void file_created(string path, Stdio.Stat st) 
    { 
      GC_WERR("File %O %O created (%O).\n", path, st, this_object()); 
 
      if (st->isreg) { 
        num_files++; 
        total_size += st->size; 
 
        // Register us for threshold-based deletion. 
        handle_lookup[path] = pending_gc->push(st_to_pri(st), this); 
 
        check_threshold(); 
      } 
    } 
 
    void file_deleted(string path, Stdio.Stat old_st) 
    { 
      GC_WERR("File %O %O deleted (%O).\n", path, old_st, this_object()); 
 
      if (old_st->isreg) { 
        num_files--; 
        total_size -= old_st->size; 
 
        remove_pending(this); 
 
        check_threshold(); 
      } 
    } 
  } 
 
  constant DefaultMonitor = Monitor; 
 
  protected void create(string modid, string path, int max_age, 
                        int|void max_size, int|void max_files, 
                        string|void quarantine, 
                        int(0..1)|void cleanup_parent_dirs) 
  { 
    GC_WERR("FSGC: Max age: %d\n", max_age); 
    GC_WERR("FSGC: Max size: %d\n", max_size); 
    GC_WERR("FSGC: Max files: %d\n", max_files); 
 
    this_program::modid = modid; 
    this_program::cleanup_parent_dirs = cleanup_parent_dirs; 
 
    this_program::max_age = max_age; 
    this_program::max_size = max_size; 
    this_program::max_files = max_files; 
 
    root = canonic_path(path); 
 
    if (quarantine) { 
      if (sizeof(quarantine)) { 
        quarantine = canonic_path(quarantine); 
      } 
      this::quarantine = quarantine; 
    } 
 
    //  If the max age is on the scale of months the file check interval 
    //  will likely exceed typical server uptime (e.g. every 36 days if 
    //  the stable time is 180 days). We cap this to a much lower number 
    //  to ensure it's run regularly even if the server reboots frequently. 
    ::create(min(max_age, 24 * 3600) / file_interval_factor, 0, max_age); 
 
    // Workaround for too strict type-check in Pike 7.8. 
    int flags = 3; 
 
    monitor(root, flags); 
  } 
 
  void stable_data_change(string path, Stdio.Stat st) 
  { 
    if (path == root) return; 
    GC_WERR("FSGC: Deleting stale file: %O\n", path); 
 
#if 0 
    // If we ever use accelerated notifications again. 
 
    // Override accelerated stable change notification. 
    if (st->mtime >= time(1) - stable_time) { 
      GC_WERR("FSGC: Keeping file: %O\n", path); 
      // Remove the stable notification marker, and reschedule. 
      Monitor m = monitor(path, MF_AUTO); 
      m->last_change = st->mtime; 
      // m->update(st); 
      m->check();       // Force an update(). 
      return; 
    } 
#endif 
    rm(path); 
  } 
 
  void reconfigure(int new_max_age, int|void new_max_size, 
                   int|void new_max_files) 
  { 
    if (!zero_type(new_max_size)) { 
      GC_WERR("FSGC: New max size: %d\n", new_max_size); 
      max_size = new_max_size; 
    } 
    if (!zero_type(new_max_files)) { 
      GC_WERR("FSGC: New max files: %d\n", new_max_files); 
      max_files = new_max_files; 
    } 
    if (new_max_age != max_age) { 
      GC_WERR("FSGC: New max age: %d\n", new_max_age); 
      this_program::max_age = new_max_age; 
      int old_stable_time = stable_time; 
      set_max_dir_check_interval(stable_time = new_max_age); 
      if (stable_time < old_stable_time) { 
        // We need to adjust the scan times for the monitors. 
        foreach(values(monitors), Monitor m) { 
          m->next_poll = 0; 
          m->update(m->st); 
        } 
      } 
    } 
 
    check_threshold(); 
  } 
 
  int check(mixed ... args) 
  { 
    int res = ::check(@args); 
    GC_WERR("FSGC: check(%{%O, %}) ==> %O\n", args, res); 
    return res; 
  } 
 
  protected string _sprintf(int c, mapping|void opts) 
  { 
    return sprintf("FSGarb(%O, %d)", root, stable_time); 
  } 
 
  array(Stdio.Stat) get_stats() 
  { 
    return filter(values(monitors)->st, 
                  lambda(Stdio.Stat st) { 
                    return st && st->isreg; 
                  }); 
  } 
} 
 
mapping(string:FSGarb) fsgarbs = ([]); 
 
Thread.Thread meta_fsgc_thread; 
 
void meta_fsgc() 
{ 
  // Sleep a bit to avoid the startup race. 
  sleep(60); 
  while(meta_fsgc_thread) { 
    int max_sleep = 300; 
    int next_start = getvar("fsgc_starttime")->get_next(0); 
    int next_stop = getvar("fsgc_stoptime")->get_next(0); 
 
    if (next_start < 0) { 
      // FSGC Disabled 
      GC_WERR("FSGC: Disabled.\n"); 
    } else if (next_start < next_stop) { 
      // FSGC Not allowed to run now. 
      // Sleep until next start time, but max 5 minutes 
      // at a time in case the settings are changed. 
      max_sleep = next_start - time(1); 
      if (max_sleep > 300) { 
        max_sleep = 300; 
      } 
    } else { 
      // FSGC Allowed to run. 
      max_sleep = 60; 
      foreach(fsgarbs; string id; FSGarb g) { 
        int seconds = g && g->check(); 
        if (seconds < max_sleep) max_sleep = seconds; 
      } 
      if (max_sleep < 1) max_sleep = 1; 
    } 
    GC_WERR("FSGC: Sleeping %d seconds...\n", max_sleep); 
    while(meta_fsgc_thread && max_sleep--) { 
      sleep(1); 
    } 
  } 
} 
 
//! Wrapper keeping a @[FSGarb] alive. 
//! 
//! When this object is destructed (eg by refcount), the corresponding 
//! @[FSGarb] will be killed. This is to make sure stale @[FSGarb]s aren't 
//! left running after module reloads or reconfigurations. 
class FSGarbWrapper(string id) 
{ 
  protected void destroy() 
  { 
    GC_WERR("FSGC: FSGarbWrapper %O destructed.\n", id); 
    FSGarb g = m_delete(fsgarbs, id); 
    if (g) destruct(g); 
  } 
 
  protected string _sprintf(int c, mapping|void opts) 
  { 
    return sprintf("FSGarbWrapper(%O)", id); 
  } 
 
  void reconfigure(int max_age, int|void max_size, int|void max_files) 
  { 
    FSGarb g = fsgarbs[id]; 
    if (g) g->reconfigure(max_age, max_size, max_files); 
  } 
} 
 
FSGarbWrapper register_fsgarb(string modid, string path, int max_age, 
                              int|void max_size, int|void max_files, 
                              string|void quarantine, 
                              int(0..1)|void cleanup_parent_dirs) 
{ 
  if ((path == "") || (path == "/") || (max_age <= 0)) return 0; 
  string id = modid + "\0" + path + "\0" + gethrtime(); 
  FSGarb g = FSGarb(modid, path, max_age, max_size, max_files, 
                    quarantine, cleanup_parent_dirs); 
  fsgarbs[id] = g; 
  GC_WERR("FSGC: Register garb on %O ==> id: %O\n", path, id); 
  return FSGarbWrapper(id); 
} 
 
protected void start_fsgarb() 
{ 
  meta_fsgc_thread = Thread.Thread(meta_fsgc); 
  Roxen.name_thread(meta_fsgc_thread, "Filesystem GC"); 
} 
 
protected void stop_fsgarb() 
{ 
  Thread.Thread th = meta_fsgc_thread; 
  if (th) { 
    meta_fsgc_thread = UNDEFINED; 
    th->wait(); 
    Roxen.name_thread(th, UNDEFINED); 
  } 
} 
 
#endif /* Filesystem.Monitor.basic */