Branch: Tag:

1997-03-20

1997-03-20 16:09:43 by Wilhelm Köhler <wk@cs.tu-berlin.de>

Applied WK's mega-patch

Rev: extern/shuffle.c:1.1.1.1.2.1
Rev: server/base_server/disk_cache.pike:1.12.2.3
Rev: server/base_server/roxen.pike:1.31.2.9
Rev: server/bin/garbagecollector.pike:1.5.2.1

1:   //#include <stdio.h>   #include <simulate.h>    - string cvs_version = "$Id: garbagecollector.pike,v 1.5 1996/12/07 11:37:46 neotron Exp $"; + string cvs_version = "$Id: garbagecollector.pike,v 1.5.2.1 1997/03/20 16:09:43 grubba Exp $";      //#define DEBUG      string version = cvs_version;      #define MAX_LOG_SIZE 512 -  + #define MAX_STAT_CACHE_SIZE 1000*MAX_LOG_SIZE    -  +    string lp;   int last_log, first_log=0x7fffffff;   
36:    string file;    mapping log;    -  mkdir(lp); +     file = lp+"cachelog"+_order(num);    if(!(s=read_bytes(file)))    { -  log = ([]); +  mkdir(lp); +  s=read_bytes(file); +  } +  if(!s) { +  if(first_log == num) +  first_log++; +  if(last_log == num) +  last_log--;    return 0; -  } else { -  if(catch(log = decode_value( s ))) +  } +  +  if(catch(log = decode_value( s ))) { + #ifdef DEBUG +  perror("Could not decode cachelog file ("+file+") - removed\n"); + #endif +  rm_log(num);    return 0; -  +  } +  if(sizeof(log)<=0) { +  rm_log(num); +  return 0; +  } +     return log;   } -  +  + void unparse_log(mapping log, int num) + { +  string file; +  file = lp+"cachelog"+_order(num); +  rm(file); +  if(sizeof(log)<=0) +  return; +  if(!write_file(file, encode_value(log))) +  perror("Could not write cachelog file ("+file+")\n");   }    -  + #define BLOCK_SIZE 2048 +  + #define FILE_SIZE_TO_BLOCK(X) (((X)+(BLOCK_SIZE-1))/BLOCK_SIZE) + // allow cache sizes of more than 2 GB 1-Nov-96-wk + #define BLOCK_TO_KB(X) ((((float)(X))*BLOCK_SIZE)/1024) +  + int max_cache_size; + int cache_normal_garb; + int cache_size; + int num_files, max_num_files; + int garbage_time; + int removed, removed_files, lastgc; + int disk_max, disk_used, disk_avail, disk_capacity, +  disk_i_used, disk_i_avail, disk_i_capacity, disk_time; + string disk_name; +  + #define LOGGER(x) if(gc_log)gc_log->write(x);else perror(x) + object gc_log; +  + string disk_info() + { +  return +  (disk_name? +  sprintf("Disk(%s): %1dkb (%s)\n" +  "Disk(%s): %1dkb (%1d%%) used, %1dkb avail\n", +  ctime(disk_time)-"\n", disk_max, disk_name, +  ctime(disk_time)-"\n", +  disk_used, disk_capacity, disk_avail):"") + +  (disk_i_used>0? +  sprintf("Disk(%s): %1d (%1d%%) files used, %1d files avail\n", +  ctime(disk_time)-"\n", +  disk_i_used, disk_i_capacity, disk_i_avail):""); + } +  + void current_cache_message() + { +  if(!gc_log) +  return; +  string now = ctime(time())-"\n"; +  +  LOGGER(sprintf("Cache(%s): %1.3f MB data (%1.2f%%)\n", +  now, +  (float)BLOCK_TO_KB(cache_size)/1024.0, +  (float)cache_size*100/max_cache_size +  )); +  if(max_num_files>0) +  LOGGER(sprintf("Cache(%s): %1d files (%1.2f%%)\n", +  now, num_files, (float)num_files*100/max_num_files)); +  else +  LOGGER(sprintf("Cache(%s): %1d files\n", +  now, num_files)); +  +  if(disk_name) +  LOGGER(disk_info()); +  +  if(lastgc>0) { +  string gctime = ctime(lastgc)-"\n"; +  LOGGER(sprintf("Gc(%s): %1.3f MB (%d files) removed in last gc run\n" +  "Gc(%s): removed files were last accessed %s\n", +  gctime, (float)BLOCK_TO_KB(removed)/1024.0, removed_files, +  gctime, ctime(garbage_time)-"\n")); +  } + } +  + int read_cache_status() + { +  mapping status = ([]); +  string file, s; +  file = lp+"cache_status"; +  mkdir(lp); +  if(file_size(file)<=0) { +  perror("read_cache_status: "+file+" is missing\n"); +  return 0; +  } +  if(!(s=read_bytes(file))) { +  perror("read_cache_status: "+file+" could not be read\n"); +  rm(file); +  return 0; +  } +  if(catch(status = decode_value(s))) { +  perror("read_cache_status: "+file+" could not be decoded\n"); +  rm(file); +  return 0; +  } +  last_log = status->last_log; +  first_log = status->first_log; +  cache_size = status->cache_size; +  num_files = status->num_files; +  garbage_time = status->garbage_time; +  removed = status->removed; +  removed_files = status->removed_files; +  lastgc = status->lastgc; +  +  if((last_log < first_log) || +  (cache_size <= 0)|| +  (num_files <= 0)|| +  (first_log <= 0)) { +  perror("read_cache_status: "+file+" contains rubbish\n"); +  rm(file); +  return 0; +  } +  +  return 1; + } +    void create_cache(string logprefix)   {    lp = logprefix; -  int i; +  int li;    string file; -  array (string) files; +  array (string) allfiles; +  cache_size = 0;       mkdir(lp); - #if 0 -  files = map_array(get_dir(lp), lambda(string s) { -  if(!search(s, "cachelog")) return s; +  +  allfiles = map_array(get_dir(lp), lambda(string s) { +  if(search(s, "cachelog") != -1) return s;    return 0;    }) - ({ 0 });    -  foreach(files, file) +  foreach(allfiles, file)    { -  if((i=_num(file)) > last_log) -  last_log = i; -  if(i < first_log) -  first_log = i; +  if((li=_num(file)) > last_log) +  last_log = li; +  if(li < first_log) +  first_log = li;    }       if(!last_log) {
76: Inside #if 0
   return; // Ok, no old log.    }    +  if(read_cache_status()) { +  current_cache_message(); +  return; +  } +  +  first_log = last_log = 0; +  + #if 0    while(!((log=parse_log(last_log)) && last_log>=0))    last_log--;    if(!log)
86:   #endif   }    + void write_cache_status() + { +  mapping status = ([]); +  string file; +  +  file = lp+"cache_status"; +  status->last_log = last_log; +  status->first_log = first_log; +  status->cache_size = cache_size; +  status->num_files = num_files; +  status->garbage_time = garbage_time; +  status->removed = removed; +  status->removed_files = removed_files; +  status->lastgc = lastgc; +  +  if(!write_file(file+"+", encode_value(status))) +  perror("write_cache_status: "+file+"+"+" could not be written\n"); +  if(!mv(file+"+", file)) +  perror("write_cache_status: "+file+" could not be written\n"); + } +    void write_log()   { -  +  mapping status = ([]); // This doesn't seem to be used    string file;    last_log++;    mkdir(lp);    file = lp+"cachelog"+_order(last_log);    rm(file); -  write_file(file, encode_value(log)); +  if (!write_file(file, encode_value(log))) +  perror("Could not write cachelog file ("+file+")\n");    log = ([]); -  +  write_cache_status();   }      void update(string file, int tim, int|void score)   {   //perror(file+" "+(time(1)-tim)+" seconds old, "+score+" \"bonus\" seconds.\n"); -  +  if((search(file, ".done")!=-1)&&log[file-".done"]) { +  m_delete(log, file-".done"); +  num_files--; +  } +     log[file] = ({ tim, score });    if(sizeof(log) > MAX_LOG_SIZE)    write_log();   }    - void accessed(string filename, int extra) + int check(int); +  + void accessed(string filename, int size)   { -  update(filename, time(), extra); +  update(filename, time(), size); +  if(size!=0) +  check(size);   }    - int do_collect(int amnt, function cb, mapping log) + mapping stat_cache = ([]); +  + int collect_log(int amnt, function cb, mapping log)   {    array a, b;       a = values(log);    b = indices(log); -  sort(map_array(a,lambda(array a){`-(@a);}), b); +     -  +  /* Sort logfile by accesstime +  * .head and .done files should be processed together +  * process until amnt is removed or greater garbage_time +  */ +  +  //sort(map_array(a,lambda(array a){`-(@a);}), b); +  sort(column(a, 0), a, b); +  +  garbage_time = a[0][0]; +     int i; -  for(i=0; i<sizeof(b); i++) +  for(i=0; (amnt>0)&&(i<sizeof(b)); i++)    { -  m_delete(log, b[i]); +     amnt -= cb(b[i], a[i][0]); -  if(amnt <= 0) throw("Done"); +  m_delete(log, b[i]);    } -  +  +  if(sizeof(stat_cache) > MAX_STAT_CACHE_SIZE) +  stat_cache = ([]); +     return amnt;   }    - #define BLOCK_SIZE 2048 -  - #define FILE_SIZE_TO_BLOCK(X) (((X)+(BLOCK_SIZE-1))/BLOCK_SIZE) - #define BLOCK_TO_KB(X) (((X)*BLOCK_SIZE)/1024) -  - int max_cache_size; - int cache_normal_garb; - int cache_size; - int num_files; // Only used for informative output -  +    void find_all_files_in(string dir, function|void cb)   {    string path;
155:    {    cache_size += FILE_SIZE_TO_BLOCK(st[1]);    num_files++; -  update(dir+path, st[2], st[1]/20); +  update(dir+path, st[2], st[1]);    } else    cb(dir+path);    }
168:    array dirs = get_dir(".");    string dir;    -  perror("Rechecking cache ... "); +  LOGGER("Rechecking cache at "+ctime(time()));       num_files = cache_size = 0;    rm("size");
180:    if(file_size(dir)<-1 && dir!="logs")    find_all_files_in(dir+"/");    -  perror(sprintf("Found %d files, in total %.2fMb data\n", -  num_files, (float)BLOCK_TO_KB(cache_size)/1024.0)); +  last_log++; +  if((file_size(lp+"cachelog"+_order(1))>0) && +  !mv(lp+"cachelog"+_order(1), lp+"cachelog"+_order(last_log))) +  perror("find_all_files_and_log_it - mv failed\n"); +  +  write_cache_status(); +  current_cache_message(); +     remove_call_out(find_all_files_and_log_it);    call_out(find_all_files_and_log_it, (BLOCK_TO_KB(cache_size)/5)+19200);   }       - void collect(int amnt, function callback, int|void norec) + void collect(int amnt, function callback)   { -  int i, t_last_log = last_log+(last_log-first_log); +  int logsize;    mixed r; -  +     write_log(); -  +    // perror("Collect. first_log="+first_log+"; last_log="+last_log+"\n");    r = catch { -  for(i=first_log; i<=t_last_log; i++) +  while((amnt>0)&&(first_log <= last_log))    {    mapping rl;   // perror("Collecting log "+i+"\n");   // perror("Collect. first_log="+first_log+"; last_log="+last_log+"\n"); -  if(rl = parse_log(i)) +  if(rl = parse_log(first_log))    { -  rm_log(i); -  if(i != last_log) -  first_log = i+1; -  amnt = do_collect(amnt, callback, rl); +  logsize = sizeof(rl); +  amnt = collect_log(amnt, callback, rl); +  if(logsize != sizeof(rl)) +  unparse_log(rl, first_log);    }    }    }; -  if(!r) +  if(r)    { - #ifdef DEBUG -  perror("All files removed?\n"); - #endif -  if(norec) -  { -  perror("All files removed, but still data to collect.\n"); +  perror("Error while garbagecollecting: "+r[0]+"\n" +  +describe_backtrace(r[1]));    return;    } -  +  +  if(amnt <= 0) +  return; +     find_all_files_and_log_it(); -  if(amnt >= 0) -  return collect(amnt, callback, 1); +    } -  if(r && (r!= "Done")) -  perror("Error while garbagecollecting: "+r[0]+"\n" -  +describe_backtrace(r[1])); - } +     -  +    void gc(int);      // All sizes are in BLOCK_SIZE b blocks, except for
236:    howmuch = FILE_SIZE_TO_BLOCK(howmuch);    cache_size += howmuch;    +  if(howmuch >= 0) +  num_files++; +  else +  num_files--; +     // len is in units of BLOCK_SIZE bytes.    if(((int)((float)cache_size)) > max_cache_size)    gc(cache_size); -  +  else if((max_num_files>0) && (num_files > max_num_files)) +  gc(cache_normal_garb);    - #ifdef DEBUG -  perror(sprintf("data in cache: %d Kb\n", -  (int)((float)BLOCK_TO_KB(cache_size)))); - #endif +     return cache_size;   }   
302:    _cache=cmd;   }    - int removed, lastgc; -  +    #define MAX(x,y) ((x)<(y)?(y):(x))    - mapping stat_cache = ([]); -  +    int remove_one_file(string fname, int last_access)   {    array s;
323:       if(s[1] != -1)    { -  int i; -  -  if((search(fname, ".done")!=-1) && (s[2]-10 > last_access)) -  { - #ifdef DEBUG - // perror("Nope.\n"); - #endif -  update(fname, s[2], s[1]/20); -  return 0; /* File has been accessed since the cache checked */ +  if(s[2]-10 > last_access) { +  update(fname, s[2], 0); +  return 0; /* See you next time */    } -  +  +  int i;    i=FILE_SIZE_TO_BLOCK(s[1]); - #ifdef DEBUG - // perror("Yep. "+(int)BLOCK_TO_KB(i)+"Kb removed\n"); - #endif -  s[1]=-1; -  removed += i; +     cache_size-=i; -  rm( fname ); +  num_files--; +  removed += i; +  removed_files++; +  s[1]=-1; +  rm(fname);    return i; /* Ok, removed */    } - #ifdef DEBUG - // perror("Already.\n"); - #endif +     return 0; /* No such file */   }   
358:    int amnt;       stat_cache = ([]); -  removed = 0; +  removed = removed_files = 0;    lastgc = time();    amnt = MAX(cache_normal_garb, cs-max_cache_size);       catch {   #ifdef DEBUG -  perror("really_gc ("+(int)BLOCK_TO_KB(amnt)+" Kb)\n"); +  // perror("really_gc ("+(int)BLOCK_TO_KB(amnt)+" Kb)\n");   #endif    collect(amnt, remove_one_file); -  +  write_cache_status(); +  current_cache_message();   #ifdef DEBUG -  perror("--------- ("+(int)BLOCK_TO_KB(removed)+" Kb really removed)\n"); +  // perror("--------- ("+(int)BLOCK_TO_KB(removed)+" Kb really removed)\n");   #endif    };    stat_cache = ([]);
381:    if(!removed)    last_garb="";    else -  last_garb=sprintf("%2.2f Mb was removed in the last garbage collection " -  "%d minutes ago", +  last_garb=sprintf("GC(%s): %2.2f Mb (%d files) removed\n" +  "GC: last run was %d minutes ago\n" +  "GC: removed files were last accessed %s\n", +  ctime(lastgc)-"\n",    (float)removed/(1048576.0/BLOCK_SIZE), -  (time()-lastgc)/60); +  removed_files, +  (time()-lastgc)/60, +  ctime(garbage_time)-"\n"); +     rm("statistics");    write_file("statistics", -  sprintf("%2.2f Mb data in the cache\n%s", +  sprintf("Cache(%s): %1d files%s, %1.3f MB (%1.2f%%)\n%s\n%s", +  ctime(time())-"\n", +  num_files, +  max_num_files>0? +  sprintf(" (%1.2f%%)", +  (float)cache_size*100/max_cache_size):"",    ((float)BLOCK_TO_KB(cache_size))/(1024.0), -  last_garb)); +  (float)cache_size*100/max_cache_size, +  last_garb, disk_info()));   }    -  +    private string lf;      void do_write_log()
401:    exit(0);   }    - void create(string cdir, string logfiles, int cng, int mcs) + void init_log_file(string lf)   { -  +  if(!lf || !strlen(lf)) +  return; +  +  remove_call_out(init_log_file); +  +  if(gc_log) +  destruct(gc_log); +  +  gc_log = files.file(); +  if(!gc_log->open(lf, "rwac")) { +  perror("init_log_file("+lf+"): open failed\n"); +  destruct(gc_log); +  return; +  } +  +  call_out(init_log_file, 300, lf); + } +  + void init_disk_check(int minfree) + { +  if(minfree<=0) +  return; +  +  remove_call_out(init_disk_check); +  +  string res; +  string comm = "/usr/ucb/df"; +  string rf = "df_output"; +  +  if(mixed err = catch { +  spawn(comm + " . > "+rf+" 2>&1;"+comm+" -i . >> "+rf+" 2>&1"); +  res = read_bytes(rf); +  } ) { +  LOGGER("Command ("+comm+") failed:" + err[0]+"\n"); +  LOGGER("Minimum free disk check disabled\n"); +  return; +  } +  +  if(!stringp(res)|| !strlen(res)) { +  call_out(init_disk_check, 60, minfree); +  return; +  } +  +  int no; +  if((no = sscanf(res, +  "%*s%*[\n]%*s%*[ \t]%d%*[ \t]%d%*[ \t]%d%*[ \t]%d%*[%]%*[ \t]%s%*[\n]" + +  "%*s%*[\n]%*s%*[ \t]%d%*[ \t]%d%*[ \t]%d%*[%]%*s", +  disk_max, disk_used, disk_avail, disk_capacity, disk_name, +  disk_i_used, disk_i_avail, disk_i_capacity)) < 12) { +  LOGGER("Minimum free disk check disabled\n"); +  return; +  } +  disk_time = time(); +  + #ifdef DEBUGX +  if(no < 24) +  LOGGER("Minimum free inodes check disabled - no no of inode info available\n"); +  LOGGER("init_disk_check - disk_max="+disk_max+ +  ", disk_used="+disk_used+ +  ", disk_avail="+disk_avail+ +  ", disk_capacity="+disk_capacity+ +  ", disk_name="+disk_name+"\n"); +  LOGGER("init_disk_check - disk_i_used="+disk_i_used+ +  ", disk_i_avail="+disk_i_avail+ +  ", disk_i_capacity="+disk_i_capacity+"\n"); + #endif +  +  if(((disk_used > 0) && ((100 - disk_capacity) < minfree)) || +  ((disk_i_used > 0) && ((100 - disk_i_capacity) < minfree))) +  gc(cache_normal_garb); +  +  call_out(init_disk_check, 600, minfree); + } +  + void create(string cdir, string logfiles, int cng, int mcs, +  int mnf, int minfree, string gc_lf) + {    int i;    for(i = 1; i < 3; i++)    signal(i,do_write_log);
410:       if(cdir)    { +  init_log_file(gc_lf); +    #ifdef DEBUG    perror("Initalizing cache, cache-dir is "+cdir+"\n");   #endif    cd(cdir);    cache_normal_garb = cng*(1048576/BLOCK_SIZE);    max_cache_size = mcs*(1048576/BLOCK_SIZE); -  +  if(mnf>0) +  max_num_files = mnf;    if(lf != logfiles) // This function might be called more than once.    {    lf = logfiles;    create_cache(logfiles); -  +  +  if(last_log < 10) +  find_all_files_and_log_it(); +  +  if(file_size(lp+"cachelog"+_order(1))>=0) { +  LOGGER("Found rechecking unfinished ...\n"); +  find_all_files_and_log_it(); +  } +     call_out(find_all_files_and_log_it, (BLOCK_TO_KB(cache_size)/5)+3600);    } -  +  +  init_disk_check(minfree); +  +  LOGGER("Garbage collector ("+version+") on-line, waiting for commands.\n"); +     check(0); // Open the 'size' file and, perhaps, do a garbage collect. -  perror("Garbage collector ("+version+") on-line, waiting for commands.\n"); -  perror("Current cache size: " -  +((float)BLOCK_TO_KB(cache_size)/(1024.0))+" MB\n"); +     }   }