Branch: Tag:

1998-07-12

1998-07-12 00:22:44 by David Hedbor <david@hedbor.org>

Added Francesco Chemolli's ABS (Anti-Block-System) which restarts the
server if it stops executing call_outs. In thread mode this means a
backend lockup and in non-threaded mode it can be anything from
eternal loops, backend lockup or even heavy computing (generating
gtext for example). The timeout before restart is configurable.

Rev: server/base_server/roxen.pike:1.218

1: - constant cvs_version = "$Id: roxen.pike,v 1.217 1998/07/07 19:04:19 grubba Exp $"; + constant cvs_version = "$Id: roxen.pike,v 1.218 1998/07/12 00:22:44 neotron Exp $"; +  + // ABS and suicide systems contributed freely by Francesco Chemolli +    #define IN_ROXEN   #include <roxen.h>   #include <config.h>
77:   object euid_egid_lock = Thread.Mutex();   #endif /* THREADS */    +  + //these mixed's are used by the ABS and suicide options, + //to determine whether a call_out is active. + static mixed abs_call_out=0, suicide_call_out=0; +    int privs_level;   int die_die_die;   
957:   #endif /* !NO_COMPAT */      int config_ports_changed = 0; - // Called from the configuration interface. - string check_variable(string name, string value) - { -  switch(name) -  { -  case "ConfigPorts": -  config_ports_changed = 1; -  break; -  case "cachedir": -  if(!sscanf(value, "%*s/roxen_cache")) -  { -  object node; -  node = (configuration_interface()->root->descend("Globals", 1)-> -  descend("Proxy disk cache: Base Cache Dir", 1)); -  if(node && !node->changed) node->change(1); -  mkdirhier(value+"roxen_cache/foo"); -  call_out(set, 0, "cachedir", value+"roxen_cache/"); -  } -  break; +     -  case "ConfigurationURL": -  case "MyWorldLocation": -  if(strlen(value)<7 || value[-1] != '/' || -  !(sscanf(value,"%*s://%*s/")==2)) -  return "The URL should follow this format: protocol://computer[:port]/"; -  } - } -  +    void stop_all_modules()   {    foreach(configurations, object conf)
1184:    return 0;   }    + void restart_if_stuck () { +  abs_call_out=0; +  if (!QUERY(ABS_engage)) +  return; +  abs_call_out=call_out (restart_if_stuck,10); +  signal(signum("SIGALRM"),lambda( int n ) { +  perror ((ctime(time())-"\n")+ +  "**** ABS engaged! Restarting. ***\n"); +  fork_or_quit(); +  }); +  alarm (60*QUERY(ABS_timeout)+10); + } +  + void post_create () { +  if (QUERY(ABS_engage)) +  abs_call_out=call_out (restart_if_stuck,10); +  if (QUERY(suicide_engage)) +  suicide_call_out=call_out (restart,60*60*24*QUERY(suicide_timeout)); + } +    void create()   {    catch
1197:    (object)"color.pike";    (object)"fonts.pike";    Configuration = (program)"configuration"; +  call_out(post_create,1); //we just want to delay some things a little   }      
1870:    perror("Unknown variable: "+c+"\n");    }    docurl=QUERY(docurl2); +  +  globvar("ABS_engage", 0, "Anti-Block-System: Enable", TYPE_FLAG|VAR_MORE, +  "If set, it will enable the anti-block-system. " +  "This will restart the server after a configurable number of minutes if it " +  "locks up. If you are running in a single threaded environment heavy calculations " +  "will also halt the server. In multi-threaded mode bugs as eternal loops will not " +  "cause the server to reboot, since only one thread is blocked. In general there is " +  "no harm in having this option enabled. "); +  +  globvar("ABS_timeout", 5, "Anti-Block-System: Timeout", TYPE_INT_LIST, +  "If the server is unable to accept connection for this many " +  "minutes, it will be restarted. You need to find a balance: " +  "if set too low, the server will be restarted even if it's doing " +  "legal things (like generating many images), if set too high you will " +  "have long downtimes.", +  ({1,2,3,4,5,10,15}), +  lambda() {return !QUERY(ABS_engage);} +  ); +  +  globvar ("suicide_engage", +  0, +  "Automatic Restart: Enable", +  TYPE_FLAG|VAR_MORE, +  "If set, Roxen will automatically restart after a configurable number " +  "of days. Since Roxen uses a monolith, non-forking server " +  "model the process tends to grow in size over time. This is mainly due to " +  "heap fragmentation but also because of memory leaks." +  ); +  +  globvar("suicide_timeout", +  7, +  "Automatic Restart: Timeout", +  TYPE_INT_LIST, +  "Automatically restart the server after this many days.", +  ({1,2,3,4,5,6,7,14,30}), +  lambda(){return !QUERY(suicide_engage);} +  );   }      
2479:   {      } +  + // Called from the configuration interface. + string check_variable(string name, string value) + { +  switch(name) +  { +  case "ConfigPorts": +  config_ports_changed = 1; +  break; +  case "cachedir": +  if(!sscanf(value, "%*s/roxen_cache")) +  { +  object node; +  node = (configuration_interface()->root->descend("Globals", 1)-> +  descend("Proxy disk cache: Base Cache Dir", 1)); +  if(node && !node->changed) node->change(1); +  mkdirhier(value+"roxen_cache/foo"); +  call_out(set, 0, "cachedir", value+"roxen_cache/"); +  } +  break; +  +  case "ConfigurationURL": +  case "MyWorldLocation": +  if(strlen(value)<7 || value[-1] != '/' || +  !(sscanf(value,"%*s://%*s/")==2)) +  return "The URL should follow this format: protocol://computer[:port]/"; +  break; +  +  case "ABS_engage": +  if (value) { +  if (!abs_call_out) +  restart_if_stuck(); +  } else { +  if (abs_call_out) { +  remove_call_out(abs_call_out); +  abs_call_out=0; +  } +  } +  break; +  +  case "suicide_engage": +  if (value) { +  if (!suicide_call_out) +  suicide_call_out=call_out(restart,60*60*24*QUERY(suicide_timeout)); +  } else { +  if (suicide_call_out) { +  remove_call_out(suicide_call_out); +  suicide_call_out=0; +  } +  } +  break; +  } + }