pike.git / src / threads.c

version» Context lines:

pike.git/src/threads.c:347: Inside #if undefined(CONFIGURE_TEST) and #if defined(HAVE_CLOCK) && \
   (defined (RDTSC) || \    (!defined(HAVE_GETHRTIME) && \    !(defined(HAVE_MACH_TASK_INFO_H) && defined(TASK_THREAD_TIMES_INFO))))   static clock_t thread_start_clock = 0;   static THREAD_T last_clocked_thread = 0;   #define USE_CLOCK_FOR_SLICES   #ifdef RDTSC   static int use_tsc_for_slices;   #define TSC_START_INTERVAL 100000   static INT64 prev_tsc; + static clock_t prev_clock; /* clock() at the beg of the last tsc interval */   #endif   #endif      #define THIS_THREAD ((struct thread_state *)CURRENT_STORAGE)      static struct callback *threads_evaluator_callback=0;      PMOD_EXPORT int num_threads = 1;   PMOD_EXPORT int threads_disabled = 0;   
pike.git/src/threads.c:605:       ts->swapped=0;    Pike_interpreter=ts->state;      #ifdef USE_CLOCK_FOR_SLICES    if (last_clocked_thread != ts->id) {    thread_start_clock = clock();    last_clocked_thread = ts->id;   #ifdef RDTSC    RDTSC (prev_tsc); +  prev_clock = thread_start_clock;   #endif    }   #endif   }      PMOD_EXPORT void pike_swap_in_current_thread (struct thread_state *ts    COMMA_DLOC_DECL)   {   #ifdef PIKE_DEBUG    THREAD_T self = th_self();
pike.git/src/threads.c:1257:      PMOD_EXPORT int count_pike_threads(void)   {    return num_pike_threads;   }      /* #define PROFILE_CHECK_THREADS */      static void check_threads(struct callback *cb, void *arg, void * arg2)   { - #if defined(RDTSC) && defined(USE_CLOCK_FOR_SLICES) -  static INT64 tsc_mincycles = TSC_START_INTERVAL; - #endif -  +    #ifdef PROFILE_CHECK_THREADS -  static unsigned long calls = 0, clock_checks = 0; -  static unsigned long slice_int_n = 0; +  static unsigned long calls = 0, clock_checks = 0, no_clock_advs = 0; +  static unsigned long slice_int_n = 0; /* Slice interval length. */    static double slice_int_mean = 0.0, slice_int_m2 = 0.0; -  static unsigned long tsc_int_n = 0; +  static unsigned long tsc_int_n = 0; /* Actual tsc interval length. */    static double tsc_int_mean = 0.0, tsc_int_m2 = 0.0; -  +  static unsigned long tsc_tgt_n = 0; /* Target tsc interval length. */ +  static double tsc_tgt_mean = 0.0, tsc_tgt_m2 = 0.0; +  static unsigned long tps = 0, tps_int_n = 0; /* TSC intervals per slice. */ +  static double tps_int_mean = 0.0, tps_int_m2 = 0.0;    calls++;   #endif      #if defined (USE_CLOCK_FOR_SLICES) && defined (PIKE_DEBUG)    if (last_clocked_thread != th_self())    Pike_fatal ("Stale thread %08lx in last_clocked_thread (self is %08lx)\n",    (unsigned long) last_clocked_thread, (unsigned long) th_self());   #endif      #if defined(RDTSC) && defined(USE_CLOCK_FOR_SLICES)    /* We can get here as often as 30+ thousand times per second;    let's try to avoid doing as many clock(3)/times(2) syscalls    by using the TSC. We'll skip any further checks until the    number of cycles passed comes close to what it was the last    time when we decided to yield. */       if (use_tsc_for_slices) { -  static INT64 target_int; -  INT64 now, tsc_elapsed; -  clock_t elapsed; +  static INT64 target_int = TSC_START_INTERVAL; +  INT64 tsc_now, tsc_elapsed;       /* prev_tsc is normally always valid here, but it can be zero    * after a tsc jump reset and just after a thread has been    * "pike-ified" with INIT_THREAD_STATE (in which case    * thread_start_clock is zero too). */    -  RDTSC(now); -  tsc_elapsed = now - prev_tsc; +  RDTSC(tsc_now); +  tsc_elapsed = tsc_now - prev_tsc;    -  if ((target_int - tsc_elapsed)>0) { +  if (tsc_elapsed < target_int) {    if (tsc_elapsed < 0) {    /* The counter jumped back in time, so reset and continue. In    * the worst case this keeps happening all the time, and then    * the only effect is that we always fall back to    * clock(3). */   #ifdef PROFILE_CHECK_THREADS -  fprintf (stderr, "TSC jump detected (now: %"PRINTINT64"d, " -  "target_int: %"PRINTINT64"d, tsc_mincycles: %"PRINTINT64"d) - " -  "resetting\n", now, target_int, tsc_mincycles); +  fprintf (stderr, "TSC backward jump detected (now: %"PRINTINT64"d, " +  "prev: %"PRINTINT64"d, target_int: %"PRINTINT64"d) - " +  "resetting\n", tsc_now, prev_tsc, target_int);   #endif -  tsc_mincycles = TSC_START_INTERVAL; +  target_int = TSC_START_INTERVAL;    prev_tsc = 0;    }    else    return;    }      #ifdef PROFILE_CHECK_THREADS    if (prev_tsc) {    double delta = tsc_elapsed - tsc_int_mean;    tsc_int_n++;    tsc_int_mean += delta / tsc_int_n;    tsc_int_m2 += delta * (tsc_elapsed - tsc_int_mean);    }    clock_checks++; -  +  tps++;   #endif    -  elapsed = clock() - thread_start_clock; +  { +  clock_t clock_now = clock();    -  if (thread_start_clock && prev_tsc) { -  if (elapsed < (clock_t) (CLOCKS_PER_SEC/30)) { -  tsc_mincycles |= 0xffff; -  if ((tsc_elapsed - target_int)<=(tsc_mincycles<<4)) -  tsc_mincycles += (tsc_mincycles>>1); -  target_int = (tsc_mincycles>>1); -  return; +  /* Aim tsc intervals at 1/20th of a thread time slice interval, +  * i.e. at 1/400 sec. That means the time is checked with +  * clock(3) approx 20 times per slice. That still cuts the vast +  * majority of the clock() calls and we're still fairly safe +  * against tsc inconsistencies of different sorts, like cpu clock +  * rate changes (on older cpu's with variable tsc), +  * unsynchronized tsc's between cores, OS tsc resets, etc - +  * individual intervals can be off by more than an order of +  * magnitude in either direction without affecting the final time +  * slice length appreciably. */ +  +  if (prev_tsc) { +  clock_t tsc_interval_time = clock_now - prev_clock; +  if (tsc_interval_time > 0) { +  /* Estimate the next interval just by extrapolating the +  * tsc/clock ratio of the last one. This adapts very +  * quickly but is also very "jumpy". That shouldn't matter +  * due to the approach with dividing the time slice into +  * ~20 tsc intervals. */ +  INT64 new_target_int = +  (tsc_elapsed * (CLOCKS_PER_SEC / 400)) / tsc_interval_time; +  if (new_target_int < target_int << 1) +  target_int = new_target_int; +  else { + #ifdef PROFILE_CHECK_THREADS +  fprintf (stderr, "TSC suspect forward jump detected " +  "(prev int: %"PRINTINT64"d, " +  "calculated int: %"PRINTINT64"d) - " +  "capping\n", target_int, new_target_int); + #endif +  /* The + 1 is paranoia just in case it has become zero somehow. */ +  target_int = (target_int << 1) + 1;    } -  if (elapsed > (clock_t) (CLOCKS_PER_SEC/18)) { -  tsc_mincycles -= tsc_mincycles>>2; -  if (elapsed > (clock_t) (CLOCKS_PER_SEC/10)) -  tsc_mincycles >>= 2; +  prev_tsc = tsc_now; +  prev_clock = clock_now; + #ifdef PROFILE_CHECK_THREADS +  { +  double delta = target_int - tsc_tgt_mean; +  tsc_tgt_n++; +  tsc_tgt_mean += delta / tsc_tgt_n; +  tsc_tgt_m2 += delta * (target_int - tsc_tgt_mean);    } -  + #endif    } -  +  else { +  /* clock(3) can have pretty low resolution and might not +  * have advanced during the tsc interval. Just do another +  * round on the old estimate, keeping prev_tsc and +  * prev_clock fixed to get a longer interval for the next +  * measurement. */ +  if (tsc_interval_time < 0) { +  /* clock() wraps around fairly often as well. We still +  * keep the old interval but update the baselines in this +  * case. */ +  prev_tsc = tsc_now; +  prev_clock = clock_now; +  } +  target_int += tsc_elapsed; + #ifdef PROFILE_CHECK_THREADS +  no_clock_advs++; + #endif +  } +  } +  else { + #ifdef PROFILE_CHECK_THREADS +  fprintf (stderr, "Warning: Encountered zero prev_tsc.\n"); + #endif +  prev_tsc = tsc_now; +  }    -  target_int = tsc_mincycles; -  prev_tsc = now; +  if (clock_now - thread_start_clock < 0) +  /* clock counter has wrapped since the start of the time +  * slice. Let's reset and yield. */ +  thread_start_clock = 0; +  else if (clock_now - thread_start_clock < +  (clock_t) (CLOCKS_PER_SEC / 20)) +  return; +  }    -  + #ifdef PROFILE_CHECK_THREADS +  { +  double delta = tps - tps_int_mean; +  tps_int_n++; +  tps_int_mean += delta / tps_int_n; +  tps_int_m2 += delta * (tps - tps_int_mean); +  tps = 0; +  } + #endif +     goto do_yield;    }   #endif /* RDTSC && USE_CLOCK_FOR_SLICES */      #ifdef HAVE_GETHRTIME    {    static hrtime_t last_ = 0;    hrtime_t now = gethrtime();    if( now-last_ < 50000000 ) /* 0.05s slice */    return;
pike.git/src/threads.c:1442: Inside #if defined(PROFILE_CHECK_THREADS) and #if defined(USE_CLOCK_FOR_SLICES)
   (double) (clock() - thread_start_clock) / CLOCKS_PER_SEC;    double delta = slice_time - slice_int_mean;    slice_int_n++;    slice_int_mean += delta / slice_int_n;    slice_int_m2 += delta * (slice_time - slice_int_mean);    }   #endif       gettimeofday (&now, NULL);    if (now.tv_sec > last_time) { -  fprintf (stderr, "check_threads: %lu calls, %lu clocks, " -  "slice [avg %.3f, d %.1e], " -  "tsc [mincyc %"PRINTINT64"d, avg %.2e, d %.1e]\n", -  calls, clock_checks, +  fprintf (stderr, "check_threads: %lu calls, %lu clocks, %lu no advs, " +  "slice %.3f:%.1e, tsc int %.2e:%.1e, tsc tgt %.2e:%.1e, tps %g:%.1e\n", +  calls, clock_checks, no_clock_advs,    slice_int_mean,    slice_int_n > 1 ? sqrt (slice_int_m2 / (slice_int_n - 1)) : 0.0, - #if defined(RDTSC) && defined(USE_CLOCK_FOR_SLICES) -  (INT64) (use_tsc_for_slices ? tsc_mincycles : -1), - #else -  (INT64) -2, - #endif +     tsc_int_mean, -  tsc_int_n > 1 ? sqrt (tsc_int_m2 / (tsc_int_n - 1)) : 0.0); +  tsc_int_n > 1 ? sqrt (tsc_int_m2 / (tsc_int_n - 1)) : 0.0, +  tsc_tgt_mean, +  tsc_tgt_n > 1 ? sqrt (tsc_tgt_m2 / (tsc_tgt_n - 1)) : 0.0, +  tps_int_mean, +  tps_int_n > 1 ? sqrt (tps_int_m2 / (tps_int_n - 1)) : 0.0);    last_time = (unsigned long) now.tv_sec; -  calls = clock_checks = 0; +  calls = clock_checks = no_clock_advs = 0;    } -  -  /* Cannot use this with the first tsc reading after the yield. */ -  prev_tsc = 0; +     }   #endif       pike_thread_yield();   }      PMOD_EXPORT void pike_thread_yield(void)   {    DEBUG_CHECK_THREAD();   
pike.git/src/threads.c:1488:    th_yield();    THREADS_DISALLOW();      #ifdef USE_CLOCK_FOR_SLICES    /* If we didn't yield then give ourselves a new time slice. If we    * did yield then thread_start_clock is the current clock anyway    * after the thread swap in. */    thread_start_clock = clock();   #ifdef RDTSC    RDTSC (prev_tsc); +  prev_clock = thread_start_clock;   #endif   #ifdef PIKE_DEBUG    if (last_clocked_thread != th_self())    Pike_fatal ("Stale thread %08lx in last_clocked_thread (self is %08lx)\n",    (unsigned long) last_clocked_thread, (unsigned long) th_self());   #endif   #endif       DEBUG_CHECK_THREAD();   }