/* A very basic threads package. Based on the excellent work of
 * Wolfram Gloger (Gloger@lrz.uni-Muenchen.de), Linus Torvalds, and
 * Alan Cox.
 *
 * I didn't do much, just strung these things together.
 * Christopher Neufeld  (neufeld@physics.utoronto.ca)
 */
   


/* Important: the macro __SMP__ must be defined if this is to run on
 * SMP Linux. */

/* #define __SMP__ */


#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <linux/signal.h>
#include <linux/unistd.h>
#include <linux/types.h>
#include <values.h>

#define CLONE_VM        0x00000100
#define CLONE_FS        0x00000200
#define CLONE_FILES     0x00000400
#define CLONE_SIGHAND   0x00000800
#define PR_SALL         (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)

/* Start a thread calling fn(data, len).
 * sp is the _top_ of the allocated stack; len its length.
 */


static int
sprocsp(void (*fn)(void *, size_t), unsigned int flags,
        void *data, caddr_t sp, size_t len)
{
        long retval;
        void **newstack = (void **)((size_t)sp & ~(sizeof(void *)-1));

        *(--(size_t *)newstack) = len;
        *(--newstack) = data;
        /*
         * Do clone() system call. We need to do the low-level stuff
         * entirely in assembly as we're returning with a different
         * stack in the child process and we couldn't otherwise guarantee
         * that the program doesn't use the old stack incorrectly.
         *
         * Parameters to clone() system call:
         *      %eax - __NR_clone, clone system call number
         *      %ebx - clone_flags, bitmap of cloned data
         *      %ecx - new stack pointer for cloned child
         *
         * The clone() system call returns (in %eax) the pid of the newly
         * cloned process to the parent, and 0 to the cloned process. If
         * an error occurs, the return value will be the negative errno.
         *
         * In the child process, we will do a "jsr" to the requested function
         * and then do a "exit()" system call which will terminate the child.
         */
        __asm__ __volatile__(
                "int $0x80\n\t"         /* Linux/i386 system call */
                "testl %0,%0\n\t"       /* check return value */
                "jne 1f\n\t"            /* jump if parent */
                "call *%3\n\t"          /* start subthread function */
                "movl %2,%0\n\t"
                "int $0x80\n"           /* exit system call: exit subthread */
                "1:\t"
                :"=a" (retval)
                :"0" (__NR_clone),"i" (__NR_exit),
                 "r" (fn),
                 "b" (flags | SIGCHLD),
                 "c" (newstack));
        if (retval < 0) {
                errno = -retval;
                retval = -1;
        }
        return retval;
}



/* The following code is removed directly from <asm/bitops.h>, and the
 * 'extern' keywords stripped. */

/*
 * Copyright 1992, Linus Torvalds.
 */

/*
 * These have to be done with inline assembly: that way the bit-setting
 * is guaranteed to be atomic. All bit operations return 0 if the bit
 * was cleared before the operation and != 0 if it was not.
 *
 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
 */

#ifdef __SMP__
#define LOCK_PREFIX "lock ; "
#define SMPVOL volatile
#else
#define LOCK_PREFIX ""
#define SMPVOL
#endif

/*
 * Some hacks to defeat gcc over-optimizations..
 */
struct __dummy { unsigned long a[100]; };
#define ADDR (*(struct __dummy *) addr)
#define CONST_ADDR (*(const struct __dummy *) addr)

__inline__ static int set_bit(int nr, SMPVOL void * addr)
{
	int oldbit;

	__asm__ __volatile__(LOCK_PREFIX
		"btsl %2,%1\n\tsbbl %0,%0"
		:"=r" (oldbit),"=m" (ADDR)
		:"ir" (nr));
	return oldbit;
}

__inline__ static int clear_bit(int nr, SMPVOL void * addr)
{
	int oldbit;

	__asm__ __volatile__(LOCK_PREFIX
		"btrl %2,%1\n\tsbbl %0,%0"
		:"=r" (oldbit),"=m" (ADDR)
		:"ir" (nr));
	return oldbit;
}


/*
 * This routine doesn't need to be atomic.
 */
__inline__ int static test_bit(int nr, const SMPVOL void * addr)
{
	return ((1UL << (nr & 31)) & (((const unsigned int *) addr)[nr >> 5])) != 0;
}


/* End of other people's code - CJN */


#define BLOCKSIZE 4096
#define MAXNUMTHREADS 256

#ifndef FALSE
#define FALSE 0
#endif

#ifndef TRUE
#define TRUE 1
#endif


static size_t stacksize;

static int shared_signal_handlers = TRUE;

static pid_t threadids[MAXNUMTHREADS];
static void *stacks[MAXNUMTHREADS];
static pid_t numthreads;   /* Initialized to 0 by compiler */

static int privatemutex;

/* Prototypes */
static __inline__ int bb_threads_private_lock(void);
static __inline__ int bb_threads_private_unlock(void);


static __inline__ int
bb_threads_private_lock(void)
{
  while(set_bit(0, &privatemutex))
    while(test_bit(0, &privatemutex));

  return 0;
}


static __inline__ int
bb_threads_private_unlock(void)
{
  return clear_bit(0, &privatemutex);
}




/* Set the stack size to be used in subsequent thread creation. */
void
bb_threads_stacksize(size_t n)
{
  if (n % BLOCKSIZE != 0) {
    fprintf(stderr, "Please select a stack size which is an integer multiple of %d\n", BLOCKSIZE);
    return;
  }

  bb_threads_private_lock();
  stacksize = n;
  bb_threads_private_unlock();
  
}


/* When a new thread is spawned, does it share signal behaviour with its
 * parent? In other words, if the child thread sets a signal action, does
 * that action replace the parent's action as well? The default action is
 * share handlers. */
void
bb_threads_shared_sighandlers(int shared) {
  bb_threads_private_lock();
  if (shared) {
    shared_signal_handlers = TRUE;
  } else {
    shared_signal_handlers = FALSE;
  }
  bb_threads_private_unlock();
}




/* Create a new thread. It should be passed "fcn", a function which
 * takes two arguments, (the second one is a dummy, always 4). The
 * first argument is passed in "arg". Returns the PID of the new
 * thread */
pid_t
bb_threads_newthread(void (*fcn)(void *, size_t), void *arg)
{
  pid_t newpid; char *barrier;
  int flags;

  if (stacksize == 0) {
    fprintf(stderr, "Error. bb_threads_newthread() called before bb_threads_stacksize()\n");
    return -1;
  }

  bb_threads_private_lock();

  stacks[numthreads] = malloc(stacksize + 1 * BLOCKSIZE);
  if (stacks[numthreads] == NULL) {
    fprintf(stderr, "Memory allocation failure in bb_threads_newthread()\n");
    bb_threads_private_unlock();
    return -1;
  }

  barrier = (char *)(((int)(stacks[numthreads] + BLOCKSIZE) / BLOCKSIZE) * BLOCKSIZE);

  if (mprotect(barrier, BLOCKSIZE, PROT_NONE)) {
    fprintf(stderr, "Warning: Unable to guard the stack in bb_threads_newthread() for the new\n");
    fprintf(stderr, "thread. Stack overruns will not immediately SIGSEGV.\n");
  }


  flags = CLONE_VM | CLONE_FS | CLONE_FILES;
  if (shared_signal_handlers)
    flags |= CLONE_SIGHAND;


  newpid = sprocsp(fcn, flags, arg, stacks[numthreads] + stacksize + BLOCKSIZE, sizeof(arg));
  
  if (newpid == -1) {
    fprintf(stderr, "Unknown error spawning new thread.\n");
    free(stacks[numthreads]);
    bb_threads_private_unlock();
    return -1;
  }

  threadids[numthreads] = newpid;
  numthreads++;
  bb_threads_private_unlock();

  return newpid;
}


/* Clean up after a thread has finished. Should be passed the PID of
 * the thread which has exited, and it will free the memory used by
 * the thread's stack. */
pid_t
bb_threads_cleanup(pid_t tnum)
{
  int i, j;
  char *barrier;

  bb_threads_private_lock();
  for (i = 0; i < numthreads; i++)
    if (threadids[i] == tnum)
      break;

  if (i == numthreads) {
#ifndef SILENT_FAIL_CLEANUP
    fprintf(stderr, "Warning: tried to clean up a thread which wasn't there.\n");
#endif  /* !SILENT_FAIL_CLEANUP */
    bb_threads_private_unlock();
    return -1;
  }


  barrier = (char *)(((int)(stacks[i] + BLOCKSIZE) / BLOCKSIZE) * BLOCKSIZE);

  if (mprotect(barrier, BLOCKSIZE, PROT_READ | PROT_WRITE)) {
    fprintf(stderr, "Warning: Unable to remove the mprotect on the stack in bb_threads_newthread().\n");
    fprintf(stderr, "Your program is likely to SEGV soon.\n");
  }


  free(stacks[i]);

  for (j = i+1; j < numthreads; j++) {
    stacks[j-1] = stacks[j];
    threadids[j-1] = threadids[j];
  }

  numthreads--;
  bb_threads_private_unlock();

  return tnum;

}




#define MAX_MUTEXES 1024
static SMPVOL int mutexes[MAX_MUTEXES / (BITSPERBYTE * sizeof(int)) + 1];


/* Initialize a mutex */
int
bb_threads_mutexcreate(int n)
{
  if (n >= MAX_MUTEXES || n < 0) {
    fprintf(stderr, "Mutex ID number out of range.\n");
    return -1;
  }

  clear_bit(n, &mutexes[0]);
  return 0;
}



/* Lock a mutex. If already locked, wait for it to be freed. */
__inline__ int
bb_threads_lock(int n)
{
  if (n >= MAX_MUTEXES || n < 0) {
    fprintf(stderr, "Mutex ID number out of range.\n");
    return -1;
  }

  while(set_bit(n, &mutexes[0]))
    while(test_bit(n, &mutexes[0]));

  return 0;
}


/* Free a mutex so that somebody else can have it. Returns the previous
 * value of the mutex. It is legal to unlock an unlocked mutex. */
__inline__ int
bb_threads_unlock(int n)
{
  if (n >= MAX_MUTEXES || n < 0) {
    fprintf(stderr, "Mutex ID number out of range.\n");
    return -1;
  }

  return clear_bit(n, &mutexes[0]);
}


/* Try to lock a mutex, but rather than busy waiting on it, sleep for
 * 'usecs' microseconds between attempts. */
__inline int
bb_threads_sleepy_lock(int n, int usecs)
{
  if (n >= MAX_MUTEXES || n < 0) {
    fprintf(stderr, "Mutex ID number out of range.\n");
    return -1;
  }

  while(set_bit(n, &mutexes[0]))
    while(test_bit(n, &mutexes[0]))
      usleep(usecs);
  return 0;
}



void
bb_threads_id_fcn(FILE *stream)
{
  fprintf(stream, "$RCSfile: bb_threads.c,v $ $Revision: 0.4 $ $Date: 1996/08/29 21:02:06 $ (UTC)\n");
}

