Subj : experimental fast-pathed rw-mutex algorithm...
To   : comp.programming.threads
From : Chris Thomasson
Date : Fri May 27 2005 05:17 pm

Here is a "somewhat" simple, experimental, writer starvation-free and 
fast-pathed rw-mutex algorithm that uses DWCAS on 32-bit systems and normal 
CAS on 64-bit systems. It does not need to use a internal mutex to protect 
the internal state of the lock. All of the bookkeeping can be modified 
atomically using a single instruction. This means it has the potential to 
outperform virtually all of the mutex based non-distributed rw-mutex 
implementations out there...




< scribbling down some pseudo-code >


// assume 64-bit size
struct rw_mutex_t
{
  short reads;
  short writes;
  short r_waits;
  short w_waits;
};




static rw_mutex_t mtx = { 0, 0, 0, 0 };
static sem_t r_wset;
static sem_t w_wset;




// updates c with d and returns non-zero on failure
extern dwcas( void *d, void *c, const void *x );




// write lock
rw_mutex_t cmp, xchg;

for ( ;; )
{
  cmp = mtx;

  do
  {
    xchg = cmp;

    if ( ! cmp.reads && ! cmp.writes )
    {
      ++xchg.writes;
    }

    else
    {
      ++xchg.w_waits;
    }

  } while ( dwcas( &mtx, &cmp, &xchg ) );

  if ( ! cmp.reads && ! cmp.writes )
  {
    break;
  }

  sem_wait( &w_wset );
}




// read lock
rw_mutex_t cmp, xchg;

for ( ;; )
{
  cmp = mtx;

  do
  {
    xchg = cmp;

    if ( ! cmp.writes && ! cmp.w_waits )
    {
      ++xchg.reads;
    }
    else
    {
      ++xchg.r_waits;
    }

  } while ( dwcas( &mtx, &cmp, &xchg ) );

  if ( ! cmp.writes && ! cmp.w_waits )
  {
    break;
  }

  sem_wait( &r_wset );
}




// write unlock
rw_mutex_t cmp, xchg;

do
{
  xchg = cmp;

  --xchg.writes;

  if ( cmp.w_waits )
  {
    --xchg.w_waits;
  }
  else if ( cmp.r_waits )
  {
    xchg.r_waits = 0;
  }

} while ( dwcas( &mtx, &cmp, &xchg ) );

if ( cmp.w_waits )
{
  sem_post( &w_wset );
}

else if ( cmp.r_waits )
{
  sem_post_multiple( &r_wset, cmp.r_waits );
}




// read unlock
rw_mutex_t cmp, xchg;

do
{
  xchg = cmp;

  --xchg.reads;

  if ( ! xchg.reads && cmp.w_waits )
  {
    --xchg.w_waits;
  }
  else if ( cmp.r_waits )
  {
    xchg.r_waits = 0;
  }

} while ( dwcas( &mtx, &cmp, &xchg ) );

if ( ! xchg.reads && cmp.w_waits )
{
  sem_post( &w_wset );
}

else if ( cmp.r_waits )
{
  sem_post_multiple( &r_wset, cmp.r_waits );
}




Does anybody notice any subtitle/major race-conditions? I just scribbled 
this simple algorithm down. I can't really seem to find any just yet, but 
just in case:

*** Do not use the code for anything else but experimentation! ***

:)


Thank you.




-- 
http://appcore.home.comcast.net/
(portable lock-free data-structures)

.