/* ----------------------------------------------------------------------------- * * (c) The GHC Team, 2009 * * Work-stealing Deque data structure * * The implementation uses Double-Ended Queues with lock-free access * (thereby often called "deque") as described in * * D.Chase and Y.Lev, Dynamic Circular Work-Stealing Deque. * SPAA'05, July 2005, Las Vegas, USA. * ACM 1-58113-986-1/05/0007 * * Author: Jost Berthold MSRC 07-09/2008 * * The DeQue is held as a circular array with known length. Positions * of top (read-end) and bottom (write-end) always increase, and the * array is accessed with indices modulo array-size. While this bears * the risk of overflow, we assume that (with 64 bit indices), a * program must run very long to reach that point. * * The write end of the queue (position bottom) can only be used with * mutual exclusion, i.e. by exactly one caller at a time. At this * end, new items can be enqueued using pushBottom()/newSpark(), and * removed using popBottom()/reclaimSpark() (the latter implying a cas * synchronisation with potential concurrent readers for the case of * just one element). * * Multiple readers can steal from the read end (position top), and * are synchronised without a lock, based on a cas of the top * position. One reader wins, the others return NULL for a failure. * * Both popWSDeque and stealWSDeque also return NULL when the queue is empty. * * Testing: see testsuite/tests/ghc-regress/rts/testwsdeque.c. If * there's anything wrong with the deque implementation, this test * will probably catch it. * * ---------------------------------------------------------------------------*/ #include "PosixSource.h" #include "Rts.h" #include "RtsUtils.h" #include "WSDeque.h" #define CASTOP(addr,old,new) ((old) == cas(((StgPtr)addr),(old),(new))) /* ----------------------------------------------------------------------------- * newWSDeque * -------------------------------------------------------------------------- */ /* internal helpers ... */ static StgWord roundUp2(StgWord val) { StgWord rounded = 1; /* StgWord is unsigned anyway, only catch 0 */ if (val == 0) { barf("DeQue,roundUp2: invalid size 0 requested"); } /* at least 1 bit set, shift up to its place */ do { rounded = rounded << 1; } while (0 != (val = val>>1)); return rounded; } WSDeque * newWSDeque (nat size) { StgWord realsize; WSDeque *q; realsize = roundUp2(size); /* to compute modulo as a bitwise & */ q = (WSDeque*) stgMallocBytes(sizeof(WSDeque), /* admin fields */ "newWSDeque"); q->elements = stgMallocBytes(realsize * sizeof(StgClosurePtr), /* dataspace */ "newWSDeque:data space"); q->top=0; q->bottom=0; q->topBound=0; /* read by writer, updated each time top is read */ q->size = realsize; /* power of 2 */ q->moduloSize = realsize - 1; /* n % size == n & moduloSize */ ASSERT_WSDEQUE_INVARIANTS(q); return q; } /* ----------------------------------------------------------------------------- * freeWSDeque * -------------------------------------------------------------------------- */ void freeWSDeque (WSDeque *q) { stgFree(q->elements); stgFree(q); } /* ----------------------------------------------------------------------------- * * popWSDeque: remove an element from the write end of the queue. * Returns the removed spark, and NULL if a race is lost or the pool * empty. * * If only one spark is left in the pool, we synchronise with * concurrently stealing threads by using cas to modify the top field. * This routine should NEVER be called by a task which does not own * this deque. * * -------------------------------------------------------------------------- */ void * popWSDeque (WSDeque *q) { /* also a bit tricky, has to avoid concurrent steal() calls by accessing top with cas, when there is only one element left */ StgWord t, b; long currSize; void * removed; ASSERT_WSDEQUE_INVARIANTS(q); b = q->bottom; // "decrement b as a test, see what happens" b--; q->bottom = b; // very important that the following read of q->top does not occur // before the earlier write to q->bottom. store_load_barrier(); t = q->top; /* using topBound would give an *upper* bound, we need a lower bound. We use the real top here, but can update the topBound value */ q->topBound = t; currSize = (long)b - (long)t; if (currSize < 0) { /* was empty before decrementing b, set b consistently and abort */ q->bottom = t; return NULL; } // read the element at b removed = q->elements[b & q->moduloSize]; if (currSize > 0) { /* no danger, still elements in buffer after b-- */ // debugBelch("popWSDeque: t=%ld b=%ld = %ld\n", t, b, removed); return removed; } /* otherwise, has someone meanwhile stolen the same (last) element? Check and increment top value to know */ if ( !(CASTOP(&(q->top),t,t+1)) ) { removed = NULL; /* no success, but continue adjusting bottom */ } q->bottom = t+1; /* anyway, empty now. Adjust bottom consistently. */ q->topBound = t+1; /* ...and cached top value as well */ ASSERT_WSDEQUE_INVARIANTS(q); ASSERT(q->bottom >= q->top); // debugBelch("popWSDeque: t=%ld b=%ld = %ld\n", t, b, removed); return removed; } /* ----------------------------------------------------------------------------- * stealWSDeque * -------------------------------------------------------------------------- */ void * stealWSDeque_ (WSDeque *q) { void * stolen; StgWord b,t; // Can't do this on someone else's spark pool: // ASSERT_WSDEQUE_INVARIANTS(q); // NB. these loads must be ordered, otherwise there is a race // between steal and pop. t = q->top; load_load_barrier(); b = q->bottom; // NB. b and t are unsigned; we need a signed value for the test // below, because it is possible that t > b during a // concurrent popWSQueue() operation. if ((long)b - (long)t <= 0 ) { return NULL; /* already looks empty, abort */ } /* now access array, see pushBottom() */ stolen = q->elements[t & q->moduloSize]; /* now decide whether we have won */ if ( !(CASTOP(&(q->top),t,t+1)) ) { /* lost the race, someon else has changed top in the meantime */ return NULL; } /* else: OK, top has been incremented by the cas call */ // debugBelch("stealWSDeque_: t=%d b=%d\n", t, b); // Can't do this on someone else's spark pool: // ASSERT_WSDEQUE_INVARIANTS(q); return stolen; } void * stealWSDeque (WSDeque *q) { void *stolen; do { stolen = stealWSDeque_(q); } while (stolen == NULL && !looksEmptyWSDeque(q)); return stolen; } /* ----------------------------------------------------------------------------- * pushWSQueue * -------------------------------------------------------------------------- */ #define DISCARD_NEW /* enqueue an element. Should always succeed by resizing the array (not implemented yet, silently fails in that case). */ rtsBool pushWSDeque (WSDeque* q, void * elem) { StgWord t; StgWord b; StgWord sz = q->moduloSize; ASSERT_WSDEQUE_INVARIANTS(q); /* we try to avoid reading q->top (accessed by all) and use q->topBound (accessed only by writer) instead. This is why we do not just call empty(q) here. */ b = q->bottom; t = q->topBound; if ( (StgInt)b - (StgInt)t >= (StgInt)sz ) { /* NB. 1. sz == q->size - 1, thus ">=" 2. signed comparison, it is possible that t > b */ /* could be full, check the real top value in this case */ t = q->top; q->topBound = t; if (b - t >= sz) { /* really no space left :-( */ /* reallocate the array, copying the values. Concurrent steal()s will in the meantime use the old one and modify only top. This means: we cannot safely free the old space! Can keep it on a free list internally here... Potential bug in combination with steal(): if array is replaced, it is unclear which one concurrent steal operations use. Must read the array base address in advance in steal(). */ #if defined(DISCARD_NEW) ASSERT_WSDEQUE_INVARIANTS(q); return rtsFalse; // we didn't push anything #else /* could make room by incrementing the top position here. In * this case, should use CASTOP. If this fails, someone else has * removed something, and new room will be available. */ ASSERT_WSDEQUE_INVARIANTS(q); #endif } } q->elements[b & sz] = elem; q->bottom = b + 1; ASSERT_WSDEQUE_INVARIANTS(q); return rtsTrue; }