trunk/src/osd/windows/winsync.c
| r32095 | r32096 | |
| 21 | 21 | //============================================================ |
| 22 | 22 | |
| 23 | 23 | #define DEBUG_SLOW_LOCKS 0 |
| 24 | #define USE_SCALABLE_LOCKS (0) |
| 24 | 25 | |
| 25 | 26 | |
| 26 | 27 | |
| r32095 | r32096 | |
| 35 | 36 | CRITICAL_SECTION critsect; |
| 36 | 37 | }; |
| 37 | 38 | |
| 39 | struct osd_scalable_lock |
| 40 | { |
| 41 | #if USE_SCALABLE_LOCKS |
| 42 | struct |
| 43 | { |
| 44 | volatile INT32 haslock; // do we have the lock? |
| 45 | INT32 filler[64/4-1]; // assumes a 64-byte cache line |
| 46 | } slot[WORK_MAX_THREADS]; // one slot per thread |
| 47 | volatile INT32 nextindex; // index of next slot to use |
| 48 | #else |
| 49 | CRITICAL_SECTION section; |
| 50 | #endif |
| 51 | }; |
| 38 | 52 | |
| 39 | 53 | |
| 40 | 54 | //============================================================ |
| r32095 | r32096 | |
| 169 | 183 | { |
| 170 | 184 | return InterlockedExchangeAdd((LONG *) ptr, delta) + delta; |
| 171 | 185 | } |
| 186 | |
| 187 | //============================================================ |
| 188 | // Scalable Locks |
| 189 | //============================================================ |
| 190 | |
| 191 | osd_scalable_lock *osd_scalable_lock_alloc(void) |
| 192 | { |
| 193 | osd_scalable_lock *lock; |
| 194 | |
| 195 | lock = (osd_scalable_lock *)calloc(1, sizeof(*lock)); |
| 196 | |
| 197 | memset(lock, 0, sizeof(*lock)); |
| 198 | #if USE_SCALABLE_LOCKS |
| 199 | lock->slot[0].haslock = TRUE; |
| 200 | #else |
| 201 | InitializeCriticalSection(&lock->section); |
| 202 | #endif |
| 203 | return lock; |
| 204 | } |
| 205 | |
| 206 | |
| 207 | INT32 osd_scalable_lock_acquire(osd_scalable_lock *lock) |
| 208 | { |
| 209 | #if USE_SCALABLE_LOCKS |
| 210 | INT32 myslot = (atomic_increment32(&lock->nextindex) - 1) & (WORK_MAX_THREADS - 1); |
| 211 | INT32 backoff = 1; |
| 212 | |
| 213 | while (!lock->slot[myslot].haslock) |
| 214 | { |
| 215 | INT32 backcount; |
| 216 | for (backcount = 0; backcount < backoff; backcount++) |
| 217 | osd_yield_processor(); |
| 218 | backoff <<= 1; |
| 219 | } |
| 220 | lock->slot[myslot].haslock = FALSE; |
| 221 | return myslot; |
| 222 | #else |
| 223 | EnterCriticalSection(&lock->section); |
| 224 | return 0; |
| 225 | #endif |
| 226 | } |
| 227 | |
| 228 | |
| 229 | void osd_scalable_lock_release(osd_scalable_lock *lock, INT32 myslot) |
| 230 | { |
| 231 | #if USE_SCALABLE_LOCKS |
| 232 | atomic_exchange32(&lock->slot[(myslot + 1) & (WORK_MAX_THREADS - 1)].haslock, TRUE); |
| 233 | #else |
| 234 | LeaveCriticalSection(&lock->section); |
| 235 | #endif |
| 236 | } |
| 237 | |
| 238 | |
| 239 | void osd_scalable_lock_free(osd_scalable_lock *lock) |
| 240 | { |
| 241 | #if USE_SCALABLE_LOCKS |
| 242 | #else |
| 243 | DeleteCriticalSection(&lock->section); |
| 244 | #endif |
| 245 | free(lock); |
| 246 | } |
| | No newline at end of file |
trunk/src/osd/windows/winwork.c
| r32095 | r32096 | |
| 19 | 19 | |
| 20 | 20 | // MAME headers |
| 21 | 21 | #include "osdcore.h" |
| 22 | |
| 23 | #include "winsync.h" |
| 24 | |
| 22 | 25 | #include "eminline.h" |
| 23 | 26 | |
| 24 | 27 | |
| r32095 | r32096 | |
| 27 | 30 | //============================================================ |
| 28 | 31 | |
| 29 | 32 | #define KEEP_STATISTICS (0) |
| 30 | | #define USE_SCALABLE_LOCKS (0) |
| 31 | 33 | |
| 32 | | |
| 33 | | |
| 34 | 34 | //============================================================ |
| 35 | 35 | // PARAMETERS |
| 36 | 36 | //============================================================ |
| r32095 | r32096 | |
| 79 | 79 | // TYPE DEFINITIONS |
| 80 | 80 | //============================================================ |
| 81 | 81 | |
| 82 | | struct scalable_lock |
| 83 | | { |
| 84 | | #if USE_SCALABLE_LOCKS |
| 85 | | struct |
| 86 | | { |
| 87 | | volatile INT32 haslock; // do we have the lock? |
| 88 | | INT32 filler[64/4-1]; // assumes a 64-byte cache line |
| 89 | | } slot[WORK_MAX_THREADS]; // one slot per thread |
| 90 | | volatile INT32 nextindex; // index of next slot to use |
| 91 | | #else |
| 92 | | CRITICAL_SECTION section; |
| 93 | | #endif |
| 94 | | }; |
| 95 | | |
| 96 | | |
| 97 | 82 | struct work_thread_info |
| 98 | 83 | { |
| 99 | 84 | osd_work_queue * queue; // pointer back to the queue |
| r32095 | r32096 | |
| 113 | 98 | |
| 114 | 99 | struct osd_work_queue |
| 115 | 100 | { |
| 116 | | scalable_lock lock; // lock for protecting the queue |
| 101 | osd_scalable_lock * lock; // lock for protecting the queue |
| 117 | 102 | osd_work_item * volatile list; // list of items in the queue |
| 118 | 103 | osd_work_item ** volatile tailptr; // pointer to the tail pointer of work items in the queue |
| 119 | 104 | osd_work_item * volatile free; // free list of work items |
| r32095 | r32096 | |
| 162 | 147 | static void worker_thread_process(osd_work_queue *queue, work_thread_info *thread); |
| 163 | 148 | |
| 164 | 149 | |
| 165 | | |
| 166 | 150 | //============================================================ |
| 167 | | // Scalable Locks |
| 168 | | //============================================================ |
| 169 | | |
| 170 | | INLINE void scalable_lock_init(scalable_lock *lock) |
| 171 | | { |
| 172 | | memset(lock, 0, sizeof(*lock)); |
| 173 | | #if USE_SCALABLE_LOCKS |
| 174 | | lock->slot[0].haslock = TRUE; |
| 175 | | #else |
| 176 | | InitializeCriticalSection(&lock->section); |
| 177 | | #endif |
| 178 | | } |
| 179 | | |
| 180 | | |
| 181 | | INLINE INT32 scalable_lock_acquire(scalable_lock *lock) |
| 182 | | { |
| 183 | | #if USE_SCALABLE_LOCKS |
| 184 | | INT32 myslot = (atomic_increment32(&lock->nextindex) - 1) & (WORK_MAX_THREADS - 1); |
| 185 | | INT32 backoff = 1; |
| 186 | | |
| 187 | | while (!lock->slot[myslot].haslock) |
| 188 | | { |
| 189 | | INT32 backcount; |
| 190 | | for (backcount = 0; backcount < backoff; backcount++) |
| 191 | | osd_yield_processor(); |
| 192 | | backoff <<= 1; |
| 193 | | } |
| 194 | | lock->slot[myslot].haslock = FALSE; |
| 195 | | return myslot; |
| 196 | | #else |
| 197 | | EnterCriticalSection(&lock->section); |
| 198 | | return 0; |
| 199 | | #endif |
| 200 | | } |
| 201 | | |
| 202 | | |
| 203 | | INLINE void scalable_lock_release(scalable_lock *lock, INT32 myslot) |
| 204 | | { |
| 205 | | #if USE_SCALABLE_LOCKS |
| 206 | | atomic_exchange32(&lock->slot[(myslot + 1) & (WORK_MAX_THREADS - 1)].haslock, TRUE); |
| 207 | | #else |
| 208 | | LeaveCriticalSection(&lock->section); |
| 209 | | #endif |
| 210 | | } |
| 211 | | |
| 212 | | |
| 213 | | INLINE void scalable_lock_delete(scalable_lock *lock) |
| 214 | | { |
| 215 | | #if USE_SCALABLE_LOCKS |
| 216 | | #else |
| 217 | | DeleteCriticalSection(&lock->section); |
| 218 | | #endif |
| 219 | | } |
| 220 | | |
| 221 | | |
| 222 | | //============================================================ |
| 223 | 151 | // osd_work_queue_alloc |
| 224 | 152 | //============================================================ |
| 225 | 153 | |
| r32095 | r32096 | |
| 246 | 174 | goto error; |
| 247 | 175 | |
| 248 | 176 | // initialize the critical section |
| 249 | | scalable_lock_init(&queue->lock); |
| 177 | queue->lock = osd_scalable_lock_alloc(); |
| 178 | if (queue->lock == NULL) |
| 179 | goto error; |
| 250 | 180 | |
| 251 | 181 | // determine how many threads to create... |
| 252 | 182 | // on a single-CPU system, create 1 thread for I/O queues, and 0 threads for everything else |
| r32095 | r32096 | |
| 341 | 271 | if (queue->flags & WORK_QUEUE_FLAG_MULTI) |
| 342 | 272 | { |
| 343 | 273 | work_thread_info *thread = &queue->thread[queue->threads]; |
| 344 | | osd_ticks_t stopspin = osd_ticks() + timeout; |
| 345 | 274 | |
| 346 | 275 | end_timing(thread->waittime); |
| 347 | 276 | |
| r32095 | r32096 | |
| 351 | 280 | // if we're a high frequency queue, spin until done |
| 352 | 281 | if (queue->flags & WORK_QUEUE_FLAG_HIGH_FREQ) |
| 353 | 282 | { |
| 283 | osd_ticks_t stopspin = osd_ticks() + timeout; |
| 284 | |
| 354 | 285 | // spin until we're done |
| 355 | 286 | begin_timing(thread->spintime); |
| 356 | 287 | while (queue->items != 0 && osd_ticks() < stopspin) |
| r32095 | r32096 | |
| 435 | 366 | if (queue->thread != NULL) |
| 436 | 367 | free(queue->thread); |
| 437 | 368 | |
| 438 | | scalable_lock_delete(&queue->lock); |
| 439 | | |
| 440 | 369 | // free all the events |
| 441 | 370 | if (queue->doneevent != NULL) |
| 442 | 371 | CloseHandle(queue->doneevent); |
| r32095 | r32096 | |
| 468 | 397 | printf("Spin loops = %9d\n", queue->spinloops); |
| 469 | 398 | #endif |
| 470 | 399 | |
| 400 | osd_scalable_lock_free(queue->lock); |
| 471 | 401 | // free the queue itself |
| 472 | 402 | free(queue); |
| 473 | 403 | } |
| r32095 | r32096 | |
| 522 | 452 | } |
| 523 | 453 | |
| 524 | 454 | // enqueue the whole thing within the critical section |
| 525 | | lockslot = scalable_lock_acquire(&queue->lock); |
| 455 | lockslot = osd_scalable_lock_acquire(queue->lock); |
| 526 | 456 | *queue->tailptr = itemlist; |
| 527 | 457 | queue->tailptr = item_tailptr; |
| 528 | | scalable_lock_release(&queue->lock, lockslot); |
| 458 | osd_scalable_lock_release(queue->lock, lockslot); |
| 529 | 459 | |
| 530 | 460 | // increment the number of items in the queue |
| 531 | 461 | atomic_add32(&queue->items, numitems); |
| r32095 | r32096 | |
| 732 | 662 | INT32 lockslot; |
| 733 | 663 | |
| 734 | 664 | // use a critical section to synchronize the removal of items |
| 735 | | lockslot = scalable_lock_acquire(&queue->lock); |
| 665 | lockslot = osd_scalable_lock_acquire(queue->lock); |
| 736 | 666 | { |
| 737 | 667 | // pull the item from the queue |
| 738 | 668 | item = (osd_work_item *)queue->list; |
| r32095 | r32096 | |
| 743 | 673 | queue->tailptr = (osd_work_item **)&queue->list; |
| 744 | 674 | } |
| 745 | 675 | } |
| 746 | | scalable_lock_release(&queue->lock, lockslot); |
| 676 | osd_scalable_lock_release(queue->lock, lockslot); |
| 747 | 677 | |
| 748 | 678 | // process non-NULL items |
| 749 | 679 | if (item != NULL) |