Previous 199869 Revisions Next

r28721 Wednesday 19th March, 2014 at 13:41:17 UTC by Oliver Stöneberg
renamed poly.{c|h} to polylgcy.{c|h} and polynew.h to poly.h
[src/emu/video]poly.c {poly.h} polylgcy.c* polylgcy.h* polynew.h video.mak voodoo.c
[src/mame/drivers]atarisy4.c chihiro.c cobra.c namcos23.c taitotz.c
[src/mame/includes]gaelco3d.h galastrm.h midvunit.h model2.h model3.h namcos22.h taitojc.h
[src/mame/video]galastrm.c gticlub.c k001005.c k001005.h midzeus.c midzeus2.c model2.c model3.c n64.h namcos22.c taitojc.c

trunk/src/emu/video/polynew.h
r28720r28721
1/***************************************************************************
2
3    poly.h
4
5    New polygon helper routines.
6
7****************************************************************************
8
9    Pixel model:
10
11    (0.0,0.0)       (1.0,0.0)       (2.0,0.0)       (3.0,0.0)
12        +---------------+---------------+---------------+
13        |               |               |               |
14        |               |               |               |
15        |   (0.5,0.5)   |   (1.5,0.5)   |   (2.5,0.5)   |
16        |       *       |       *       |       *       |
17        |               |               |               |
18        |               |               |               |
19    (0.0,1.0)       (1.0,1.0)       (2.0,1.0)       (3.0,1.0)
20        +---------------+---------------+---------------+
21        |               |               |               |
22        |               |               |               |
23        |   (0.5,1.5)   |   (1.5,1.5)   |   (2.5,1.5)   |
24        |       *       |       *       |       *       |
25        |               |               |               |
26        |               |               |               |
27        |               |               |               |
28        +---------------+---------------+---------------+
29    (0.0,2.0)       (1.0,2.0)       (2.0,2.0)       (3.0,2.0)
30
31***************************************************************************/
32
33#pragma once
34
35#ifndef __POLYNEW_H__
36#define __POLYNEW_H__
37
38
39//**************************************************************************
40//  DEBUGGING
41//**************************************************************************
42
43// keep statistics
44#define KEEP_STATISTICS                 0
45
46// turn this on to log the reasons for any long waits
47#define LOG_WAITS                       0
48
49// number of profiling ticks before we consider a wait "long"
50#define LOG_WAIT_THRESHOLD              1000
51
52
53
54/***************************************************************************
55    CONSTANTS
56***************************************************************************/
57
58#define POLYFLAG_INCLUDE_BOTTOM_EDGE        0x01
59#define POLYFLAG_INCLUDE_RIGHT_EDGE         0x02
60#define POLYFLAG_NO_WORK_QUEUE              0x04
61
62#define SCANLINES_PER_BUCKET                8
63#define CACHE_LINE_SIZE                     64          // this is a general guess
64#define TOTAL_BUCKETS                       (512 / SCANLINES_PER_BUCKET)
65#define UNITS_PER_POLY                      (100 / SCANLINES_PER_BUCKET)
66
67
68
69//**************************************************************************
70//  TYPE DEFINITIONS
71//**************************************************************************
72
73//-------------------------------------------------
74//  global helpers for float base types
75//-------------------------------------------------
76
77inline float poly_floor(float x) { return floorf(x); }
78inline float poly_abs(float x) { return fabsf(x); }
79inline float poly_recip(float x) { return 1.0f / x; }
80
81
82//-------------------------------------------------
83//  global helpers for double base types
84//-------------------------------------------------
85
86inline double poly_floor(double x) { return floor(x); }
87inline double poly_abs(double x) { return fabs(x); }
88inline double poly_recip(double x) { return 1.0 / x; }
89
90
91// poly_manager is a template class
92template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
93class poly_manager
94{
95public:
96   // each vertex has an X/Y coordinate and a set of parameters
97   struct vertex_t
98   {
99      vertex_t() { }
100      vertex_t(_BaseType _x, _BaseType _y) { x = _x; y = _y; }
101
102      _BaseType x, y;                         // X, Y coordinates
103      _BaseType p[_MaxParams];                // interpolated parameters
104   };
105
106   // a single extent describes a span and a list of parameter extents
107   struct extent_t
108   {
109      INT16 startx, stopx;                    // starting (inclusive)/ending (exclusive) endpoints
110      struct
111      {
112         _BaseType start;                    // parameter value at start
113         _BaseType dpdx;                     // dp/dx relative to start
114      } param[_MaxParams];
115      void *userdata;                         // custom per-span data
116   };
117
118   // delegate type for scanline callbacks
119   typedef delegate<void (INT32, const extent_t &, const _ObjectData &, int)> render_delegate;
120
121   // construction/destruction
122   poly_manager(running_machine &machine, UINT8 flags = 0);
123   poly_manager(screen_device &screen, UINT8 flags = 0);
124   virtual ~poly_manager();
125
126   // getters
127   running_machine &machine() const { return m_machine; }
128   screen_device &screen() const { assert(m_screen != NULL); return *m_screen; }
129
130   // synchronization
131   void wait(const char *debug_reason = "general");
132
133   // object data allocators
134   _ObjectData &object_data_alloc();
135   _ObjectData &object_data_last() const { return m_object.last(); }
136
137   // tiles
138   UINT32 render_tile(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t &v1, const vertex_t &v2);
139
140   // triangles
141   UINT32 render_triangle(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t &v1, const vertex_t &v2, const vertex_t &v3);
142   UINT32 render_triangle_fan(const rectangle &cliprect, render_delegate callback, int paramcount, int numverts, const vertex_t *v);
143   UINT32 render_triangle_strip(const rectangle &cliprect, render_delegate callback, int paramcount, int numverts, const vertex_t *v);
144   UINT32 render_triangle_custom(const rectangle &cliprect, render_delegate callback, int startscanline, int numscanlines, const extent_t *extents);
145
146   // polygons
147   template<int _NumVerts>
148   UINT32 render_polygon(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t *v);
149
150   // public helpers
151   int zclip_if_less(int numverts, const vertex_t *v, vertex_t *outv, int paramcount, _BaseType clipval);
152
153private:
154   // polygon_info describes a single polygon, which includes the poly_params
155   struct polygon_info
156   {
157      poly_manager *      m_owner;                // pointer back to the poly manager
158      _ObjectData *       m_object;               // object data pointer
159      render_delegate     m_callback;             // callback to handle a scanline's worth of work
160   };
161
162   // internal unit of work
163   struct work_unit
164   {
165      volatile UINT32     count_next;             // number of scanlines and index of next item to process
166      polygon_info *      polygon;                // pointer to polygon
167      INT16               scanline;               // starting scanline
168      UINT16              previtem;               // index of previous item in the same bucket
169   #ifndef PTR64
170      UINT32              dummy;                  // pad to 16 bytes
171   #endif
172      extent_t            extent[SCANLINES_PER_BUCKET]; // array of scanline extents
173   };
174
175   // class for managing an array of items
176   template<class _Type, int _Count>
177   class poly_array
178   {
179      // size of an item, rounded up to the cache line size
180      static const int k_itemsize = ((sizeof(_Type) + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE) * CACHE_LINE_SIZE;
181
182   public:
183      // construction
184      poly_array(running_machine &machine, poly_manager &manager)
185         : m_manager(manager),
186            m_base(auto_alloc_array_clear(machine, UINT8, k_itemsize * _Count)),
187            m_next(0),
188            m_max(0),
189            m_waits(0) { }
190
191      // destruction
192      ~poly_array() { auto_free(m_manager.machine(), m_base); }
193
194      // operators
195      _Type &operator[](int index) const { assert(index >= 0 && index < _Count); return *reinterpret_cast<_Type *>(m_base + index * k_itemsize); }
196
197      // getters
198      int count() const { return m_next; }
199      int max() const { return m_max; }
200      int waits() const { return m_waits; }
201      int itemsize() const { return k_itemsize; }
202      int allocated() const { return _Count; }
203      int indexof(_Type &item) const { int result = (reinterpret_cast<UINT8 *>(&item) - m_base) / k_itemsize; assert(result >= 0 && result < _Count); return result; }
204
205      // operations
206      void reset() { m_next = 0; }
207      _Type &next() { if (m_next > m_max) m_max = m_next; assert(m_next < _Count); return *new(m_base + m_next++ * k_itemsize) _Type; }
208      _Type &last() const { return (*this)[m_next - 1]; }
209      void wait_for_space(int count = 1) { while ((m_next + count) >= _Count) { m_waits++; m_manager.wait(""); }  }
210
211   private:
212      // internal state
213      poly_manager &      m_manager;
214      UINT8 *             m_base;
215      int                 m_next;
216      int                 m_max;
217      int                 m_waits;
218   };
219
220   // internal array types
221   typedef poly_array<polygon_info, _MaxPolys> polygon_array;
222   typedef poly_array<_ObjectData, _MaxPolys + 1> objectdata_array;
223   typedef poly_array<work_unit, MIN(_MaxPolys * UNITS_PER_POLY, 65535)> unit_array;
224
225   // round in a cross-platform consistent manner
226   inline INT32 round_coordinate(_BaseType value)
227   {
228      INT32 result = poly_floor(value);
229      return result + (value - _BaseType(result) > _BaseType(0.5));
230   }
231
232   // internal helpers
233   polygon_info &polygon_alloc(int minx, int maxx, int miny, int maxy, render_delegate callback)
234   {
235      // wait for space in the polygon and unit arrays
236      m_polygon.wait_for_space();
237      m_unit.wait_for_space((maxy - miny) / SCANLINES_PER_BUCKET + 2);
238
239      // return and initialize the next one
240      polygon_info &polygon = m_polygon.next();
241      polygon.m_owner = this;
242      polygon.m_object = &object_data_last();
243      polygon.m_callback = callback;
244      return polygon;
245   }
246
247   static void *work_item_callback(void *param, int threadid);
248   void presave() { wait("pre-save"); }
249
250   // queue management
251   running_machine &   m_machine;
252   screen_device *     m_screen;
253   osd_work_queue *    m_queue;                    // work queue
254
255   // arrays
256   polygon_array       m_polygon;                  // array of polygons
257   objectdata_array    m_object;                   // array of object data
258   unit_array          m_unit;                     // array of work units
259
260   // misc data
261   UINT8               m_flags;                    // flags
262
263   // buckets
264   UINT16              m_unit_bucket[TOTAL_BUCKETS]; // buckets for tracking unit usage
265
266   // statistics
267   UINT32              m_tiles;                    // number of tiles queued
268   UINT32              m_triangles;                // number of triangles queued
269   UINT32              m_quads;                    // number of quads queued
270   UINT64              m_pixels;                   // number of pixels rendered
271#if KEEP_STATISTICS
272   UINT32              m_conflicts[WORK_MAX_THREADS]; // number of conflicts found, per thread
273   UINT32              m_resolved[WORK_MAX_THREADS];   // number of conflicts resolved, per thread
274#endif
275};
276
277
278//-------------------------------------------------
279//  poly_manager - constructor
280//-------------------------------------------------
281
282template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
283poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::poly_manager(running_machine &machine, UINT8 flags)
284   : m_machine(machine),
285      m_screen(NULL),
286      m_queue(NULL),
287      m_polygon(machine, *this),
288      m_object(machine, *this),
289      m_unit(machine, *this),
290      m_flags(flags),
291      m_triangles(0),
292      m_quads(0),
293      m_pixels(0)
294{
295#if KEEP_STATISTICS
296   memset(m_conflicts, 0, sizeof(m_conflicts));
297   memset(m_resolved, 0, sizeof(m_resolved));
298#endif
299
300   // create the work queue
301   if (!(flags & POLYFLAG_NO_WORK_QUEUE))
302      m_queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_MULTI | WORK_QUEUE_FLAG_HIGH_FREQ);
303
304   // request a pre-save callback for synchronization
305   machine.save().register_presave(save_prepost_delegate(FUNC(poly_manager::presave), this));
306}
307
308
309template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
310poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::poly_manager(screen_device &screen, UINT8 flags)
311   : m_machine(screen.machine()),
312      m_screen(&screen),
313      m_queue(NULL),
314      m_polygon(screen.machine(), *this),
315      m_object(screen.machine(), *this),
316      m_unit(screen.machine(), *this),
317      m_flags(flags),
318      m_triangles(0),
319      m_quads(0),
320      m_pixels(0)
321{
322#if KEEP_STATISTICS
323   memset(m_conflicts, 0, sizeof(m_conflicts));
324   memset(m_resolved, 0, sizeof(m_resolved));
325#endif
326
327   // create the work queue
328   if (!(flags & POLYFLAG_NO_WORK_QUEUE))
329      m_queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_MULTI | WORK_QUEUE_FLAG_HIGH_FREQ);
330
331   // request a pre-save callback for synchronization
332   machine().save().register_presave(save_prepost_delegate(FUNC(poly_manager::presave), this));
333}
334
335
336//-------------------------------------------------
337//  ~poly_manager - destructor
338//-------------------------------------------------
339
340template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
341poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::~poly_manager()
342{
343#if KEEP_STATISTICS
344{
345   // accumulate stats over the entire collection
346   int conflicts = 0, resolved = 0;
347   for (int i = 0; i < ARRAY_LENGTH(m_conflicts); i++)
348   {
349      conflicts += m_conflicts[i];
350      resolved += m_resolved[i];
351   }
352
353   // output global stats
354   printf("Total triangles = %d\n", m_triangles);
355   printf("Total quads = %d\n", m_quads);
356   if (m_pixels > 1000000000)
357      printf("Total pixels   = %d%09d\n", (UINT32)(m_pixels / 1000000000), (UINT32)(m_pixels % 1000000000));
358   else
359      printf("Total pixels   = %d\n", (UINT32)m_pixels);
360
361   printf("Conflicts:   %d resolved, %d total\n", resolved, conflicts);
362   printf("Units:       %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", m_unit.max(), m_unit.allocated(), m_unit.waits(), m_unit.itemsize(), m_unit.allocated() * m_unit.itemsize());
363   printf("Polygons:    %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", m_polygon.max(), m_polygon.allocated(), m_polygon.waits(), m_polygon.itemsize(), m_polygon.allocated() * m_polygon.itemsize());
364   printf("Object data: %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", m_object.max(), m_object.allocated(), m_object.waits(), m_object.itemsize(), m_object.allocated() * m_object.itemsize());
365}
366#endif
367
368   // free the work queue
369   if (m_queue != NULL)
370      osd_work_queue_free(m_queue);
371}
372
373
374//-------------------------------------------------
375//  work_item_callback - process a work item
376//-------------------------------------------------
377
378template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
379void *poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::work_item_callback(void *param, int threadid)
380{
381   while (1)
382   {
383      work_unit &unit = *(work_unit *)param;
384      polygon_info &polygon = *unit.polygon;
385      int count = unit.count_next & 0xffff;
386      UINT32 orig_count_next;
387
388      // if our previous item isn't done yet, enqueue this item to the end and proceed
389      if (unit.previtem != 0xffff)
390      {
391         work_unit &prevunit = polygon.m_owner->m_unit[unit.previtem];
392         if (prevunit.count_next != 0)
393         {
394            UINT32 unitnum = polygon.m_owner->m_unit.indexof(unit);
395            UINT32 new_count_next;
396
397            // attempt to atomically swap in this new value
398            do
399            {
400               orig_count_next = prevunit.count_next;
401               new_count_next = orig_count_next | (unitnum << 16);
402            } while (compare_exchange32((volatile INT32 *)&prevunit.count_next, orig_count_next, new_count_next) != orig_count_next);
403
404#if KEEP_STATISTICS
405            // track resolved conflicts
406            polygon.m_owner->m_conflicts[threadid]++;
407            if (orig_count_next != 0)
408               polygon.m_owner->m_resolved[threadid]++;
409#endif
410            // if we succeeded, skip out early so we can do other work
411            if (orig_count_next != 0)
412               break;
413         }
414      }
415
416      // iterate over extents
417      for (int curscan = 0; curscan < count; curscan++)
418         polygon.m_callback(unit.scanline + curscan, unit.extent[curscan], *polygon.m_object, threadid);
419
420      // set our count to 0 and re-fetch the original count value
421      do
422      {
423         orig_count_next = unit.count_next;
424      } while (compare_exchange32((volatile INT32 *)&unit.count_next, orig_count_next, 0) != orig_count_next);
425
426      // if we have no more work to do, do nothing
427      orig_count_next >>= 16;
428      if (orig_count_next == 0)
429         break;
430      param = &polygon.m_owner->m_unit[orig_count_next];
431   }
432   return NULL;
433}
434
435
436//-------------------------------------------------
437//  wait - stall until all work is complete
438//-------------------------------------------------
439
440template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
441void poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::wait(const char *debug_reason)
442{
443   osd_ticks_t time;
444
445   // remember the start time if we're logging
446   if (LOG_WAITS)
447      time = get_profile_ticks();
448
449   // wait for all pending work items to complete
450   if (m_queue != NULL)
451      osd_work_queue_wait(m_queue, osd_ticks_per_second() * 100);
452
453   // if we don't have a queue, just run the whole list now
454   else
455      for (int unitnum = 0; unitnum < m_unit.count(); unitnum++)
456         work_item_callback(&m_unit[unitnum], 0);
457
458   // log any long waits
459   if (LOG_WAITS)
460   {
461      time = get_profile_ticks() - time;
462      if (time > LOG_WAIT_THRESHOLD)
463         logerror("Poly:Waited %d cycles for %s\n", (int)time, debug_reason);
464   }
465
466   // reset the state
467   m_polygon.reset();
468   m_unit.reset();
469   memset(m_unit_bucket, 0xff, sizeof(m_unit_bucket));
470
471   // we need to preserve the last object data that was supplied
472   if (m_object.count() > 0)
473   {
474      _ObjectData temp = object_data_last();
475      m_object.reset();
476      m_object.next() = temp;
477   }
478   else
479      m_object.reset();
480}
481
482
483//-------------------------------------------------
484//  object_data_alloc - allocate a new _ObjectData
485//-------------------------------------------------
486
487template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
488_ObjectData &poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::object_data_alloc()
489{
490   // wait for a work item if we have to, then return the next item
491   m_object.wait_for_space();
492   return m_object.next();
493}
494
495
496//-------------------------------------------------
497//  render_tile - render a tile
498//-------------------------------------------------
499
500template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
501UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_tile(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t &_v1, const vertex_t &_v2)
502{
503   const vertex_t *v1 = &_v1;
504   const vertex_t *v2 = &_v2;
505
506   // first sort by Y
507   if (v2->y < v1->y)
508   {
509      const vertex_t *tv = v1;
510      v1 = v2;
511      v2 = tv;
512   }
513
514   // compute some integral X/Y vertex values
515   INT32 v1y = round_coordinate(v1->y);
516   INT32 v2y = round_coordinate(v2->y);
517
518   // clip coordinates
519   INT32 v1yclip = v1y;
520   INT32 v2yclip = v2y + ((m_flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
521   v1yclip = MAX(v1yclip, cliprect.min_y);
522   v2yclip = MIN(v2yclip, cliprect.max_y + 1);
523   if (v2yclip - v1yclip <= 0)
524      return 0;
525
526   // determine total X extents
527   _BaseType minx = v1->x;
528   _BaseType maxx = v2->x;
529   if (minx > maxx)
530      return 0;
531
532   // allocate and populate a new polygon
533   polygon_info &polygon = polygon_alloc(round_coordinate(minx), round_coordinate(maxx), v1yclip, v2yclip, callback);
534
535   // compute parameter deltas
536   _BaseType param_dpdx[_MaxParams];
537   _BaseType param_dpdy[_MaxParams];
538   if (paramcount > 0)
539   {
540      _BaseType oox = poly_recip(v2->x - v1->x);
541      _BaseType ooy = poly_recip(v2->y - v1->y);
542      for (int paramnum = 0; paramnum < paramcount; paramnum++)
543      {
544         param_dpdx[paramnum]  = oox * (v2->p[paramnum] - v1->p[paramnum]);
545         param_dpdy[paramnum]  = ooy * (v2->p[paramnum] - v1->p[paramnum]);
546      }
547   }
548
549   // clamp to full pixels
550   INT32 istartx = round_coordinate(v1->x);
551   INT32 istopx = round_coordinate(v2->x);
552
553   // force start < stop
554   if (istartx > istopx)
555   {
556      INT32 temp = istartx;
557      istartx = istopx;
558      istopx = temp;
559   }
560
561   // include the right edge if requested
562   if (m_flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
563      istopx++;
564
565   // apply left/right clipping
566   if (istartx < cliprect.min_x)
567      istartx = cliprect.min_x;
568   if (istopx > cliprect.max_x)
569      istopx = cliprect.max_x + 1;
570   if (istartx >= istopx)
571      return 0;
572
573   // compute the X extents for each scanline
574   INT32 pixels = 0;
575   UINT32 startunit = m_unit.count();
576   INT32 scaninc = 1;
577   for (INT32 curscan = v1yclip; curscan < v2yclip; curscan += scaninc)
578   {
579      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
580      UINT32 unit_index = m_unit.count();
581      work_unit &unit = m_unit.next();
582
583      // determine how much to advance to hit the next bucket
584      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
585
586      // fill in the work unit basics
587      unit.polygon = &polygon;
588      unit.count_next = MIN(v2yclip - curscan, scaninc);
589      unit.scanline = curscan;
590      unit.previtem = m_unit_bucket[bucketnum];
591      m_unit_bucket[bucketnum] = unit_index;
592
593      // iterate over extents
594      for (int extnum = 0; extnum < unit.count_next; extnum++)
595      {
596         // compute the ending X based on which part of the triangle we're in
597         _BaseType fully = _BaseType(curscan + extnum) + _BaseType(0.5);
598
599         // set the extent and update the total pixel count
600         extent_t &extent = unit.extent[extnum];
601         extent.startx = istartx;
602         extent.stopx = istopx;
603         extent.userdata = NULL;
604         pixels += istopx - istartx;
605
606         // fill in the parameters for the extent
607         _BaseType fullstartx = _BaseType(istartx) + _BaseType(0.5);
608         for (int paramnum = 0; paramnum < paramcount; paramnum++)
609         {
610            extent.param[paramnum].start = v1->p[paramnum] + fullstartx * param_dpdx[paramnum] + fully * param_dpdy[paramnum];
611            extent.param[paramnum].dpdx = param_dpdx[paramnum];
612         }
613      }
614   }
615
616   // enqueue the work items
617   if (m_queue != NULL)
618      osd_work_item_queue_multiple(m_queue, work_item_callback, m_unit.count() - startunit, &m_unit[startunit], m_unit.itemsize(), WORK_ITEM_FLAG_AUTO_RELEASE);
619
620   // return the total number of pixels in the triangle
621   m_tiles++;
622   m_pixels += pixels;
623   return pixels;
624}
625
626
627//-------------------------------------------------
628//  render_triangle - render a single triangle
629//  given 3 vertexes
630//-------------------------------------------------
631
632template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
633UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_triangle(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t &_v1, const vertex_t &_v2, const vertex_t &_v3)
634{
635   const vertex_t *v1 = &_v1;
636   const vertex_t *v2 = &_v2;
637   const vertex_t *v3 = &_v3;
638
639   // first sort by Y
640   if (v2->y < v1->y)
641   {
642      const vertex_t *tv = v1;
643      v1 = v2;
644      v2 = tv;
645   }
646   if (v3->y < v2->y)
647   {
648      const vertex_t *tv = v2;
649      v2 = v3;
650      v3 = tv;
651      if (v2->y < v1->y)
652      {
653         const vertex_t *tv = v1;
654         v1 = v2;
655         v2 = tv;
656      }
657   }
658
659   // compute some integral X/Y vertex values
660   INT32 v1y = round_coordinate(v1->y);
661   INT32 v3y = round_coordinate(v3->y);
662
663   // clip coordinates
664   INT32 v1yclip = v1y;
665   INT32 v3yclip = v3y + ((m_flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
666   v1yclip = MAX(v1yclip, cliprect.min_y);
667   v3yclip = MIN(v3yclip, cliprect.max_y + 1);
668   if (v3yclip - v1yclip <= 0)
669      return 0;
670
671   // determine total X extents
672   _BaseType minx = v1->x;
673   _BaseType maxx = v1->x;
674   if (v2->x < minx) minx = v2->x;
675   else if (v2->x > maxx) maxx = v2->x;
676   if (v3->x < minx) minx = v3->x;
677   else if (v3->x > maxx) maxx = v3->x;
678
679   // allocate and populate a new polygon
680   polygon_info &polygon = polygon_alloc(round_coordinate(minx), round_coordinate(maxx), v1yclip, v3yclip, callback);
681
682   // compute the slopes for each portion of the triangle
683   _BaseType dxdy_v1v2 = (v2->y == v1->y) ? _BaseType(0.0) : (v2->x - v1->x) / (v2->y - v1->y);
684   _BaseType dxdy_v1v3 = (v3->y == v1->y) ? _BaseType(0.0) : (v3->x - v1->x) / (v3->y - v1->y);
685   _BaseType dxdy_v2v3 = (v3->y == v2->y) ? _BaseType(0.0) : (v3->x - v2->x) / (v3->y - v2->y);
686
687   // compute parameter starting points and deltas
688   _BaseType param_start[_MaxParams];
689   _BaseType param_dpdx[_MaxParams];
690   _BaseType param_dpdy[_MaxParams];
691   if (paramcount > 0)
692   {
693      _BaseType a00 = v2->y - v3->y;
694      _BaseType a01 = v3->x - v2->x;
695      _BaseType a02 = v2->x*v3->y - v3->x*v2->y;
696      _BaseType a10 = v3->y - v1->y;
697      _BaseType a11 = v1->x - v3->x;
698      _BaseType a12 = v3->x*v1->y - v1->x*v3->y;
699      _BaseType a20 = v1->y - v2->y;
700      _BaseType a21 = v2->x - v1->x;
701      _BaseType a22 = v1->x*v2->y - v2->x*v1->y;
702      _BaseType det = a02 + a12 + a22;
703
704      if (poly_abs(det) < _BaseType(0.00001))
705      {
706         for (int paramnum = 0; paramnum < paramcount; paramnum++)
707         {
708            param_dpdx[paramnum] = _BaseType(0.0);
709            param_dpdy[paramnum] = _BaseType(0.0);
710            param_start[paramnum] = v1->p[paramnum];
711         }
712      }
713      else
714      {
715         _BaseType idet = poly_recip(det);
716         for (int paramnum = 0; paramnum < paramcount; paramnum++)
717         {
718            param_dpdx[paramnum]  = idet * (v1->p[paramnum]*a00 + v2->p[paramnum]*a10 + v3->p[paramnum]*a20);
719            param_dpdy[paramnum]  = idet * (v1->p[paramnum]*a01 + v2->p[paramnum]*a11 + v3->p[paramnum]*a21);
720            param_start[paramnum] = idet * (v1->p[paramnum]*a02 + v2->p[paramnum]*a12 + v3->p[paramnum]*a22);
721         }
722      }
723   }
724   else    // GCC 4.7.0 incorrectly claims these are uninitialized; humor it by initializing in the (hopefully rare) zero parameter case
725   {
726      param_start[0] = _BaseType(0.0);
727      param_dpdx[0] = _BaseType(0.0);
728      param_dpdy[0] = _BaseType(0.0);
729   }
730
731   // compute the X extents for each scanline
732   INT32 pixels = 0;
733   UINT32 startunit = m_unit.count();
734   INT32 scaninc = 1;
735   for (INT32 curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
736   {
737      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
738      UINT32 unit_index = m_unit.count();
739      work_unit &unit = m_unit.next();
740
741      // determine how much to advance to hit the next bucket
742      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
743
744      // fill in the work unit basics
745      unit.polygon = &polygon;
746      unit.count_next = MIN(v3yclip - curscan, scaninc);
747      unit.scanline = curscan;
748      unit.previtem = m_unit_bucket[bucketnum];
749      m_unit_bucket[bucketnum] = unit_index;
750
751      // iterate over extents
752      for (int extnum = 0; extnum < unit.count_next; extnum++)
753      {
754         // compute the ending X based on which part of the triangle we're in
755         _BaseType fully = _BaseType(curscan + extnum) + _BaseType(0.5);
756         _BaseType startx = v1->x + (fully - v1->y) * dxdy_v1v3;
757         _BaseType stopx;
758         if (fully < v2->y)
759            stopx = v1->x + (fully - v1->y) * dxdy_v1v2;
760         else
761            stopx = v2->x + (fully - v2->y) * dxdy_v2v3;
762
763         // clamp to full pixels
764         INT32 istartx = round_coordinate(startx);
765         INT32 istopx = round_coordinate(stopx);
766
767         // force start < stop
768         if (istartx > istopx)
769         {
770            INT32 temp = istartx;
771            istartx = istopx;
772            istopx = temp;
773         }
774
775         // include the right edge if requested
776         if (m_flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
777            istopx++;
778
779         // apply left/right clipping
780         if (istartx < cliprect.min_x)
781            istartx = cliprect.min_x;
782         if (istopx > cliprect.max_x)
783            istopx = cliprect.max_x + 1;
784
785         // set the extent and update the total pixel count
786         if (istartx >= istopx)
787            istartx = istopx = 0;
788         extent_t &extent = unit.extent[extnum];
789         extent.startx = istartx;
790         extent.stopx = istopx;
791         extent.userdata = NULL;
792         pixels += istopx - istartx;
793
794         // fill in the parameters for the extent
795         _BaseType fullstartx = _BaseType(istartx) + _BaseType(0.5);
796         for (int paramnum = 0; paramnum < paramcount; paramnum++)
797         {
798            extent.param[paramnum].start = param_start[paramnum] + fullstartx * param_dpdx[paramnum] + fully * param_dpdy[paramnum];
799            extent.param[paramnum].dpdx = param_dpdx[paramnum];
800         }
801      }
802   }
803
804   // enqueue the work items
805   if (m_queue != NULL)
806      osd_work_item_queue_multiple(m_queue, work_item_callback, m_unit.count() - startunit, &m_unit[startunit], m_unit.itemsize(), WORK_ITEM_FLAG_AUTO_RELEASE);
807
808   // return the total number of pixels in the triangle
809   m_triangles++;
810   m_pixels += pixels;
811   return pixels;
812}
813
814
815//-------------------------------------------------
816//  render_triangle_fan - render a set of
817//  triangles in a fan
818//-------------------------------------------------
819
820template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
821UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_triangle_fan(const rectangle &cliprect, render_delegate callback, int paramcount, int numverts, const vertex_t *v)
822{
823   // iterate over vertices
824   UINT32 pixels = 0;
825   for (int vertnum = 2; vertnum < numverts; vertnum++)
826      pixels += render_triangle(cliprect, callback, paramcount, v[0], v[vertnum - 1], v[vertnum]);
827   return pixels;
828}
829
830
831//-------------------------------------------------
832//  render_triangle_strip - render a set of
833//  triangles in a strip
834//-------------------------------------------------
835
836template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
837UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_triangle_strip(const rectangle &cliprect, render_delegate callback, int paramcount, int numverts, const vertex_t *v)
838{
839   // iterate over vertices
840   UINT32 pixels = 0;
841   for (int vertnum = 2; vertnum < numverts; vertnum++)
842      pixels += render_triangle(cliprect, callback, paramcount, v[vertnum - 2], v[vertnum - 1], v[vertnum]);
843   return pixels;
844}
845
846
847//-------------------------------------------------
848//  render_triangle_custom - perform a custom
849//  render of an object, given specific extents
850//-------------------------------------------------
851
852template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
853UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_triangle_custom(const rectangle &cliprect, render_delegate callback, int startscanline, int numscanlines, const extent_t *extents)
854{
855   // clip coordinates
856   INT32 v1yclip = MAX(startscanline, cliprect.min_y);
857   INT32 v3yclip = MIN(startscanline + numscanlines, cliprect.max_y + 1);
858   if (v3yclip - v1yclip <= 0)
859      return 0;
860
861   // allocate and populate a new polygon
862   polygon_info &polygon = polygon_alloc(0, 0, v1yclip, v3yclip, callback);
863
864   // compute the X extents for each scanline
865   INT32 pixels = 0;
866   UINT32 startunit = m_unit.count();
867   INT32 scaninc = 1;
868   for (INT32 curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
869   {
870      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
871      UINT32 unit_index = m_unit.count();
872      work_unit &unit = m_unit.next();
873
874      // determine how much to advance to hit the next bucket
875      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
876
877      // fill in the work unit basics
878      unit.polygon = &polygon;
879      unit.count_next = MIN(v3yclip - curscan, scaninc);
880      unit.scanline = curscan;
881      unit.previtem = m_unit_bucket[bucketnum];
882      m_unit_bucket[bucketnum] = unit_index;
883
884      // iterate over extents
885      for (int extnum = 0; extnum < unit.count_next; extnum++)
886      {
887         const extent_t &srcextent = extents[(curscan + extnum) - startscanline];
888         INT32 istartx = srcextent.startx, istopx = srcextent.stopx;
889
890         // apply left/right clipping
891         if (istartx < cliprect.min_x)
892            istartx = cliprect.min_x;
893         if (istartx > cliprect.max_x)
894            istartx = cliprect.max_x + 1;
895         if (istopx < cliprect.min_x)
896            istopx = cliprect.min_x;
897         if (istopx > cliprect.max_x)
898            istopx = cliprect.max_x + 1;
899
900         // set the extent and update the total pixel count
901         extent_t &extent = unit.extent[extnum];
902         extent.startx = istartx;
903         extent.stopx = istopx;
904
905         // fill in the parameters for the extent
906         for (int paramnum = 0; paramnum < _MaxParams; paramnum++)
907         {
908            extent.param[paramnum].start = srcextent.param[paramnum].start;
909            extent.param[paramnum].dpdx = srcextent.param[paramnum].dpdx;
910         }
911
912         extent.userdata = srcextent.userdata;
913         if (istartx < istopx)
914            pixels += istopx - istartx;
915         else if(istopx < istartx)
916            pixels += istartx - istopx;
917      }
918   }
919
920   // enqueue the work items
921   if (m_queue != NULL)
922      osd_work_item_queue_multiple(m_queue, work_item_callback, m_unit.count() - startunit, &m_unit[startunit], m_unit.itemsize(), WORK_ITEM_FLAG_AUTO_RELEASE);
923
924   // return the total number of pixels in the object
925   m_triangles++;
926   m_pixels += pixels;
927   return pixels;
928}
929
930
931//-------------------------------------------------
932//  render_polygon - render a single polygon up
933//  to 32 vertices
934//-------------------------------------------------
935
936template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
937template<int _NumVerts>
938UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_polygon(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t *v)
939{
940   // determine min/max Y vertices
941   _BaseType minx = v[0].x;
942   _BaseType maxx = v[0].x;
943   int minv = 0;
944   int maxv = 0;
945   for (int vertnum = 1; vertnum < _NumVerts; vertnum++)
946   {
947      if (v[vertnum].y < v[minv].y)
948         minv = vertnum;
949      else if (v[vertnum].y > v[maxv].y)
950         maxv = vertnum;
951      if (v[vertnum].x < minx)
952         minx = v[vertnum].x;
953      else if (v[vertnum].x > maxx)
954         maxx = v[vertnum].x;
955   }
956
957   // determine start/end scanlines
958   INT32 miny = round_coordinate(v[minv].y);
959   INT32 maxy = round_coordinate(v[maxv].y);
960
961   // clip coordinates
962   INT32 minyclip = miny;
963   INT32 maxyclip = maxy + ((m_flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
964   minyclip = MAX(minyclip, cliprect.min_y);
965   maxyclip = MIN(maxyclip, cliprect.max_y + 1);
966   if (maxyclip - minyclip <= 0)
967      return 0;
968
969   // allocate a new polygon
970   polygon_info &polygon = polygon_alloc(round_coordinate(minx), round_coordinate(maxx), minyclip, maxyclip, callback);
971
972   // walk forward to build up the forward edge list
973   struct poly_edge
974   {
975      poly_edge *         next;                   // next edge in sequence
976      int                 index;                  // index of this edge
977      const vertex_t *    v1;                     // pointer to first vertex
978      const vertex_t *    v2;                     // pointer to second vertex
979      _BaseType           dxdy;                   // dx/dy along the edge
980      _BaseType           dpdy[_MaxParams];       // per-parameter dp/dy values
981   };
982   poly_edge fedgelist[_NumVerts - 1];
983   poly_edge *edgeptr = &fedgelist[0];
984   for (int curv = minv; curv != maxv; curv = (curv == _NumVerts - 1) ? 0 : (curv + 1))
985   {
986      // set the two vertices
987      edgeptr->v1 = &v[curv];
988      edgeptr->v2 = &v[(curv == _NumVerts - 1) ? 0 : (curv + 1)];
989
990      // if horizontal, skip altogether
991      if (edgeptr->v1->y == edgeptr->v2->y)
992         continue;
993
994      // need dx/dy always, and parameter deltas as necessary
995      _BaseType ooy = poly_recip(edgeptr->v2->y - edgeptr->v1->y);
996      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
997      for (int paramnum = 0; paramnum < paramcount; paramnum++)
998         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
999      edgeptr++;
1000   }
1001
1002   // walk backward to build up the backward edge list
1003   poly_edge bedgelist[_NumVerts - 1];
1004   edgeptr = &bedgelist[0];
1005   for (int curv = minv; curv != maxv; curv = (curv == 0) ? (_NumVerts - 1) : (curv - 1))
1006   {
1007      // set the two vertices
1008      edgeptr->v1 = &v[curv];
1009      edgeptr->v2 = &v[(curv == 0) ? (_NumVerts - 1) : (curv - 1)];
1010
1011      // if horizontal, skip altogether
1012      if (edgeptr->v1->y == edgeptr->v2->y)
1013         continue;
1014
1015      // need dx/dy always, and parameter deltas as necessary
1016      _BaseType ooy = poly_recip(edgeptr->v2->y - edgeptr->v1->y);
1017      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
1018      for (int paramnum = 0; paramnum < paramcount; paramnum++)
1019         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
1020      edgeptr++;
1021   }
1022
1023   // determine which list is left/right:
1024   // if the first vertex is shared, compare the slopes
1025   // if the first vertex is not shared, compare the X coordinates
1026   const poly_edge *ledge, *redge;
1027   if ((fedgelist[0].v1 == bedgelist[0].v1 && fedgelist[0].dxdy < bedgelist[0].dxdy) ||
1028      (fedgelist[0].v1 != bedgelist[0].v1 && fedgelist[0].v1->x < bedgelist[0].v1->x))
1029   {
1030      ledge = fedgelist;
1031      redge = bedgelist;
1032   }
1033   else
1034   {
1035      ledge = bedgelist;
1036      redge = fedgelist;
1037   }
1038
1039   // compute the X extents for each scanline
1040   INT32 pixels = 0;
1041   UINT32 startunit = m_unit.count();
1042   INT32 scaninc = 1;
1043   for (INT32 curscan = minyclip; curscan < maxyclip; curscan += scaninc)
1044   {
1045      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
1046      UINT32 unit_index = m_unit.count();
1047      work_unit &unit = m_unit.next();
1048
1049      // determine how much to advance to hit the next bucket
1050      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
1051
1052      // fill in the work unit basics
1053      unit.polygon = &polygon;
1054      unit.count_next = MIN(maxyclip - curscan, scaninc);
1055      unit.scanline = curscan;
1056      unit.previtem = m_unit_bucket[bucketnum];
1057      m_unit_bucket[bucketnum] = unit_index;
1058
1059      // iterate over extents
1060      for (int extnum = 0; extnum < unit.count_next; extnum++)
1061      {
1062         // compute the ending X based on which part of the triangle we're in
1063         _BaseType fully = _BaseType(curscan + extnum) + _BaseType(0.5);
1064         while (fully > ledge->v2->y && fully < v[maxv].y)
1065            ledge++;
1066         while (fully > redge->v2->y && fully < v[maxv].y)
1067            redge++;
1068         _BaseType startx = ledge->v1->x + (fully - ledge->v1->y) * ledge->dxdy;
1069         _BaseType stopx = redge->v1->x + (fully - redge->v1->y) * redge->dxdy;
1070
1071         // clamp to full pixels
1072         INT32 istartx = round_coordinate(startx);
1073         INT32 istopx = round_coordinate(stopx);
1074
1075         // compute parameter starting points and deltas
1076         extent_t &extent = unit.extent[extnum];
1077         if (paramcount > 0)
1078         {
1079            _BaseType ldy = fully - ledge->v1->y;
1080            _BaseType rdy = fully - redge->v1->y;
1081            _BaseType oox = poly_recip(stopx - startx);
1082
1083            // iterate over parameters
1084            for (int paramnum = 0; paramnum < paramcount; paramnum++)
1085            {
1086               _BaseType lparam = ledge->v1->p[paramnum] + ldy * ledge->dpdy[paramnum];
1087               _BaseType rparam = redge->v1->p[paramnum] + rdy * redge->dpdy[paramnum];
1088               _BaseType dpdx = (rparam - lparam) * oox;
1089
1090               extent.param[paramnum].start = lparam;// - (_BaseType(istartx) + 0.5f) * dpdx;
1091               extent.param[paramnum].dpdx = dpdx;
1092            }
1093         }
1094
1095         // include the right edge if requested
1096         if (m_flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
1097            istopx++;
1098
1099         // apply left/right clipping
1100         if (istartx < cliprect.min_x)
1101         {
1102            for (int paramnum = 0; paramnum < paramcount; paramnum++)
1103               extent.param[paramnum].start += (cliprect.min_x - istartx) * extent.param[paramnum].dpdx;
1104            istartx = cliprect.min_x;
1105         }
1106         if (istopx > cliprect.max_x)
1107            istopx = cliprect.max_x + 1;
1108
1109         // set the extent and update the total pixel count
1110         if (istartx >= istopx)
1111            istartx = istopx = 0;
1112         extent.startx = istartx;
1113         extent.stopx = istopx;
1114         extent.userdata = NULL;
1115         pixels += istopx - istartx;
1116      }
1117   }
1118
1119   // enqueue the work items
1120   if (m_queue != NULL)
1121      osd_work_item_queue_multiple(m_queue, work_item_callback, m_unit.count() - startunit, &m_unit[startunit], m_unit.itemsize(), WORK_ITEM_FLAG_AUTO_RELEASE);
1122
1123   // return the total number of pixels in the triangle
1124   m_quads++;
1125   m_pixels += pixels;
1126   return pixels;
1127}
1128
1129
1130//-------------------------------------------------
1131//  zclip_if_less - clip a polygon using p[0] as
1132//  a z coordinate
1133//-------------------------------------------------
1134
1135template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
1136int poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::zclip_if_less(int numverts, const vertex_t *v, vertex_t *outv, int paramcount, _BaseType clipval)
1137{
1138   bool prevclipped = (v[numverts - 1].p[0] < clipval);
1139   vertex_t *nextout = outv;
1140
1141   // iterate over vertices
1142   for (int vertnum = 0; vertnum < numverts; vertnum++)
1143   {
1144      bool thisclipped = (v[vertnum].p[0] < clipval);
1145
1146      // if we switched from clipped to non-clipped, interpolate a vertex
1147      if (thisclipped != prevclipped)
1148      {
1149         const vertex_t &v1 = v[(vertnum == 0) ? (numverts - 1) : (vertnum - 1)];
1150         const vertex_t &v2 = v[vertnum];
1151         _BaseType frac = (clipval - v1.p[0]) / (v2.p[0] - v1.p[0]);
1152         nextout->x = v1.x + frac * (v2.x - v1.x);
1153         nextout->y = v1.y + frac * (v2.y - v1.y);
1154         for (int paramnum = 0; paramnum < paramcount; paramnum++)
1155            nextout->p[paramnum] = v1.p[paramnum] + frac * (v2.p[paramnum] - v1.p[paramnum]);
1156         nextout++;
1157      }
1158
1159      // if this vertex is not clipped, copy it in
1160      if (!thisclipped)
1161         *nextout++ = v[vertnum];
1162
1163      // remember the last state
1164      prevclipped = thisclipped;
1165   }
1166   return nextout - outv;
1167}
1168
1169#endif  // __POLYNEW_H__
trunk/src/emu/video/poly.c
r28720r28721
1/***************************************************************************
2
3    poly.c
4
5    Helper routines for polygon rendering.
6
7***************************************************************************/
8
9#include "emu.h"
10#include "poly.h"
11
12
13/***************************************************************************
14    DEBUGGING
15***************************************************************************/
16
17/* keep statistics */
18#define KEEP_STATISTICS                 0
19
20/* turn this on to log the reasons for any long waits */
21#define LOG_WAITS                       0
22
23/* number of profiling ticks before we consider a wait "long" */
24#define LOG_WAIT_THRESHOLD              1000
25
26
27
28/***************************************************************************
29    CONSTANTS
30***************************************************************************/
31
32#define SCANLINES_PER_BUCKET            8
33#define CACHE_LINE_SIZE                 64          /* this is a general guess */
34#define TOTAL_BUCKETS                   (512 / SCANLINES_PER_BUCKET)
35#define UNITS_PER_POLY                  (100 / SCANLINES_PER_BUCKET)
36
37
38
39/***************************************************************************
40    TYPE DEFINITIONS
41***************************************************************************/
42
43/* forward definitions */
44struct polygon_info;
45
46
47/* tri_extent describes start/end points for a scanline */
48struct tri_extent
49{
50   INT16       startx;                     /* starting X coordinate (inclusive) */
51   INT16       stopx;                      /* ending X coordinate (exclusive) */
52};
53
54
55/* single set of polygon per-parameter data */
56struct poly_param
57{
58   float       start;                      /* parameter value at starting X,Y */
59   float       dpdx;                       /* dp/dx relative to starting X */
60   float       dpdy;                       /* dp/dy relative to starting Y */
61};
62
63
64/* poly edge is used internally for quad rendering */
65struct poly_edge
66{
67   poly_edge *         next;                   /* next edge in sequence */
68   int                 index;                  /* index of this edge */
69   const poly_vertex * v1;                     /* pointer to first vertex */
70   const poly_vertex * v2;                     /* pointer to second vertex */
71   float               dxdy;                   /* dx/dy along the edge */
72   float               dpdy[MAX_VERTEX_PARAMS];/* per-parameter dp/dy values */
73};
74
75
76/* poly section is used internally for quad rendering */
77struct poly_section
78{
79   const poly_edge *   ledge;                  /* pointer to left edge */
80   const poly_edge *   redge;                  /* pointer to right edge */
81   float               ybottom;                /* bottom of this section */
82};
83
84
85/* work_unit_shared is a common set of data shared between tris and quads */
86struct work_unit_shared
87{
88   polygon_info *      polygon;                /* pointer to polygon */
89   volatile UINT32     count_next;             /* number of scanlines and index of next item to process */
90   INT16               scanline;               /* starting scanline and count */
91   UINT16              previtem;               /* index of previous item in the same bucket */
92#ifndef PTR64
93   UINT32              dummy;                  /* pad to 16 bytes */
94#endif
95};
96
97
98/* tri_work_unit is a triangle-specific work-unit */
99struct tri_work_unit
100{
101   work_unit_shared    shared;                 /* shared data */
102   tri_extent          extent[SCANLINES_PER_BUCKET]; /* array of scanline extents */
103};
104
105
106/* quad_work_unit is a quad-specific work-unit */
107struct quad_work_unit
108{
109   work_unit_shared    shared;                 /* shared data */
110   poly_extent         extent[SCANLINES_PER_BUCKET]; /* array of scanline extents */
111};
112
113
114/* work_unit is a union of the two types */
115union work_unit
116{
117   work_unit_shared    shared;                 /* shared data */
118   tri_work_unit       tri;                    /* triangle work unit */
119   quad_work_unit      quad;                   /* quad work unit */
120};
121
122
123/* polygon_info describes a single polygon, which includes the poly_params */
124struct polygon_info
125{
126   legacy_poly_manager *      poly;                   /* pointer back to the poly manager */
127   void *              dest;                   /* pointer to the destination we are rendering to */
128   void *              extra;                  /* extra data pointer */
129   UINT8               numparams;              /* number of parameters for this polygon  */
130   UINT8               numverts;               /* number of vertices in this polygon */
131   poly_draw_scanline_func     callback;               /* callback to handle a scanline's worth of work */
132   INT32               xorigin;                /* X origin for all parameters */
133   INT32               yorigin;                /* Y origin for all parameters */
134   poly_param          param[MAX_VERTEX_PARAMS];/* array of parameter data */
135};
136
137
138/* full poly manager description */
139struct legacy_poly_manager
140{
141   /* queue management */
142   osd_work_queue *    queue;                  /* work queue */
143
144   /* triangle work units */
145   work_unit **        unit;                   /* array of work unit pointers */
146   UINT32              unit_next;              /* index of next unit to allocate */
147   UINT32              unit_count;             /* number of work units available */
148   size_t              unit_size;              /* size of each work unit, in bytes */
149
150   /* quad work units */
151   UINT32              quadunit_next;          /* index of next unit to allocate */
152   UINT32              quadunit_count;         /* number of work units available */
153   size_t              quadunit_size;          /* size of each work unit, in bytes */
154
155   /* poly data */
156   polygon_info **     polygon;                /* array of polygon pointers */
157   UINT32              polygon_next;           /* index of next polygon to allocate */
158   UINT32              polygon_count;          /* number of polygon items available */
159   size_t              polygon_size;           /* size of each polygon, in bytes */
160
161   /* extra data */
162   void **             extra;                  /* array of extra data pointers */
163   UINT32              extra_next;             /* index of next extra data to allocate */
164   UINT32              extra_count;            /* number of extra data items available */
165   size_t              extra_size;             /* size of each extra data, in bytes */
166
167   /* misc data */
168   UINT8               flags;                  /* flags */
169
170   /* buckets */
171   UINT16              unit_bucket[TOTAL_BUCKETS]; /* buckets for tracking unit usage */
172
173   /* statistics */
174   UINT32              triangles;              /* number of triangles queued */
175   UINT32              quads;                  /* number of quads queued */
176   UINT64              pixels;                 /* number of pixels rendered */
177#if KEEP_STATISTICS
178   UINT32              unit_waits;             /* number of times we waited for a unit */
179   UINT32              unit_max;               /* maximum units used */
180   UINT32              polygon_waits;          /* number of times we waited for a polygon */
181   UINT32              polygon_max;            /* maximum polygons used */
182   UINT32              extra_waits;            /* number of times we waited for an extra data */
183   UINT32              extra_max;              /* maximum extra data used */
184   UINT32              conflicts[WORK_MAX_THREADS]; /* number of conflicts found, per thread */
185   UINT32              resolved[WORK_MAX_THREADS]; /* number of conflicts resolved, per thread */
186#endif
187};
188
189
190
191/***************************************************************************
192    FUNCTION PROTOTYPES
193***************************************************************************/
194
195static void **allocate_array(running_machine &machine, size_t *itemsize, UINT32 itemcount);
196static void *poly_item_callback(void *param, int threadid);
197static void poly_state_presave(legacy_poly_manager *poly);
198
199
200
201/***************************************************************************
202    INLINE FUNCTIONS
203***************************************************************************/
204
205/*-------------------------------------------------
206    round_coordinate - round a coordinate to
207    an integer, following rules that 0.5 rounds
208    down
209-------------------------------------------------*/
210
211INLINE INT32 round_coordinate(float value)
212{
213   INT32 result = floor(value);
214   return result + (value - (float)result > 0.5f);
215}
216
217
218/*-------------------------------------------------
219    convert_tri_extent_to_poly_extent - convert
220    a simple tri_extent to a full poly_extent
221-------------------------------------------------*/
222
223INLINE void convert_tri_extent_to_poly_extent(poly_extent *dstextent, const tri_extent *srcextent, const polygon_info *polygon, INT32 y)
224{
225   /* copy start/stop always */
226   dstextent->startx = srcextent->startx;
227   dstextent->stopx = srcextent->stopx;
228
229   /* if we have parameters, process them as well */
230   for (int paramnum = 0; paramnum < polygon->numparams; paramnum++)
231   {
232      dstextent->param[paramnum].start = polygon->param[paramnum].start + srcextent->startx * polygon->param[paramnum].dpdx + y * polygon->param[paramnum].dpdy;
233      dstextent->param[paramnum].dpdx = polygon->param[paramnum].dpdx;
234   }
235}
236
237
238/*-------------------------------------------------
239    interpolate_vertex - interpolate values in
240    a vertex based on p[0] crossing the clipval
241-------------------------------------------------*/
242
243INLINE void interpolate_vertex(poly_vertex *outv, const poly_vertex *v1, const poly_vertex *v2, int paramcount, float clipval)
244{
245   float frac = (clipval - v1->p[0]) / (v2->p[0] - v1->p[0]);
246   int paramnum;
247
248   /* create a new one at the intersection point */
249   outv->x = v1->x + frac * (v2->x - v1->x);
250   outv->y = v1->y + frac * (v2->y - v1->y);
251   for (paramnum = 0; paramnum < paramcount; paramnum++)
252      outv->p[paramnum] = v1->p[paramnum] + frac * (v2->p[paramnum] - v1->p[paramnum]);
253}
254
255
256/*-------------------------------------------------
257    copy_vertex - copy vertex data from one to
258    another
259-------------------------------------------------*/
260
261INLINE void copy_vertex(poly_vertex *outv, const poly_vertex *v, int paramcount)
262{
263   int paramnum;
264
265   outv->x = v->x;
266   outv->y = v->y;
267   for (paramnum = 0; paramnum < paramcount; paramnum++)
268      outv->p[paramnum] = v->p[paramnum];
269}
270
271
272/*-------------------------------------------------
273    allocate_polygon - allocate a new polygon
274    object, blocking if we run out
275-------------------------------------------------*/
276
277INLINE polygon_info *allocate_polygon(legacy_poly_manager *poly, int miny, int maxy)
278{
279   /* wait for a work item if we have to */
280   if (poly->polygon_next + 1 > poly->polygon_count)
281   {
282      poly_wait(poly, "Out of polygons");
283#if KEEP_STATISTICS
284      poly->polygon_waits++;
285#endif
286   }
287   else if (poly->unit_next + (maxy - miny) / SCANLINES_PER_BUCKET + 2 > poly->unit_count)
288   {
289      poly_wait(poly, "Out of work units");
290#if KEEP_STATISTICS
291      poly->unit_waits++;
292#endif
293   }
294#if KEEP_STATISTICS
295   poly->polygon_max = MAX(poly->polygon_max, poly->polygon_next + 1);
296#endif
297   return poly->polygon[poly->polygon_next++];
298}
299
300
301
302/***************************************************************************
303    INITIALIZATION/TEARDOWN
304***************************************************************************/
305
306/*-------------------------------------------------
307    poly_alloc - initialize a new polygon
308    manager
309-------------------------------------------------*/
310
311legacy_poly_manager *poly_alloc(running_machine &machine, int max_polys, size_t extra_data_size, UINT8 flags)
312{
313   legacy_poly_manager *poly;
314
315   /* allocate the manager itself */
316   poly = auto_alloc_clear(machine, legacy_poly_manager);
317   poly->flags = flags;
318
319   /* allocate polygons */
320   poly->polygon_size = sizeof(polygon_info);
321   poly->polygon_count = MAX(max_polys, 1);
322   poly->polygon_next = 0;
323   poly->polygon = (polygon_info **)allocate_array(machine, &poly->polygon_size, poly->polygon_count);
324
325   /* allocate extra data */
326   poly->extra_size = extra_data_size;
327   poly->extra_count = poly->polygon_count;
328   poly->extra_next = 1;
329   poly->extra = allocate_array(machine, &poly->extra_size, poly->extra_count);
330
331   /* allocate triangle work units */
332   poly->unit_size = (flags & POLYFLAG_ALLOW_QUADS) ? sizeof(quad_work_unit) : sizeof(tri_work_unit);
333   poly->unit_count = MIN(poly->polygon_count * UNITS_PER_POLY, 65535);
334   poly->unit_next = 0;
335   poly->unit = (work_unit **)allocate_array(machine, &poly->unit_size, poly->unit_count);
336
337   /* create the work queue */
338   if (!(flags & POLYFLAG_NO_WORK_QUEUE))
339      poly->queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_MULTI | WORK_QUEUE_FLAG_HIGH_FREQ);
340
341   /* request a pre-save callback for synchronization */
342   machine.save().register_presave(save_prepost_delegate(FUNC(poly_state_presave), poly));
343   return poly;
344}
345
346
347/*-------------------------------------------------
348    poly_free - free a polygon manager
349-------------------------------------------------*/
350
351void poly_free(legacy_poly_manager *poly)
352{
353#if KEEP_STATISTICS
354{
355   int i, conflicts = 0, resolved = 0;
356   for (i = 0; i < ARRAY_LENGTH(poly->conflicts); i++)
357   {
358      conflicts += poly->conflicts[i];
359      resolved += poly->resolved[i];
360   }
361   printf("Total triangles = %d\n", poly->triangles);
362   printf("Total quads = %d\n", poly->quads);
363   if (poly->pixels > 1000000000)
364      printf("Total pixels   = %d%09d\n", (UINT32)(poly->pixels / 1000000000), (UINT32)(poly->pixels % 1000000000));
365   else
366      printf("Total pixels   = %d\n", (UINT32)poly->pixels);
367   printf("Conflicts:  %d resolved, %d total\n", resolved, conflicts);
368   printf("Units:      %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", poly->unit_max, poly->unit_count, poly->unit_waits, poly->unit_size, poly->unit_count * poly->unit_size);
369   printf("Polygons:   %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", poly->polygon_max, poly->polygon_count, poly->polygon_waits, poly->polygon_size, poly->polygon_count * poly->polygon_size);
370   printf("Extra data: %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", poly->extra_max, poly->extra_count, poly->extra_waits, poly->extra_size, poly->extra_count * poly->extra_size);
371}
372#endif
373
374   /* free the work queue */
375   if (poly->queue != NULL)
376      osd_work_queue_free(poly->queue);
377}
378
379
380
381/***************************************************************************
382    COMMON FUNCTIONS
383***************************************************************************/
384
385/*-------------------------------------------------
386    poly_wait - wait for all pending rendering
387    to complete
388-------------------------------------------------*/
389
390void poly_wait(legacy_poly_manager *poly, const char *debug_reason)
391{
392   osd_ticks_t time;
393
394   /* remember the start time if we're logging */
395   if (LOG_WAITS)
396      time = get_profile_ticks();
397
398   /* wait for all pending work items to complete */
399   if (poly->queue != NULL)
400      osd_work_queue_wait(poly->queue, osd_ticks_per_second() * 100);
401
402   /* if we don't have a queue, just run the whole list now */
403   else
404   {
405      int unitnum;
406      for (unitnum = 0; unitnum < poly->unit_next; unitnum++)
407         poly_item_callback(poly->unit[unitnum], 0);
408   }
409
410   /* log any long waits */
411   if (LOG_WAITS)
412   {
413      time = get_profile_ticks() - time;
414      if (time > LOG_WAIT_THRESHOLD)
415         logerror("Poly:Waited %d cycles for %s\n", (int)time, debug_reason);
416   }
417
418   /* reset the state */
419   poly->polygon_next = poly->unit_next = 0;
420   memset(poly->unit_bucket, 0xff, sizeof(poly->unit_bucket));
421
422   /* we need to preserve the last extra data that was supplied */
423   if (poly->extra_next > 1)
424      memcpy(poly->extra[0], poly->extra[poly->extra_next - 1], poly->extra_size);
425   poly->extra_next = 1;
426}
427
428
429/*-------------------------------------------------
430    poly_get_extra_data - get a pointer to the
431    extra data for the next polygon
432-------------------------------------------------*/
433
434void *poly_get_extra_data(legacy_poly_manager *poly)
435{
436   /* wait for a work item if we have to */
437   if (poly->extra_next + 1 > poly->extra_count)
438   {
439      poly_wait(poly, "Out of extra data");
440#if KEEP_STATISTICS
441      poly->extra_waits++;
442#endif
443   }
444
445   /* return a pointer to the extra data for the next item */
446#if KEEP_STATISTICS
447   poly->extra_max = MAX(poly->extra_max, poly->extra_next + 1);
448#endif
449   return poly->extra[poly->extra_next++];
450}
451
452
453
454/***************************************************************************
455    CORE TRIANGLE RENDERING
456***************************************************************************/
457
458/*-------------------------------------------------
459    poly_render_triangle - render a single
460    triangle given 3 vertexes
461-------------------------------------------------*/
462
463UINT32 poly_render_triangle(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3)
464{
465   float dxdy_v1v2, dxdy_v1v3, dxdy_v2v3;
466   const poly_vertex *tv;
467   INT32 curscan, scaninc;
468   polygon_info *polygon;
469   INT32 v1yclip, v3yclip;
470   INT32 v1y, v3y, v1x;
471   INT32 pixels = 0;
472   UINT32 startunit;
473
474   /* first sort by Y */
475   if (v2->y < v1->y)
476   {
477      tv = v1;
478      v1 = v2;
479      v2 = tv;
480   }
481   if (v3->y < v2->y)
482   {
483      tv = v2;
484      v2 = v3;
485      v3 = tv;
486      if (v2->y < v1->y)
487      {
488         tv = v1;
489         v1 = v2;
490         v2 = tv;
491      }
492   }
493
494   /* compute some integral X/Y vertex values */
495   v1x = round_coordinate(v1->x);
496   v1y = round_coordinate(v1->y);
497   v3y = round_coordinate(v3->y);
498
499   /* clip coordinates */
500   v1yclip = v1y;
501   v3yclip = v3y + ((poly->flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
502   v1yclip = MAX(v1yclip, cliprect.min_y);
503   v3yclip = MIN(v3yclip, cliprect.max_y + 1);
504   if (v3yclip - v1yclip <= 0)
505      return 0;
506
507   /* allocate a new polygon */
508   polygon = allocate_polygon(poly, v1yclip, v3yclip);
509
510   /* fill in the polygon information */
511   polygon->poly = poly;
512   polygon->dest = dest;
513   polygon->callback = callback;
514   polygon->extra = poly->extra[poly->extra_next - 1];
515   polygon->numparams = paramcount;
516   polygon->numverts = 3;
517
518   /* set the start X/Y coordinates */
519   polygon->xorigin = v1x;
520   polygon->yorigin = v1y;
521
522   /* compute the slopes for each portion of the triangle */
523   dxdy_v1v2 = (v2->y == v1->y) ? 0.0f : (v2->x - v1->x) / (v2->y - v1->y);
524   dxdy_v1v3 = (v3->y == v1->y) ? 0.0f : (v3->x - v1->x) / (v3->y - v1->y);
525   dxdy_v2v3 = (v3->y == v2->y) ? 0.0f : (v3->x - v2->x) / (v3->y - v2->y);
526
527   /* compute the X extents for each scanline */
528   startunit = poly->unit_next;
529   for (curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
530   {
531      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
532      UINT32 unit_index = poly->unit_next++;
533      tri_work_unit *unit = &poly->unit[unit_index]->tri;
534      int extnum;
535
536      /* determine how much to advance to hit the next bucket */
537      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
538
539      /* fill in the work unit basics */
540      unit->shared.polygon = polygon;
541      unit->shared.count_next = MIN(v3yclip - curscan, scaninc);
542      unit->shared.scanline = curscan;
543      unit->shared.previtem = poly->unit_bucket[bucketnum];
544      poly->unit_bucket[bucketnum] = unit_index;
545
546      /* iterate over extents */
547      for (extnum = 0; extnum < unit->shared.count_next; extnum++)
548      {
549         float fully = (float)(curscan + extnum) + 0.5f;
550         float startx = v1->x + (fully - v1->y) * dxdy_v1v3;
551         float stopx;
552         INT32 istartx, istopx;
553
554         /* compute the ending X based on which part of the triangle we're in */
555         if (fully < v2->y)
556            stopx = v1->x + (fully - v1->y) * dxdy_v1v2;
557         else
558            stopx = v2->x + (fully - v2->y) * dxdy_v2v3;
559
560         /* clamp to full pixels */
561         istartx = round_coordinate(startx);
562         istopx = round_coordinate(stopx);
563
564         /* force start < stop */
565         if (istartx > istopx)
566         {
567            INT32 temp = istartx;
568            istartx = istopx;
569            istopx = temp;
570         }
571
572         /* include the right edge if requested */
573         if (poly->flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
574            istopx++;
575
576         /* apply left/right clipping */
577         if (istartx < cliprect.min_x)
578            istartx = cliprect.min_x;
579         if (istopx > cliprect.max_x)
580            istopx = cliprect.max_x + 1;
581
582         /* set the extent and update the total pixel count */
583         if (istartx >= istopx)
584            istartx = istopx = 0;
585         unit->extent[extnum].startx = istartx;
586         unit->extent[extnum].stopx = istopx;
587         pixels += istopx - istartx;
588      }
589   }
590#if KEEP_STATISTICS
591   poly->unit_max = MAX(poly->unit_max, poly->unit_next);
592#endif
593
594   /* compute parameter starting points and deltas */
595   if (paramcount > 0)
596   {
597      float a00 = v2->y - v3->y;
598      float a01 = v3->x - v2->x;
599      float a02 = v2->x*v3->y - v3->x*v2->y;
600      float a10 = v3->y - v1->y;
601      float a11 = v1->x - v3->x;
602      float a12 = v3->x*v1->y - v1->x*v3->y;
603      float a20 = v1->y - v2->y;
604      float a21 = v2->x - v1->x;
605      float a22 = v1->x*v2->y - v2->x*v1->y;
606      float det = a02 + a12 + a22;
607
608      if(fabsf(det) < 0.001) {
609         for (int paramnum = 0; paramnum < paramcount; paramnum++)
610         {
611            poly_param *params = &polygon->param[paramnum];
612            params->dpdx = 0;
613            params->dpdy = 0;
614            params->start = v1->p[paramnum];
615         }
616      }
617      else
618      {
619         float idet = 1/det;
620         for (int paramnum = 0; paramnum < paramcount; paramnum++)
621         {
622            poly_param *params = &polygon->param[paramnum];
623            params->dpdx  = idet*(v1->p[paramnum]*a00 + v2->p[paramnum]*a10 + v3->p[paramnum]*a20);
624            params->dpdy  = idet*(v1->p[paramnum]*a01 + v2->p[paramnum]*a11 + v3->p[paramnum]*a21);
625            params->start = idet*(v1->p[paramnum]*a02 + v2->p[paramnum]*a12 + v3->p[paramnum]*a22);
626         }
627      }
628   }
629
630   /* enqueue the work items */
631   if (poly->queue != NULL)
632      osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
633
634   /* return the total number of pixels in the triangle */
635   poly->triangles++;
636   poly->pixels += pixels;
637   return pixels;
638}
639
640
641/*-------------------------------------------------
642    poly_render_triangle_fan - render a set of
643    triangles in a fan
644-------------------------------------------------*/
645
646UINT32 poly_render_triangle_fan(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v)
647{
648   UINT32 pixels = 0;
649   int vertnum;
650
651   /* iterate over vertices */
652   for (vertnum = 2; vertnum < numverts; vertnum++)
653      pixels += poly_render_triangle(poly, dest, cliprect, callback, paramcount, &v[0], &v[vertnum - 1], &v[vertnum]);
654   return pixels;
655}
656
657
658/*-------------------------------------------------
659    poly_render_triangle_custom - perform a custom
660    render of an object, given specific extents
661-------------------------------------------------*/
662
663UINT32 poly_render_triangle_custom(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int startscanline, int numscanlines, const poly_extent *extents)
664{
665   INT32 curscan, scaninc;
666   polygon_info *polygon;
667   INT32 v1yclip, v3yclip;
668   INT32 pixels = 0;
669   UINT32 startunit;
670
671   /* clip coordinates */
672   v1yclip = MAX(startscanline, cliprect.min_y);
673   v3yclip = MIN(startscanline + numscanlines, cliprect.max_y + 1);
674   if (v3yclip - v1yclip <= 0)
675      return 0;
676
677   /* allocate a new polygon */
678   polygon = allocate_polygon(poly, v1yclip, v3yclip);
679
680   /* fill in the polygon information */
681   polygon->poly = poly;
682   polygon->dest = dest;
683   polygon->callback = callback;
684   polygon->extra = poly->extra[poly->extra_next - 1];
685   polygon->numparams = 0;
686   polygon->numverts = 3;
687
688   /* compute the X extents for each scanline */
689   startunit = poly->unit_next;
690   for (curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
691   {
692      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
693      UINT32 unit_index = poly->unit_next++;
694      tri_work_unit *unit = &poly->unit[unit_index]->tri;
695      int extnum;
696
697      /* determine how much to advance to hit the next bucket */
698      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
699
700      /* fill in the work unit basics */
701      unit->shared.polygon = polygon;
702      unit->shared.count_next = MIN(v3yclip - curscan, scaninc);
703      unit->shared.scanline = curscan;
704      unit->shared.previtem = poly->unit_bucket[bucketnum];
705      poly->unit_bucket[bucketnum] = unit_index;
706
707      /* iterate over extents */
708      for (extnum = 0; extnum < unit->shared.count_next; extnum++)
709      {
710         const poly_extent *extent = &extents[(curscan + extnum) - startscanline];
711         INT32 istartx = extent->startx, istopx = extent->stopx;
712
713         /* force start < stop */
714         if (istartx > istopx)
715         {
716            INT32 temp = istartx;
717            istartx = istopx;
718            istopx = temp;
719         }
720
721         /* apply left/right clipping */
722         if (istartx < cliprect.min_x)
723            istartx = cliprect.min_x;
724         if (istopx > cliprect.max_x)
725            istopx = cliprect.max_x + 1;
726
727         /* set the extent and update the total pixel count */
728         unit->extent[extnum].startx = istartx;
729         unit->extent[extnum].stopx = istopx;
730         if (istartx < istopx)
731            pixels += istopx - istartx;
732      }
733   }
734#if KEEP_STATISTICS
735   poly->unit_max = MAX(poly->unit_max, poly->unit_next);
736#endif
737
738   /* enqueue the work items */
739   if (poly->queue != NULL)
740      osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
741
742   /* return the total number of pixels in the object */
743   poly->triangles++;
744   poly->pixels += pixels;
745   return pixels;
746}
747
748
749
750/***************************************************************************
751    CORE QUAD RENDERING
752***************************************************************************/
753
754/*-------------------------------------------------
755    poly_render_quad - render a single quad
756    given 4 vertexes
757-------------------------------------------------*/
758
759UINT32 poly_render_quad(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3, const poly_vertex *v4)
760{
761   poly_edge fedgelist[3], bedgelist[3];
762   const poly_edge *ledge, *redge;
763   const poly_vertex *v[4];
764   poly_edge *edgeptr;
765   int minv, maxv, curv;
766   INT32 minyclip, maxyclip;
767   INT32 miny, maxy;
768   INT32 curscan, scaninc;
769   polygon_info *polygon;
770   INT32 pixels = 0;
771   UINT32 startunit;
772
773   assert(poly->flags & POLYFLAG_ALLOW_QUADS);
774
775   /* arrays make things easier */
776   v[0] = v1;
777   v[1] = v2;
778   v[2] = v3;
779   v[3] = v4;
780
781   /* determine min/max Y vertices */
782   if (v[1]->y < v[0]->y)
783      minv = 1, maxv = 0;
784   else
785      minv = 0, maxv = 1;
786   if (v[2]->y < v[minv]->y)
787      minv = 2;
788   else if (v[2]->y > v[maxv]->y)
789      maxv = 2;
790   if (v[3]->y < v[minv]->y)
791      minv = 3;
792   else if (v[3]->y > v[maxv]->y)
793      maxv = 3;
794
795   /* determine start/end scanlines */
796   miny = round_coordinate(v[minv]->y);
797   maxy = round_coordinate(v[maxv]->y);
798
799   /* clip coordinates */
800   minyclip = miny;
801   maxyclip = maxy + ((poly->flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
802   minyclip = MAX(minyclip, cliprect.min_y);
803   maxyclip = MIN(maxyclip, cliprect.max_y + 1);
804   if (maxyclip - minyclip <= 0)
805      return 0;
806
807   /* allocate a new polygon */
808   polygon = allocate_polygon(poly, minyclip, maxyclip);
809
810   /* fill in the polygon information */
811   polygon->poly = poly;
812   polygon->dest = dest;
813   polygon->callback = callback;
814   polygon->extra = poly->extra[poly->extra_next - 1];
815   polygon->numparams = paramcount;
816   polygon->numverts = 4;
817
818   /* walk forward to build up the forward edge list */
819   edgeptr = &fedgelist[0];
820   for (curv = minv; curv != maxv; curv = (curv + 1) & 3)
821   {
822      int paramnum;
823      float ooy;
824
825      /* set the two vertices */
826      edgeptr->v1 = v[curv];
827      edgeptr->v2 = v[(curv + 1) & 3];
828
829      /* if horizontal, skip altogether */
830      if (edgeptr->v1->y == edgeptr->v2->y)
831         continue;
832
833      /* need dx/dy always, and parameter deltas as necessary */
834      ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
835      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
836      for (paramnum = 0; paramnum < paramcount; paramnum++)
837         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
838      edgeptr++;
839   }
840
841   /* walk backward to build up the backward edge list */
842   edgeptr = &bedgelist[0];
843   for (curv = minv; curv != maxv; curv = (curv - 1) & 3)
844   {
845      int paramnum;
846      float ooy;
847
848      /* set the two vertices */
849      edgeptr->v1 = v[curv];
850      edgeptr->v2 = v[(curv - 1) & 3];
851
852      /* if horizontal, skip altogether */
853      if (edgeptr->v1->y == edgeptr->v2->y)
854         continue;
855
856      /* need dx/dy always, and parameter deltas as necessary */
857      ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
858      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
859      for (paramnum = 0; paramnum < paramcount; paramnum++)
860         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
861      edgeptr++;
862   }
863
864   /* determine which list is left/right: */
865   /* if the first vertex is shared, compare the slopes */
866   /* if the first vertex is not shared, compare the X coordinates */
867   if ((fedgelist[0].v1 == bedgelist[0].v1 && fedgelist[0].dxdy < bedgelist[0].dxdy) ||
868      (fedgelist[0].v1 != bedgelist[0].v1 && fedgelist[0].v1->x < bedgelist[0].v1->x))
869   {
870      ledge = fedgelist;
871      redge = bedgelist;
872   }
873   else
874   {
875      ledge = bedgelist;
876      redge = fedgelist;
877   }
878
879   /* compute the X extents for each scanline */
880   startunit = poly->unit_next;
881   for (curscan = minyclip; curscan < maxyclip; curscan += scaninc)
882   {
883      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
884      UINT32 unit_index = poly->unit_next++;
885      quad_work_unit *unit = &poly->unit[unit_index]->quad;
886      int extnum;
887
888      /* determine how much to advance to hit the next bucket */
889      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
890
891      /* fill in the work unit basics */
892      unit->shared.polygon = polygon;
893      unit->shared.count_next = MIN(maxyclip - curscan, scaninc);
894      unit->shared.scanline = curscan;
895      unit->shared.previtem = poly->unit_bucket[bucketnum];
896      poly->unit_bucket[bucketnum] = unit_index;
897
898      /* iterate over extents */
899      for (extnum = 0; extnum < unit->shared.count_next; extnum++)
900      {
901         float fully = (float)(curscan + extnum) + 0.5f;
902         float startx, stopx;
903         INT32 istartx, istopx;
904         int paramnum;
905
906         /* compute the ending X based on which part of the triangle we're in */
907         while (fully > ledge->v2->y && fully < v[maxv]->y)
908            ledge++;
909         while (fully > redge->v2->y && fully < v[maxv]->y)
910            redge++;
911         startx = ledge->v1->x + (fully - ledge->v1->y) * ledge->dxdy;
912         stopx = redge->v1->x + (fully - redge->v1->y) * redge->dxdy;
913
914         /* clamp to full pixels */
915         istartx = round_coordinate(startx);
916         istopx = round_coordinate(stopx);
917
918         /* compute parameter starting points and deltas */
919         if (paramcount > 0)
920         {
921            float ldy = fully - ledge->v1->y;
922            float rdy = fully - redge->v1->y;
923            float oox = 1.0f / (stopx - startx);
924
925            /* iterate over parameters */
926            for (paramnum = 0; paramnum < paramcount; paramnum++)
927            {
928               float lparam = ledge->v1->p[paramnum] + ldy * ledge->dpdy[paramnum];
929               float rparam = redge->v1->p[paramnum] + rdy * redge->dpdy[paramnum];
930               float dpdx = (rparam - lparam) * oox;
931
932               unit->extent[extnum].param[paramnum].start = lparam;// - ((float)istartx + 0.5f) * dpdx;
933               unit->extent[extnum].param[paramnum].dpdx = dpdx;
934            }
935         }
936
937         /* include the right edge if requested */
938         if (poly->flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
939            istopx++;
940
941         /* apply left/right clipping */
942         if (istartx < cliprect.min_x)
943         {
944            for (paramnum = 0; paramnum < paramcount; paramnum++)
945               unit->extent[extnum].param[paramnum].start += (cliprect.min_x - istartx) * unit->extent[extnum].param[paramnum].dpdx;
946            istartx = cliprect.min_x;
947         }
948         if (istopx > cliprect.max_x)
949            istopx = cliprect.max_x + 1;
950
951         /* set the extent and update the total pixel count */
952         if (istartx >= istopx)
953            istartx = istopx = 0;
954         unit->extent[extnum].startx = istartx;
955         unit->extent[extnum].stopx = istopx;
956         pixels += istopx - istartx;
957      }
958   }
959#if KEEP_STATISTICS
960   poly->unit_max = MAX(poly->unit_max, poly->unit_next);
961#endif
962
963   /* enqueue the work items */
964   if (poly->queue != NULL)
965      osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
966
967   /* return the total number of pixels in the triangle */
968   poly->quads++;
969   poly->pixels += pixels;
970   return pixels;
971}
972
973
974/*-------------------------------------------------
975    poly_render_quad_fan - render a set of
976    quads in a fan
977-------------------------------------------------*/
978
979UINT32 poly_render_quad_fan(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v)
980{
981   UINT32 pixels = 0;
982   int vertnum;
983
984   /* iterate over vertices */
985   for (vertnum = 2; vertnum < numverts; vertnum += 2)
986      pixels += poly_render_quad(poly, dest, cliprect, callback, paramcount, &v[0], &v[vertnum - 1], &v[vertnum], &v[MIN(vertnum + 1, numverts - 1)]);
987   return pixels;
988}
989
990
991
992/***************************************************************************
993    CORE POLYGON RENDERING
994***************************************************************************/
995
996/*-------------------------------------------------
997    poly_render_polygon - render a single polygon up
998    to 32 vertices
999-------------------------------------------------*/
1000
1001UINT32 poly_render_polygon(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v)
1002{
1003   poly_edge fedgelist[MAX_POLYGON_VERTS - 1], bedgelist[MAX_POLYGON_VERTS - 1];
1004   const poly_edge *ledge, *redge;
1005   poly_edge *edgeptr;
1006   int minv, maxv, curv;
1007   INT32 minyclip, maxyclip;
1008   INT32 miny, maxy;
1009   INT32 curscan, scaninc;
1010   polygon_info *polygon;
1011   INT32 pixels = 0;
1012   UINT32 startunit;
1013   int vertnum;
1014
1015   assert(poly->flags & POLYFLAG_ALLOW_QUADS);
1016
1017   /* determine min/max Y vertices */
1018   minv = maxv = 0;
1019   for (vertnum = 1; vertnum < numverts; vertnum++)
1020   {
1021      if (v[vertnum].y < v[minv].y)
1022         minv = vertnum;
1023      else if (v[vertnum].y > v[maxv].y)
1024         maxv = vertnum;
1025   }
1026
1027   /* determine start/end scanlines */
1028   miny = round_coordinate(v[minv].y);
1029   maxy = round_coordinate(v[maxv].y);
1030
1031   /* clip coordinates */
1032   minyclip = miny;
1033   maxyclip = maxy + ((poly->flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
1034   minyclip = MAX(minyclip, cliprect.min_y);
1035   maxyclip = MIN(maxyclip, cliprect.max_y + 1);
1036   if (maxyclip - minyclip <= 0)
1037      return 0;
1038
1039   /* allocate a new polygon */
1040   polygon = allocate_polygon(poly, minyclip, maxyclip);
1041
1042   /* fill in the polygon information */
1043   polygon->poly = poly;
1044   polygon->dest = dest;
1045   polygon->callback = callback;
1046   polygon->extra = poly->extra[poly->extra_next - 1];
1047   polygon->numparams = paramcount;
1048   polygon->numverts = numverts;
1049
1050   /* walk forward to build up the forward edge list */
1051   edgeptr = &fedgelist[0];
1052   for (curv = minv; curv != maxv; curv = (curv == numverts - 1) ? 0 : (curv + 1))
1053   {
1054      int paramnum;
1055      float ooy;
1056
1057      /* set the two vertices */
1058      edgeptr->v1 = &v[curv];
1059      edgeptr->v2 = &v[(curv == numverts - 1) ? 0 : (curv + 1)];
1060
1061      /* if horizontal, skip altogether */
1062      if (edgeptr->v1->y == edgeptr->v2->y)
1063         continue;
1064
1065      /* need dx/dy always, and parameter deltas as necessary */
1066      ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
1067      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
1068      for (paramnum = 0; paramnum < paramcount; paramnum++)
1069         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
1070      edgeptr++;
1071   }
1072
1073   /* walk backward to build up the backward edge list */
1074   edgeptr = &bedgelist[0];
1075   for (curv = minv; curv != maxv; curv = (curv == 0) ? (numverts - 1) : (curv - 1))
1076   {
1077      int paramnum;
1078      float ooy;
1079
1080      /* set the two vertices */
1081      edgeptr->v1 = &v[curv];
1082      edgeptr->v2 = &v[(curv == 0) ? (numverts - 1) : (curv - 1)];
1083
1084      /* if horizontal, skip altogether */
1085      if (edgeptr->v1->y == edgeptr->v2->y)
1086         continue;
1087
1088      /* need dx/dy always, and parameter deltas as necessary */
1089      ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
1090      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
1091      for (paramnum = 0; paramnum < paramcount; paramnum++)
1092         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
1093      edgeptr++;
1094   }
1095
1096   /* determine which list is left/right: */
1097   /* if the first vertex is shared, compare the slopes */
1098   /* if the first vertex is not shared, compare the X coordinates */
1099   if ((fedgelist[0].v1 == bedgelist[0].v1 && fedgelist[0].dxdy < bedgelist[0].dxdy) ||
1100      (fedgelist[0].v1 != bedgelist[0].v1 && fedgelist[0].v1->x < bedgelist[0].v1->x))
1101   {
1102      ledge = fedgelist;
1103      redge = bedgelist;
1104   }
1105   else
1106   {
1107      ledge = bedgelist;
1108      redge = fedgelist;
1109   }
1110
1111   /* compute the X extents for each scanline */
1112   startunit = poly->unit_next;
1113   for (curscan = minyclip; curscan < maxyclip; curscan += scaninc)
1114   {
1115      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
1116      UINT32 unit_index = poly->unit_next++;
1117      quad_work_unit *unit = &poly->unit[unit_index]->quad;
1118      int extnum;
1119
1120      /* determine how much to advance to hit the next bucket */
1121      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
1122
1123      /* fill in the work unit basics */
1124      unit->shared.polygon = polygon;
1125      unit->shared.count_next = MIN(maxyclip - curscan, scaninc);
1126      unit->shared.scanline = curscan;
1127      unit->shared.previtem = poly->unit_bucket[bucketnum];
1128      poly->unit_bucket[bucketnum] = unit_index;
1129
1130      /* iterate over extents */
1131      for (extnum = 0; extnum < unit->shared.count_next; extnum++)
1132      {
1133         float fully = (float)(curscan + extnum) + 0.5f;
1134         float startx, stopx;
1135         INT32 istartx, istopx;
1136         int paramnum;
1137
1138         /* compute the ending X based on which part of the triangle we're in */
1139         while (fully > ledge->v2->y && fully < v[maxv].y)
1140            ledge++;
1141         while (fully > redge->v2->y && fully < v[maxv].y)
1142            redge++;
1143         startx = ledge->v1->x + (fully - ledge->v1->y) * ledge->dxdy;
1144         stopx = redge->v1->x + (fully - redge->v1->y) * redge->dxdy;
1145
1146         /* clamp to full pixels */
1147         istartx = round_coordinate(startx);
1148         istopx = round_coordinate(stopx);
1149
1150         /* compute parameter starting points and deltas */
1151         if (paramcount > 0)
1152         {
1153            float ldy = fully - ledge->v1->y;
1154            float rdy = fully - redge->v1->y;
1155            float oox = 1.0f / (stopx - startx);
1156
1157            /* iterate over parameters */
1158            for (paramnum = 0; paramnum < paramcount; paramnum++)
1159            {
1160               float lparam = ledge->v1->p[paramnum] + ldy * ledge->dpdy[paramnum];
1161               float rparam = redge->v1->p[paramnum] + rdy * redge->dpdy[paramnum];
1162               float dpdx = (rparam - lparam) * oox;
1163
1164               unit->extent[extnum].param[paramnum].start = lparam;// - ((float)istartx + 0.5f) * dpdx;
1165               unit->extent[extnum].param[paramnum].dpdx = dpdx;
1166            }
1167         }
1168
1169         /* include the right edge if requested */
1170         if (poly->flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
1171            istopx++;
1172
1173         /* apply left/right clipping */
1174         if (istartx < cliprect.min_x)
1175         {
1176            for (paramnum = 0; paramnum < paramcount; paramnum++)
1177               unit->extent[extnum].param[paramnum].start += (cliprect.min_x - istartx) * unit->extent[extnum].param[paramnum].dpdx;
1178            istartx = cliprect.min_x;
1179         }
1180         if (istopx > cliprect.max_x)
1181            istopx = cliprect.max_x + 1;
1182
1183         /* set the extent and update the total pixel count */
1184         if (istartx >= istopx)
1185            istartx = istopx = 0;
1186         unit->extent[extnum].startx = istartx;
1187         unit->extent[extnum].stopx = istopx;
1188         pixels += istopx - istartx;
1189      }
1190   }
1191#if KEEP_STATISTICS
1192   poly->unit_max = MAX(poly->unit_max, poly->unit_next);
1193#endif
1194
1195   /* enqueue the work items */
1196   if (poly->queue != NULL)
1197      osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
1198
1199   /* return the total number of pixels in the triangle */
1200   poly->quads++;
1201   poly->pixels += pixels;
1202   return pixels;
1203}
1204
1205
1206
1207/***************************************************************************
1208    CLIPPING
1209***************************************************************************/
1210
1211/*-------------------------------------------------
1212    poly_zclip_if_less - z clip a polygon against
1213    the given value, returning a set of clipped
1214    vertices
1215-------------------------------------------------*/
1216
1217int poly_zclip_if_less(int numverts, const poly_vertex *v, poly_vertex *outv, int paramcount, float clipval)
1218{
1219   int prevclipped = (v[numverts - 1].p[0] < clipval);
1220   poly_vertex *nextout = outv;
1221   int vertnum;
1222
1223   /* iterate over vertices */
1224   for (vertnum = 0; vertnum < numverts; vertnum++)
1225   {
1226      int thisclipped = (v[vertnum].p[0] < clipval);
1227
1228      /* if we switched from clipped to non-clipped, interpolate a vertex */
1229      if (thisclipped != prevclipped)
1230         interpolate_vertex(nextout++, &v[(vertnum == 0) ? (numverts - 1) : (vertnum - 1)], &v[vertnum], paramcount, clipval);
1231
1232      /* if this vertex is not clipped, copy it in */
1233      if (!thisclipped)
1234         copy_vertex(nextout++, &v[vertnum], paramcount);
1235
1236      /* remember the last state */
1237      prevclipped = thisclipped;
1238   }
1239   return nextout - outv;
1240}
1241
1242
1243
1244/***************************************************************************
1245    INTERNAL FUNCTIONS
1246***************************************************************************/
1247
1248/*-------------------------------------------------
1249    allocate_array - allocate an array of pointers
1250-------------------------------------------------*/
1251
1252static void **allocate_array(running_machine &machine, size_t *itemsize, UINT32 itemcount)
1253{
1254   void **ptrarray;
1255   int itemnum;
1256
1257   /* fail if 0 */
1258   if (itemcount == 0)
1259      return NULL;
1260
1261   /* round to a cache line boundary */
1262   *itemsize = ((*itemsize + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE) * CACHE_LINE_SIZE;
1263
1264   /* allocate the array */
1265   ptrarray = auto_alloc_array_clear(machine, void *, itemcount);
1266
1267   /* allocate the actual items */
1268   ptrarray[0] = auto_alloc_array_clear(machine, UINT8, *itemsize * itemcount);
1269
1270   /* initialize the pointer array */
1271   for (itemnum = 1; itemnum < itemcount; itemnum++)
1272      ptrarray[itemnum] = (UINT8 *)ptrarray[0] + *itemsize * itemnum;
1273   return ptrarray;
1274}
1275
1276
1277/*-------------------------------------------------
1278    poly_item_callback - callback for each poly
1279    item
1280-------------------------------------------------*/
1281
1282static void *poly_item_callback(void *param, int threadid)
1283{
1284   while (1)
1285   {
1286      work_unit *unit = (work_unit *)param;
1287      polygon_info *polygon = unit->shared.polygon;
1288      int count = unit->shared.count_next & 0xffff;
1289      UINT32 orig_count_next;
1290      int curscan;
1291
1292      /* if our previous item isn't done yet, enqueue this item to the end and proceed */
1293      if (unit->shared.previtem != 0xffff)
1294      {
1295         work_unit *prevunit = polygon->poly->unit[unit->shared.previtem];
1296         if (prevunit->shared.count_next != 0)
1297         {
1298            UINT32 unitnum = ((UINT8 *)unit - (UINT8 *)polygon->poly->unit[0]) / polygon->poly->unit_size;
1299            UINT32 new_count_next;
1300
1301            /* attempt to atomically swap in this new value */
1302            do
1303            {
1304               orig_count_next = prevunit->shared.count_next;
1305               new_count_next = orig_count_next | (unitnum << 16);
1306            } while (compare_exchange32((volatile INT32 *)&prevunit->shared.count_next, orig_count_next, new_count_next) != orig_count_next);
1307
1308#if KEEP_STATISTICS
1309            /* track resolved conflicts */
1310            polygon->poly->conflicts[threadid]++;
1311            if (orig_count_next != 0)
1312               polygon->poly->resolved[threadid]++;
1313#endif
1314            /* if we succeeded, skip out early so we can do other work */
1315            if (orig_count_next != 0)
1316               break;
1317         }
1318      }
1319
1320      /* iterate over extents */
1321      for (curscan = 0; curscan < count; curscan++)
1322      {
1323         if (polygon->numverts == 3)
1324         {
1325            poly_extent tmpextent;
1326            convert_tri_extent_to_poly_extent(&tmpextent, &unit->tri.extent[curscan], polygon, unit->shared.scanline + curscan);
1327            (*polygon->callback)(polygon->dest, unit->shared.scanline + curscan, &tmpextent, polygon->extra, threadid);
1328         }
1329         else
1330            (*polygon->callback)(polygon->dest, unit->shared.scanline + curscan, &unit->quad.extent[curscan], polygon->extra, threadid);
1331      }
1332
1333      /* set our count to 0 and re-fetch the original count value */
1334      do
1335      {
1336         orig_count_next = unit->shared.count_next;
1337      } while (compare_exchange32((volatile INT32 *)&unit->shared.count_next, orig_count_next, 0) != orig_count_next);
1338
1339      /* if we have no more work to do, do nothing */
1340      orig_count_next >>= 16;
1341      if (orig_count_next == 0)
1342         break;
1343      param = polygon->poly->unit[orig_count_next];
1344   }
1345   return NULL;
1346}
1347
1348
1349/*-------------------------------------------------
1350    poly_state_presave - pre-save callback to
1351    ensure everything is synced before saving
1352-------------------------------------------------*/
1353
1354static void poly_state_presave(legacy_poly_manager *poly)
1355{
1356   poly_wait(poly, "pre-save");
1357}
trunk/src/emu/video/video.mak
r28720r28721
384384
385385#-------------------------------------------------
386386#
387#@src/emu/video/poly.h,VIDEOS += POLY
387#@src/emu/video/polylgcy.h,VIDEOS += POLY
388388#-------------------------------------------------
389389
390390ifneq ($(filter POLY,$(VIDEOS)),)
391VIDEOOBJS+= $(VIDEOOBJ)/poly.o
391VIDEOOBJS+= $(VIDEOOBJ)/polylgcy.o
392392endif
393393
394394#-------------------------------------------------
trunk/src/emu/video/voodoo.c
r28720r28721
145145#define EXPAND_RASTERIZERS
146146
147147#include "emu.h"
148#include "video/poly.h"
148#include "video/polylgcy.h"
149149#include "video/rgbutil.h"
150150#include "voodoo.h"
151151#include "vooddefs.h"
trunk/src/emu/video/poly.h
r28720r28721
22
33    poly.h
44
5    New polygon helper routines.
5    Polygon helper routines.
66
77****************************************************************************
88
r28720r28721
3636#define __POLYNEW_H__
3737
3838
39//**************************************************************************
40//  DEBUGGING
41//**************************************************************************
42
43// keep statistics
44#define KEEP_STATISTICS                 0
45
46// turn this on to log the reasons for any long waits
47#define LOG_WAITS                       0
48
49// number of profiling ticks before we consider a wait "long"
50#define LOG_WAIT_THRESHOLD              1000
51
52
53
3954/***************************************************************************
4055    CONSTANTS
4156***************************************************************************/
4257
43#define MAX_VERTEX_PARAMS                   6
44#define MAX_POLYGON_VERTS                   32
45
4658#define POLYFLAG_INCLUDE_BOTTOM_EDGE        0x01
4759#define POLYFLAG_INCLUDE_RIGHT_EDGE         0x02
4860#define POLYFLAG_NO_WORK_QUEUE              0x04
49#define POLYFLAG_ALLOW_QUADS                0x08
5061
62#define SCANLINES_PER_BUCKET                8
63#define CACHE_LINE_SIZE                     64          // this is a general guess
64#define TOTAL_BUCKETS                       (512 / SCANLINES_PER_BUCKET)
65#define UNITS_PER_POLY                      (100 / SCANLINES_PER_BUCKET)
5166
5267
53/***************************************************************************
54    TYPE DEFINITIONS
55***************************************************************************/
5668
57/* opaque reference to the poly manager */
58struct legacy_poly_manager;
69//**************************************************************************
70//  TYPE DEFINITIONS
71//**************************************************************************
5972
73//-------------------------------------------------
74//  global helpers for float base types
75//-------------------------------------------------
6076
61/* input vertex data */
62struct poly_vertex
77inline float poly_floor(float x) { return floorf(x); }
78inline float poly_abs(float x) { return fabsf(x); }
79inline float poly_recip(float x) { return 1.0f / x; }
80
81
82//-------------------------------------------------
83//  global helpers for double base types
84//-------------------------------------------------
85
86inline double poly_floor(double x) { return floor(x); }
87inline double poly_abs(double x) { return fabs(x); }
88inline double poly_recip(double x) { return 1.0 / x; }
89
90
91// poly_manager is a template class
92template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
93class poly_manager
6394{
64   float       x;                          /* X coordinate */
65   float       y;                          /* Y coordinate */
66   float       p[MAX_VERTEX_PARAMS];       /* interpolated parameter values */
95public:
96   // each vertex has an X/Y coordinate and a set of parameters
97   struct vertex_t
98   {
99      vertex_t() { }
100      vertex_t(_BaseType _x, _BaseType _y) { x = _x; y = _y; }
101
102      _BaseType x, y;                         // X, Y coordinates
103      _BaseType p[_MaxParams];                // interpolated parameters
104   };
105
106   // a single extent describes a span and a list of parameter extents
107   struct extent_t
108   {
109      INT16 startx, stopx;                    // starting (inclusive)/ending (exclusive) endpoints
110      struct
111      {
112         _BaseType start;                    // parameter value at start
113         _BaseType dpdx;                     // dp/dx relative to start
114      } param[_MaxParams];
115      void *userdata;                         // custom per-span data
116   };
117
118   // delegate type for scanline callbacks
119   typedef delegate<void (INT32, const extent_t &, const _ObjectData &, int)> render_delegate;
120
121   // construction/destruction
122   poly_manager(running_machine &machine, UINT8 flags = 0);
123   poly_manager(screen_device &screen, UINT8 flags = 0);
124   virtual ~poly_manager();
125
126   // getters
127   running_machine &machine() const { return m_machine; }
128   screen_device &screen() const { assert(m_screen != NULL); return *m_screen; }
129
130   // synchronization
131   void wait(const char *debug_reason = "general");
132
133   // object data allocators
134   _ObjectData &object_data_alloc();
135   _ObjectData &object_data_last() const { return m_object.last(); }
136
137   // tiles
138   UINT32 render_tile(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t &v1, const vertex_t &v2);
139
140   // triangles
141   UINT32 render_triangle(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t &v1, const vertex_t &v2, const vertex_t &v3);
142   UINT32 render_triangle_fan(const rectangle &cliprect, render_delegate callback, int paramcount, int numverts, const vertex_t *v);
143   UINT32 render_triangle_strip(const rectangle &cliprect, render_delegate callback, int paramcount, int numverts, const vertex_t *v);
144   UINT32 render_triangle_custom(const rectangle &cliprect, render_delegate callback, int startscanline, int numscanlines, const extent_t *extents);
145
146   // polygons
147   template<int _NumVerts>
148   UINT32 render_polygon(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t *v);
149
150   // public helpers
151   int zclip_if_less(int numverts, const vertex_t *v, vertex_t *outv, int paramcount, _BaseType clipval);
152
153private:
154   // polygon_info describes a single polygon, which includes the poly_params
155   struct polygon_info
156   {
157      poly_manager *      m_owner;                // pointer back to the poly manager
158      _ObjectData *       m_object;               // object data pointer
159      render_delegate     m_callback;             // callback to handle a scanline's worth of work
160   };
161
162   // internal unit of work
163   struct work_unit
164   {
165      volatile UINT32     count_next;             // number of scanlines and index of next item to process
166      polygon_info *      polygon;                // pointer to polygon
167      INT16               scanline;               // starting scanline
168      UINT16              previtem;               // index of previous item in the same bucket
169   #ifndef PTR64
170      UINT32              dummy;                  // pad to 16 bytes
171   #endif
172      extent_t            extent[SCANLINES_PER_BUCKET]; // array of scanline extents
173   };
174
175   // class for managing an array of items
176   template<class _Type, int _Count>
177   class poly_array
178   {
179      // size of an item, rounded up to the cache line size
180      static const int k_itemsize = ((sizeof(_Type) + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE) * CACHE_LINE_SIZE;
181
182   public:
183      // construction
184      poly_array(running_machine &machine, poly_manager &manager)
185         : m_manager(manager),
186            m_base(auto_alloc_array_clear(machine, UINT8, k_itemsize * _Count)),
187            m_next(0),
188            m_max(0),
189            m_waits(0) { }
190
191      // destruction
192      ~poly_array() { auto_free(m_manager.machine(), m_base); }
193
194      // operators
195      _Type &operator[](int index) const { assert(index >= 0 && index < _Count); return *reinterpret_cast<_Type *>(m_base + index * k_itemsize); }
196
197      // getters
198      int count() const { return m_next; }
199      int max() const { return m_max; }
200      int waits() const { return m_waits; }
201      int itemsize() const { return k_itemsize; }
202      int allocated() const { return _Count; }
203      int indexof(_Type &item) const { int result = (reinterpret_cast<UINT8 *>(&item) - m_base) / k_itemsize; assert(result >= 0 && result < _Count); return result; }
204
205      // operations
206      void reset() { m_next = 0; }
207      _Type &next() { if (m_next > m_max) m_max = m_next; assert(m_next < _Count); return *new(m_base + m_next++ * k_itemsize) _Type; }
208      _Type &last() const { return (*this)[m_next - 1]; }
209      void wait_for_space(int count = 1) { while ((m_next + count) >= _Count) { m_waits++; m_manager.wait(""); }  }
210
211   private:
212      // internal state
213      poly_manager &      m_manager;
214      UINT8 *             m_base;
215      int                 m_next;
216      int                 m_max;
217      int                 m_waits;
218   };
219
220   // internal array types
221   typedef poly_array<polygon_info, _MaxPolys> polygon_array;
222   typedef poly_array<_ObjectData, _MaxPolys + 1> objectdata_array;
223   typedef poly_array<work_unit, MIN(_MaxPolys * UNITS_PER_POLY, 65535)> unit_array;
224
225   // round in a cross-platform consistent manner
226   inline INT32 round_coordinate(_BaseType value)
227   {
228      INT32 result = poly_floor(value);
229      return result + (value - _BaseType(result) > _BaseType(0.5));
230   }
231
232   // internal helpers
233   polygon_info &polygon_alloc(int minx, int maxx, int miny, int maxy, render_delegate callback)
234   {
235      // wait for space in the polygon and unit arrays
236      m_polygon.wait_for_space();
237      m_unit.wait_for_space((maxy - miny) / SCANLINES_PER_BUCKET + 2);
238
239      // return and initialize the next one
240      polygon_info &polygon = m_polygon.next();
241      polygon.m_owner = this;
242      polygon.m_object = &object_data_last();
243      polygon.m_callback = callback;
244      return polygon;
245   }
246
247   static void *work_item_callback(void *param, int threadid);
248   void presave() { wait("pre-save"); }
249
250   // queue management
251   running_machine &   m_machine;
252   screen_device *     m_screen;
253   osd_work_queue *    m_queue;                    // work queue
254
255   // arrays
256   polygon_array       m_polygon;                  // array of polygons
257   objectdata_array    m_object;                   // array of object data
258   unit_array          m_unit;                     // array of work units
259
260   // misc data
261   UINT8               m_flags;                    // flags
262
263   // buckets
264   UINT16              m_unit_bucket[TOTAL_BUCKETS]; // buckets for tracking unit usage
265
266   // statistics
267   UINT32              m_tiles;                    // number of tiles queued
268   UINT32              m_triangles;                // number of triangles queued
269   UINT32              m_quads;                    // number of quads queued
270   UINT64              m_pixels;                   // number of pixels rendered
271#if KEEP_STATISTICS
272   UINT32              m_conflicts[WORK_MAX_THREADS]; // number of conflicts found, per thread
273   UINT32              m_resolved[WORK_MAX_THREADS];   // number of conflicts resolved, per thread
274#endif
67275};
68276
69277
70/* poly_param_extent describes information for a single parameter in an extent */
71struct poly_param_extent
278//-------------------------------------------------
279//  poly_manager - constructor
280//-------------------------------------------------
281
282template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
283poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::poly_manager(running_machine &machine, UINT8 flags)
284   : m_machine(machine),
285      m_screen(NULL),
286      m_queue(NULL),
287      m_polygon(machine, *this),
288      m_object(machine, *this),
289      m_unit(machine, *this),
290      m_flags(flags),
291      m_triangles(0),
292      m_quads(0),
293      m_pixels(0)
72294{
73   float       start;                      /* parameter value at starting X,Y */
74   float       dpdx;                       /* dp/dx relative to starting X */
75};
295#if KEEP_STATISTICS
296   memset(m_conflicts, 0, sizeof(m_conflicts));
297   memset(m_resolved, 0, sizeof(m_resolved));
298#endif
76299
300   // create the work queue
301   if (!(flags & POLYFLAG_NO_WORK_QUEUE))
302      m_queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_MULTI | WORK_QUEUE_FLAG_HIGH_FREQ);
77303
78/* poly_extent describes start/end points for a scanline, along with per-scanline parameters */
79struct poly_extent
304   // request a pre-save callback for synchronization
305   machine.save().register_presave(save_prepost_delegate(FUNC(poly_manager::presave), this));
306}
307
308
309template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
310poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::poly_manager(screen_device &screen, UINT8 flags)
311   : m_machine(screen.machine()),
312      m_screen(&screen),
313      m_queue(NULL),
314      m_polygon(screen.machine(), *this),
315      m_object(screen.machine(), *this),
316      m_unit(screen.machine(), *this),
317      m_flags(flags),
318      m_triangles(0),
319      m_quads(0),
320      m_pixels(0)
80321{
81   INT16       startx;                     /* starting X coordinate (inclusive) */
82   INT16       stopx;                      /* ending X coordinate (exclusive) */
83   poly_param_extent param[MAX_VERTEX_PARAMS]; /* starting and dx values for each parameter */
84};
322#if KEEP_STATISTICS
323   memset(m_conflicts, 0, sizeof(m_conflicts));
324   memset(m_resolved, 0, sizeof(m_resolved));
325#endif
85326
327   // create the work queue
328   if (!(flags & POLYFLAG_NO_WORK_QUEUE))
329      m_queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_MULTI | WORK_QUEUE_FLAG_HIGH_FREQ);
86330
87/* callback routine to process a batch of scanlines in a triangle */
88typedef void (*poly_draw_scanline_func)(void *dest, INT32 scanline, const poly_extent *extent, const void *extradata, int threadid);
331   // request a pre-save callback for synchronization
332   machine().save().register_presave(save_prepost_delegate(FUNC(poly_manager::presave), this));
333}
89334
90335
336//-------------------------------------------------
337//  ~poly_manager - destructor
338//-------------------------------------------------
91339
92/***************************************************************************
93    TYPE DEFINITIONS
94***************************************************************************/
340template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
341poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::~poly_manager()
342{
343#if KEEP_STATISTICS
344{
345   // accumulate stats over the entire collection
346   int conflicts = 0, resolved = 0;
347   for (int i = 0; i < ARRAY_LENGTH(m_conflicts); i++)
348   {
349      conflicts += m_conflicts[i];
350      resolved += m_resolved[i];
351   }
95352
353   // output global stats
354   printf("Total triangles = %d\n", m_triangles);
355   printf("Total quads = %d\n", m_quads);
356   if (m_pixels > 1000000000)
357      printf("Total pixels   = %d%09d\n", (UINT32)(m_pixels / 1000000000), (UINT32)(m_pixels % 1000000000));
358   else
359      printf("Total pixels   = %d\n", (UINT32)m_pixels);
96360
97/* ----- initialization/teardown ----- */
361   printf("Conflicts:   %d resolved, %d total\n", resolved, conflicts);
362   printf("Units:       %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", m_unit.max(), m_unit.allocated(), m_unit.waits(), m_unit.itemsize(), m_unit.allocated() * m_unit.itemsize());
363   printf("Polygons:    %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", m_polygon.max(), m_polygon.allocated(), m_polygon.waits(), m_polygon.itemsize(), m_polygon.allocated() * m_polygon.itemsize());
364   printf("Object data: %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", m_object.max(), m_object.allocated(), m_object.waits(), m_object.itemsize(), m_object.allocated() * m_object.itemsize());
365}
366#endif
98367
99/* allocate a new poly manager that can render triangles */
100legacy_poly_manager *poly_alloc(running_machine &machine, int max_polys, size_t extra_data_size, UINT8 flags);
368   // free the work queue
369   if (m_queue != NULL)
370      osd_work_queue_free(m_queue);
371}
101372
102/* free a poly manager */
103void poly_free(legacy_poly_manager *poly);
104373
374//-------------------------------------------------
375//  work_item_callback - process a work item
376//-------------------------------------------------
105377
378template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
379void *poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::work_item_callback(void *param, int threadid)
380{
381   while (1)
382   {
383      work_unit &unit = *(work_unit *)param;
384      polygon_info &polygon = *unit.polygon;
385      int count = unit.count_next & 0xffff;
386      UINT32 orig_count_next;
106387
107/* ----- common functions ----- */
388      // if our previous item isn't done yet, enqueue this item to the end and proceed
389      if (unit.previtem != 0xffff)
390      {
391         work_unit &prevunit = polygon.m_owner->m_unit[unit.previtem];
392         if (prevunit.count_next != 0)
393         {
394            UINT32 unitnum = polygon.m_owner->m_unit.indexof(unit);
395            UINT32 new_count_next;
108396
109/* wait until all polygons in the queue have been rendered */
110void poly_wait(legacy_poly_manager *poly, const char *debug_reason);
397            // attempt to atomically swap in this new value
398            do
399            {
400               orig_count_next = prevunit.count_next;
401               new_count_next = orig_count_next | (unitnum << 16);
402            } while (compare_exchange32((volatile INT32 *)&prevunit.count_next, orig_count_next, new_count_next) != orig_count_next);
111403
112/* get a pointer to the extra data for the next polygon */
113void *poly_get_extra_data(legacy_poly_manager *poly);
404#if KEEP_STATISTICS
405            // track resolved conflicts
406            polygon.m_owner->m_conflicts[threadid]++;
407            if (orig_count_next != 0)
408               polygon.m_owner->m_resolved[threadid]++;
409#endif
410            // if we succeeded, skip out early so we can do other work
411            if (orig_count_next != 0)
412               break;
413         }
414      }
114415
416      // iterate over extents
417      for (int curscan = 0; curscan < count; curscan++)
418         polygon.m_callback(unit.scanline + curscan, unit.extent[curscan], *polygon.m_object, threadid);
115419
420      // set our count to 0 and re-fetch the original count value
421      do
422      {
423         orig_count_next = unit.count_next;
424      } while (compare_exchange32((volatile INT32 *)&unit.count_next, orig_count_next, 0) != orig_count_next);
116425
117/* ----- core triangle rendering ----- */
426      // if we have no more work to do, do nothing
427      orig_count_next >>= 16;
428      if (orig_count_next == 0)
429         break;
430      param = &polygon.m_owner->m_unit[orig_count_next];
431   }
432   return NULL;
433}
118434
119/* render a single triangle given 3 vertexes */
120UINT32 poly_render_triangle(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3);
121435
122/* render a set of triangles in a fan */
123UINT32 poly_render_triangle_fan(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v);
436//-------------------------------------------------
437//  wait - stall until all work is complete
438//-------------------------------------------------
124439
125/* perform a custom render of an object, given specific extents */
126UINT32 poly_render_triangle_custom(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int startscanline, int numscanlines, const poly_extent *extents);
440template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
441void poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::wait(const char *debug_reason)
442{
443   osd_ticks_t time;
127444
445   // remember the start time if we're logging
446   if (LOG_WAITS)
447      time = get_profile_ticks();
128448
449   // wait for all pending work items to complete
450   if (m_queue != NULL)
451      osd_work_queue_wait(m_queue, osd_ticks_per_second() * 100);
129452
130/* ----- core quad rendering ----- */
453   // if we don't have a queue, just run the whole list now
454   else
455      for (int unitnum = 0; unitnum < m_unit.count(); unitnum++)
456         work_item_callback(&m_unit[unitnum], 0);
131457
132/* render a single quad given 4 vertexes */
133UINT32 poly_render_quad(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3, const poly_vertex *v4);
458   // log any long waits
459   if (LOG_WAITS)
460   {
461      time = get_profile_ticks() - time;
462      if (time > LOG_WAIT_THRESHOLD)
463         logerror("Poly:Waited %d cycles for %s\n", (int)time, debug_reason);
464   }
134465
135/* render a set of quads in a fan */
136UINT32 poly_render_quad_fan(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v);
466   // reset the state
467   m_polygon.reset();
468   m_unit.reset();
469   memset(m_unit_bucket, 0xff, sizeof(m_unit_bucket));
137470
471   // we need to preserve the last object data that was supplied
472   if (m_object.count() > 0)
473   {
474      _ObjectData temp = object_data_last();
475      m_object.reset();
476      m_object.next() = temp;
477   }
478   else
479      m_object.reset();
480}
138481
139482
140/* ----- core polygon rendering ----- */
483//-------------------------------------------------
484//  object_data_alloc - allocate a new _ObjectData
485//-------------------------------------------------
141486
142/* render a single polygon up to 32 vertices */
143UINT32 poly_render_polygon(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v);
487template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
488_ObjectData &poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::object_data_alloc()
489{
490   // wait for a work item if we have to, then return the next item
491   m_object.wait_for_space();
492   return m_object.next();
493}
144494
145495
496//-------------------------------------------------
497//  render_tile - render a tile
498//-------------------------------------------------
146499
147/* ----- clipping ----- */
500template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
501UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_tile(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t &_v1, const vertex_t &_v2)
502{
503   const vertex_t *v1 = &_v1;
504   const vertex_t *v2 = &_v2;
148505
149/* zclip (assumes p[0] == z) a polygon */
150int poly_zclip_if_less(int numverts, const poly_vertex *v, poly_vertex *outv, int paramcount, float clipval);
506   // first sort by Y
507   if (v2->y < v1->y)
508   {
509      const vertex_t *tv = v1;
510      v1 = v2;
511      v2 = tv;
512   }
151513
514   // compute some integral X/Y vertex values
515   INT32 v1y = round_coordinate(v1->y);
516   INT32 v2y = round_coordinate(v2->y);
152517
153#endif  /* __POLY_H__ */
518   // clip coordinates
519   INT32 v1yclip = v1y;
520   INT32 v2yclip = v2y + ((m_flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
521   v1yclip = MAX(v1yclip, cliprect.min_y);
522   v2yclip = MIN(v2yclip, cliprect.max_y + 1);
523   if (v2yclip - v1yclip <= 0)
524      return 0;
525
526   // determine total X extents
527   _BaseType minx = v1->x;
528   _BaseType maxx = v2->x;
529   if (minx > maxx)
530      return 0;
531
532   // allocate and populate a new polygon
533   polygon_info &polygon = polygon_alloc(round_coordinate(minx), round_coordinate(maxx), v1yclip, v2yclip, callback);
534
535   // compute parameter deltas
536   _BaseType param_dpdx[_MaxParams];
537   _BaseType param_dpdy[_MaxParams];
538   if (paramcount > 0)
539   {
540      _BaseType oox = poly_recip(v2->x - v1->x);
541      _BaseType ooy = poly_recip(v2->y - v1->y);
542      for (int paramnum = 0; paramnum < paramcount; paramnum++)
543      {
544         param_dpdx[paramnum]  = oox * (v2->p[paramnum] - v1->p[paramnum]);
545         param_dpdy[paramnum]  = ooy * (v2->p[paramnum] - v1->p[paramnum]);
546      }
547   }
548
549   // clamp to full pixels
550   INT32 istartx = round_coordinate(v1->x);
551   INT32 istopx = round_coordinate(v2->x);
552
553   // force start < stop
554   if (istartx > istopx)
555   {
556      INT32 temp = istartx;
557      istartx = istopx;
558      istopx = temp;
559   }
560
561   // include the right edge if requested
562   if (m_flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
563      istopx++;
564
565   // apply left/right clipping
566   if (istartx < cliprect.min_x)
567      istartx = cliprect.min_x;
568   if (istopx > cliprect.max_x)
569      istopx = cliprect.max_x + 1;
570   if (istartx >= istopx)
571      return 0;
572
573   // compute the X extents for each scanline
574   INT32 pixels = 0;
575   UINT32 startunit = m_unit.count();
576   INT32 scaninc = 1;
577   for (INT32 curscan = v1yclip; curscan < v2yclip; curscan += scaninc)
578   {
579      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
580      UINT32 unit_index = m_unit.count();
581      work_unit &unit = m_unit.next();
582
583      // determine how much to advance to hit the next bucket
584      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
585
586      // fill in the work unit basics
587      unit.polygon = &polygon;
588      unit.count_next = MIN(v2yclip - curscan, scaninc);
589      unit.scanline = curscan;
590      unit.previtem = m_unit_bucket[bucketnum];
591      m_unit_bucket[bucketnum] = unit_index;
592
593      // iterate over extents
594      for (int extnum = 0; extnum < unit.count_next; extnum++)
595      {
596         // compute the ending X based on which part of the triangle we're in
597         _BaseType fully = _BaseType(curscan + extnum) + _BaseType(0.5);
598
599         // set the extent and update the total pixel count
600         extent_t &extent = unit.extent[extnum];
601         extent.startx = istartx;
602         extent.stopx = istopx;
603         extent.userdata = NULL;
604         pixels += istopx - istartx;
605
606         // fill in the parameters for the extent
607         _BaseType fullstartx = _BaseType(istartx) + _BaseType(0.5);
608         for (int paramnum = 0; paramnum < paramcount; paramnum++)
609         {
610            extent.param[paramnum].start = v1->p[paramnum] + fullstartx * param_dpdx[paramnum] + fully * param_dpdy[paramnum];
611            extent.param[paramnum].dpdx = param_dpdx[paramnum];
612         }
613      }
614   }
615
616   // enqueue the work items
617   if (m_queue != NULL)
618      osd_work_item_queue_multiple(m_queue, work_item_callback, m_unit.count() - startunit, &m_unit[startunit], m_unit.itemsize(), WORK_ITEM_FLAG_AUTO_RELEASE);
619
620   // return the total number of pixels in the triangle
621   m_tiles++;
622   m_pixels += pixels;
623   return pixels;
624}
625
626
627//-------------------------------------------------
628//  render_triangle - render a single triangle
629//  given 3 vertexes
630//-------------------------------------------------
631
632template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
633UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_triangle(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t &_v1, const vertex_t &_v2, const vertex_t &_v3)
634{
635   const vertex_t *v1 = &_v1;
636   const vertex_t *v2 = &_v2;
637   const vertex_t *v3 = &_v3;
638
639   // first sort by Y
640   if (v2->y < v1->y)
641   {
642      const vertex_t *tv = v1;
643      v1 = v2;
644      v2 = tv;
645   }
646   if (v3->y < v2->y)
647   {
648      const vertex_t *tv = v2;
649      v2 = v3;
650      v3 = tv;
651      if (v2->y < v1->y)
652      {
653         const vertex_t *tv = v1;
654         v1 = v2;
655         v2 = tv;
656      }
657   }
658
659   // compute some integral X/Y vertex values
660   INT32 v1y = round_coordinate(v1->y);
661   INT32 v3y = round_coordinate(v3->y);
662
663   // clip coordinates
664   INT32 v1yclip = v1y;
665   INT32 v3yclip = v3y + ((m_flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
666   v1yclip = MAX(v1yclip, cliprect.min_y);
667   v3yclip = MIN(v3yclip, cliprect.max_y + 1);
668   if (v3yclip - v1yclip <= 0)
669      return 0;
670
671   // determine total X extents
672   _BaseType minx = v1->x;
673   _BaseType maxx = v1->x;
674   if (v2->x < minx) minx = v2->x;
675   else if (v2->x > maxx) maxx = v2->x;
676   if (v3->x < minx) minx = v3->x;
677   else if (v3->x > maxx) maxx = v3->x;
678
679   // allocate and populate a new polygon
680   polygon_info &polygon = polygon_alloc(round_coordinate(minx), round_coordinate(maxx), v1yclip, v3yclip, callback);
681
682   // compute the slopes for each portion of the triangle
683   _BaseType dxdy_v1v2 = (v2->y == v1->y) ? _BaseType(0.0) : (v2->x - v1->x) / (v2->y - v1->y);
684   _BaseType dxdy_v1v3 = (v3->y == v1->y) ? _BaseType(0.0) : (v3->x - v1->x) / (v3->y - v1->y);
685   _BaseType dxdy_v2v3 = (v3->y == v2->y) ? _BaseType(0.0) : (v3->x - v2->x) / (v3->y - v2->y);
686
687   // compute parameter starting points and deltas
688   _BaseType param_start[_MaxParams];
689   _BaseType param_dpdx[_MaxParams];
690   _BaseType param_dpdy[_MaxParams];
691   if (paramcount > 0)
692   {
693      _BaseType a00 = v2->y - v3->y;
694      _BaseType a01 = v3->x - v2->x;
695      _BaseType a02 = v2->x*v3->y - v3->x*v2->y;
696      _BaseType a10 = v3->y - v1->y;
697      _BaseType a11 = v1->x - v3->x;
698      _BaseType a12 = v3->x*v1->y - v1->x*v3->y;
699      _BaseType a20 = v1->y - v2->y;
700      _BaseType a21 = v2->x - v1->x;
701      _BaseType a22 = v1->x*v2->y - v2->x*v1->y;
702      _BaseType det = a02 + a12 + a22;
703
704      if (poly_abs(det) < _BaseType(0.00001))
705      {
706         for (int paramnum = 0; paramnum < paramcount; paramnum++)
707         {
708            param_dpdx[paramnum] = _BaseType(0.0);
709            param_dpdy[paramnum] = _BaseType(0.0);
710            param_start[paramnum] = v1->p[paramnum];
711         }
712      }
713      else
714      {
715         _BaseType idet = poly_recip(det);
716         for (int paramnum = 0; paramnum < paramcount; paramnum++)
717         {
718            param_dpdx[paramnum]  = idet * (v1->p[paramnum]*a00 + v2->p[paramnum]*a10 + v3->p[paramnum]*a20);
719            param_dpdy[paramnum]  = idet * (v1->p[paramnum]*a01 + v2->p[paramnum]*a11 + v3->p[paramnum]*a21);
720            param_start[paramnum] = idet * (v1->p[paramnum]*a02 + v2->p[paramnum]*a12 + v3->p[paramnum]*a22);
721         }
722      }
723   }
724   else    // GCC 4.7.0 incorrectly claims these are uninitialized; humor it by initializing in the (hopefully rare) zero parameter case
725   {
726      param_start[0] = _BaseType(0.0);
727      param_dpdx[0] = _BaseType(0.0);
728      param_dpdy[0] = _BaseType(0.0);
729   }
730
731   // compute the X extents for each scanline
732   INT32 pixels = 0;
733   UINT32 startunit = m_unit.count();
734   INT32 scaninc = 1;
735   for (INT32 curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
736   {
737      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
738      UINT32 unit_index = m_unit.count();
739      work_unit &unit = m_unit.next();
740
741      // determine how much to advance to hit the next bucket
742      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
743
744      // fill in the work unit basics
745      unit.polygon = &polygon;
746      unit.count_next = MIN(v3yclip - curscan, scaninc);
747      unit.scanline = curscan;
748      unit.previtem = m_unit_bucket[bucketnum];
749      m_unit_bucket[bucketnum] = unit_index;
750
751      // iterate over extents
752      for (int extnum = 0; extnum < unit.count_next; extnum++)
753      {
754         // compute the ending X based on which part of the triangle we're in
755         _BaseType fully = _BaseType(curscan + extnum) + _BaseType(0.5);
756         _BaseType startx = v1->x + (fully - v1->y) * dxdy_v1v3;
757         _BaseType stopx;
758         if (fully < v2->y)
759            stopx = v1->x + (fully - v1->y) * dxdy_v1v2;
760         else
761            stopx = v2->x + (fully - v2->y) * dxdy_v2v3;
762
763         // clamp to full pixels
764         INT32 istartx = round_coordinate(startx);
765         INT32 istopx = round_coordinate(stopx);
766
767         // force start < stop
768         if (istartx > istopx)
769         {
770            INT32 temp = istartx;
771            istartx = istopx;
772            istopx = temp;
773         }
774
775         // include the right edge if requested
776         if (m_flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
777            istopx++;
778
779         // apply left/right clipping
780         if (istartx < cliprect.min_x)
781            istartx = cliprect.min_x;
782         if (istopx > cliprect.max_x)
783            istopx = cliprect.max_x + 1;
784
785         // set the extent and update the total pixel count
786         if (istartx >= istopx)
787            istartx = istopx = 0;
788         extent_t &extent = unit.extent[extnum];
789         extent.startx = istartx;
790         extent.stopx = istopx;
791         extent.userdata = NULL;
792         pixels += istopx - istartx;
793
794         // fill in the parameters for the extent
795         _BaseType fullstartx = _BaseType(istartx) + _BaseType(0.5);
796         for (int paramnum = 0; paramnum < paramcount; paramnum++)
797         {
798            extent.param[paramnum].start = param_start[paramnum] + fullstartx * param_dpdx[paramnum] + fully * param_dpdy[paramnum];
799            extent.param[paramnum].dpdx = param_dpdx[paramnum];
800         }
801      }
802   }
803
804   // enqueue the work items
805   if (m_queue != NULL)
806      osd_work_item_queue_multiple(m_queue, work_item_callback, m_unit.count() - startunit, &m_unit[startunit], m_unit.itemsize(), WORK_ITEM_FLAG_AUTO_RELEASE);
807
808   // return the total number of pixels in the triangle
809   m_triangles++;
810   m_pixels += pixels;
811   return pixels;
812}
813
814
815//-------------------------------------------------
816//  render_triangle_fan - render a set of
817//  triangles in a fan
818//-------------------------------------------------
819
820template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
821UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_triangle_fan(const rectangle &cliprect, render_delegate callback, int paramcount, int numverts, const vertex_t *v)
822{
823   // iterate over vertices
824   UINT32 pixels = 0;
825   for (int vertnum = 2; vertnum < numverts; vertnum++)
826      pixels += render_triangle(cliprect, callback, paramcount, v[0], v[vertnum - 1], v[vertnum]);
827   return pixels;
828}
829
830
831//-------------------------------------------------
832//  render_triangle_strip - render a set of
833//  triangles in a strip
834//-------------------------------------------------
835
836template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
837UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_triangle_strip(const rectangle &cliprect, render_delegate callback, int paramcount, int numverts, const vertex_t *v)
838{
839   // iterate over vertices
840   UINT32 pixels = 0;
841   for (int vertnum = 2; vertnum < numverts; vertnum++)
842      pixels += render_triangle(cliprect, callback, paramcount, v[vertnum - 2], v[vertnum - 1], v[vertnum]);
843   return pixels;
844}
845
846
847//-------------------------------------------------
848//  render_triangle_custom - perform a custom
849//  render of an object, given specific extents
850//-------------------------------------------------
851
852template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
853UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_triangle_custom(const rectangle &cliprect, render_delegate callback, int startscanline, int numscanlines, const extent_t *extents)
854{
855   // clip coordinates
856   INT32 v1yclip = MAX(startscanline, cliprect.min_y);
857   INT32 v3yclip = MIN(startscanline + numscanlines, cliprect.max_y + 1);
858   if (v3yclip - v1yclip <= 0)
859      return 0;
860
861   // allocate and populate a new polygon
862   polygon_info &polygon = polygon_alloc(0, 0, v1yclip, v3yclip, callback);
863
864   // compute the X extents for each scanline
865   INT32 pixels = 0;
866   UINT32 startunit = m_unit.count();
867   INT32 scaninc = 1;
868   for (INT32 curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
869   {
870      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
871      UINT32 unit_index = m_unit.count();
872      work_unit &unit = m_unit.next();
873
874      // determine how much to advance to hit the next bucket
875      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
876
877      // fill in the work unit basics
878      unit.polygon = &polygon;
879      unit.count_next = MIN(v3yclip - curscan, scaninc);
880      unit.scanline = curscan;
881      unit.previtem = m_unit_bucket[bucketnum];
882      m_unit_bucket[bucketnum] = unit_index;
883
884      // iterate over extents
885      for (int extnum = 0; extnum < unit.count_next; extnum++)
886      {
887         const extent_t &srcextent = extents[(curscan + extnum) - startscanline];
888         INT32 istartx = srcextent.startx, istopx = srcextent.stopx;
889
890         // apply left/right clipping
891         if (istartx < cliprect.min_x)
892            istartx = cliprect.min_x;
893         if (istartx > cliprect.max_x)
894            istartx = cliprect.max_x + 1;
895         if (istopx < cliprect.min_x)
896            istopx = cliprect.min_x;
897         if (istopx > cliprect.max_x)
898            istopx = cliprect.max_x + 1;
899
900         // set the extent and update the total pixel count
901         extent_t &extent = unit.extent[extnum];
902         extent.startx = istartx;
903         extent.stopx = istopx;
904
905         // fill in the parameters for the extent
906         for (int paramnum = 0; paramnum < _MaxParams; paramnum++)
907         {
908            extent.param[paramnum].start = srcextent.param[paramnum].start;
909            extent.param[paramnum].dpdx = srcextent.param[paramnum].dpdx;
910         }
911
912         extent.userdata = srcextent.userdata;
913         if (istartx < istopx)
914            pixels += istopx - istartx;
915         else if(istopx < istartx)
916            pixels += istartx - istopx;
917      }
918   }
919
920   // enqueue the work items
921   if (m_queue != NULL)
922      osd_work_item_queue_multiple(m_queue, work_item_callback, m_unit.count() - startunit, &m_unit[startunit], m_unit.itemsize(), WORK_ITEM_FLAG_AUTO_RELEASE);
923
924   // return the total number of pixels in the object
925   m_triangles++;
926   m_pixels += pixels;
927   return pixels;
928}
929
930
931//-------------------------------------------------
932//  render_polygon - render a single polygon up
933//  to 32 vertices
934//-------------------------------------------------
935
936template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
937template<int _NumVerts>
938UINT32 poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::render_polygon(const rectangle &cliprect, render_delegate callback, int paramcount, const vertex_t *v)
939{
940   // determine min/max Y vertices
941   _BaseType minx = v[0].x;
942   _BaseType maxx = v[0].x;
943   int minv = 0;
944   int maxv = 0;
945   for (int vertnum = 1; vertnum < _NumVerts; vertnum++)
946   {
947      if (v[vertnum].y < v[minv].y)
948         minv = vertnum;
949      else if (v[vertnum].y > v[maxv].y)
950         maxv = vertnum;
951      if (v[vertnum].x < minx)
952         minx = v[vertnum].x;
953      else if (v[vertnum].x > maxx)
954         maxx = v[vertnum].x;
955   }
956
957   // determine start/end scanlines
958   INT32 miny = round_coordinate(v[minv].y);
959   INT32 maxy = round_coordinate(v[maxv].y);
960
961   // clip coordinates
962   INT32 minyclip = miny;
963   INT32 maxyclip = maxy + ((m_flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
964   minyclip = MAX(minyclip, cliprect.min_y);
965   maxyclip = MIN(maxyclip, cliprect.max_y + 1);
966   if (maxyclip - minyclip <= 0)
967      return 0;
968
969   // allocate a new polygon
970   polygon_info &polygon = polygon_alloc(round_coordinate(minx), round_coordinate(maxx), minyclip, maxyclip, callback);
971
972   // walk forward to build up the forward edge list
973   struct poly_edge
974   {
975      poly_edge *         next;                   // next edge in sequence
976      int                 index;                  // index of this edge
977      const vertex_t *    v1;                     // pointer to first vertex
978      const vertex_t *    v2;                     // pointer to second vertex
979      _BaseType           dxdy;                   // dx/dy along the edge
980      _BaseType           dpdy[_MaxParams];       // per-parameter dp/dy values
981   };
982   poly_edge fedgelist[_NumVerts - 1];
983   poly_edge *edgeptr = &fedgelist[0];
984   for (int curv = minv; curv != maxv; curv = (curv == _NumVerts - 1) ? 0 : (curv + 1))
985   {
986      // set the two vertices
987      edgeptr->v1 = &v[curv];
988      edgeptr->v2 = &v[(curv == _NumVerts - 1) ? 0 : (curv + 1)];
989
990      // if horizontal, skip altogether
991      if (edgeptr->v1->y == edgeptr->v2->y)
992         continue;
993
994      // need dx/dy always, and parameter deltas as necessary
995      _BaseType ooy = poly_recip(edgeptr->v2->y - edgeptr->v1->y);
996      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
997      for (int paramnum = 0; paramnum < paramcount; paramnum++)
998         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
999      edgeptr++;
1000   }
1001
1002   // walk backward to build up the backward edge list
1003   poly_edge bedgelist[_NumVerts - 1];
1004   edgeptr = &bedgelist[0];
1005   for (int curv = minv; curv != maxv; curv = (curv == 0) ? (_NumVerts - 1) : (curv - 1))
1006   {
1007      // set the two vertices
1008      edgeptr->v1 = &v[curv];
1009      edgeptr->v2 = &v[(curv == 0) ? (_NumVerts - 1) : (curv - 1)];
1010
1011      // if horizontal, skip altogether
1012      if (edgeptr->v1->y == edgeptr->v2->y)
1013         continue;
1014
1015      // need dx/dy always, and parameter deltas as necessary
1016      _BaseType ooy = poly_recip(edgeptr->v2->y - edgeptr->v1->y);
1017      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
1018      for (int paramnum = 0; paramnum < paramcount; paramnum++)
1019         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
1020      edgeptr++;
1021   }
1022
1023   // determine which list is left/right:
1024   // if the first vertex is shared, compare the slopes
1025   // if the first vertex is not shared, compare the X coordinates
1026   const poly_edge *ledge, *redge;
1027   if ((fedgelist[0].v1 == bedgelist[0].v1 && fedgelist[0].dxdy < bedgelist[0].dxdy) ||
1028      (fedgelist[0].v1 != bedgelist[0].v1 && fedgelist[0].v1->x < bedgelist[0].v1->x))
1029   {
1030      ledge = fedgelist;
1031      redge = bedgelist;
1032   }
1033   else
1034   {
1035      ledge = bedgelist;
1036      redge = fedgelist;
1037   }
1038
1039   // compute the X extents for each scanline
1040   INT32 pixels = 0;
1041   UINT32 startunit = m_unit.count();
1042   INT32 scaninc = 1;
1043   for (INT32 curscan = minyclip; curscan < maxyclip; curscan += scaninc)
1044   {
1045      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
1046      UINT32 unit_index = m_unit.count();
1047      work_unit &unit = m_unit.next();
1048
1049      // determine how much to advance to hit the next bucket
1050      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
1051
1052      // fill in the work unit basics
1053      unit.polygon = &polygon;
1054      unit.count_next = MIN(maxyclip - curscan, scaninc);
1055      unit.scanline = curscan;
1056      unit.previtem = m_unit_bucket[bucketnum];
1057      m_unit_bucket[bucketnum] = unit_index;
1058
1059      // iterate over extents
1060      for (int extnum = 0; extnum < unit.count_next; extnum++)
1061      {
1062         // compute the ending X based on which part of the triangle we're in
1063         _BaseType fully = _BaseType(curscan + extnum) + _BaseType(0.5);
1064         while (fully > ledge->v2->y && fully < v[maxv].y)
1065            ledge++;
1066         while (fully > redge->v2->y && fully < v[maxv].y)
1067            redge++;
1068         _BaseType startx = ledge->v1->x + (fully - ledge->v1->y) * ledge->dxdy;
1069         _BaseType stopx = redge->v1->x + (fully - redge->v1->y) * redge->dxdy;
1070
1071         // clamp to full pixels
1072         INT32 istartx = round_coordinate(startx);
1073         INT32 istopx = round_coordinate(stopx);
1074
1075         // compute parameter starting points and deltas
1076         extent_t &extent = unit.extent[extnum];
1077         if (paramcount > 0)
1078         {
1079            _BaseType ldy = fully - ledge->v1->y;
1080            _BaseType rdy = fully - redge->v1->y;
1081            _BaseType oox = poly_recip(stopx - startx);
1082
1083            // iterate over parameters
1084            for (int paramnum = 0; paramnum < paramcount; paramnum++)
1085            {
1086               _BaseType lparam = ledge->v1->p[paramnum] + ldy * ledge->dpdy[paramnum];
1087               _BaseType rparam = redge->v1->p[paramnum] + rdy * redge->dpdy[paramnum];
1088               _BaseType dpdx = (rparam - lparam) * oox;
1089
1090               extent.param[paramnum].start = lparam;// - (_BaseType(istartx) + 0.5f) * dpdx;
1091               extent.param[paramnum].dpdx = dpdx;
1092            }
1093         }
1094
1095         // include the right edge if requested
1096         if (m_flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
1097            istopx++;
1098
1099         // apply left/right clipping
1100         if (istartx < cliprect.min_x)
1101         {
1102            for (int paramnum = 0; paramnum < paramcount; paramnum++)
1103               extent.param[paramnum].start += (cliprect.min_x - istartx) * extent.param[paramnum].dpdx;
1104            istartx = cliprect.min_x;
1105         }
1106         if (istopx > cliprect.max_x)
1107            istopx = cliprect.max_x + 1;
1108
1109         // set the extent and update the total pixel count
1110         if (istartx >= istopx)
1111            istartx = istopx = 0;
1112         extent.startx = istartx;
1113         extent.stopx = istopx;
1114         extent.userdata = NULL;
1115         pixels += istopx - istartx;
1116      }
1117   }
1118
1119   // enqueue the work items
1120   if (m_queue != NULL)
1121      osd_work_item_queue_multiple(m_queue, work_item_callback, m_unit.count() - startunit, &m_unit[startunit], m_unit.itemsize(), WORK_ITEM_FLAG_AUTO_RELEASE);
1122
1123   // return the total number of pixels in the triangle
1124   m_quads++;
1125   m_pixels += pixels;
1126   return pixels;
1127}
1128
1129
1130//-------------------------------------------------
1131//  zclip_if_less - clip a polygon using p[0] as
1132//  a z coordinate
1133//-------------------------------------------------
1134
1135template<typename _BaseType, class _ObjectData, int _MaxParams, int _MaxPolys>
1136int poly_manager<_BaseType, _ObjectData, _MaxParams, _MaxPolys>::zclip_if_less(int numverts, const vertex_t *v, vertex_t *outv, int paramcount, _BaseType clipval)
1137{
1138   bool prevclipped = (v[numverts - 1].p[0] < clipval);
1139   vertex_t *nextout = outv;
1140
1141   // iterate over vertices
1142   for (int vertnum = 0; vertnum < numverts; vertnum++)
1143   {
1144      bool thisclipped = (v[vertnum].p[0] < clipval);
1145
1146      // if we switched from clipped to non-clipped, interpolate a vertex
1147      if (thisclipped != prevclipped)
1148      {
1149         const vertex_t &v1 = v[(vertnum == 0) ? (numverts - 1) : (vertnum - 1)];
1150         const vertex_t &v2 = v[vertnum];
1151         _BaseType frac = (clipval - v1.p[0]) / (v2.p[0] - v1.p[0]);
1152         nextout->x = v1.x + frac * (v2.x - v1.x);
1153         nextout->y = v1.y + frac * (v2.y - v1.y);
1154         for (int paramnum = 0; paramnum < paramcount; paramnum++)
1155            nextout->p[paramnum] = v1.p[paramnum] + frac * (v2.p[paramnum] - v1.p[paramnum]);
1156         nextout++;
1157      }
1158
1159      // if this vertex is not clipped, copy it in
1160      if (!thisclipped)
1161         *nextout++ = v[vertnum];
1162
1163      // remember the last state
1164      prevclipped = thisclipped;
1165   }
1166   return nextout - outv;
1167}
1168
1169#endif  // __POLYNEW_H__
trunk/src/emu/video/polylgcy.c
r0r28721
1/***************************************************************************
2
3    polylgcy.c
4
5    Legacy helper routines for polygon rendering.
6
7***************************************************************************/
8
9#include "emu.h"
10#include "polylgcy.h"
11
12
13/***************************************************************************
14    DEBUGGING
15***************************************************************************/
16
17/* keep statistics */
18#define KEEP_STATISTICS                 0
19
20/* turn this on to log the reasons for any long waits */
21#define LOG_WAITS                       0
22
23/* number of profiling ticks before we consider a wait "long" */
24#define LOG_WAIT_THRESHOLD              1000
25
26
27
28/***************************************************************************
29    CONSTANTS
30***************************************************************************/
31
32#define SCANLINES_PER_BUCKET            8
33#define CACHE_LINE_SIZE                 64          /* this is a general guess */
34#define TOTAL_BUCKETS                   (512 / SCANLINES_PER_BUCKET)
35#define UNITS_PER_POLY                  (100 / SCANLINES_PER_BUCKET)
36
37
38
39/***************************************************************************
40    TYPE DEFINITIONS
41***************************************************************************/
42
43/* forward definitions */
44struct polygon_info;
45
46
47/* tri_extent describes start/end points for a scanline */
48struct tri_extent
49{
50   INT16       startx;                     /* starting X coordinate (inclusive) */
51   INT16       stopx;                      /* ending X coordinate (exclusive) */
52};
53
54
55/* single set of polygon per-parameter data */
56struct poly_param
57{
58   float       start;                      /* parameter value at starting X,Y */
59   float       dpdx;                       /* dp/dx relative to starting X */
60   float       dpdy;                       /* dp/dy relative to starting Y */
61};
62
63
64/* poly edge is used internally for quad rendering */
65struct poly_edge
66{
67   poly_edge *         next;                   /* next edge in sequence */
68   int                 index;                  /* index of this edge */
69   const poly_vertex * v1;                     /* pointer to first vertex */
70   const poly_vertex * v2;                     /* pointer to second vertex */
71   float               dxdy;                   /* dx/dy along the edge */
72   float               dpdy[MAX_VERTEX_PARAMS];/* per-parameter dp/dy values */
73};
74
75
76/* poly section is used internally for quad rendering */
77struct poly_section
78{
79   const poly_edge *   ledge;                  /* pointer to left edge */
80   const poly_edge *   redge;                  /* pointer to right edge */
81   float               ybottom;                /* bottom of this section */
82};
83
84
85/* work_unit_shared is a common set of data shared between tris and quads */
86struct work_unit_shared
87{
88   polygon_info *      polygon;                /* pointer to polygon */
89   volatile UINT32     count_next;             /* number of scanlines and index of next item to process */
90   INT16               scanline;               /* starting scanline and count */
91   UINT16              previtem;               /* index of previous item in the same bucket */
92#ifndef PTR64
93   UINT32              dummy;                  /* pad to 16 bytes */
94#endif
95};
96
97
98/* tri_work_unit is a triangle-specific work-unit */
99struct tri_work_unit
100{
101   work_unit_shared    shared;                 /* shared data */
102   tri_extent          extent[SCANLINES_PER_BUCKET]; /* array of scanline extents */
103};
104
105
106/* quad_work_unit is a quad-specific work-unit */
107struct quad_work_unit
108{
109   work_unit_shared    shared;                 /* shared data */
110   poly_extent         extent[SCANLINES_PER_BUCKET]; /* array of scanline extents */
111};
112
113
114/* work_unit is a union of the two types */
115union work_unit
116{
117   work_unit_shared    shared;                 /* shared data */
118   tri_work_unit       tri;                    /* triangle work unit */
119   quad_work_unit      quad;                   /* quad work unit */
120};
121
122
123/* polygon_info describes a single polygon, which includes the poly_params */
124struct polygon_info
125{
126   legacy_poly_manager *      poly;                   /* pointer back to the poly manager */
127   void *              dest;                   /* pointer to the destination we are rendering to */
128   void *              extra;                  /* extra data pointer */
129   UINT8               numparams;              /* number of parameters for this polygon  */
130   UINT8               numverts;               /* number of vertices in this polygon */
131   poly_draw_scanline_func     callback;               /* callback to handle a scanline's worth of work */
132   INT32               xorigin;                /* X origin for all parameters */
133   INT32               yorigin;                /* Y origin for all parameters */
134   poly_param          param[MAX_VERTEX_PARAMS];/* array of parameter data */
135};
136
137
138/* full poly manager description */
139struct legacy_poly_manager
140{
141   /* queue management */
142   osd_work_queue *    queue;                  /* work queue */
143
144   /* triangle work units */
145   work_unit **        unit;                   /* array of work unit pointers */
146   UINT32              unit_next;              /* index of next unit to allocate */
147   UINT32              unit_count;             /* number of work units available */
148   size_t              unit_size;              /* size of each work unit, in bytes */
149
150   /* quad work units */
151   UINT32              quadunit_next;          /* index of next unit to allocate */
152   UINT32              quadunit_count;         /* number of work units available */
153   size_t              quadunit_size;          /* size of each work unit, in bytes */
154
155   /* poly data */
156   polygon_info **     polygon;                /* array of polygon pointers */
157   UINT32              polygon_next;           /* index of next polygon to allocate */
158   UINT32              polygon_count;          /* number of polygon items available */
159   size_t              polygon_size;           /* size of each polygon, in bytes */
160
161   /* extra data */
162   void **             extra;                  /* array of extra data pointers */
163   UINT32              extra_next;             /* index of next extra data to allocate */
164   UINT32              extra_count;            /* number of extra data items available */
165   size_t              extra_size;             /* size of each extra data, in bytes */
166
167   /* misc data */
168   UINT8               flags;                  /* flags */
169
170   /* buckets */
171   UINT16              unit_bucket[TOTAL_BUCKETS]; /* buckets for tracking unit usage */
172
173   /* statistics */
174   UINT32              triangles;              /* number of triangles queued */
175   UINT32              quads;                  /* number of quads queued */
176   UINT64              pixels;                 /* number of pixels rendered */
177#if KEEP_STATISTICS
178   UINT32              unit_waits;             /* number of times we waited for a unit */
179   UINT32              unit_max;               /* maximum units used */
180   UINT32              polygon_waits;          /* number of times we waited for a polygon */
181   UINT32              polygon_max;            /* maximum polygons used */
182   UINT32              extra_waits;            /* number of times we waited for an extra data */
183   UINT32              extra_max;              /* maximum extra data used */
184   UINT32              conflicts[WORK_MAX_THREADS]; /* number of conflicts found, per thread */
185   UINT32              resolved[WORK_MAX_THREADS]; /* number of conflicts resolved, per thread */
186#endif
187};
188
189
190
191/***************************************************************************
192    FUNCTION PROTOTYPES
193***************************************************************************/
194
195static void **allocate_array(running_machine &machine, size_t *itemsize, UINT32 itemcount);
196static void *poly_item_callback(void *param, int threadid);
197static void poly_state_presave(legacy_poly_manager *poly);
198
199
200
201/***************************************************************************
202    INLINE FUNCTIONS
203***************************************************************************/
204
205/*-------------------------------------------------
206    round_coordinate - round a coordinate to
207    an integer, following rules that 0.5 rounds
208    down
209-------------------------------------------------*/
210
211INLINE INT32 round_coordinate(float value)
212{
213   INT32 result = floor(value);
214   return result + (value - (float)result > 0.5f);
215}
216
217
218/*-------------------------------------------------
219    convert_tri_extent_to_poly_extent - convert
220    a simple tri_extent to a full poly_extent
221-------------------------------------------------*/
222
223INLINE void convert_tri_extent_to_poly_extent(poly_extent *dstextent, const tri_extent *srcextent, const polygon_info *polygon, INT32 y)
224{
225   /* copy start/stop always */
226   dstextent->startx = srcextent->startx;
227   dstextent->stopx = srcextent->stopx;
228
229   /* if we have parameters, process them as well */
230   for (int paramnum = 0; paramnum < polygon->numparams; paramnum++)
231   {
232      dstextent->param[paramnum].start = polygon->param[paramnum].start + srcextent->startx * polygon->param[paramnum].dpdx + y * polygon->param[paramnum].dpdy;
233      dstextent->param[paramnum].dpdx = polygon->param[paramnum].dpdx;
234   }
235}
236
237
238/*-------------------------------------------------
239    interpolate_vertex - interpolate values in
240    a vertex based on p[0] crossing the clipval
241-------------------------------------------------*/
242
243INLINE void interpolate_vertex(poly_vertex *outv, const poly_vertex *v1, const poly_vertex *v2, int paramcount, float clipval)
244{
245   float frac = (clipval - v1->p[0]) / (v2->p[0] - v1->p[0]);
246   int paramnum;
247
248   /* create a new one at the intersection point */
249   outv->x = v1->x + frac * (v2->x - v1->x);
250   outv->y = v1->y + frac * (v2->y - v1->y);
251   for (paramnum = 0; paramnum < paramcount; paramnum++)
252      outv->p[paramnum] = v1->p[paramnum] + frac * (v2->p[paramnum] - v1->p[paramnum]);
253}
254
255
256/*-------------------------------------------------
257    copy_vertex - copy vertex data from one to
258    another
259-------------------------------------------------*/
260
261INLINE void copy_vertex(poly_vertex *outv, const poly_vertex *v, int paramcount)
262{
263   int paramnum;
264
265   outv->x = v->x;
266   outv->y = v->y;
267   for (paramnum = 0; paramnum < paramcount; paramnum++)
268      outv->p[paramnum] = v->p[paramnum];
269}
270
271
272/*-------------------------------------------------
273    allocate_polygon - allocate a new polygon
274    object, blocking if we run out
275-------------------------------------------------*/
276
277INLINE polygon_info *allocate_polygon(legacy_poly_manager *poly, int miny, int maxy)
278{
279   /* wait for a work item if we have to */
280   if (poly->polygon_next + 1 > poly->polygon_count)
281   {
282      poly_wait(poly, "Out of polygons");
283#if KEEP_STATISTICS
284      poly->polygon_waits++;
285#endif
286   }
287   else if (poly->unit_next + (maxy - miny) / SCANLINES_PER_BUCKET + 2 > poly->unit_count)
288   {
289      poly_wait(poly, "Out of work units");
290#if KEEP_STATISTICS
291      poly->unit_waits++;
292#endif
293   }
294#if KEEP_STATISTICS
295   poly->polygon_max = MAX(poly->polygon_max, poly->polygon_next + 1);
296#endif
297   return poly->polygon[poly->polygon_next++];
298}
299
300
301
302/***************************************************************************
303    INITIALIZATION/TEARDOWN
304***************************************************************************/
305
306/*-------------------------------------------------
307    poly_alloc - initialize a new polygon
308    manager
309-------------------------------------------------*/
310
311legacy_poly_manager *poly_alloc(running_machine &machine, int max_polys, size_t extra_data_size, UINT8 flags)
312{
313   legacy_poly_manager *poly;
314
315   /* allocate the manager itself */
316   poly = auto_alloc_clear(machine, legacy_poly_manager);
317   poly->flags = flags;
318
319   /* allocate polygons */
320   poly->polygon_size = sizeof(polygon_info);
321   poly->polygon_count = MAX(max_polys, 1);
322   poly->polygon_next = 0;
323   poly->polygon = (polygon_info **)allocate_array(machine, &poly->polygon_size, poly->polygon_count);
324
325   /* allocate extra data */
326   poly->extra_size = extra_data_size;
327   poly->extra_count = poly->polygon_count;
328   poly->extra_next = 1;
329   poly->extra = allocate_array(machine, &poly->extra_size, poly->extra_count);
330
331   /* allocate triangle work units */
332   poly->unit_size = (flags & POLYFLAG_ALLOW_QUADS) ? sizeof(quad_work_unit) : sizeof(tri_work_unit);
333   poly->unit_count = MIN(poly->polygon_count * UNITS_PER_POLY, 65535);
334   poly->unit_next = 0;
335   poly->unit = (work_unit **)allocate_array(machine, &poly->unit_size, poly->unit_count);
336
337   /* create the work queue */
338   if (!(flags & POLYFLAG_NO_WORK_QUEUE))
339      poly->queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_MULTI | WORK_QUEUE_FLAG_HIGH_FREQ);
340
341   /* request a pre-save callback for synchronization */
342   machine.save().register_presave(save_prepost_delegate(FUNC(poly_state_presave), poly));
343   return poly;
344}
345
346
347/*-------------------------------------------------
348    poly_free - free a polygon manager
349-------------------------------------------------*/
350
351void poly_free(legacy_poly_manager *poly)
352{
353#if KEEP_STATISTICS
354{
355   int i, conflicts = 0, resolved = 0;
356   for (i = 0; i < ARRAY_LENGTH(poly->conflicts); i++)
357   {
358      conflicts += poly->conflicts[i];
359      resolved += poly->resolved[i];
360   }
361   printf("Total triangles = %d\n", poly->triangles);
362   printf("Total quads = %d\n", poly->quads);
363   if (poly->pixels > 1000000000)
364      printf("Total pixels   = %d%09d\n", (UINT32)(poly->pixels / 1000000000), (UINT32)(poly->pixels % 1000000000));
365   else
366      printf("Total pixels   = %d\n", (UINT32)poly->pixels);
367   printf("Conflicts:  %d resolved, %d total\n", resolved, conflicts);
368   printf("Units:      %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", poly->unit_max, poly->unit_count, poly->unit_waits, poly->unit_size, poly->unit_count * poly->unit_size);
369   printf("Polygons:   %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", poly->polygon_max, poly->polygon_count, poly->polygon_waits, poly->polygon_size, poly->polygon_count * poly->polygon_size);
370   printf("Extra data: %5d used, %5d allocated, %5d waits, %4d bytes each, %7d total\n", poly->extra_max, poly->extra_count, poly->extra_waits, poly->extra_size, poly->extra_count * poly->extra_size);
371}
372#endif
373
374   /* free the work queue */
375   if (poly->queue != NULL)
376      osd_work_queue_free(poly->queue);
377}
378
379
380
381/***************************************************************************
382    COMMON FUNCTIONS
383***************************************************************************/
384
385/*-------------------------------------------------
386    poly_wait - wait for all pending rendering
387    to complete
388-------------------------------------------------*/
389
390void poly_wait(legacy_poly_manager *poly, const char *debug_reason)
391{
392   osd_ticks_t time;
393
394   /* remember the start time if we're logging */
395   if (LOG_WAITS)
396      time = get_profile_ticks();
397
398   /* wait for all pending work items to complete */
399   if (poly->queue != NULL)
400      osd_work_queue_wait(poly->queue, osd_ticks_per_second() * 100);
401
402   /* if we don't have a queue, just run the whole list now */
403   else
404   {
405      int unitnum;
406      for (unitnum = 0; unitnum < poly->unit_next; unitnum++)
407         poly_item_callback(poly->unit[unitnum], 0);
408   }
409
410   /* log any long waits */
411   if (LOG_WAITS)
412   {
413      time = get_profile_ticks() - time;
414      if (time > LOG_WAIT_THRESHOLD)
415         logerror("Poly:Waited %d cycles for %s\n", (int)time, debug_reason);
416   }
417
418   /* reset the state */
419   poly->polygon_next = poly->unit_next = 0;
420   memset(poly->unit_bucket, 0xff, sizeof(poly->unit_bucket));
421
422   /* we need to preserve the last extra data that was supplied */
423   if (poly->extra_next > 1)
424      memcpy(poly->extra[0], poly->extra[poly->extra_next - 1], poly->extra_size);
425   poly->extra_next = 1;
426}
427
428
429/*-------------------------------------------------
430    poly_get_extra_data - get a pointer to the
431    extra data for the next polygon
432-------------------------------------------------*/
433
434void *poly_get_extra_data(legacy_poly_manager *poly)
435{
436   /* wait for a work item if we have to */
437   if (poly->extra_next + 1 > poly->extra_count)
438   {
439      poly_wait(poly, "Out of extra data");
440#if KEEP_STATISTICS
441      poly->extra_waits++;
442#endif
443   }
444
445   /* return a pointer to the extra data for the next item */
446#if KEEP_STATISTICS
447   poly->extra_max = MAX(poly->extra_max, poly->extra_next + 1);
448#endif
449   return poly->extra[poly->extra_next++];
450}
451
452
453
454/***************************************************************************
455    CORE TRIANGLE RENDERING
456***************************************************************************/
457
458/*-------------------------------------------------
459    poly_render_triangle - render a single
460    triangle given 3 vertexes
461-------------------------------------------------*/
462
463UINT32 poly_render_triangle(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3)
464{
465   float dxdy_v1v2, dxdy_v1v3, dxdy_v2v3;
466   const poly_vertex *tv;
467   INT32 curscan, scaninc;
468   polygon_info *polygon;
469   INT32 v1yclip, v3yclip;
470   INT32 v1y, v3y, v1x;
471   INT32 pixels = 0;
472   UINT32 startunit;
473
474   /* first sort by Y */
475   if (v2->y < v1->y)
476   {
477      tv = v1;
478      v1 = v2;
479      v2 = tv;
480   }
481   if (v3->y < v2->y)
482   {
483      tv = v2;
484      v2 = v3;
485      v3 = tv;
486      if (v2->y < v1->y)
487      {
488         tv = v1;
489         v1 = v2;
490         v2 = tv;
491      }
492   }
493
494   /* compute some integral X/Y vertex values */
495   v1x = round_coordinate(v1->x);
496   v1y = round_coordinate(v1->y);
497   v3y = round_coordinate(v3->y);
498
499   /* clip coordinates */
500   v1yclip = v1y;
501   v3yclip = v3y + ((poly->flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
502   v1yclip = MAX(v1yclip, cliprect.min_y);
503   v3yclip = MIN(v3yclip, cliprect.max_y + 1);
504   if (v3yclip - v1yclip <= 0)
505      return 0;
506
507   /* allocate a new polygon */
508   polygon = allocate_polygon(poly, v1yclip, v3yclip);
509
510   /* fill in the polygon information */
511   polygon->poly = poly;
512   polygon->dest = dest;
513   polygon->callback = callback;
514   polygon->extra = poly->extra[poly->extra_next - 1];
515   polygon->numparams = paramcount;
516   polygon->numverts = 3;
517
518   /* set the start X/Y coordinates */
519   polygon->xorigin = v1x;
520   polygon->yorigin = v1y;
521
522   /* compute the slopes for each portion of the triangle */
523   dxdy_v1v2 = (v2->y == v1->y) ? 0.0f : (v2->x - v1->x) / (v2->y - v1->y);
524   dxdy_v1v3 = (v3->y == v1->y) ? 0.0f : (v3->x - v1->x) / (v3->y - v1->y);
525   dxdy_v2v3 = (v3->y == v2->y) ? 0.0f : (v3->x - v2->x) / (v3->y - v2->y);
526
527   /* compute the X extents for each scanline */
528   startunit = poly->unit_next;
529   for (curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
530   {
531      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
532      UINT32 unit_index = poly->unit_next++;
533      tri_work_unit *unit = &poly->unit[unit_index]->tri;
534      int extnum;
535
536      /* determine how much to advance to hit the next bucket */
537      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
538
539      /* fill in the work unit basics */
540      unit->shared.polygon = polygon;
541      unit->shared.count_next = MIN(v3yclip - curscan, scaninc);
542      unit->shared.scanline = curscan;
543      unit->shared.previtem = poly->unit_bucket[bucketnum];
544      poly->unit_bucket[bucketnum] = unit_index;
545
546      /* iterate over extents */
547      for (extnum = 0; extnum < unit->shared.count_next; extnum++)
548      {
549         float fully = (float)(curscan + extnum) + 0.5f;
550         float startx = v1->x + (fully - v1->y) * dxdy_v1v3;
551         float stopx;
552         INT32 istartx, istopx;
553
554         /* compute the ending X based on which part of the triangle we're in */
555         if (fully < v2->y)
556            stopx = v1->x + (fully - v1->y) * dxdy_v1v2;
557         else
558            stopx = v2->x + (fully - v2->y) * dxdy_v2v3;
559
560         /* clamp to full pixels */
561         istartx = round_coordinate(startx);
562         istopx = round_coordinate(stopx);
563
564         /* force start < stop */
565         if (istartx > istopx)
566         {
567            INT32 temp = istartx;
568            istartx = istopx;
569            istopx = temp;
570         }
571
572         /* include the right edge if requested */
573         if (poly->flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
574            istopx++;
575
576         /* apply left/right clipping */
577         if (istartx < cliprect.min_x)
578            istartx = cliprect.min_x;
579         if (istopx > cliprect.max_x)
580            istopx = cliprect.max_x + 1;
581
582         /* set the extent and update the total pixel count */
583         if (istartx >= istopx)
584            istartx = istopx = 0;
585         unit->extent[extnum].startx = istartx;
586         unit->extent[extnum].stopx = istopx;
587         pixels += istopx - istartx;
588      }
589   }
590#if KEEP_STATISTICS
591   poly->unit_max = MAX(poly->unit_max, poly->unit_next);
592#endif
593
594   /* compute parameter starting points and deltas */
595   if (paramcount > 0)
596   {
597      float a00 = v2->y - v3->y;
598      float a01 = v3->x - v2->x;
599      float a02 = v2->x*v3->y - v3->x*v2->y;
600      float a10 = v3->y - v1->y;
601      float a11 = v1->x - v3->x;
602      float a12 = v3->x*v1->y - v1->x*v3->y;
603      float a20 = v1->y - v2->y;
604      float a21 = v2->x - v1->x;
605      float a22 = v1->x*v2->y - v2->x*v1->y;
606      float det = a02 + a12 + a22;
607
608      if(fabsf(det) < 0.001) {
609         for (int paramnum = 0; paramnum < paramcount; paramnum++)
610         {
611            poly_param *params = &polygon->param[paramnum];
612            params->dpdx = 0;
613            params->dpdy = 0;
614            params->start = v1->p[paramnum];
615         }
616      }
617      else
618      {
619         float idet = 1/det;
620         for (int paramnum = 0; paramnum < paramcount; paramnum++)
621         {
622            poly_param *params = &polygon->param[paramnum];
623            params->dpdx  = idet*(v1->p[paramnum]*a00 + v2->p[paramnum]*a10 + v3->p[paramnum]*a20);
624            params->dpdy  = idet*(v1->p[paramnum]*a01 + v2->p[paramnum]*a11 + v3->p[paramnum]*a21);
625            params->start = idet*(v1->p[paramnum]*a02 + v2->p[paramnum]*a12 + v3->p[paramnum]*a22);
626         }
627      }
628   }
629
630   /* enqueue the work items */
631   if (poly->queue != NULL)
632      osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
633
634   /* return the total number of pixels in the triangle */
635   poly->triangles++;
636   poly->pixels += pixels;
637   return pixels;
638}
639
640
641/*-------------------------------------------------
642    poly_render_triangle_fan - render a set of
643    triangles in a fan
644-------------------------------------------------*/
645
646UINT32 poly_render_triangle_fan(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v)
647{
648   UINT32 pixels = 0;
649   int vertnum;
650
651   /* iterate over vertices */
652   for (vertnum = 2; vertnum < numverts; vertnum++)
653      pixels += poly_render_triangle(poly, dest, cliprect, callback, paramcount, &v[0], &v[vertnum - 1], &v[vertnum]);
654   return pixels;
655}
656
657
658/*-------------------------------------------------
659    poly_render_triangle_custom - perform a custom
660    render of an object, given specific extents
661-------------------------------------------------*/
662
663UINT32 poly_render_triangle_custom(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int startscanline, int numscanlines, const poly_extent *extents)
664{
665   INT32 curscan, scaninc;
666   polygon_info *polygon;
667   INT32 v1yclip, v3yclip;
668   INT32 pixels = 0;
669   UINT32 startunit;
670
671   /* clip coordinates */
672   v1yclip = MAX(startscanline, cliprect.min_y);
673   v3yclip = MIN(startscanline + numscanlines, cliprect.max_y + 1);
674   if (v3yclip - v1yclip <= 0)
675      return 0;
676
677   /* allocate a new polygon */
678   polygon = allocate_polygon(poly, v1yclip, v3yclip);
679
680   /* fill in the polygon information */
681   polygon->poly = poly;
682   polygon->dest = dest;
683   polygon->callback = callback;
684   polygon->extra = poly->extra[poly->extra_next - 1];
685   polygon->numparams = 0;
686   polygon->numverts = 3;
687
688   /* compute the X extents for each scanline */
689   startunit = poly->unit_next;
690   for (curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
691   {
692      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
693      UINT32 unit_index = poly->unit_next++;
694      tri_work_unit *unit = &poly->unit[unit_index]->tri;
695      int extnum;
696
697      /* determine how much to advance to hit the next bucket */
698      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
699
700      /* fill in the work unit basics */
701      unit->shared.polygon = polygon;
702      unit->shared.count_next = MIN(v3yclip - curscan, scaninc);
703      unit->shared.scanline = curscan;
704      unit->shared.previtem = poly->unit_bucket[bucketnum];
705      poly->unit_bucket[bucketnum] = unit_index;
706
707      /* iterate over extents */
708      for (extnum = 0; extnum < unit->shared.count_next; extnum++)
709      {
710         const poly_extent *extent = &extents[(curscan + extnum) - startscanline];
711         INT32 istartx = extent->startx, istopx = extent->stopx;
712
713         /* force start < stop */
714         if (istartx > istopx)
715         {
716            INT32 temp = istartx;
717            istartx = istopx;
718            istopx = temp;
719         }
720
721         /* apply left/right clipping */
722         if (istartx < cliprect.min_x)
723            istartx = cliprect.min_x;
724         if (istopx > cliprect.max_x)
725            istopx = cliprect.max_x + 1;
726
727         /* set the extent and update the total pixel count */
728         unit->extent[extnum].startx = istartx;
729         unit->extent[extnum].stopx = istopx;
730         if (istartx < istopx)
731            pixels += istopx - istartx;
732      }
733   }
734#if KEEP_STATISTICS
735   poly->unit_max = MAX(poly->unit_max, poly->unit_next);
736#endif
737
738   /* enqueue the work items */
739   if (poly->queue != NULL)
740      osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
741
742   /* return the total number of pixels in the object */
743   poly->triangles++;
744   poly->pixels += pixels;
745   return pixels;
746}
747
748
749
750/***************************************************************************
751    CORE QUAD RENDERING
752***************************************************************************/
753
754/*-------------------------------------------------
755    poly_render_quad - render a single quad
756    given 4 vertexes
757-------------------------------------------------*/
758
759UINT32 poly_render_quad(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3, const poly_vertex *v4)
760{
761   poly_edge fedgelist[3], bedgelist[3];
762   const poly_edge *ledge, *redge;
763   const poly_vertex *v[4];
764   poly_edge *edgeptr;
765   int minv, maxv, curv;
766   INT32 minyclip, maxyclip;
767   INT32 miny, maxy;
768   INT32 curscan, scaninc;
769   polygon_info *polygon;
770   INT32 pixels = 0;
771   UINT32 startunit;
772
773   assert(poly->flags & POLYFLAG_ALLOW_QUADS);
774
775   /* arrays make things easier */
776   v[0] = v1;
777   v[1] = v2;
778   v[2] = v3;
779   v[3] = v4;
780
781   /* determine min/max Y vertices */
782   if (v[1]->y < v[0]->y)
783      minv = 1, maxv = 0;
784   else
785      minv = 0, maxv = 1;
786   if (v[2]->y < v[minv]->y)
787      minv = 2;
788   else if (v[2]->y > v[maxv]->y)
789      maxv = 2;
790   if (v[3]->y < v[minv]->y)
791      minv = 3;
792   else if (v[3]->y > v[maxv]->y)
793      maxv = 3;
794
795   /* determine start/end scanlines */
796   miny = round_coordinate(v[minv]->y);
797   maxy = round_coordinate(v[maxv]->y);
798
799   /* clip coordinates */
800   minyclip = miny;
801   maxyclip = maxy + ((poly->flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
802   minyclip = MAX(minyclip, cliprect.min_y);
803   maxyclip = MIN(maxyclip, cliprect.max_y + 1);
804   if (maxyclip - minyclip <= 0)
805      return 0;
806
807   /* allocate a new polygon */
808   polygon = allocate_polygon(poly, minyclip, maxyclip);
809
810   /* fill in the polygon information */
811   polygon->poly = poly;
812   polygon->dest = dest;
813   polygon->callback = callback;
814   polygon->extra = poly->extra[poly->extra_next - 1];
815   polygon->numparams = paramcount;
816   polygon->numverts = 4;
817
818   /* walk forward to build up the forward edge list */
819   edgeptr = &fedgelist[0];
820   for (curv = minv; curv != maxv; curv = (curv + 1) & 3)
821   {
822      int paramnum;
823      float ooy;
824
825      /* set the two vertices */
826      edgeptr->v1 = v[curv];
827      edgeptr->v2 = v[(curv + 1) & 3];
828
829      /* if horizontal, skip altogether */
830      if (edgeptr->v1->y == edgeptr->v2->y)
831         continue;
832
833      /* need dx/dy always, and parameter deltas as necessary */
834      ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
835      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
836      for (paramnum = 0; paramnum < paramcount; paramnum++)
837         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
838      edgeptr++;
839   }
840
841   /* walk backward to build up the backward edge list */
842   edgeptr = &bedgelist[0];
843   for (curv = minv; curv != maxv; curv = (curv - 1) & 3)
844   {
845      int paramnum;
846      float ooy;
847
848      /* set the two vertices */
849      edgeptr->v1 = v[curv];
850      edgeptr->v2 = v[(curv - 1) & 3];
851
852      /* if horizontal, skip altogether */
853      if (edgeptr->v1->y == edgeptr->v2->y)
854         continue;
855
856      /* need dx/dy always, and parameter deltas as necessary */
857      ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
858      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
859      for (paramnum = 0; paramnum < paramcount; paramnum++)
860         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
861      edgeptr++;
862   }
863
864   /* determine which list is left/right: */
865   /* if the first vertex is shared, compare the slopes */
866   /* if the first vertex is not shared, compare the X coordinates */
867   if ((fedgelist[0].v1 == bedgelist[0].v1 && fedgelist[0].dxdy < bedgelist[0].dxdy) ||
868      (fedgelist[0].v1 != bedgelist[0].v1 && fedgelist[0].v1->x < bedgelist[0].v1->x))
869   {
870      ledge = fedgelist;
871      redge = bedgelist;
872   }
873   else
874   {
875      ledge = bedgelist;
876      redge = fedgelist;
877   }
878
879   /* compute the X extents for each scanline */
880   startunit = poly->unit_next;
881   for (curscan = minyclip; curscan < maxyclip; curscan += scaninc)
882   {
883      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
884      UINT32 unit_index = poly->unit_next++;
885      quad_work_unit *unit = &poly->unit[unit_index]->quad;
886      int extnum;
887
888      /* determine how much to advance to hit the next bucket */
889      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
890
891      /* fill in the work unit basics */
892      unit->shared.polygon = polygon;
893      unit->shared.count_next = MIN(maxyclip - curscan, scaninc);
894      unit->shared.scanline = curscan;
895      unit->shared.previtem = poly->unit_bucket[bucketnum];
896      poly->unit_bucket[bucketnum] = unit_index;
897
898      /* iterate over extents */
899      for (extnum = 0; extnum < unit->shared.count_next; extnum++)
900      {
901         float fully = (float)(curscan + extnum) + 0.5f;
902         float startx, stopx;
903         INT32 istartx, istopx;
904         int paramnum;
905
906         /* compute the ending X based on which part of the triangle we're in */
907         while (fully > ledge->v2->y && fully < v[maxv]->y)
908            ledge++;
909         while (fully > redge->v2->y && fully < v[maxv]->y)
910            redge++;
911         startx = ledge->v1->x + (fully - ledge->v1->y) * ledge->dxdy;
912         stopx = redge->v1->x + (fully - redge->v1->y) * redge->dxdy;
913
914         /* clamp to full pixels */
915         istartx = round_coordinate(startx);
916         istopx = round_coordinate(stopx);
917
918         /* compute parameter starting points and deltas */
919         if (paramcount > 0)
920         {
921            float ldy = fully - ledge->v1->y;
922            float rdy = fully - redge->v1->y;
923            float oox = 1.0f / (stopx - startx);
924
925            /* iterate over parameters */
926            for (paramnum = 0; paramnum < paramcount; paramnum++)
927            {
928               float lparam = ledge->v1->p[paramnum] + ldy * ledge->dpdy[paramnum];
929               float rparam = redge->v1->p[paramnum] + rdy * redge->dpdy[paramnum];
930               float dpdx = (rparam - lparam) * oox;
931
932               unit->extent[extnum].param[paramnum].start = lparam;// - ((float)istartx + 0.5f) * dpdx;
933               unit->extent[extnum].param[paramnum].dpdx = dpdx;
934            }
935         }
936
937         /* include the right edge if requested */
938         if (poly->flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
939            istopx++;
940
941         /* apply left/right clipping */
942         if (istartx < cliprect.min_x)
943         {
944            for (paramnum = 0; paramnum < paramcount; paramnum++)
945               unit->extent[extnum].param[paramnum].start += (cliprect.min_x - istartx) * unit->extent[extnum].param[paramnum].dpdx;
946            istartx = cliprect.min_x;
947         }
948         if (istopx > cliprect.max_x)
949            istopx = cliprect.max_x + 1;
950
951         /* set the extent and update the total pixel count */
952         if (istartx >= istopx)
953            istartx = istopx = 0;
954         unit->extent[extnum].startx = istartx;
955         unit->extent[extnum].stopx = istopx;
956         pixels += istopx - istartx;
957      }
958   }
959#if KEEP_STATISTICS
960   poly->unit_max = MAX(poly->unit_max, poly->unit_next);
961#endif
962
963   /* enqueue the work items */
964   if (poly->queue != NULL)
965      osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
966
967   /* return the total number of pixels in the triangle */
968   poly->quads++;
969   poly->pixels += pixels;
970   return pixels;
971}
972
973
974/*-------------------------------------------------
975    poly_render_quad_fan - render a set of
976    quads in a fan
977-------------------------------------------------*/
978
979UINT32 poly_render_quad_fan(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v)
980{
981   UINT32 pixels = 0;
982   int vertnum;
983
984   /* iterate over vertices */
985   for (vertnum = 2; vertnum < numverts; vertnum += 2)
986      pixels += poly_render_quad(poly, dest, cliprect, callback, paramcount, &v[0], &v[vertnum - 1], &v[vertnum], &v[MIN(vertnum + 1, numverts - 1)]);
987   return pixels;
988}
989
990
991
992/***************************************************************************
993    CORE POLYGON RENDERING
994***************************************************************************/
995
996/*-------------------------------------------------
997    poly_render_polygon - render a single polygon up
998    to 32 vertices
999-------------------------------------------------*/
1000
1001UINT32 poly_render_polygon(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v)
1002{
1003   poly_edge fedgelist[MAX_POLYGON_VERTS - 1], bedgelist[MAX_POLYGON_VERTS - 1];
1004   const poly_edge *ledge, *redge;
1005   poly_edge *edgeptr;
1006   int minv, maxv, curv;
1007   INT32 minyclip, maxyclip;
1008   INT32 miny, maxy;
1009   INT32 curscan, scaninc;
1010   polygon_info *polygon;
1011   INT32 pixels = 0;
1012   UINT32 startunit;
1013   int vertnum;
1014
1015   assert(poly->flags & POLYFLAG_ALLOW_QUADS);
1016
1017   /* determine min/max Y vertices */
1018   minv = maxv = 0;
1019   for (vertnum = 1; vertnum < numverts; vertnum++)
1020   {
1021      if (v[vertnum].y < v[minv].y)
1022         minv = vertnum;
1023      else if (v[vertnum].y > v[maxv].y)
1024         maxv = vertnum;
1025   }
1026
1027   /* determine start/end scanlines */
1028   miny = round_coordinate(v[minv].y);
1029   maxy = round_coordinate(v[maxv].y);
1030
1031   /* clip coordinates */
1032   minyclip = miny;
1033   maxyclip = maxy + ((poly->flags & POLYFLAG_INCLUDE_BOTTOM_EDGE) ? 1 : 0);
1034   minyclip = MAX(minyclip, cliprect.min_y);
1035   maxyclip = MIN(maxyclip, cliprect.max_y + 1);
1036   if (maxyclip - minyclip <= 0)
1037      return 0;
1038
1039   /* allocate a new polygon */
1040   polygon = allocate_polygon(poly, minyclip, maxyclip);
1041
1042   /* fill in the polygon information */
1043   polygon->poly = poly;
1044   polygon->dest = dest;
1045   polygon->callback = callback;
1046   polygon->extra = poly->extra[poly->extra_next - 1];
1047   polygon->numparams = paramcount;
1048   polygon->numverts = numverts;
1049
1050   /* walk forward to build up the forward edge list */
1051   edgeptr = &fedgelist[0];
1052   for (curv = minv; curv != maxv; curv = (curv == numverts - 1) ? 0 : (curv + 1))
1053   {
1054      int paramnum;
1055      float ooy;
1056
1057      /* set the two vertices */
1058      edgeptr->v1 = &v[curv];
1059      edgeptr->v2 = &v[(curv == numverts - 1) ? 0 : (curv + 1)];
1060
1061      /* if horizontal, skip altogether */
1062      if (edgeptr->v1->y == edgeptr->v2->y)
1063         continue;
1064
1065      /* need dx/dy always, and parameter deltas as necessary */
1066      ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
1067      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
1068      for (paramnum = 0; paramnum < paramcount; paramnum++)
1069         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
1070      edgeptr++;
1071   }
1072
1073   /* walk backward to build up the backward edge list */
1074   edgeptr = &bedgelist[0];
1075   for (curv = minv; curv != maxv; curv = (curv == 0) ? (numverts - 1) : (curv - 1))
1076   {
1077      int paramnum;
1078      float ooy;
1079
1080      /* set the two vertices */
1081      edgeptr->v1 = &v[curv];
1082      edgeptr->v2 = &v[(curv == 0) ? (numverts - 1) : (curv - 1)];
1083
1084      /* if horizontal, skip altogether */
1085      if (edgeptr->v1->y == edgeptr->v2->y)
1086         continue;
1087
1088      /* need dx/dy always, and parameter deltas as necessary */
1089      ooy = 1.0f / (edgeptr->v2->y - edgeptr->v1->y);
1090      edgeptr->dxdy = (edgeptr->v2->x - edgeptr->v1->x) * ooy;
1091      for (paramnum = 0; paramnum < paramcount; paramnum++)
1092         edgeptr->dpdy[paramnum] = (edgeptr->v2->p[paramnum] - edgeptr->v1->p[paramnum]) * ooy;
1093      edgeptr++;
1094   }
1095
1096   /* determine which list is left/right: */
1097   /* if the first vertex is shared, compare the slopes */
1098   /* if the first vertex is not shared, compare the X coordinates */
1099   if ((fedgelist[0].v1 == bedgelist[0].v1 && fedgelist[0].dxdy < bedgelist[0].dxdy) ||
1100      (fedgelist[0].v1 != bedgelist[0].v1 && fedgelist[0].v1->x < bedgelist[0].v1->x))
1101   {
1102      ledge = fedgelist;
1103      redge = bedgelist;
1104   }
1105   else
1106   {
1107      ledge = bedgelist;
1108      redge = fedgelist;
1109   }
1110
1111   /* compute the X extents for each scanline */
1112   startunit = poly->unit_next;
1113   for (curscan = minyclip; curscan < maxyclip; curscan += scaninc)
1114   {
1115      UINT32 bucketnum = ((UINT32)curscan / SCANLINES_PER_BUCKET) % TOTAL_BUCKETS;
1116      UINT32 unit_index = poly->unit_next++;
1117      quad_work_unit *unit = &poly->unit[unit_index]->quad;
1118      int extnum;
1119
1120      /* determine how much to advance to hit the next bucket */
1121      scaninc = SCANLINES_PER_BUCKET - (UINT32)curscan % SCANLINES_PER_BUCKET;
1122
1123      /* fill in the work unit basics */
1124      unit->shared.polygon = polygon;
1125      unit->shared.count_next = MIN(maxyclip - curscan, scaninc);
1126      unit->shared.scanline = curscan;
1127      unit->shared.previtem = poly->unit_bucket[bucketnum];
1128      poly->unit_bucket[bucketnum] = unit_index;
1129
1130      /* iterate over extents */
1131      for (extnum = 0; extnum < unit->shared.count_next; extnum++)
1132      {
1133         float fully = (float)(curscan + extnum) + 0.5f;
1134         float startx, stopx;
1135         INT32 istartx, istopx;
1136         int paramnum;
1137
1138         /* compute the ending X based on which part of the triangle we're in */
1139         while (fully > ledge->v2->y && fully < v[maxv].y)
1140            ledge++;
1141         while (fully > redge->v2->y && fully < v[maxv].y)
1142            redge++;
1143         startx = ledge->v1->x + (fully - ledge->v1->y) * ledge->dxdy;
1144         stopx = redge->v1->x + (fully - redge->v1->y) * redge->dxdy;
1145
1146         /* clamp to full pixels */
1147         istartx = round_coordinate(startx);
1148         istopx = round_coordinate(stopx);
1149
1150         /* compute parameter starting points and deltas */
1151         if (paramcount > 0)
1152         {
1153            float ldy = fully - ledge->v1->y;
1154            float rdy = fully - redge->v1->y;
1155            float oox = 1.0f / (stopx - startx);
1156
1157            /* iterate over parameters */
1158            for (paramnum = 0; paramnum < paramcount; paramnum++)
1159            {
1160               float lparam = ledge->v1->p[paramnum] + ldy * ledge->dpdy[paramnum];
1161               float rparam = redge->v1->p[paramnum] + rdy * redge->dpdy[paramnum];
1162               float dpdx = (rparam - lparam) * oox;
1163
1164               unit->extent[extnum].param[paramnum].start = lparam;// - ((float)istartx + 0.5f) * dpdx;
1165               unit->extent[extnum].param[paramnum].dpdx = dpdx;
1166            }
1167         }
1168
1169         /* include the right edge if requested */
1170         if (poly->flags & POLYFLAG_INCLUDE_RIGHT_EDGE)
1171            istopx++;
1172
1173         /* apply left/right clipping */
1174         if (istartx < cliprect.min_x)
1175         {
1176            for (paramnum = 0; paramnum < paramcount; paramnum++)
1177               unit->extent[extnum].param[paramnum].start += (cliprect.min_x - istartx) * unit->extent[extnum].param[paramnum].dpdx;
1178            istartx = cliprect.min_x;
1179         }
1180         if (istopx > cliprect.max_x)
1181            istopx = cliprect.max_x + 1;
1182
1183         /* set the extent and update the total pixel count */
1184         if (istartx >= istopx)
1185            istartx = istopx = 0;
1186         unit->extent[extnum].startx = istartx;
1187         unit->extent[extnum].stopx = istopx;
1188         pixels += istopx - istartx;
1189      }
1190   }
1191#if KEEP_STATISTICS
1192   poly->unit_max = MAX(poly->unit_max, poly->unit_next);
1193#endif
1194
1195   /* enqueue the work items */
1196   if (poly->queue != NULL)
1197      osd_work_item_queue_multiple(poly->queue, poly_item_callback, poly->unit_next - startunit, poly->unit[startunit], poly->unit_size, WORK_ITEM_FLAG_AUTO_RELEASE);
1198
1199   /* return the total number of pixels in the triangle */
1200   poly->quads++;
1201   poly->pixels += pixels;
1202   return pixels;
1203}
1204
1205
1206
1207/***************************************************************************
1208    CLIPPING
1209***************************************************************************/
1210
1211/*-------------------------------------------------
1212    poly_zclip_if_less - z clip a polygon against
1213    the given value, returning a set of clipped
1214    vertices
1215-------------------------------------------------*/
1216
1217int poly_zclip_if_less(int numverts, const poly_vertex *v, poly_vertex *outv, int paramcount, float clipval)
1218{
1219   int prevclipped = (v[numverts - 1].p[0] < clipval);
1220   poly_vertex *nextout = outv;
1221   int vertnum;
1222
1223   /* iterate over vertices */
1224   for (vertnum = 0; vertnum < numverts; vertnum++)
1225   {
1226      int thisclipped = (v[vertnum].p[0] < clipval);
1227
1228      /* if we switched from clipped to non-clipped, interpolate a vertex */
1229      if (thisclipped != prevclipped)
1230         interpolate_vertex(nextout++, &v[(vertnum == 0) ? (numverts - 1) : (vertnum - 1)], &v[vertnum], paramcount, clipval);
1231
1232      /* if this vertex is not clipped, copy it in */
1233      if (!thisclipped)
1234         copy_vertex(nextout++, &v[vertnum], paramcount);
1235
1236      /* remember the last state */
1237      prevclipped = thisclipped;
1238   }
1239   return nextout - outv;
1240}
1241
1242
1243
1244/***************************************************************************
1245    INTERNAL FUNCTIONS
1246***************************************************************************/
1247
1248/*-------------------------------------------------
1249    allocate_array - allocate an array of pointers
1250-------------------------------------------------*/
1251
1252static void **allocate_array(running_machine &machine, size_t *itemsize, UINT32 itemcount)
1253{
1254   void **ptrarray;
1255   int itemnum;
1256
1257   /* fail if 0 */
1258   if (itemcount == 0)
1259      return NULL;
1260
1261   /* round to a cache line boundary */
1262   *itemsize = ((*itemsize + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE) * CACHE_LINE_SIZE;
1263
1264   /* allocate the array */
1265   ptrarray = auto_alloc_array_clear(machine, void *, itemcount);
1266
1267   /* allocate the actual items */
1268   ptrarray[0] = auto_alloc_array_clear(machine, UINT8, *itemsize * itemcount);
1269
1270   /* initialize the pointer array */
1271   for (itemnum = 1; itemnum < itemcount; itemnum++)
1272      ptrarray[itemnum] = (UINT8 *)ptrarray[0] + *itemsize * itemnum;
1273   return ptrarray;
1274}
1275
1276
1277/*-------------------------------------------------
1278    poly_item_callback - callback for each poly
1279    item
1280-------------------------------------------------*/
1281
1282static void *poly_item_callback(void *param, int threadid)
1283{
1284   while (1)
1285   {
1286      work_unit *unit = (work_unit *)param;
1287      polygon_info *polygon = unit->shared.polygon;
1288      int count = unit->shared.count_next & 0xffff;
1289      UINT32 orig_count_next;
1290      int curscan;
1291
1292      /* if our previous item isn't done yet, enqueue this item to the end and proceed */
1293      if (unit->shared.previtem != 0xffff)
1294      {
1295         work_unit *prevunit = polygon->poly->unit[unit->shared.previtem];
1296         if (prevunit->shared.count_next != 0)
1297         {
1298            UINT32 unitnum = ((UINT8 *)unit - (UINT8 *)polygon->poly->unit[0]) / polygon->poly->unit_size;
1299            UINT32 new_count_next;
1300
1301            /* attempt to atomically swap in this new value */
1302            do
1303            {
1304               orig_count_next = prevunit->shared.count_next;
1305               new_count_next = orig_count_next | (unitnum << 16);
1306            } while (compare_exchange32((volatile INT32 *)&prevunit->shared.count_next, orig_count_next, new_count_next) != orig_count_next);
1307
1308#if KEEP_STATISTICS
1309            /* track resolved conflicts */
1310            polygon->poly->conflicts[threadid]++;
1311            if (orig_count_next != 0)
1312               polygon->poly->resolved[threadid]++;
1313#endif
1314            /* if we succeeded, skip out early so we can do other work */
1315            if (orig_count_next != 0)
1316               break;
1317         }
1318      }
1319
1320      /* iterate over extents */
1321      for (curscan = 0; curscan < count; curscan++)
1322      {
1323         if (polygon->numverts == 3)
1324         {
1325            poly_extent tmpextent;
1326            convert_tri_extent_to_poly_extent(&tmpextent, &unit->tri.extent[curscan], polygon, unit->shared.scanline + curscan);
1327            (*polygon->callback)(polygon->dest, unit->shared.scanline + curscan, &tmpextent, polygon->extra, threadid);
1328         }
1329         else
1330            (*polygon->callback)(polygon->dest, unit->shared.scanline + curscan, &unit->quad.extent[curscan], polygon->extra, threadid);
1331      }
1332
1333      /* set our count to 0 and re-fetch the original count value */
1334      do
1335      {
1336         orig_count_next = unit->shared.count_next;
1337      } while (compare_exchange32((volatile INT32 *)&unit->shared.count_next, orig_count_next, 0) != orig_count_next);
1338
1339      /* if we have no more work to do, do nothing */
1340      orig_count_next >>= 16;
1341      if (orig_count_next == 0)
1342         break;
1343      param = polygon->poly->unit[orig_count_next];
1344   }
1345   return NULL;
1346}
1347
1348
1349/*-------------------------------------------------
1350    poly_state_presave - pre-save callback to
1351    ensure everything is synced before saving
1352-------------------------------------------------*/
1353
1354static void poly_state_presave(legacy_poly_manager *poly)
1355{
1356   poly_wait(poly, "pre-save");
1357}
Property changes on: trunk/src/emu/video/polylgcy.c
Added: svn:mime-type
   + text/plain
Added: svn:eol-style
   + native
trunk/src/emu/video/polylgcy.h
r0r28721
1/***************************************************************************
2
3    polylgcy.h
4
5    Legacy polygon helper routines.
6
7****************************************************************************
8
9    Pixel model:
10
11    (0.0,0.0)       (1.0,0.0)       (2.0,0.0)       (3.0,0.0)
12        +---------------+---------------+---------------+
13        |               |               |               |
14        |               |               |               |
15        |   (0.5,0.5)   |   (1.5,0.5)   |   (2.5,0.5)   |
16        |       *       |       *       |       *       |
17        |               |               |               |
18        |               |               |               |
19    (0.0,1.0)       (1.0,1.0)       (2.0,1.0)       (3.0,1.0)
20        +---------------+---------------+---------------+
21        |               |               |               |
22        |               |               |               |
23        |   (0.5,1.5)   |   (1.5,1.5)   |   (2.5,1.5)   |
24        |       *       |       *       |       *       |
25        |               |               |               |
26        |               |               |               |
27        |               |               |               |
28        +---------------+---------------+---------------+
29    (0.0,2.0)       (1.0,2.0)       (2.0,2.0)       (3.0,2.0)
30
31***************************************************************************/
32
33#pragma once
34
35#ifndef __POLYNEW_H__
36#define __POLYNEW_H__
37
38
39/***************************************************************************
40    CONSTANTS
41***************************************************************************/
42
43#define MAX_VERTEX_PARAMS                   6
44#define MAX_POLYGON_VERTS                   32
45
46#define POLYFLAG_INCLUDE_BOTTOM_EDGE        0x01
47#define POLYFLAG_INCLUDE_RIGHT_EDGE         0x02
48#define POLYFLAG_NO_WORK_QUEUE              0x04
49#define POLYFLAG_ALLOW_QUADS                0x08
50
51
52
53/***************************************************************************
54    TYPE DEFINITIONS
55***************************************************************************/
56
57/* opaque reference to the poly manager */
58struct legacy_poly_manager;
59
60
61/* input vertex data */
62struct poly_vertex
63{
64   float       x;                          /* X coordinate */
65   float       y;                          /* Y coordinate */
66   float       p[MAX_VERTEX_PARAMS];       /* interpolated parameter values */
67};
68
69
70/* poly_param_extent describes information for a single parameter in an extent */
71struct poly_param_extent
72{
73   float       start;                      /* parameter value at starting X,Y */
74   float       dpdx;                       /* dp/dx relative to starting X */
75};
76
77
78/* poly_extent describes start/end points for a scanline, along with per-scanline parameters */
79struct poly_extent
80{
81   INT16       startx;                     /* starting X coordinate (inclusive) */
82   INT16       stopx;                      /* ending X coordinate (exclusive) */
83   poly_param_extent param[MAX_VERTEX_PARAMS]; /* starting and dx values for each parameter */
84};
85
86
87/* callback routine to process a batch of scanlines in a triangle */
88typedef void (*poly_draw_scanline_func)(void *dest, INT32 scanline, const poly_extent *extent, const void *extradata, int threadid);
89
90
91
92/***************************************************************************
93    TYPE DEFINITIONS
94***************************************************************************/
95
96
97/* ----- initialization/teardown ----- */
98
99/* allocate a new poly manager that can render triangles */
100legacy_poly_manager *poly_alloc(running_machine &machine, int max_polys, size_t extra_data_size, UINT8 flags);
101
102/* free a poly manager */
103void poly_free(legacy_poly_manager *poly);
104
105
106
107/* ----- common functions ----- */
108
109/* wait until all polygons in the queue have been rendered */
110void poly_wait(legacy_poly_manager *poly, const char *debug_reason);
111
112/* get a pointer to the extra data for the next polygon */
113void *poly_get_extra_data(legacy_poly_manager *poly);
114
115
116
117/* ----- core triangle rendering ----- */
118
119/* render a single triangle given 3 vertexes */
120UINT32 poly_render_triangle(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3);
121
122/* render a set of triangles in a fan */
123UINT32 poly_render_triangle_fan(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v);
124
125/* perform a custom render of an object, given specific extents */
126UINT32 poly_render_triangle_custom(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int startscanline, int numscanlines, const poly_extent *extents);
127
128
129
130/* ----- core quad rendering ----- */
131
132/* render a single quad given 4 vertexes */
133UINT32 poly_render_quad(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3, const poly_vertex *v4);
134
135/* render a set of quads in a fan */
136UINT32 poly_render_quad_fan(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v);
137
138
139
140/* ----- core polygon rendering ----- */
141
142/* render a single polygon up to 32 vertices */
143UINT32 poly_render_polygon(legacy_poly_manager *poly, void *dest, const rectangle &cliprect, poly_draw_scanline_func callback, int paramcount, int numverts, const poly_vertex *v);
144
145
146
147/* ----- clipping ----- */
148
149/* zclip (assumes p[0] == z) a polygon */
150int poly_zclip_if_less(int numverts, const poly_vertex *v, poly_vertex *outv, int paramcount, float clipval);
151
152
153#endif  /* __POLY_H__ */
Property changes on: trunk/src/emu/video/polylgcy.h
Added: svn:mime-type
   + text/plain
Added: svn:eol-style
   + native
trunk/src/mame/includes/midvunit.h
r28720r28721
66
77**************************************************************************/
88
9#include "video/polynew.h"
9#include "video/poly.h"
1010
1111#define MIDVUNIT_VIDEO_CLOCK    33000000
1212
trunk/src/mame/includes/model3.h
r28720r28721
1#include "video/poly.h"
1#include "video/polylgcy.h"
22#include "machine/scsibus.h"
33#include "machine/53c810.h"
44#include "audio/dsbz80.h"
trunk/src/mame/includes/gaelco3d.h
r28720r28721
99**************************************************************************/
1010
1111#include "sound/dmadac.h"
12#include "video/polynew.h"
12#include "video/poly.h"
1313#include "machine/eepromser.h"
1414#include "machine/gaelco3d.h"
1515
trunk/src/mame/includes/taitojc.h
r28720r28721
1#include "video/polynew.h"
1#include "video/poly.h"
22#include "machine/taitoio.h"
33
44#define TAITOJC_POLYGON_FIFO_SIZE       0x20000
trunk/src/mame/includes/galastrm.h
r28720r28721
11#include "machine/eepromser.h"
2#include "video/poly.h"
2#include "video/polylgcy.h"
33#include "video/tc0100scn.h"
44#include "video/tc0480scp.h"
55
trunk/src/mame/includes/model2.h
r28720r28721
1#include "video/poly.h"
1#include "video/polylgcy.h"
22#include "audio/dsbz80.h"
33#include "audio/segam1audio.h"
44#include "machine/eepromser.h"
trunk/src/mame/includes/namcos22.h
r28720r28721
66
77#include "machine/eeprompar.h"
88#include "video/rgbutil.h"
9#include "video/polynew.h"
9#include "video/poly.h"
1010
1111enum
1212{
trunk/src/mame/video/midzeus2.c
r28720r28721
99#include "emu.h"
1010#include "cpu/tms32031/tms32031.h"
1111#include "includes/midzeus.h"
12#include "video/poly.h"
12#include "video/polylgcy.h"
1313#include "video/rgbutil.h"
1414
1515
trunk/src/mame/video/galastrm.c
r28720r28721
11#include "emu.h"
2#include "video/poly.h"
2#include "video/polylgcy.h"
33#include "includes/galastrm.h"
44
55#define X_OFFSET 96
trunk/src/mame/video/namcos22.c
r28720r28721
55 *
66 * - emulate slave dsp!
77 * - texture u/v mapping is often 1 pixel off, resulting in many glitch lines/gaps between textures. The glitch may be in MAME core:
8 *       it used to be much worse with the old poly.h
8 *       it used to be much worse with the legacy_poly_manager
99 * - tokyowar tanks are not shootable, same for timecris helicopter, there's still a very small hitbox but almost impossible to hit.
1010 *       airco22b may have a similar problem. (is this related to dsp? or cpu?)
1111 * - find out how/where vics num_sprites is determined exactly, currently a workaround is needed for airco22b and dirtdash
trunk/src/mame/video/midzeus.c
r28720r28721
88
99#include "emu.h"
1010#include "includes/midzeus.h"
11#include "video/poly.h"
11#include "video/polylgcy.h"
1212#include "video/rgbutil.h"
1313
1414
trunk/src/mame/video/gticlub.c
r28720r28721
33#include "cpu/sharc/sharc.h"
44#include "machine/konppc.h"
55#include "video/voodoo.h"
6#include "video/poly.h"
6#include "video/polylgcy.h"
77#include "video/k001604.h"
88#include "video/gticlub.h"
99
trunk/src/mame/video/taitojc.c
r28720r28721
77*************************************************************************/
88
99#include "emu.h"
10#include "video/polynew.h"
10#include "video/poly.h"
1111#include "includes/taitojc.h"
1212
1313static const gfx_layout taitojc_char_layout =
trunk/src/mame/video/n64.h
r28720r28721
33
44#include "emu.h"
55#include "includes/n64.h"
6#include "video/polynew.h"
6#include "video/poly.h"
77#include "video/rdpblend.h"
88#include "video/rdptpipe.h"
99
trunk/src/mame/video/model2.c
r28720r28721
8787*********************************************************************************************************************************/
8888#include "emu.h"
8989#include "video/segaic24.h"
90#include "video/poly.h"
90#include "video/polylgcy.h"
9191#include "includes/model2.h"
9292
9393#define MODEL2_VIDEO_DEBUG 0
trunk/src/mame/video/k001005.c
r28720r28721
409409
410410}
411411
412/* emu/video/poly.c cannot handle atm callbacks passing a device parameter */
412/* legacy_poly_manager cannot handle atm callbacks passing a device parameter */
413413
414414#if POLY_DEVICE
415415void k001005_device::draw_scanline( void *dest, INT32 scanline, const poly_extent *extent, const void *extradata, int threadid )
trunk/src/mame/video/k001005.h
r28720r28721
22#ifndef __K001005_H__
33#define __K001005_H__
44
5#include "video/poly.h"
5#include "video/polylgcy.h"
66#include "cpu/sharc/sharc.h"
77
88#define POLY_DEVICE 0
trunk/src/mame/video/model3.c
r28720r28721
11#include "emu.h"
2#include "video/poly.h"
2#include "video/polylgcy.h"
33#include "video/rgbutil.h"
44#include "includes/model3.h"
55
trunk/src/mame/drivers/chihiro.c
r28720r28721
365365#include "machine/idectrl.h"
366366#include "machine/idehd.h"
367367#include "machine/naomigd.h"
368#include "video/polynew.h"
368#include "video/poly.h"
369369#include "bitmap.h"
370370#include "debug/debugcon.h"
371371#include "debug/debugcmd.h"
trunk/src/mame/drivers/cobra.c
r28720r28721
320320#include "machine/jvsdev.h"
321321#include "machine/timekpr.h"
322322#include "video/k001604.h"
323#include "video/polynew.h"
323#include "video/poly.h"
324324#include "video/rgbgen.h"
325325#include "sound/rf5c400.h"
326326#include "sound/dmadac.h"
trunk/src/mame/drivers/namcos23.c
r28720r28721
12251225
12261226#include "emu.h"
12271227#include <float.h>
1228#include "video/poly.h"
1228#include "video/polylgcy.h"
12291229#include "cpu/mips/mips3.h"
12301230#include "cpu/h83002/h8.h"
12311231#include "cpu/sh2/sh2.h"
trunk/src/mame/drivers/taitotz.c
r28720r28721
177177#include "machine/ataintf.h"
178178#include "machine/idehd.h"
179179#include "machine/nvram.h"
180#include "video/polynew.h"
180#include "video/poly.h"
181181
182182/*
183183    Interesting mem areas
trunk/src/mame/drivers/atarisy4.c
r28720r28721
1717#include "emu.h"
1818#include "cpu/m68000/m68000.h"
1919#include "cpu/tms32010/tms32010.h"
20#include "video/poly.h"
20#include "video/polylgcy.h"
2121
2222
2323class atarisy4_state : public driver_device

Previous 199869 Revisions Next


© 1997-2024 The MAME Team