1 /**
2   Blitter for painting radial gradients.
3 
4   Copyright Chris Jones 2020.
5   Distributed under the Boost Software License, Version 1.0.
6   See accompanying file Licence.txt or copy at...
7   https://www.boost.org/LICENSE_1_0.txt
8 */
9 
10 module dg2d.radialblit;
11 
12 import dg2d.rasterizer;
13 import dg2d.gradient;
14 import dg2d.misc;
15 import dg2d.blitex;
16 
17 /**
18    Radial gradient blitter struct.
19 
20    You set up the properties and pass the BlitFunc to the rasterizer.
21 
22    ---
23    auto ablit = RadialBlit(m_pixels,m_stride,m_height);
24    rblit.setPaint(grad, wr, RepeatMode.Pad);
25    rblit.setCoords(x0,y0,x1,y1,x2,y2);
26    m_rasterizer.rasterize(rblit.getBlitFunc);
27    ---
28 */
29 
30 struct RadialBlit
31 {
32     /** Construct a Radial blitter.
33     pixels - pointer to a 32 bpp pixel buffer
34     stride - buffer width in pixels
35     height - buffer heigth in pixels
36 
37     note: buffer must be 16 byte aligned, stride must be multiple of 4
38     */
39 
40     this(uint* pixels, int stride, int height)
41     {
42         assert(((cast(uint)pixels) & 15) == 0); // must be 16 byte aligned
43         assert((stride & 3) == 0);              // stride must be 16 byte aligned
44         assert(height > 0);
45         this.pixels = pixels;
46         this.stride = stride;
47         this.height = height;
48     }
49 
50     /** set the gradient, winding rule and repeat mode. "numRepeats" sets how many times
51     the gradient repeats in 360 degrees.
52     */
53 
54     void setPaint(Gradient grad, WindingRule wrule, RepeatMode rmode)
55     {
56         assert(grad !is null);
57         assert(isPow2(grad.lookupLength));
58         gradient = grad;
59         windingRule = wrule;
60         repeatMode = rmode;
61     }
62   
63     /** Specifiy the orientation in terms of an elipse, for that we need 3 points...
64     (x0,y0) is the center of the elipse
65     (x1,y1) is radius at 0 degrees
66     (x2,y2) is radius at 90 degrees
67     The radii dont need to be at right angles, so it can handle elipse that has been
68     though any affine transform.
69     */
70 
71     void setCoords(float x0, float y0, float x1, float y1, float x2, float y2)
72     {
73         xctr = x0;
74         yctr = y0;
75         float w0 = x1-x0;
76         float h0 = y1-y0;
77         float w1 = x2-x0;
78         float h1 = y2-y0;
79         float q = w1*h0 - w0*h1;
80         if (abs(q) < 0.1) q = (q < 0) ? -0.1 : 0.1;
81         xstep0 = gradient.lookupLength * -h1 / q;
82         ystep0 = gradient.lookupLength * w1 / q;
83         xstep1 = gradient.lookupLength * h0 / q;
84         ystep1 = gradient.lookupLength * -w0 / q;
85 
86     }
87 
88     /** Specifiy the orientation in terms of an circle, for that we need two points,
89     (x0,y0) is the center of the circle
90     (x1,y1) is radius at 0 degrees
91     */
92 
93     void setCoords(float x0, float y0, float x1, float y1)
94     {
95         setCoords(x0,y0,x1,y1,x0-y1+y0,y0+x1-x0);
96     }
97 
98     /** returns a BlitFunc for use by the rasterizer */
99 
100     BlitFunc getBlitFunc() return
101     {
102         if (windingRule == WindingRule.NonZero)
103         {
104             switch(repeatMode)
105             {
106                 case RepeatMode.Pad: return &radial_blit!(WindingRule.NonZero,RepeatMode.Pad);
107                 case RepeatMode.Repeat: return &radial_blit!(WindingRule.NonZero,RepeatMode.Repeat);
108                 case RepeatMode.Mirror: return &radial_blit!(WindingRule.NonZero,RepeatMode.Mirror);
109                 default: assert(0);
110             }
111         }
112         else
113         {
114             switch(repeatMode)
115             {
116                 case RepeatMode.Pad: return &radial_blit!(WindingRule.EvenOdd,RepeatMode.Pad);
117                 case RepeatMode.Repeat: return &radial_blit!(WindingRule.EvenOdd,RepeatMode.Repeat);
118                 case RepeatMode.Mirror: return &radial_blit!(WindingRule.EvenOdd,RepeatMode.Mirror);
119                 default: assert(0);
120             }
121         }
122     }
123 
124 private:
125 
126     void radial_blit(WindingRule wr, RepeatMode mode)(int* delta, DMWord* mask, int x0, int x1, int y)
127     {
128         assert(x0 >= 0);
129         assert(x1 <= stride);
130         assert(y >= 0);
131         assert(y < height);
132         assert((x0 & 3) == 0);
133         assert((x1 & 3) == 0);
134 
135         // main blit variables
136 
137         int bpos = x0 / 4;
138         int endbit = x1 / 4;
139         uint* dest = &pixels[y*stride];
140         __m128i xmWinding = 0;
141         uint* lut = gradient.getLookup.ptr;
142         __m128i lutmsk = gradient.lookupLength - 1;
143         __m128i lutmsk2 = gradient.lookupLength*2 - 1;
144 
145         // XMM constants
146 
147         immutable __m128i XMZERO = 0;
148 
149         // paint variables
150 
151         float t0 = (bpos*4-xctr)*xstep0 + (y-yctr)*ystep0;
152         __m128 xmT0 = _mm_mul_ps(_mm_set1_ps(xstep0), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f));
153         xmT0 = _mm_add_ps(xmT0, _mm_set1_ps(t0));
154         __m128 xmStep0 = _mm_set1_ps(xstep0*4);
155 
156         float t1 = (bpos*4-xctr)*xstep1 + (y-yctr)*ystep1;
157         __m128 xmT1 = _mm_mul_ps(_mm_set1_ps(xstep1), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f));
158         xmT1 = _mm_add_ps(xmT1, _mm_set1_ps(t1));
159         __m128 xmStep1 = _mm_set1_ps(xstep1*4);
160 
161         // main loop 
162 
163         while (bpos < endbit)
164         {
165             int nsb = nextSetBit(mask, bpos, endbit);
166 
167             // do we have a span of unchanging coverage?
168 
169             if (bpos < nsb)
170             {
171                 // Calc coverage of first pixel
172 
173                 int cover = calcCoverage!wr(xmWinding[3]+delta[bpos*4]);
174 
175                 // We can skip the span
176 
177                 if (cover < 0x100)
178                 {
179                     __m128 xskip = _mm_set1_ps(nsb-bpos);
180                     xmT0 = _mm_add_ps(xmT0, _mm_mul_ps(xskip,xmStep0));
181                     xmT1 = _mm_add_ps(xmT1, _mm_mul_ps(xskip,xmStep1));
182                     bpos = nsb;
183                 }
184 
185                 // Or fill span with soid color
186 
187                 else if (gradient.isOpaque && (cover > 0xFF00))
188                 {
189                     uint* ptr = &dest[bpos*4];
190                     uint* end = ptr + ((nsb-bpos)*4);
191 
192                     while (ptr < end)
193                     {
194                         __m128 xmRad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1));
195                         xmRad = _mm_sqrt_ps(xmRad);
196                         xmT0 = xmT0 + xmStep0;
197                         xmT1 = xmT1 + xmStep1;
198                         __m128i ipos = _mm_cvtps_epi32 (xmRad);
199                         ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2);
200 
201                         ptr[0] = lut[ipos.array[0]];
202                         ptr[1] = lut[ipos.array[1]];
203                         ptr[2] = lut[ipos.array[2]];
204                         ptr[3] = lut[ipos.array[3]];
205 
206                         ptr+=4;                        
207                     }
208 
209                     bpos = nsb;
210                 }
211 
212                 // Or fill span with transparent color
213 
214                 else
215                 {
216                     __m128i xmcover = _mm_set1_epi16 (cast(ushort) cover);
217 
218                     uint* ptr = &dest[bpos*4];
219                     uint* end = &dest[nsb*4];
220 
221                     while (ptr < end)
222                     {
223                         __m128 xmRad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1));
224                         xmT0 = xmT0 + xmStep0;
225                         xmT1 = xmT1 + xmStep1;
226                         xmRad = _mm_sqrt_ps(xmRad);
227 
228                         // load destination pixels
229 
230                         __m128i d0 = _mm_load_si128(cast(__m128i*)ptr);
231                         __m128i d1 = _mm_unpackhi_epi8(d0,d0);
232                         d0 = _mm_unpacklo_epi8(d0,d0);
233 
234                         __m128i ipos = _mm_cvtps_epi32 (xmRad);
235                         ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2);
236 
237                         // load grad colours and alpha
238 
239                         __m128i c0 = _mm_loadu_si32 (&lut[ipos.array[0]]);
240                         __m128i tmpc0 = _mm_loadu_si32 (&lut[ipos.array[1]]);
241                         c0 = _mm_unpacklo_epi32 (c0, tmpc0);
242                         c0 = _mm_unpacklo_epi8 (c0, c0);
243 
244                         __m128i a0 = _mm_mulhi_epu16(c0,xmcover);
245                        
246                         __m128i c1 = _mm_loadu_si32 (&lut[ipos.array[2]]);
247                         __m128i tmpc1 = _mm_loadu_si32 (&lut[ipos.array[3]]);
248                         c1 = _mm_unpacklo_epi32 (c1, tmpc1);
249                         c1 = _mm_unpacklo_epi8 (c1, c1);
250 
251                         __m128i a1 = _mm_mulhi_epu16(c1,xmcover);
252 
253                         // unpack alpha
254 
255                         a0 = _mm_shufflelo_epi16!255(a0);
256                         a0 = _mm_shufflehi_epi16!255(a0);
257                         a1 = _mm_shufflelo_epi16!255(a1);
258                         a1 = _mm_shufflehi_epi16!255(a1);
259 
260                        // alpha*source + dest - alpha*dest
261 
262                         c0 = _mm_mulhi_epu16 (c0,a0);
263                         c1 = _mm_mulhi_epu16 (c1,a1);
264                         c0 = _mm_add_epi16 (c0,d0);
265                         c1 = _mm_add_epi16 (c1,d1);
266                         d0 = _mm_mulhi_epu16 (d0,a0);
267                         d1 = _mm_mulhi_epu16 (d1,a1);
268                         c0 =  _mm_sub_epi16 (c0,d0);
269                         c1 =  _mm_sub_epi16 (c1,d1);
270                         c0 = _mm_srli_epi16 (c0,8);
271                         c1 = _mm_srli_epi16 (c1,8);
272 
273                         d0 = _mm_packus_epi16 (c0,c1);
274 
275                         _mm_store_si128 (cast(__m128i*)ptr,d0);
276                         
277                         ptr+=4;
278                     }
279 
280                     bpos = nsb;
281                 }
282             }
283 
284             // At this point we need to integrate scandelta
285 
286             uint* ptr = &dest[bpos*4];
287             uint* end = &dest[endbit*4];
288             int* dlptr = &delta[bpos*4];
289 
290             while (bpos < endbit)
291             {
292                 __m128 xmRad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1));
293                 xmRad = _mm_sqrt_ps(xmRad);
294 
295                 // Integrate delta values
296 
297                 __m128i idv = _mm_load_si128(cast(__m128i*)dlptr);
298                 idv = _mm_add_epi32(idv, _mm_slli_si128!4(idv)); 
299                 idv = _mm_add_epi32(idv, _mm_slli_si128!8(idv)); 
300                 idv = _mm_add_epi32(idv, xmWinding); 
301                 xmWinding = _mm_shuffle_epi32!255(idv);  
302                 _mm_store_si128(cast(__m128i*)dlptr,XMZERO);
303 
304                 // convert grad pos to integer
305 
306                 __m128i ipos = _mm_cvtps_epi32 (xmRad);
307                 xmT0 = xmT0 + xmStep0;
308                 xmT1 = xmT1 + xmStep1;
309 
310                 ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2);
311 
312                 // calculate coverage from winding
313 
314                 __m128i xmcover = calcCoverage32!wr(idv);
315 
316                 // Load destination pixels
317 
318                 __m128i d0 = _mm_load_si128(cast(__m128i*)ptr);
319                 __m128i d1 = _mm_unpackhi_epi8(d0,d0);
320                 d0 = _mm_unpacklo_epi8(d0,d0);
321 
322                 // load grad colors
323 
324                 __m128i c0 = _mm_loadu_si32 (&lut[ipos.array[0]]);
325                 __m128i tmpc0 = _mm_loadu_si32 (&lut[ipos.array[1]]);
326                 c0 = _mm_unpacklo_epi32 (c0, tmpc0);
327                 c0 = _mm_unpacklo_epi8 (c0, c0);
328 
329                 __m128i a0 = _mm_unpacklo_epi32(xmcover,xmcover);
330                 a0 = _mm_mulhi_epu16(a0, c0);
331 
332                 __m128i c1 = _mm_loadu_si32 (&lut[ipos.array[2]]);
333                 __m128i tmpc1 = _mm_loadu_si32 (&lut[ipos.array[3]]);
334                 c1 = _mm_unpacklo_epi32 (c1, tmpc1);
335                 c1 = _mm_unpacklo_epi8 (c1, c1);
336 
337                 __m128i a1 = _mm_unpackhi_epi32(xmcover,xmcover);
338                 a1 = _mm_mulhi_epu16(a1, c1);
339 
340                 // unpack alpha
341 
342                 a0 = _mm_shufflelo_epi16!255(a0);
343                 a0 = _mm_shufflehi_epi16!255(a0);
344                 a1 = _mm_shufflelo_epi16!255(a1);
345                 a1 = _mm_shufflehi_epi16!255(a1);
346 
347                 // alpha*source + dest - alpha*dest
348 
349                 c0 = _mm_mulhi_epu16 (c0,a0);
350                 c1 = _mm_mulhi_epu16 (c1,a1);
351                 c0 = _mm_add_epi16 (c0,d0);
352                 c1 = _mm_add_epi16 (c1,d1);
353                 d0 = _mm_mulhi_epu16 (d0,a0);
354                 d1 = _mm_mulhi_epu16 (d1,a1);
355                 c0 =  _mm_sub_epi16 (c0, d0);
356                 c1 =  _mm_sub_epi16 (c1, d1);
357                 c0 = _mm_srli_epi16 (c0,8);
358                 c1 = _mm_srli_epi16 (c1,8);
359 
360                 d0 = _mm_packus_epi16 (c0,c1);
361 
362                 _mm_store_si128 (cast(__m128i*)ptr,d0);
363                 
364                 bpos++;
365                 ptr+=4;
366                 dlptr+=4;
367 
368                 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0)  break;
369             }
370         }
371     }
372 
373 private:
374 
375     uint* pixels;
376     int stride;
377     int height;
378     float xctr,yctr;
379     float xstep0,ystep0;
380     float xstep1,ystep1; 
381     Gradient gradient;
382     WindingRule windingRule;
383     RepeatMode repeatMode;
384 }
385