1 /**
2   Blitter for painting solid color.
3 
4   Copyright Chris Jones 2020.
5   Distributed under the Boost Software License, Version 1.0.
6   See accompanying file Licence.txt or copy at...
7   https://www.boost.org/LICENSE_1_0.txt
8 */
9 
10 
11 module dg2d.colorblit;
12 
13 import dg2d.rasterizer;
14 import dg2d.misc;
15 import dg2d.blitex;
16 
17 /**
18    Color blitter struct
19 
20    You set up the properties and pass the BlitFunc to the rasterizer.
21 
22    ---
23    auto cblit = AngularBlit(m_pixels,m_stride,m_height);
24    cblit.setColor(color);
25    m_rasterizer.rasterize(cblit.getBlitFunc);
26    ---
27 */
28 
29 struct ColorBlit
30 {   
31     /** Construct an color blitter.
32     pixels - pointer to a 32 bpp pixel buffer
33     stride - buffer width in pixels
34     height - buffer heigth in pixels
35 
36     note: buffer must be 16 byte aligned, stride must be multiple of 4
37     */
38 
39     this(uint* pixels, int stride, int height)
40     {
41         assert(((cast(uint)pixels) & 15) == 0); // must be 16 byte alligned
42         assert((stride & 3) == 0);              // stride must be 16 byte alligned
43         assert(height > 0);
44 
45         this.pixels = pixels;
46         this.stride = stride;
47         this.height = height;
48     }
49 
50     /** set the colour to blit */
51 
52     void setColor(uint color)
53     {
54         this.color = color;
55     }
56 
57     /** returns a BlitFunc for use by the rasterizer */
58 
59     BlitFunc getBlitFunc(WindingRule rule) return
60     {
61         if (rule == WindingRule.NonZero)
62         {
63             return &color_blit!(WindingRule.NonZero);
64         }
65         else
66         {
67             return &color_blit!(WindingRule.EvenOdd);
68         }
69     }
70 
71 private:
72 
73     void color_blit(WindingRule rule)(int* delta, DMWord* mask, int x0, int x1, int y)
74     {
75         assert(x0 >= 0);
76         assert(x1 <= stride);
77         assert(y >= 0);
78         assert(y < height);
79         assert((x0 & 3) == 0);
80         assert((x1 & 3) == 0);
81 
82         // main blit variables
83 
84         int bpos = x0 / 4;
85         int endbit = x1 / 4;
86         uint* dest = &pixels[y*stride];
87         __m128i xmWinding = 0;
88         bool isopaque = (color >> 24) == 0xFF;
89 
90         // XMM constants
91 
92         immutable __m128i XMZERO = 0;
93         immutable __m128i XMFFFF = 0xFFFFFFFF;
94 
95         // paint variables
96 
97         __m128i xmColor = _mm_loadu_si32 (&color);
98         xmColor = _mm_unpacklo_epi8 (xmColor, xmColor);
99         xmColor = _mm_unpacklo_epi64 (xmColor, xmColor);
100         __m128i xmAlpha = _mm_set1_epi16 (cast(ushort) ((color >> 24) * 257));
101 
102         // main loop
103 
104         while (bpos < endbit)
105         {
106             int nsb = nextSetBit(mask, bpos, endbit);
107 
108             // do we have a span of unchanging coverage?
109 
110             if (bpos < nsb)
111             {
112                 // Calc coverage of first pixel
113 
114                 int cover = calcCoverage!rule(xmWinding[3]+delta[bpos*4]);
115 
116                 // We can skip the span
117 
118                 if (cover < 0x100)
119                 {
120                     bpos = nsb;
121                 }
122 
123                 // Or fill span with soid color
124 
125                 else if (isopaque && (cover > 0xFF00))
126                 {
127                     __m128i sscol = _mm_set1_epi32(color);
128 
129                     uint* ptr = &dest[bpos*4];
130                     uint* end = &dest[nsb*4];
131 
132                     while (ptr < end)
133                     {
134                         _mm_store_si128(cast(__m128i*)ptr, sscol);
135                         ptr+=4;                        
136                     }
137 
138                     bpos = nsb;
139                 }
140 
141                 // Or fill the span with transparent color
142 
143                 else
144                 {
145                     __m128i tsalpha = _mm_set1_epi16(cast(ushort) cover); 
146                     tsalpha = _mm_mulhi_epu16(xmAlpha,tsalpha);
147                     __m128i tscolor = _mm_mulhi_epu16(xmColor,tsalpha);
148                     tsalpha  = tsalpha ^ XMFFFF;               // 1-alpha
149          
150                     uint* ptr = &dest[bpos*4];
151                     uint* end = &dest[nsb*4];
152 
153                     while (ptr < end)
154                     {
155                         __m128i d0 = _mm_load_si128(cast(__m128i*)ptr);
156                         __m128i d1 = _mm_unpackhi_epi8(d0,d0);
157                         d0 = _mm_unpacklo_epi8(d0,d0);
158                         d0 = _mm_mulhi_epu16(d0,tsalpha);
159                         d1 = _mm_mulhi_epu16(d1,tsalpha);
160                         d0 = _mm_add_epi16(d0,tscolor);
161                         d0 = _mm_srli_epi16(d0,8);
162                         d1 = _mm_add_epi16(d1,tscolor);
163                         d1 = _mm_srli_epi16(d1,8);                       
164                         d0 = _mm_packus_epi16(d0,d1);
165                         _mm_store_si128(cast(__m128i*)ptr,d0);
166                         ptr+=4;
167                     }
168 
169                     bpos = nsb;
170                 }
171             }
172 
173             // At this point we need to integrate scandelta
174 
175             uint* ptr = &dest[bpos*4];
176             uint* end = &dest[endbit*4];
177             int* dlptr = &delta[bpos*4];
178 
179             while (ptr < end)
180             {
181                 // Integrate delta values
182 
183                 __m128i idv = _mm_load_si128(cast(__m128i*)dlptr);
184                 idv = _mm_add_epi32(idv, _mm_slli_si128!4(idv)); 
185                 idv = _mm_add_epi32(idv, _mm_slli_si128!8(idv)); 
186                 idv = _mm_add_epi32(idv, xmWinding); 
187                 xmWinding = _mm_shuffle_epi32!255(idv);  
188                 _mm_store_si128(cast(__m128i*)dlptr,XMZERO);
189 
190                 // calculate coverage from winding
191 
192                 __m128i xmcover = calcCoverage16!rule(idv);
193 
194                 // Load destination pixels
195 
196                 __m128i d0 = _mm_load_si128(cast(__m128i*)ptr);
197                 __m128i d1 = _mm_unpackhi_epi8(d0,d0);
198                 d0 = _mm_unpacklo_epi8(d0,d0);
199 
200                 // muliply source alpha & coverage
201 
202                 __m128i a0 = _mm_mulhi_epu16(xmcover,xmAlpha);
203                 a0 = _mm_unpacklo_epi16(a0,a0); 
204                 __m128i a1 = _mm_unpackhi_epi32(a0,a0);
205                 a0 = _mm_unpacklo_epi32(a0,a0);
206 
207                 // r = alpha*color + dest - alpha*dest
208 
209                 __m128i r0 = _mm_mulhi_epu16(xmColor,a0);
210                 r0 = _mm_add_epi16(r0, d0);
211                 d0 = _mm_mulhi_epu16(d0,a0);
212                 r0 = _mm_sub_epi16(r0, d0);
213                 r0 = _mm_srli_epi16(r0,8);
214 
215                 __m128i r1 = _mm_mulhi_epu16(xmColor,a1);
216                 r1 = _mm_add_epi16(r1, d1);
217                 d1 = _mm_mulhi_epu16(d1,a1);
218                 r1 = _mm_sub_epi16(r1, d1);
219                 r1 = _mm_srli_epi16(r1,8);
220 
221                 __m128i r01 = _mm_packus_epi16(r0,r1);
222 
223                 _mm_store_si128(cast(__m128i*)ptr,r01);
224                 
225                 bpos++;
226                 ptr+=4;
227                 dlptr+=4;
228 
229                 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break;
230             }
231         }
232     }
233 
234     uint* pixels;
235     int stride;
236     int height;
237     uint color;
238 }