1 /** 2 Blitter for painting linear gradients. 3 4 Copyright Chris Jones 2020. 5 Distributed under the Boost Software License, Version 1.0. 6 See accompanying file Licence.txt or copy at... 7 https://www.boost.org/LICENSE_1_0.txt 8 */ 9 10 module dg2d.linearblit; 11 12 import dg2d.rasterizer; 13 import dg2d.gradient; 14 import dg2d.misc; 15 import dg2d.blitex; 16 17 /* 18 Linear gradient blitter struct. 19 20 You set up the properties and pass the BlitFunc to the rasterizer. 21 22 --- 23 auto ablit = AngularBlit(m_pixels,m_stride,m_height); 24 ablit.setPaint(grad, wr, RepeatMode.Mirror, 4.0f); 25 ablit.setElipse(x0,y0,x1,y1,x2,y2); 26 m_rasterizer.rasterize(ablit.getBlitFunc); 27 --- 28 */ 29 30 31 struct LinearBlit 32 { 33 /** Construct an linear gradient blitter. 34 pixels - pointer to a 32 bpp pixel buffer 35 stride - buffer width in pixels 36 height - buffer heigth in pixels 37 38 note: buffer must be 16 byte aligned, stride must be multiple of 4 39 */ 40 41 this(uint* pixels, int stride, int height) 42 { 43 assert(((cast(uint)pixels) & 15) == 0); // must be 16 byte aligned 44 assert((stride & 3) == 0); // stride must be 16 byte aligned 45 assert(height > 0); 46 this.pixels = pixels; 47 this.stride = stride; 48 this.height = height; 49 } 50 51 /** set the gradient, winding rule and repeat mode. 52 */ 53 54 void setPaint(Gradient grad, WindingRule wrule, RepeatMode rmode) 55 { 56 assert(grad !is null); 57 assert(isPow2(grad.lookupLength)); 58 gradient = grad; 59 windingRule = wrule; 60 repeatMode = rmode; 61 } 62 63 /** Set the coordinates for the start and end point of the linear gradient. 64 */ 65 66 void setCoords(float x0, float y0, float x1, float y1) 67 { 68 xctr = x0; 69 yctr = y0; 70 float w = x1-x0; 71 float h = y1-y0; 72 float hsq = w*w + h*h; 73 if (hsq < 0.1) hsq = 0.1; // avoid div by zero 74 xstep = gradient.lookupLength * w / hsq; 75 ystep = gradient.lookupLength * h / hsq; 76 } 77 78 /** returns a BlitFunc for use by the rasterizer */ 79 80 BlitFunc getBlitFunc() return 81 { 82 if (windingRule == WindingRule.NonZero) 83 { 84 switch(repeatMode) 85 { 86 case RepeatMode.Pad: return &linear_blit!(WindingRule.NonZero,RepeatMode.Pad); 87 case RepeatMode.Repeat: return &linear_blit!(WindingRule.NonZero,RepeatMode.Repeat); 88 case RepeatMode.Mirror: return &linear_blit!(WindingRule.NonZero,RepeatMode.Mirror); 89 default: assert(0); 90 } 91 } 92 else 93 { 94 switch(repeatMode) 95 { 96 case RepeatMode.Pad: return &linear_blit!(WindingRule.EvenOdd,RepeatMode.Pad); 97 case RepeatMode.Repeat: return &linear_blit!(WindingRule.EvenOdd,RepeatMode.Repeat); 98 case RepeatMode.Mirror: return &linear_blit!(WindingRule.EvenOdd,RepeatMode.Mirror); 99 default: assert(0); 100 } 101 } 102 } 103 104 private: 105 106 void linear_blit(WindingRule wr, RepeatMode mode)(int* delta, DMWord* mask, int x0, int x1, int y) 107 { 108 assert( ( cast(size_t)delta & 15 ) == 0 ); 109 assert(x0 >= 0); 110 assert(x1 <= stride); 111 assert(y >= 0); 112 assert(y < height); 113 assert((x0 & 3) == 0); 114 assert((x1 & 3) == 0); 115 116 // main blit variables 117 118 int bpos = x0 / 4; 119 int endbit = x1 / 4; 120 uint* dest = &pixels[y*stride]; 121 __m128i xmWinding = 0; 122 uint* lut = gradient.getLookup.ptr; 123 __m128i lutmsk = gradient.lookupLength - 1; 124 __m128i lutmsk2 = gradient.lookupLength*2 - 1; 125 126 // XMM constants 127 128 immutable __m128i XMZERO = 0; 129 130 // paint variables 131 132 float t0 = (bpos*4-xctr)*xstep + (y-yctr)*ystep; 133 __m128 xmT0 = _mm_mul_ps(_mm_set1_ps(xstep), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f)); 134 xmT0 = _mm_add_ps(xmT0, _mm_set1_ps(t0)); 135 __m128 xmStep0 = _mm_set1_ps(xstep*4); 136 137 // main loop 138 139 while (bpos < endbit) 140 { 141 int nsb = nextSetBit(mask, bpos, endbit); 142 143 // do we have a span of unchanging coverage? 144 145 if (bpos < nsb) 146 { 147 // Calc coverage of first pixel 148 149 int cover = calcCoverage!wr(xmWinding[3]+delta[bpos*4]); 150 151 // We can skip the span 152 153 if (cover < 0x100) 154 { 155 __m128 xskip = _mm_set1_ps(nsb-bpos); 156 xmT0 = _mm_add_ps(xmT0, _mm_mul_ps(xskip,xmStep0)); 157 bpos = nsb; 158 } 159 160 // Or fill span with soid color 161 162 else if (gradient.isOpaque && (cover > 0xFF00)) 163 { 164 uint* ptr = &dest[bpos*4]; 165 uint* end = ptr + ((nsb-bpos)*4); 166 167 while (ptr < end) 168 { 169 __m128i ipos = _mm_cvtps_epi32 (xmT0); 170 xmT0 = xmT0 + xmStep0; 171 172 ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2); 173 174 ptr[0] = lut[ipos.array[0]]; 175 ptr[1] = lut[ipos.array[1]]; 176 ptr[2] = lut[ipos.array[2]]; 177 ptr[3] = lut[ipos.array[3]]; 178 179 ptr+=4; 180 } 181 182 bpos = nsb; 183 } 184 185 // Or fill span with transparent color 186 187 else 188 { 189 __m128i xmcover = _mm_set1_epi16 (cast(ushort) cover); 190 191 uint* ptr = &dest[bpos*4]; 192 uint* end = &dest[nsb*4]; 193 194 while (ptr < end) 195 { 196 __m128i ipos = _mm_cvtps_epi32 (xmT0); 197 xmT0 = xmT0 + xmStep0; 198 199 ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2); 200 201 // load destinatin pixels 202 203 __m128i d0 = _mm_load_si128(cast(__m128i*)ptr); 204 __m128i d1 = _mm_unpackhi_epi8(d0,d0); 205 d0 = _mm_unpacklo_epi8(d0,d0); 206 207 // load grad colors and alpha 208 209 __m128i c0 = _mm_loadu_si32 (&lut[ipos.array[0]]); 210 __m128i tmpc0 = _mm_loadu_si32 (&lut[ipos.array[1]]); 211 c0 = _mm_unpacklo_epi32 (c0, tmpc0); 212 c0 = _mm_unpacklo_epi8 (c0, c0); 213 214 __m128i a0 = _mm_mulhi_epu16(c0,xmcover); 215 216 __m128i c1 = _mm_loadu_si32 (&lut[ipos.array[2]]); 217 __m128i tmpc1 = _mm_loadu_si32 (&lut[ipos.array[3]]); 218 c1 = _mm_unpacklo_epi32 (c1, tmpc1); 219 c1 = _mm_unpacklo_epi8 (c1, c1); 220 221 __m128i a1 = _mm_mulhi_epu16(c1,xmcover); 222 223 // unpack alpha 224 225 a0 = _mm_shufflelo_epi16!255(a0); 226 a0 = _mm_shufflehi_epi16!255(a0); 227 a1 = _mm_shufflelo_epi16!255(a1); 228 a1 = _mm_shufflehi_epi16!255(a1); 229 230 // alpha*source + dest - alpha*dest 231 232 c0 = _mm_mulhi_epu16 (c0,a0); 233 c1 = _mm_mulhi_epu16 (c1,a1); 234 c0 = _mm_add_epi16 (c0,d0); 235 c1 = _mm_add_epi16 (c1,d1); 236 d0 = _mm_mulhi_epu16 (d0,a0); 237 d1 = _mm_mulhi_epu16 (d1,a1); 238 c0 = _mm_sub_epi16 (c0,d0); 239 c1 = _mm_sub_epi16 (c1,d1); 240 c0 = _mm_srli_epi16 (c0,8); 241 c1 = _mm_srli_epi16 (c1,8); 242 243 d0 = _mm_packus_epi16 (c0,c1); 244 245 _mm_store_si128 (cast(__m128i*)ptr,d0); 246 247 ptr+=4; 248 } 249 250 bpos = nsb; 251 } 252 } 253 254 // At this point we need to integrate scandelta 255 256 uint* ptr = &dest[bpos*4]; 257 uint* end = &dest[endbit*4]; 258 int* dlptr = &delta[bpos*4]; 259 260 while (bpos < endbit) 261 { 262 // Integrate delta values 263 264 __m128i idv = _mm_load_si128(cast(__m128i*)dlptr); 265 idv = _mm_add_epi32(idv, _mm_slli_si128!4(idv)); 266 idv = _mm_add_epi32(idv, _mm_slli_si128!8(idv)); 267 idv = _mm_add_epi32(idv, xmWinding); 268 xmWinding = _mm_shuffle_epi32!255(idv); 269 _mm_store_si128(cast(__m128i*)dlptr,XMZERO); 270 271 // calculate coverage from winding 272 273 __m128i xmcover = calcCoverage32!wr(idv); 274 275 // convert grad pos to integer 276 277 __m128i ipos = _mm_cvtps_epi32 (xmT0); 278 xmT0 = xmT0 + xmStep0; 279 280 ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2); 281 282 // Load destination pixels 283 284 __m128i d0 = _mm_load_si128(cast(__m128i*)ptr); 285 __m128i d1 = _mm_unpackhi_epi8(d0,d0); 286 d0 = _mm_unpacklo_epi8(d0,d0); 287 288 // load grad colors and alpha 289 290 __m128i c0 = _mm_loadu_si32 (&lut[ipos.array[0]]); 291 __m128i tmpc0 = _mm_loadu_si32 (&lut[ipos.array[1]]); 292 c0 = _mm_unpacklo_epi32 (c0, tmpc0); 293 c0 = _mm_unpacklo_epi8 (c0, c0); 294 295 __m128i a0 = _mm_unpacklo_epi32(xmcover,xmcover); 296 a0 = _mm_mulhi_epu16(a0, c0); 297 298 __m128i c1 = _mm_loadu_si32 (&lut[ipos.array[2]]); 299 __m128i tmpc1 = _mm_loadu_si32 (&lut[ipos.array[3]]); 300 c1 = _mm_unpacklo_epi32 (c1, tmpc1); 301 c1 = _mm_unpacklo_epi8 (c1, c1); 302 303 __m128i a1 = _mm_unpackhi_epi32(xmcover,xmcover); 304 a1 = _mm_mulhi_epu16(a1, c1); 305 306 // unpack alpha 307 308 a0 = _mm_shufflelo_epi16!255(a0); 309 a0 = _mm_shufflehi_epi16!255(a0); 310 a1 = _mm_shufflelo_epi16!255(a1); 311 a1 = _mm_shufflehi_epi16!255(a1); 312 313 // alpha*source + dest - alpha*dest 314 315 c0 = _mm_mulhi_epu16 (c0,a0); 316 c1 = _mm_mulhi_epu16 (c1,a1); 317 c0 = _mm_add_epi16 (c0,d0); 318 c1 = _mm_add_epi16 (c1,d1); 319 d0 = _mm_mulhi_epu16 (d0,a0); 320 d1 = _mm_mulhi_epu16 (d1,a1); 321 c0 = _mm_sub_epi16 (c0, d0); 322 c1 = _mm_sub_epi16 (c1, d1); 323 c0 = _mm_srli_epi16 (c0,8); 324 c1 = _mm_srli_epi16 (c1,8); 325 326 d0 = _mm_packus_epi16 (c0,c1); 327 328 _mm_store_si128 (cast(__m128i*)ptr,d0); 329 330 bpos++; 331 ptr+=4; 332 dlptr+=4; 333 334 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break; 335 } 336 } 337 } 338 339 // Member variables 340 341 uint* pixels; 342 int stride; 343 int height; 344 float xctr,yctr,xstep,ystep; 345 Gradient gradient; 346 WindingRule windingRule; 347 RepeatMode repeatMode; 348 } 349 350