1 /** 2 Blitter for painting radial gradients. 3 4 Copyright Chris Jones 2020. 5 Distributed under the Boost Software License, Version 1.0. 6 See accompanying file Licence.txt or copy at... 7 https://www.boost.org/LICENSE_1_0.txt 8 */ 9 10 module dg2d.radialblit; 11 12 import dg2d.rasterizer; 13 import dg2d.gradient; 14 import dg2d.misc; 15 import dg2d.blitex; 16 17 /** 18 Radial gradient blitter struct. 19 20 You set up the properties and pass the BlitFunc to the rasterizer. 21 22 --- 23 auto ablit = RadialBlit(m_pixels,m_stride,m_height); 24 rblit.setPaint(grad, wr, RepeatMode.Pad); 25 rblit.setCoords(x0,y0,x1,y1,x2,y2); 26 m_rasterizer.rasterize(rblit.getBlitFunc); 27 --- 28 */ 29 30 struct RadialBlit 31 { 32 /** Construct a Radial blitter. 33 pixels - pointer to a 32 bpp pixel buffer 34 stride - buffer width in pixels 35 height - buffer heigth in pixels 36 37 note: buffer must be 16 byte aligned, stride must be multiple of 4 38 */ 39 40 this(uint* pixels, int stride, int height) 41 { 42 assert(((cast(uint)pixels) & 15) == 0); // must be 16 byte aligned 43 assert((stride & 3) == 0); // stride must be 16 byte aligned 44 assert(height > 0); 45 this.pixels = pixels; 46 this.stride = stride; 47 this.height = height; 48 } 49 50 /** set the gradient, winding rule and repeat mode. "numRepeats" sets how many times 51 the gradient repeats in 360 degrees. 52 */ 53 54 void setPaint(Gradient grad, WindingRule wrule, RepeatMode rmode) 55 { 56 assert(grad !is null); 57 assert(isPow2(grad.lookupLength)); 58 gradient = grad; 59 windingRule = wrule; 60 repeatMode = rmode; 61 } 62 63 /** Specifiy the orientation in terms of an elipse, for that we need 3 points... 64 (x0,y0) is the center of the elipse 65 (x1,y1) is radius at 0 degrees 66 (x2,y2) is radius at 90 degrees 67 The radii dont need to be at right angles, so it can handle elipse that has been 68 though any affine transform. 69 */ 70 71 void setCoords(float x0, float y0, float x1, float y1, float x2, float y2) 72 { 73 xctr = x0; 74 yctr = y0; 75 float w0 = x1-x0; 76 float h0 = y1-y0; 77 float w1 = x2-x0; 78 float h1 = y2-y0; 79 float q = w1*h0 - w0*h1; 80 if (abs(q) < 0.1) q = (q < 0) ? -0.1 : 0.1; 81 xstep0 = gradient.lookupLength * -h1 / q; 82 ystep0 = gradient.lookupLength * w1 / q; 83 xstep1 = gradient.lookupLength * h0 / q; 84 ystep1 = gradient.lookupLength * -w0 / q; 85 86 } 87 88 /** Specifiy the orientation in terms of an circle, for that we need two points, 89 (x0,y0) is the center of the circle 90 (x1,y1) is radius at 0 degrees 91 */ 92 93 void setCoords(float x0, float y0, float x1, float y1) 94 { 95 setCoords(x0,y0,x1,y1,x0-y1+y0,y0+x1-x0); 96 } 97 98 /** returns a BlitFunc for use by the rasterizer */ 99 100 BlitFunc getBlitFunc() return 101 { 102 if (windingRule == WindingRule.NonZero) 103 { 104 switch(repeatMode) 105 { 106 case RepeatMode.Pad: return &radial_blit!(WindingRule.NonZero,RepeatMode.Pad); 107 case RepeatMode.Repeat: return &radial_blit!(WindingRule.NonZero,RepeatMode.Repeat); 108 case RepeatMode.Mirror: return &radial_blit!(WindingRule.NonZero,RepeatMode.Mirror); 109 default: assert(0); 110 } 111 } 112 else 113 { 114 switch(repeatMode) 115 { 116 case RepeatMode.Pad: return &radial_blit!(WindingRule.EvenOdd,RepeatMode.Pad); 117 case RepeatMode.Repeat: return &radial_blit!(WindingRule.EvenOdd,RepeatMode.Repeat); 118 case RepeatMode.Mirror: return &radial_blit!(WindingRule.EvenOdd,RepeatMode.Mirror); 119 default: assert(0); 120 } 121 } 122 } 123 124 private: 125 126 void radial_blit(WindingRule wr, RepeatMode mode)(int* delta, DMWord* mask, int x0, int x1, int y) 127 { 128 assert(x0 >= 0); 129 assert(x1 <= stride); 130 assert(y >= 0); 131 assert(y < height); 132 assert((x0 & 3) == 0); 133 assert((x1 & 3) == 0); 134 135 // main blit variables 136 137 int bpos = x0 / 4; 138 int endbit = x1 / 4; 139 uint* dest = &pixels[y*stride]; 140 __m128i xmWinding = 0; 141 uint* lut = gradient.getLookup.ptr; 142 __m128i lutmsk = gradient.lookupLength - 1; 143 __m128i lutmsk2 = gradient.lookupLength*2 - 1; 144 145 // XMM constants 146 147 immutable __m128i XMZERO = 0; 148 149 // paint variables 150 151 float t0 = (bpos*4-xctr)*xstep0 + (y-yctr)*ystep0; 152 __m128 xmT0 = _mm_mul_ps(_mm_set1_ps(xstep0), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f)); 153 xmT0 = _mm_add_ps(xmT0, _mm_set1_ps(t0)); 154 __m128 xmStep0 = _mm_set1_ps(xstep0*4); 155 156 float t1 = (bpos*4-xctr)*xstep1 + (y-yctr)*ystep1; 157 __m128 xmT1 = _mm_mul_ps(_mm_set1_ps(xstep1), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f)); 158 xmT1 = _mm_add_ps(xmT1, _mm_set1_ps(t1)); 159 __m128 xmStep1 = _mm_set1_ps(xstep1*4); 160 161 // main loop 162 163 while (bpos < endbit) 164 { 165 int nsb = nextSetBit(mask, bpos, endbit); 166 167 // do we have a span of unchanging coverage? 168 169 if (bpos < nsb) 170 { 171 // Calc coverage of first pixel 172 173 int cover = calcCoverage!wr(xmWinding[3]+delta[bpos*4]); 174 175 // We can skip the span 176 177 if (cover < 0x100) 178 { 179 __m128 xskip = _mm_set1_ps(nsb-bpos); 180 xmT0 = _mm_add_ps(xmT0, _mm_mul_ps(xskip,xmStep0)); 181 xmT1 = _mm_add_ps(xmT1, _mm_mul_ps(xskip,xmStep1)); 182 bpos = nsb; 183 } 184 185 // Or fill span with soid color 186 187 else if (gradient.isOpaque && (cover > 0xFF00)) 188 { 189 uint* ptr = &dest[bpos*4]; 190 uint* end = ptr + ((nsb-bpos)*4); 191 192 while (ptr < end) 193 { 194 __m128 xmRad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1)); 195 xmRad = _mm_sqrt_ps(xmRad); 196 xmT0 = xmT0 + xmStep0; 197 xmT1 = xmT1 + xmStep1; 198 __m128i ipos = _mm_cvtps_epi32 (xmRad); 199 ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2); 200 201 ptr[0] = lut[ipos.array[0]]; 202 ptr[1] = lut[ipos.array[1]]; 203 ptr[2] = lut[ipos.array[2]]; 204 ptr[3] = lut[ipos.array[3]]; 205 206 ptr+=4; 207 } 208 209 bpos = nsb; 210 } 211 212 // Or fill span with transparent color 213 214 else 215 { 216 __m128i xmcover = _mm_set1_epi16 (cast(ushort) cover); 217 218 uint* ptr = &dest[bpos*4]; 219 uint* end = &dest[nsb*4]; 220 221 while (ptr < end) 222 { 223 __m128 xmRad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1)); 224 xmT0 = xmT0 + xmStep0; 225 xmT1 = xmT1 + xmStep1; 226 xmRad = _mm_sqrt_ps(xmRad); 227 228 // load destination pixels 229 230 __m128i d0 = _mm_load_si128(cast(__m128i*)ptr); 231 __m128i d1 = _mm_unpackhi_epi8(d0,d0); 232 d0 = _mm_unpacklo_epi8(d0,d0); 233 234 __m128i ipos = _mm_cvtps_epi32 (xmRad); 235 ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2); 236 237 // load grad colours and alpha 238 239 __m128i c0 = _mm_loadu_si32 (&lut[ipos.array[0]]); 240 __m128i tmpc0 = _mm_loadu_si32 (&lut[ipos.array[1]]); 241 c0 = _mm_unpacklo_epi32 (c0, tmpc0); 242 c0 = _mm_unpacklo_epi8 (c0, c0); 243 244 __m128i a0 = _mm_mulhi_epu16(c0,xmcover); 245 246 __m128i c1 = _mm_loadu_si32 (&lut[ipos.array[2]]); 247 __m128i tmpc1 = _mm_loadu_si32 (&lut[ipos.array[3]]); 248 c1 = _mm_unpacklo_epi32 (c1, tmpc1); 249 c1 = _mm_unpacklo_epi8 (c1, c1); 250 251 __m128i a1 = _mm_mulhi_epu16(c1,xmcover); 252 253 // unpack alpha 254 255 a0 = _mm_shufflelo_epi16!255(a0); 256 a0 = _mm_shufflehi_epi16!255(a0); 257 a1 = _mm_shufflelo_epi16!255(a1); 258 a1 = _mm_shufflehi_epi16!255(a1); 259 260 // alpha*source + dest - alpha*dest 261 262 c0 = _mm_mulhi_epu16 (c0,a0); 263 c1 = _mm_mulhi_epu16 (c1,a1); 264 c0 = _mm_add_epi16 (c0,d0); 265 c1 = _mm_add_epi16 (c1,d1); 266 d0 = _mm_mulhi_epu16 (d0,a0); 267 d1 = _mm_mulhi_epu16 (d1,a1); 268 c0 = _mm_sub_epi16 (c0,d0); 269 c1 = _mm_sub_epi16 (c1,d1); 270 c0 = _mm_srli_epi16 (c0,8); 271 c1 = _mm_srli_epi16 (c1,8); 272 273 d0 = _mm_packus_epi16 (c0,c1); 274 275 _mm_store_si128 (cast(__m128i*)ptr,d0); 276 277 ptr+=4; 278 } 279 280 bpos = nsb; 281 } 282 } 283 284 // At this point we need to integrate scandelta 285 286 uint* ptr = &dest[bpos*4]; 287 uint* end = &dest[endbit*4]; 288 int* dlptr = &delta[bpos*4]; 289 290 while (bpos < endbit) 291 { 292 __m128 xmRad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1)); 293 xmRad = _mm_sqrt_ps(xmRad); 294 295 // Integrate delta values 296 297 __m128i idv = _mm_load_si128(cast(__m128i*)dlptr); 298 idv = _mm_add_epi32(idv, _mm_slli_si128!4(idv)); 299 idv = _mm_add_epi32(idv, _mm_slli_si128!8(idv)); 300 idv = _mm_add_epi32(idv, xmWinding); 301 xmWinding = _mm_shuffle_epi32!255(idv); 302 _mm_store_si128(cast(__m128i*)dlptr,XMZERO); 303 304 // convert grad pos to integer 305 306 __m128i ipos = _mm_cvtps_epi32 (xmRad); 307 xmT0 = xmT0 + xmStep0; 308 xmT1 = xmT1 + xmStep1; 309 310 ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2); 311 312 // calculate coverage from winding 313 314 __m128i xmcover = calcCoverage32!wr(idv); 315 316 // Load destination pixels 317 318 __m128i d0 = _mm_load_si128(cast(__m128i*)ptr); 319 __m128i d1 = _mm_unpackhi_epi8(d0,d0); 320 d0 = _mm_unpacklo_epi8(d0,d0); 321 322 // load grad colors 323 324 __m128i c0 = _mm_loadu_si32 (&lut[ipos.array[0]]); 325 __m128i tmpc0 = _mm_loadu_si32 (&lut[ipos.array[1]]); 326 c0 = _mm_unpacklo_epi32 (c0, tmpc0); 327 c0 = _mm_unpacklo_epi8 (c0, c0); 328 329 __m128i a0 = _mm_unpacklo_epi32(xmcover,xmcover); 330 a0 = _mm_mulhi_epu16(a0, c0); 331 332 __m128i c1 = _mm_loadu_si32 (&lut[ipos.array[2]]); 333 __m128i tmpc1 = _mm_loadu_si32 (&lut[ipos.array[3]]); 334 c1 = _mm_unpacklo_epi32 (c1, tmpc1); 335 c1 = _mm_unpacklo_epi8 (c1, c1); 336 337 __m128i a1 = _mm_unpackhi_epi32(xmcover,xmcover); 338 a1 = _mm_mulhi_epu16(a1, c1); 339 340 // unpack alpha 341 342 a0 = _mm_shufflelo_epi16!255(a0); 343 a0 = _mm_shufflehi_epi16!255(a0); 344 a1 = _mm_shufflelo_epi16!255(a1); 345 a1 = _mm_shufflehi_epi16!255(a1); 346 347 // alpha*source + dest - alpha*dest 348 349 c0 = _mm_mulhi_epu16 (c0,a0); 350 c1 = _mm_mulhi_epu16 (c1,a1); 351 c0 = _mm_add_epi16 (c0,d0); 352 c1 = _mm_add_epi16 (c1,d1); 353 d0 = _mm_mulhi_epu16 (d0,a0); 354 d1 = _mm_mulhi_epu16 (d1,a1); 355 c0 = _mm_sub_epi16 (c0, d0); 356 c1 = _mm_sub_epi16 (c1, d1); 357 c0 = _mm_srli_epi16 (c0,8); 358 c1 = _mm_srli_epi16 (c1,8); 359 360 d0 = _mm_packus_epi16 (c0,c1); 361 362 _mm_store_si128 (cast(__m128i*)ptr,d0); 363 364 bpos++; 365 ptr+=4; 366 dlptr+=4; 367 368 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break; 369 } 370 } 371 } 372 373 private: 374 375 uint* pixels; 376 int stride; 377 int height; 378 float xctr,yctr; 379 float xstep0,ystep0; 380 float xstep1,ystep1; 381 Gradient gradient; 382 WindingRule windingRule; 383 RepeatMode repeatMode; 384 } 385