1 /** 2 Blitter for painting biradial gradients. 3 4 Copyright Chris Jones 2020. 5 Distributed under the Boost Software License, Version 1.0. 6 See accompanying file Licence.txt or copy at... 7 https://www.boost.org/LICENSE_1_0.txt 8 */ 9 10 module dg2d.biradialblit; 11 12 import dg2d.rasterizer; 13 import dg2d.gradient; 14 import dg2d.misc; 15 import dg2d.blitex; 16 17 /** 18 Biradial gradient blitter struct. 19 20 --- 21 auto blit = RadialBlit(m_pixels,m_stride,m_height); 22 blit.setPaint(grad, wr, RepeatMode.Pad); 23 blit.setCircles(x0,y0,x1,y1,x2,y2); 24 m_rasterizer.rasterize(blit.getBlitFunc); 25 --- 26 */ 27 28 struct BiradialBlit 29 { 30 /** Construct a Biradial blitter. 31 pixels - pointer to a 32 bpp pixel buffer 32 stride - buffer width in pixels 33 height - buffer heigth in pixels 34 35 note: buffer must be 16 byte aligned, stride must be multiple of 4 36 */ 37 38 this(uint* pixels, int stride, int height) 39 { 40 assert(((cast(uint)pixels) & 15) == 0); // must be 16 byte aligned 41 assert((stride & 3) == 0); // stride must be 16 byte aligned 42 assert(height > 0); 43 this.pixels = pixels; 44 this.stride = stride; 45 this.height = height; 46 } 47 48 /** 49 Set the paint options. 50 Params: 51 gradient = colour gradient to use 52 wrule = winding rule 53 rmode = repeat mode, Pad, Repeat or Mirror modes are supported. 54 55 Notes: If the focus circle is not fully enclosed by the main circle there will be 56 areas that are undefined in terms of where they map to on the gradient 57 axis. These areas are filled with end colour from the gradient. 58 */ 59 60 void setPaint(Gradient gradient, WindingRule wrule, RepeatMode rmode) 61 { 62 assert(gradient !is null); 63 assert(isPow2(gradient.lookupLength)); 64 this.gradient = gradient; 65 this.windingRule = wrule; 66 this.repeatMode = rmode; 67 } 68 69 /** 70 Set the focus and main circles. 71 */ 72 73 void setCoords(float x0, float y0, float r0, float y1, float x1, float r1) 74 { 75 dx = x1-x0; 76 dy = y1-y0; 77 dr = r1-r0; 78 fx = x0; // note use fx,fy as focus x,y to avoid clashing with x0 in blit method 79 fy = y0; 80 fr = r0; 81 isEnclosed = (sqr(dx)+sqr(dy)) < sqr(dr); 82 } 83 84 /** Get the BlitFunc for use by the rasterizer */ 85 86 BlitFunc getBlitFunc() return 87 { 88 if (isEnclosed) 89 { 90 if (windingRule == WindingRule.NonZero) 91 { 92 switch(repeatMode) 93 { 94 case RepeatMode.Pad: return &biradial_blit!(WindingRule.NonZero,RepeatMode.Pad,true); 95 case RepeatMode.Repeat: return &biradial_blit!(WindingRule.NonZero,RepeatMode.Repeat,true); 96 case RepeatMode.Mirror: return &biradial_blit!(WindingRule.NonZero,RepeatMode.Mirror,true); 97 default: assert(0); 98 } 99 } 100 else 101 { 102 switch(repeatMode) 103 { 104 case RepeatMode.Pad: return &biradial_blit!(WindingRule.EvenOdd,RepeatMode.Pad,true); 105 case RepeatMode.Repeat: return &biradial_blit!(WindingRule.EvenOdd,RepeatMode.Repeat,true); 106 case RepeatMode.Mirror: return &biradial_blit!(WindingRule.EvenOdd,RepeatMode.Mirror,true); 107 default: assert(0); 108 } 109 } 110 } 111 else 112 { 113 if (windingRule == WindingRule.NonZero) 114 { 115 switch(repeatMode) 116 { 117 case RepeatMode.Pad: return &biradial_blit!(WindingRule.NonZero,RepeatMode.Pad,false); 118 case RepeatMode.Repeat: return &biradial_blit!(WindingRule.NonZero,RepeatMode.Repeat,false); 119 case RepeatMode.Mirror: return &biradial_blit!(WindingRule.NonZero,RepeatMode.Mirror,false); 120 default: assert(0); 121 } 122 } 123 else 124 { 125 switch(repeatMode) 126 { 127 case RepeatMode.Pad: return &biradial_blit!(WindingRule.EvenOdd,RepeatMode.Pad,false); 128 case RepeatMode.Repeat: return &biradial_blit!(WindingRule.EvenOdd,RepeatMode.Repeat,false); 129 case RepeatMode.Mirror: return &biradial_blit!(WindingRule.EvenOdd,RepeatMode.Mirror,false); 130 default: assert(0); 131 } 132 } 133 } 134 } 135 136 private: 137 138 void biradial_blit(WindingRule wr, RepeatMode mode, bool isEnclosed) 139 (int* delta, DMWord* mask, int x0, int x1, int y) 140 { 141 assert(x0 >= 0); 142 assert(x1 <= stride); 143 assert(y >= 0); 144 assert(y < height); 145 assert((x0 & 3) == 0); 146 assert((x1 & 3) == 0); 147 148 // main blit variables 149 150 int bpos = x0 / 4; 151 int endbit = x1 / 4; 152 uint* dest = &pixels[y*stride]; 153 __m128i xmWinding = 0; 154 uint* lut = gradient.getLookup.ptr; 155 __m128i lutmsk = gradient.lookupLength - 1; 156 __m128i lutmsk2 = gradient.lookupLength*2 - 1; 157 __m128 xmgradlen = _mm_set1_ps(gradient.lookupLength); 158 159 // XMM constants 160 161 immutable __m128i XMZERO = 0; 162 163 // paint variables 164 165 // note that variable names have changed from doc in the notes folder... 166 // fx,fy,fr are the focus circle instead of (x0,y0,r0). 167 // (x0,y) and the point of interest instead of (x,y) 168 169 float coefA = sqr(dx) + sqr(dy) - sqr(dr); 170 float coefB = 2*fx*dx - 2*dx*x0 + 2*dy*fy - 2*dy*y - 2*fr*dr; 171 float coefC = sqr(y) + sqr(fx) + sqr(fy) - 2*fy*y - sqr(fr); 172 173 __m128 xmstepB = _mm_set1_ps(-2*dx*4); 174 __m128 xmseqx = _mm_setr_ps(0.0f,1.0f,2.0f,3.0f); 175 __m128 xmposx = _mm_set1_ps(x0) + xmseqx; 176 __m128 xmstepx = _mm_set1_ps(4.0f); 177 __m128 xm2x0 = _mm_set1_ps(2*fx); 178 __m128 xmcoefB = _mm_set1_ps(coefB) + xmseqx * _mm_set1_ps(-2*dx); 179 __m128 xmcoefC = _mm_set1_ps(coefC); 180 181 __m128 xmq0 = _mm_set1_ps(-0.5/coefA); 182 __m128 xmq1 = _mm_set1_ps(4*coefA); 183 184 // main loop 185 186 while (bpos < endbit) 187 { 188 int nsb = nextSetBit(mask, bpos, endbit); 189 190 // do we have a span of unchanging coverage? 191 192 if (bpos < nsb) 193 { 194 // Calc coverage of first pixel 195 196 int cover = calcCoverage!wr(xmWinding[3]+delta[bpos*4]); 197 198 // We can skip the span 199 200 if (cover < 0x100) 201 { 202 __m128 xskip = _mm_set1_ps(nsb-bpos); 203 xmcoefB = xmcoefB + _mm_mul_ps(xskip,xmstepB); 204 xmposx = xmposx + _mm_mul_ps(xskip,xmstepx); 205 bpos = nsb; 206 } 207 208 // Or fill span with soid color 209 210 else if (gradient.isOpaque && (cover > 0xFF00)) 211 { 212 uint* ptr = &dest[bpos*4]; 213 uint* end = ptr + ((nsb-bpos)*4); 214 215 while (ptr < end) 216 { 217 __m128 xmc = xmposx*(xmposx-xm2x0) + xmcoefC; 218 __m128 xmdiscr = xmcoefB*xmcoefB - xmq1*xmc; 219 __m128 xmsqrtd = _mm_sqrt_ps(xmdiscr); 220 __m128 xmt = xmq0 * (xmsqrtd + xmcoefB); 221 222 // Generate pad mask if needed, used to control the colour in 223 // undefined /out of bounds areas 224 225 static if (isEnclosed == false) 226 { 227 __m128 xmt2 = (xmsqrtd - xmcoefB); 228 __m128i padMask = _mm_or_si128(cast(__m128i) xmdiscr, cast(__m128i) xmt2); 229 padMask = _mm_srai_epi32(padMask,31); 230 } 231 232 xmcoefB += xmstepB; 233 xmposx += xmstepx; 234 235 __m128i ipos = _mm_cvtps_epi32 (xmt * xmgradlen); 236 ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2); 237 238 // set ipos to max for undefined areas 239 240 static if (isEnclosed == false) 241 { 242 ipos = _mm_or_si128(ipos, padMask & lutmsk); 243 } 244 245 __m128i tmp; 246 tmp[0] = lut[ipos.array[0]]; 247 tmp[1] = lut[ipos.array[1]]; 248 tmp[2] = lut[ipos.array[2]]; 249 tmp[3] = lut[ipos.array[3]]; 250 251 _mm_store_si128 (cast(__m128i*)ptr,tmp); 252 253 ptr+=4; 254 } 255 256 bpos = nsb; 257 } 258 259 // Or fill span with transparent color 260 261 else 262 { 263 __m128i xmcover = _mm_set1_epi16 (cast(ushort) cover); 264 265 uint* ptr = &dest[bpos*4]; 266 uint* end = &dest[nsb*4]; 267 268 while (ptr < end) 269 { 270 271 __m128 xmc = xmposx*(xmposx-xm2x0) + xmcoefC; 272 __m128 xmdiscr = xmcoefB*xmcoefB - xmq1*xmc; 273 __m128 xmsqrtd = _mm_sqrt_ps(xmdiscr); 274 __m128 xmt = xmq0 * (xmsqrtd + xmcoefB); 275 276 // Generate pad mask if needed, used to control the colour in 277 // any undefined areas 278 279 static if (isEnclosed == false) 280 { 281 __m128 xmt2 = (xmsqrtd - xmcoefB); 282 __m128i padMask = _mm_or_si128(cast(__m128i) xmdiscr, cast(__m128i) xmt2); 283 padMask = _mm_srai_epi32(padMask,31); 284 } 285 286 xmcoefB += xmstepB; 287 xmposx += xmstepx; 288 289 __m128i ipos = _mm_cvtps_epi32 (xmt * xmgradlen); 290 ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2); 291 292 // set ipos to max for undefined areas 293 294 static if (isEnclosed == false) 295 { 296 ipos = _mm_or_si128(ipos, padMask & lutmsk); 297 } 298 299 // load destination pixels 300 301 __m128i d0 = _mm_load_si128(cast(__m128i*)ptr); 302 __m128i d1 = _mm_unpackhi_epi8(d0,d0); 303 d0 = _mm_unpacklo_epi8(d0,d0); 304 305 // load grad colours and alpha 306 307 __m128i c0 = _mm_loadu_si32 (&lut[ipos.array[0]]); 308 __m128i tmpc0 = _mm_loadu_si32 (&lut[ipos.array[1]]); 309 c0 = _mm_unpacklo_epi32 (c0, tmpc0); 310 c0 = _mm_unpacklo_epi8 (c0, c0); 311 312 __m128i a0 = _mm_mulhi_epu16(c0,xmcover); 313 314 __m128i c1 = _mm_loadu_si32 (&lut[ipos.array[2]]); 315 __m128i tmpc1 = _mm_loadu_si32 (&lut[ipos.array[3]]); 316 c1 = _mm_unpacklo_epi32 (c1, tmpc1); 317 c1 = _mm_unpacklo_epi8 (c1, c1); 318 319 __m128i a1 = _mm_mulhi_epu16(c1,xmcover); 320 321 // unpack alpha 322 323 a0 = _mm_shufflelo_epi16!255(a0); 324 a0 = _mm_shufflehi_epi16!255(a0); 325 a1 = _mm_shufflelo_epi16!255(a1); 326 a1 = _mm_shufflehi_epi16!255(a1); 327 328 // alpha*source + dest - alpha*dest 329 330 c0 = _mm_mulhi_epu16 (c0,a0); 331 c1 = _mm_mulhi_epu16 (c1,a1); 332 c0 = _mm_add_epi16 (c0,d0); 333 c1 = _mm_add_epi16 (c1,d1); 334 d0 = _mm_mulhi_epu16 (d0,a0); 335 d1 = _mm_mulhi_epu16 (d1,a1); 336 c0 = _mm_sub_epi16 (c0,d0); 337 c1 = _mm_sub_epi16 (c1,d1); 338 c0 = _mm_srli_epi16 (c0,8); 339 c1 = _mm_srli_epi16 (c1,8); 340 341 d0 = _mm_packus_epi16 (c0,c1); 342 343 _mm_store_si128 (cast(__m128i*)ptr,d0); 344 345 ptr+=4; 346 } 347 348 bpos = nsb; 349 } 350 } 351 352 // At this point we need to integrate scandelta 353 354 uint* ptr = &dest[bpos*4]; 355 uint* end = &dest[endbit*4]; 356 int* dlptr = &delta[bpos*4]; 357 358 while (bpos < endbit) 359 { 360 __m128 xmc = xmposx*(xmposx-xm2x0) + xmcoefC; 361 __m128 xmdiscr = xmcoefB*xmcoefB - xmq1*xmc; 362 __m128 xmsqrtd = _mm_sqrt_ps(xmdiscr); 363 __m128 xmt = xmq0 * (xmsqrtd + xmcoefB); 364 365 // Generate pad mask if needed, used to control the colour in 366 // any undefined areas 367 368 static if (isEnclosed == false) 369 { 370 __m128 xmt2 = (xmsqrtd - xmcoefB); 371 __m128i padMask = _mm_or_si128(cast(__m128i) xmdiscr, cast(__m128i) xmt2); 372 padMask = _mm_srai_epi32(padMask,31); 373 } 374 375 xmcoefB += xmstepB; 376 xmposx += xmstepx; 377 378 __m128i ipos = _mm_cvtps_epi32 (xmt * xmgradlen); 379 ipos = calcRepeatModeIDX!mode(ipos, lutmsk, lutmsk2); 380 381 // set ipos to max for undefined areas 382 383 static if (isEnclosed == false) 384 { 385 ipos = _mm_or_si128(ipos, padMask & lutmsk); 386 } 387 388 // Integrate delta values 389 390 __m128i idv = _mm_load_si128(cast(__m128i*)dlptr); 391 idv = _mm_add_epi32(idv, _mm_slli_si128!4(idv)); 392 idv = _mm_add_epi32(idv, _mm_slli_si128!8(idv)); 393 idv = _mm_add_epi32(idv, xmWinding); 394 xmWinding = _mm_shuffle_epi32!255(idv); 395 _mm_store_si128(cast(__m128i*)dlptr,XMZERO); 396 397 // calculate coverage from winding 398 399 __m128i xmcover = calcCoverage32!wr(idv); 400 401 // Load destination pixels 402 403 __m128i d0 = _mm_load_si128(cast(__m128i*)ptr); 404 __m128i d1 = _mm_unpackhi_epi8(d0,d0); 405 d0 = _mm_unpacklo_epi8(d0,d0); 406 407 // load grad colors 408 409 __m128i c0 = _mm_loadu_si32 (&lut[ipos.array[0]]); 410 __m128i tmpc0 = _mm_loadu_si32 (&lut[ipos.array[1]]); 411 c0 = _mm_unpacklo_epi32 (c0, tmpc0); 412 c0 = _mm_unpacklo_epi8 (c0, c0); 413 414 __m128i a0 = _mm_unpacklo_epi32(xmcover,xmcover); 415 a0 = _mm_mulhi_epu16(a0, c0); 416 417 __m128i c1 = _mm_loadu_si32 (&lut[ipos.array[2]]); 418 __m128i tmpc1 = _mm_loadu_si32 (&lut[ipos.array[3]]); 419 c1 = _mm_unpacklo_epi32 (c1, tmpc1); 420 c1 = _mm_unpacklo_epi8 (c1, c1); 421 422 __m128i a1 = _mm_unpackhi_epi32(xmcover,xmcover); 423 a1 = _mm_mulhi_epu16(a1, c1); 424 425 // unpack alpha 426 427 a0 = _mm_shufflelo_epi16!255(a0); 428 a0 = _mm_shufflehi_epi16!255(a0); 429 a1 = _mm_shufflelo_epi16!255(a1); 430 a1 = _mm_shufflehi_epi16!255(a1); 431 432 // alpha*source + dest - alpha*dest 433 434 c0 = _mm_mulhi_epu16 (c0,a0); 435 c1 = _mm_mulhi_epu16 (c1,a1); 436 c0 = _mm_add_epi16 (c0,d0); 437 c1 = _mm_add_epi16 (c1,d1); 438 d0 = _mm_mulhi_epu16 (d0,a0); 439 d1 = _mm_mulhi_epu16 (d1,a1); 440 c0 = _mm_sub_epi16 (c0, d0); 441 c1 = _mm_sub_epi16 (c1, d1); 442 c0 = _mm_srli_epi16 (c0,8); 443 c1 = _mm_srli_epi16 (c1,8); 444 445 d0 = _mm_packus_epi16 (c0,c1); 446 447 _mm_store_si128 (cast(__m128i*)ptr,d0); 448 449 bpos++; 450 ptr+=4; 451 dlptr+=4; 452 453 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break; 454 } 455 } 456 } 457 458 private: 459 460 uint* pixels; 461 int stride; 462 int height; 463 float fx,fy,fr; 464 float dx,dy,dr; 465 Gradient gradient; 466 WindingRule windingRule; 467 RepeatMode repeatMode; 468 bool isEnclosed; 469 } 470