Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
Classes | Macros | Typedefs | Functions | Variables
Transform_inl.h File Reference

Go to the source code of this file.

Classes

struct  NoCtx
 
struct  Ctx
 

Macros

#define SI   static inline
 
#define STAGE_PARAMS(MAYBE_REF)
 
#define DECLARE_STAGE(name, arg)
 
#define STAGE(name, arg)   DECLARE_STAGE(name, arg)
 
#define FINAL_STAGE(name, arg)   DECLARE_STAGE(name, arg)
 
#define M(name)   case Op::name: Exec_##name(*contexts++, src, dst, r, g, b, a, i); break;
 
#define M(name)   case Op::name: Exec_##name(*contexts++, src, dst, r, g, b, a, i); return;
 

Typedefs

using F = V< float >
 
using I32 = V< int32_t >
 
using U64 = V< uint64_t >
 
using U32 = V< uint32_t >
 
using U16 = V< uint16_t >
 
using U8 = V< uint8_t >
 

Functions

template<typename T , typename P >
SI T load (const P *ptr)
 
template<typename T , typename P >
SI void store (P *ptr, const T &val)
 
template<typename D , typename S >
SI D cast (const S &v)
 
template<typename D , typename S >
SI D bit_pun (const S &v)
 
SI U32 to_fixed (F f)
 
template<typename C , typename T >
SI T if_then_else (C cond, T t, T e)
 
SI F F_from_Half (U16 half)
 
SI U16 Half_from_F (F f)
 
SI U64 swap_endian_16x4 (const U64 &rgba)
 
SI F min_ (F x, F y)
 
SI F max_ (F x, F y)
 
SI F floor_ (F x)
 
SI F approx_log2 (F x)
 
SI F approx_log (F x)
 
SI F approx_exp2 (F x)
 
SI F approx_pow (F x, float y)
 
SI F approx_exp (F x)
 
SI F strip_sign (F x, U32 *sign)
 
SI F apply_sign (F x, U32 sign)
 
SI F apply_tf (const skcms_TransferFunction *tf, F x)
 
SI F apply_gamma (const skcms_TransferFunction *tf, F x)
 
SI F apply_pq (const skcms_TransferFunction *tf, F x)
 
SI F apply_hlg (const skcms_TransferFunction *tf, F x)
 
SI F apply_hlginv (const skcms_TransferFunction *tf, F x)
 
template<typename T , typename P >
SI T load_3 (const P *p)
 
template<typename T , typename P >
SI T load_4 (const P *p)
 
template<typename T , typename P >
SI void store_3 (P *p, const T &v)
 
template<typename T , typename P >
SI void store_4 (P *p, const T &v)
 
SI U8 gather_8 (const uint8_t *p, I32 ix)
 
SI U16 gather_16 (const uint8_t *p, I32 ix)
 
SI U32 gather_32 (const uint8_t *p, I32 ix)
 
SI U32 gather_24 (const uint8_t *p, I32 ix)
 
SI void gather_48 (const uint8_t *p, I32 ix, U64 *v)
 
SI F F_from_U8 (U8 v)
 
SI F F_from_U16_BE (U16 v)
 
SI U16 U16_from_F (F v)
 
SI F minus_1_ulp (F v)
 
SI F table (const skcms_Curve *curve, F v)
 
SI void sample_clut_8 (const uint8_t *grid_8, I32 ix, F *r, F *g, F *b)
 
SI void sample_clut_8 (const uint8_t *grid_8, I32 ix, F *r, F *g, F *b, F *a)
 
SI void sample_clut_16 (const uint8_t *grid_16, I32 ix, F *r, F *g, F *b)
 
SI void sample_clut_16 (const uint8_t *grid_16, I32 ix, F *r, F *g, F *b, F *a)
 
static void clut (uint32_t input_channels, uint32_t output_channels, const uint8_t grid_points[4], const uint8_t *grid_8, const uint8_t *grid_16, F *r, F *g, F *b, F *a)
 
static void clut (const skcms_A2B *a2b, F *r, F *g, F *b, F a)
 
static void clut (const skcms_B2A *b2a, F *r, F *g, F *b, F *a)
 
 STAGE (load_a8, NoCtx)
 
 STAGE (load_g8, NoCtx)
 
 STAGE (load_4444, NoCtx)
 
 STAGE (load_565, NoCtx)
 
 STAGE (load_888, NoCtx)
 
 STAGE (load_8888, NoCtx)
 
 STAGE (load_1010102, NoCtx)
 
 STAGE (load_101010x_XR, NoCtx)
 
 STAGE (load_161616LE, NoCtx)
 
 STAGE (load_16161616LE, NoCtx)
 
 STAGE (load_161616BE, NoCtx)
 
 STAGE (load_16161616BE, NoCtx)
 
 STAGE (load_hhh, NoCtx)
 
 STAGE (load_hhhh, NoCtx)
 
 STAGE (load_fff, NoCtx)
 
 STAGE (load_ffff, NoCtx)
 
 STAGE (swap_rb, NoCtx)
 
 STAGE (clamp, NoCtx)
 
 STAGE (invert, NoCtx)
 
 STAGE (force_opaque, NoCtx)
 
 STAGE (premul, NoCtx)
 
 STAGE (unpremul, NoCtx)
 
 STAGE (matrix_3x3, const skcms_Matrix3x3 *matrix)
 
 STAGE (matrix_3x4, const skcms_Matrix3x4 *matrix)
 
 STAGE (lab_to_xyz, NoCtx)
 
 STAGE (xyz_to_lab, NoCtx)
 
 STAGE (gamma_r, const skcms_TransferFunction *tf)
 
 STAGE (gamma_g, const skcms_TransferFunction *tf)
 
 STAGE (gamma_b, const skcms_TransferFunction *tf)
 
 STAGE (gamma_a, const skcms_TransferFunction *tf)
 
 STAGE (gamma_rgb, const skcms_TransferFunction *tf)
 
 STAGE (tf_r, const skcms_TransferFunction *tf)
 
 STAGE (tf_g, const skcms_TransferFunction *tf)
 
 STAGE (tf_b, const skcms_TransferFunction *tf)
 
 STAGE (tf_a, const skcms_TransferFunction *tf)
 
 STAGE (tf_rgb, const skcms_TransferFunction *tf)
 
 STAGE (pq_r, const skcms_TransferFunction *tf)
 
 STAGE (pq_g, const skcms_TransferFunction *tf)
 
 STAGE (pq_b, const skcms_TransferFunction *tf)
 
 STAGE (pq_a, const skcms_TransferFunction *tf)
 
 STAGE (pq_rgb, const skcms_TransferFunction *tf)
 
 STAGE (hlg_r, const skcms_TransferFunction *tf)
 
 STAGE (hlg_g, const skcms_TransferFunction *tf)
 
 STAGE (hlg_b, const skcms_TransferFunction *tf)
 
 STAGE (hlg_a, const skcms_TransferFunction *tf)
 
 STAGE (hlg_rgb, const skcms_TransferFunction *tf)
 
 STAGE (hlginv_r, const skcms_TransferFunction *tf)
 
 STAGE (hlginv_g, const skcms_TransferFunction *tf)
 
 STAGE (hlginv_b, const skcms_TransferFunction *tf)
 
 STAGE (hlginv_a, const skcms_TransferFunction *tf)
 
 STAGE (hlginv_rgb, const skcms_TransferFunction *tf)
 
 STAGE (table_r, const skcms_Curve *curve)
 
 STAGE (table_g, const skcms_Curve *curve)
 
 STAGE (table_b, const skcms_Curve *curve)
 
 STAGE (table_a, const skcms_Curve *curve)
 
 STAGE (clut_A2B, const skcms_A2B *a2b)
 
 STAGE (clut_B2A, const skcms_B2A *b2a)
 
 FINAL_STAGE (store_a8, NoCtx)
 
 FINAL_STAGE (store_g8, NoCtx)
 
 FINAL_STAGE (store_4444, NoCtx)
 
 FINAL_STAGE (store_565, NoCtx)
 
 FINAL_STAGE (store_888, NoCtx)
 
 FINAL_STAGE (store_8888, NoCtx)
 
 FINAL_STAGE (store_101010x_XR, NoCtx)
 
 FINAL_STAGE (store_1010102, NoCtx)
 
 FINAL_STAGE (store_161616LE, NoCtx)
 
 FINAL_STAGE (store_16161616LE, NoCtx)
 
 FINAL_STAGE (store_161616BE, NoCtx)
 
 FINAL_STAGE (store_16161616BE, NoCtx)
 
 FINAL_STAGE (store_hhh, NoCtx)
 
 FINAL_STAGE (store_hhhh, NoCtx)
 
 FINAL_STAGE (store_fff, NoCtx)
 
 FINAL_STAGE (store_ffff, NoCtx)
 
static void exec_stages (const Op *ops, const void **contexts, const char *src, char *dst, int i)
 
void run_program (const Op *program, const void **contexts, SKCMS_MAYBE_UNUSED ptrdiff_t programSize, const char *src, char *dst, int n, const size_t src_bpp, const size_t dst_bpp)
 

Variables

static constexpr F F0 = 0.0f
 
static constexpr F F1 = 1.0f
 
static constexpr F FInfBits = 0x7f800000
 

Macro Definition Documentation

◆ DECLARE_STAGE

#define DECLARE_STAGE (   name,
  arg 
)
Value:
SI void Exec_##name##_k(arg, STAGE_PARAMS(&)); \
\
SI void Exec_##name(const void* ctx, STAGE_PARAMS(&)) { \
Exec_##name##_k(Ctx{ctx}, src, dst, r, g, b, a, i); \
} \
\
SI void Exec_##name##_k(arg, STAGE_PARAMS(&))
#define STAGE_PARAMS(MAYBE_REF)
#define SI
static bool b
struct MyStruct a[10]
const char * name
Definition fuchsia.cc:50

Definition at line 813 of file Transform_inl.h.

816 { \
817 Exec_##name##_k(Ctx{ctx}, src, dst, r, g, b, a, i); \
818 } \
819 \
820 SI void Exec_##name##_k(arg, STAGE_PARAMS(&))
dst
Definition cp.py:12

◆ FINAL_STAGE

#define FINAL_STAGE (   name,
  arg 
)    DECLARE_STAGE(name, arg)

Definition at line 823 of file Transform_inl.h.

◆ M [1/2]

#define M (   name)    case Op::name: Exec_##name(*contexts++, src, dst, r, g, b, a, i); break;

◆ M [2/2]

#define M (   name)    case Op::name: Exec_##name(*contexts++, src, dst, r, g, b, a, i); return;

◆ SI

#define SI   static inline

Definition at line 94 of file Transform_inl.h.

◆ STAGE

#define STAGE (   name,
  arg 
)    DECLARE_STAGE(name, arg)

Definition at line 822 of file Transform_inl.h.

◆ STAGE_PARAMS

#define STAGE_PARAMS (   MAYBE_REF)
Value:
SKCMS_MAYBE_UNUSED const char* src, \
SKCMS_MAYBE_UNUSED char* dst, \
SKCMS_MAYBE_UNUSED F MAYBE_REF r, \
SKCMS_MAYBE_UNUSED F MAYBE_REF g, \
SKCMS_MAYBE_UNUSED F MAYBE_REF b, \
SKCMS_MAYBE_UNUSED F MAYBE_REF a, \
#define SKCMS_MAYBE_UNUSED
Definition SkMD5.cpp:120

Definition at line 772 of file Transform_inl.h.

Typedef Documentation

◆ F

using F = V<float>

Definition at line 14 of file Transform_inl.h.

◆ I32

using I32 = V<int32_t>

Definition at line 15 of file Transform_inl.h.

◆ U16

using U16 = V<uint16_t>

Definition at line 18 of file Transform_inl.h.

◆ U32

using U32 = V<uint32_t>

Definition at line 17 of file Transform_inl.h.

◆ U64

using U64 = V<uint64_t>

Definition at line 16 of file Transform_inl.h.

◆ U8

using U8 = V<uint8_t>

Definition at line 19 of file Transform_inl.h.

Function Documentation

◆ apply_gamma()

SI F apply_gamma ( const skcms_TransferFunction tf,
F  x 
)

Definition at line 310 of file Transform_inl.h.

310 {
311 U32 sign;
312 x = strip_sign(x, &sign);
313 return apply_sign(approx_pow(x, tf->g), sign);
314}
static int sign(SkScalar x)
Definition SkPath.cpp:2141
SI F approx_pow(F x, float y)
SI F apply_sign(F x, U32 sign)
SI F strip_sign(F x, U32 *sign)
V< uint32_t > U32
double x

◆ apply_hlg()

SI F apply_hlg ( const skcms_TransferFunction tf,
F  x 
)

Definition at line 328 of file Transform_inl.h.

328 {
329 const float R = tf->a, G = tf->b,
330 a = tf->c, b = tf->d, c = tf->e,
331 K = tf->f + 1;
332 U32 bits = bit_pun<U32>(x),
333 sign = bits & 0x80000000;
334 x = bit_pun<F>(bits ^ sign);
335
336 F v = if_then_else(x*R <= 1, approx_pow(x*R, G)
337 , approx_exp((x-c)*a) + b);
338
339 return K*bit_pun<F>(sign | bit_pun<U32>(v));
340}
SI F approx_exp(F x)
SI T if_then_else(C cond, T t, T e)
static const int K
Definition daa.cpp:21
#define R(r)
Definition SkMD5.cpp:125

◆ apply_hlginv()

SI F apply_hlginv ( const skcms_TransferFunction tf,
F  x 
)

Definition at line 342 of file Transform_inl.h.

342 {
343 const float R = tf->a, G = tf->b,
344 a = tf->c, b = tf->d, c = tf->e,
345 K = tf->f + 1;
346 U32 bits = bit_pun<U32>(x),
347 sign = bits & 0x80000000;
348 x = bit_pun<F>(bits ^ sign);
349 x /= K;
350
351 F v = if_then_else(x <= 1, R * approx_pow(x, G)
352 , a * approx_log(x - b) + c);
353
354 return bit_pun<F>(sign | bit_pun<U32>(v));
355}
SI F approx_log(F x)

◆ apply_pq()

SI F apply_pq ( const skcms_TransferFunction tf,
F  x 
)

Definition at line 316 of file Transform_inl.h.

316 {
317 U32 bits = bit_pun<U32>(x),
318 sign = bits & 0x80000000;
319 x = bit_pun<F>(bits ^ sign);
320
321 F v = approx_pow(max_(tf->a + tf->b * approx_pow(x, tf->c), F0)
322 / (tf->d + tf->e * approx_pow(x, tf->c)),
323 tf->f);
324
325 return bit_pun<F>(sign | bit_pun<U32>(v));
326}
SI F max_(F x, F y)
static constexpr F F0

◆ apply_sign()

SI F apply_sign ( F  x,
U32  sign 
)

Definition at line 291 of file Transform_inl.h.

291 {
292 return bit_pun<F>(sign | bit_pun<U32>(x));
293}

◆ apply_tf()

SI F apply_tf ( const skcms_TransferFunction tf,
F  x 
)

Definition at line 296 of file Transform_inl.h.

296 {
297 // Peel off the sign bit and set x = |x|.
298 U32 sign;
299 x = strip_sign(x, &sign);
300
301 // The transfer function has a linear part up to d, exponential at d and after.
302 F v = if_then_else(x < tf->d, tf->c*x + tf->f
303 , approx_pow(tf->a*x + tf->b, tf->g) + tf->e);
304
305 // Tack the sign bit back on.
306 return apply_sign(v, sign);
307}
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE auto & d
Definition main.cc:19

◆ approx_exp()

SI F approx_exp ( F  x)

Definition at line 280 of file Transform_inl.h.

280 {
281 const float log2_e = 1.4426950408889634074f;
282 return approx_exp2(log2_e * x);
283}
SI F approx_exp2(F x)

◆ approx_exp2()

SI F approx_exp2 ( F  x)

Definition at line 264 of file Transform_inl.h.

264 {
265 F fract = x - floor_(x);
266
267 F fbits = (1.0f * (1<<23)) * (x + 121.274057500f
268 - 1.490129070f*fract
269 + 27.728023300f/(4.84252568f - fract));
270 I32 bits = cast<I32>(min_(max_(fbits, F0), FInfBits));
271
272 return bit_pun<F>(bits);
273}
SI F min_(F x, F y)
static constexpr F FInfBits
SI F floor_(F x)
V< int32_t > I32
SIN Vec< N, float > fract(const Vec< N, float > &x)
Definition SkVx.h:744

◆ approx_log()

SI F approx_log ( F  x)

Definition at line 259 of file Transform_inl.h.

259 {
260 const float ln2 = 0.69314718f;
261 return ln2 * approx_log2(x);
262}
SI F approx_log2(F x)

◆ approx_log2()

SI F approx_log2 ( F  x)

Definition at line 245 of file Transform_inl.h.

245 {
246 // The first approximation of log2(x) is its exponent 'e', minus 127.
247 I32 bits = bit_pun<I32>(x);
248
249 F e = cast<F>(bits) * (1.0f / (1<<23));
250
251 // If we use the mantissa too we can refine the error signficantly.
252 F m = bit_pun<F>( (bits & 0x007fffff) | 0x3f000000 );
253
254 return e - 124.225514990f
255 - 1.498030302f*m
256 - 1.725879990f/(0.3520887068f + m);
257}

◆ approx_pow()

SI F approx_pow ( F  x,
float  y 
)

Definition at line 275 of file Transform_inl.h.

275 {
276 return if_then_else((x == F0) | (x == F1), x
277 , approx_exp2(approx_log2(x) * y));
278}
static constexpr F F1
double y

◆ bit_pun()

template<typename D , typename S >
SI D bit_pun ( const S &  v)

Definition at line 126 of file Transform_inl.h.

126 {
127 static_assert(sizeof(D) == sizeof(v), "");
128 return load<D>(&v);
129}

◆ cast()

template<typename D , typename S >
SI D cast ( const S &  v)

Definition at line 111 of file Transform_inl.h.

111 {
112#if N == 1
113 return (D)v;
114#elif defined(__clang__)
115 return __builtin_convertvector(v, D);
116#else
117 D d;
118 for (int i = 0; i < N; i++) {
119 d[i] = v[i];
120 }
121 return d;
122#endif
123}
#define N
Definition beziers.cpp:19

◆ clut() [1/3]

static void clut ( const skcms_A2B a2b,
F r,
F g,
F b,
F  a 
)
static

Definition at line 753 of file Transform_inl.h.

753 {
755 a2b->grid_points, a2b->grid_8, a2b->grid_16,
756 r,g,b,&a);
757}
static void clut(uint32_t input_channels, uint32_t output_channels, const uint8_t grid_points[4], const uint8_t *grid_8, const uint8_t *grid_16, F *r, F *g, F *b, F *a)
const uint8_t * grid_8
uint32_t output_channels
const uint8_t * grid_16
uint32_t input_channels
uint8_t grid_points[4]

◆ clut() [2/3]

static void clut ( const skcms_B2A b2a,
F r,
F g,
F b,
F a 
)
static

Definition at line 758 of file Transform_inl.h.

758 {
760 b2a->grid_points, b2a->grid_8, b2a->grid_16,
761 r,g,b,a);
762}
const uint8_t * grid_16
const uint8_t * grid_8
uint8_t grid_points[4]
uint32_t input_channels
uint32_t output_channels

◆ clut() [3/3]

static void clut ( uint32_t  input_channels,
uint32_t  output_channels,
const uint8_t  grid_points[4],
const uint8_t *  grid_8,
const uint8_t *  grid_16,
F r,
F g,
F b,
F a 
)
static

Definition at line 672 of file Transform_inl.h.

674 {
675
676 const int dim = (int)input_channels;
677 assert (0 < dim && dim <= 4);
678 assert (output_channels == 3 ||
679 output_channels == 4);
680
681 // For each of these arrays, think foo[2*dim], but we use foo[8] since we know dim <= 4.
682 I32 index [8]; // Index contribution by dimension, first low from 0, then high from 4.
683 F weight[8]; // Weight for each contribution, again first low, then high.
684
685 // O(dim) work first: calculate index,weight from r,g,b,a.
686 const F inputs[] = { *r,*g,*b,*a };
687 for (int i = dim-1, stride = 1; i >= 0; i--) {
688 // x is where we logically want to sample the grid in the i-th dimension.
689 F x = inputs[i] * (float)(grid_points[i] - 1);
690
691 // But we can't index at floats. lo and hi are the two integer grid points surrounding x.
692 I32 lo = cast<I32>( x ), // i.e. trunc(x) == floor(x) here.
693 hi = cast<I32>(minus_1_ulp(x+1.0f));
694 // Notice how we fold in the accumulated stride across previous dimensions here.
695 index[i+0] = lo * stride;
696 index[i+4] = hi * stride;
697 stride *= grid_points[i];
698
699 // We'll interpolate between those two integer grid points by t.
700 F t = x - cast<F>(lo); // i.e. fract(x)
701 weight[i+0] = 1-t;
702 weight[i+4] = t;
703 }
704
705 *r = *g = *b = F0;
706 if (output_channels == 4) {
707 *a = F0;
708 }
709
710 // We'll sample 2^dim == 1<<dim table entries per pixel,
711 // in all combinations of low and high in each dimension.
712 for (int combo = 0; combo < (1<<dim); combo++) { // This loop can be done in any order.
713
714 // Each of these upcoming (combo&N)*K expressions here evaluates to 0 or 4,
715 // where 0 selects the low index contribution and its weight 1-t,
716 // or 4 the high index contribution and its weight t.
717
718 // Since 0<dim≤4, we can always just start off with the 0-th channel,
719 // then handle the others conditionally.
720 I32 ix = index [0 + (combo&1)*4];
721 F w = weight[0 + (combo&1)*4];
722
723 switch ((dim-1)&3) { // This lets the compiler know there are no other cases to handle.
724 case 3: ix += index [3 + (combo&8)/2];
725 w *= weight[3 + (combo&8)/2];
727 // fall through
728
729 case 2: ix += index [2 + (combo&4)*1];
730 w *= weight[2 + (combo&4)*1];
732 // fall through
733
734 case 1: ix += index [1 + (combo&2)*2];
735 w *= weight[1 + (combo&2)*2];
736 }
737
738 F R,G,B,A=F0;
739 if (output_channels == 3) {
740 if (grid_8) { sample_clut_8 (grid_8 ,ix, &R,&G,&B); }
741 else { sample_clut_16(grid_16,ix, &R,&G,&B); }
742 } else {
743 if (grid_8) { sample_clut_8 (grid_8 ,ix, &R,&G,&B,&A); }
744 else { sample_clut_16(grid_16,ix, &R,&G,&B,&A); }
745 }
746 *r += w*R;
747 *g += w*G;
748 *b += w*B;
749 *a += w*A;
750 }
751}
SI void sample_clut_8(const uint8_t *grid_8, I32 ix, F *r, F *g, F *b)
SI F minus_1_ulp(F v)
SI void sample_clut_16(const uint8_t *grid_16, I32 ix, F *r, F *g, F *b)
Type::kYUV Type::kRGBA() int(0.7 *637)
#define B
SkScalar w
#define SKCMS_FALLTHROUGH

◆ exec_stages()

static void exec_stages ( const Op *  ops,
const void **  contexts,
const char *  src,
char *  dst,
int  i 
)
static

Definition at line 1476 of file Transform_inl.h.

1477 {
1478 F r = F0, g = F0, b = F0, a = F1;
1479 while (true) {
1480 switch (*ops++) {
1481#define M(name) case Op::name: Exec_##name(*contexts++, src, dst, r, g, b, a, i); break;
1483#undef M
1484#define M(name) case Op::name: Exec_##name(*contexts++, src, dst, r, g, b, a, i); return;
1486#undef M
1487 }
1488 }
1489 }
SkPathOp ops[]
#define M(name)
#define SKCMS_STORE_OPS(M)
#define SKCMS_WORK_OPS(M)

◆ F_from_Half()

SI F F_from_Half ( U16  half)

Definition at line 153 of file Transform_inl.h.

153 {
154#if defined(USING_NEON_F16C)
155 return vcvt_f32_f16((float16x4_t)half);
156#elif defined(USING_AVX512F)
157 return (F)_mm512_cvtph_ps((__m256i)half);
158#elif defined(USING_AVX_F16C)
159 typedef int16_t __attribute__((vector_size(16))) I16;
160 return __builtin_ia32_vcvtph2ps256((I16)half);
161#else
162 U32 wide = cast<U32>(half);
163 // A half is 1-5-10 sign-exponent-mantissa, with 15 exponent bias.
164 U32 s = wide & 0x8000,
165 em = wide ^ s;
166
167 // Constructing the float is easy if the half is not denormalized.
168 F norm = bit_pun<F>( (s<<16) + (em<<13) + ((127-15)<<23) );
169
170 // Simply flush all denorm half floats to zero.
171 return if_then_else(em < 0x0400, F0, norm);
172#endif
173}
V< 8, int16_t > I16
Definition QMath.h:30
struct MyStruct s
__attribute__((visibility("default"))) int RunBenchmarks(int argc

◆ F_from_U16_BE()

SI F F_from_U16_BE ( U16  v)

Definition at line 587 of file Transform_inl.h.

587 {
588 // All 16-bit ICC values are big-endian, so we byte swap before converting to float.
589 // MSVC catches the "loss" of data here in the portable path, so we also make sure to mask.
590 U16 lo = (v >> 8),
591 hi = (v << 8) & 0xffff;
592 return cast<F>(lo|hi) * (1/65535.0f);
593}
V< 8, uint16_t > U16
Definition QMath.h:31

◆ F_from_U8()

SI F F_from_U8 ( U8  v)

Definition at line 583 of file Transform_inl.h.

583 {
584 return cast<F>(v) * (1/255.0f);
585}

◆ FINAL_STAGE() [1/16]

FINAL_STAGE ( store_1010102  ,
NoCtx   
)

Definition at line 1293 of file Transform_inl.h.

1293 {
1294 store(dst + 4*i, cast<U32>(to_fixed(r * 1023)) << 0
1295 | cast<U32>(to_fixed(g * 1023)) << 10
1296 | cast<U32>(to_fixed(b * 1023)) << 20
1297 | cast<U32>(to_fixed(a * 3)) << 30);
1298}
SI void store(P *ptr, const T &val)
SI U32 to_fixed(F f)

◆ FINAL_STAGE() [2/16]

FINAL_STAGE ( store_101010x_XR  ,
NoCtx   
)

Definition at line 1284 of file Transform_inl.h.

1284 {
1285 static constexpr float min = -0.752941f;
1286 static constexpr float max = 1.25098f;
1287 static constexpr float range = max - min;
1288 store(dst + 4*i, cast<U32>(to_fixed(((r - min) / range) * 1023)) << 0
1289 | cast<U32>(to_fixed(((g - min) / range) * 1023)) << 10
1290 | cast<U32>(to_fixed(((b - min) / range) * 1023)) << 20);
1291}
static float max(float r, float g, float b)
Definition hsl.cpp:49
static float min(float r, float g, float b)
Definition hsl.cpp:48

◆ FINAL_STAGE() [3/16]

FINAL_STAGE ( store_16161616BE  ,
NoCtx   
)

Definition at line 1362 of file Transform_inl.h.

1362 {
1363 uintptr_t ptr = (uintptr_t)(dst + 8*i);
1364 assert( (ptr & 1) == 0 ); // The dst pointer must be 2-byte aligned
1365 uint16_t* rgba = (uint16_t*)ptr; // for this cast to uint16_t* to be safe.
1366#if defined(USING_NEON)
1367 uint16x4x4_t v = {{
1368 (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(r))),
1369 (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(g))),
1370 (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(b))),
1371 (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(a))),
1372 }};
1373 vst4_u16(rgba, v);
1374#else
1375 U64 px = cast<U64>(to_fixed(r * 65535)) << 0
1376 | cast<U64>(to_fixed(g * 65535)) << 16
1377 | cast<U64>(to_fixed(b * 65535)) << 32
1378 | cast<U64>(to_fixed(a * 65535)) << 48;
1380#endif
1381}
static const uint32_t rgba[kNumPixels]
V< uint64_t > U64
SI U16 U16_from_F(F v)
SI U64 swap_endian_16x4(const U64 &rgba)

◆ FINAL_STAGE() [4/16]

FINAL_STAGE ( store_16161616LE  ,
NoCtx   
)

Definition at line 1319 of file Transform_inl.h.

1319 {
1320 uintptr_t ptr = (uintptr_t)(dst + 8*i);
1321 assert( (ptr & 1) == 0 ); // The dst pointer must be 2-byte aligned
1322 uint16_t* rgba = (uint16_t*)ptr; // for this cast to uint16_t* to be safe.
1323#if defined(USING_NEON)
1324 uint16x4x4_t v = {{
1325 (uint16x4_t)U16_from_F(r),
1326 (uint16x4_t)U16_from_F(g),
1327 (uint16x4_t)U16_from_F(b),
1328 (uint16x4_t)U16_from_F(a),
1329 }};
1330 vst4_u16(rgba, v);
1331#else
1332 U64 px = cast<U64>(to_fixed(r * 65535)) << 0
1333 | cast<U64>(to_fixed(g * 65535)) << 16
1334 | cast<U64>(to_fixed(b * 65535)) << 32
1335 | cast<U64>(to_fixed(a * 65535)) << 48;
1336 store(rgba, px);
1337#endif
1338}

◆ FINAL_STAGE() [5/16]

FINAL_STAGE ( store_161616BE  ,
NoCtx   
)

Definition at line 1340 of file Transform_inl.h.

1340 {
1341 uintptr_t ptr = (uintptr_t)(dst + 6*i);
1342 assert( (ptr & 1) == 0 ); // The dst pointer must be 2-byte aligned
1343 uint16_t* rgb = (uint16_t*)ptr; // for this cast to uint16_t* to be safe.
1344#if defined(USING_NEON)
1345 uint16x4x3_t v = {{
1346 (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(r))),
1347 (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(g))),
1348 (uint16x4_t)swap_endian_16(cast<U16>(U16_from_F(b))),
1349 }};
1350 vst3_u16(rgb, v);
1351#else
1352 U32 R = to_fixed(r * 65535),
1353 G = to_fixed(g * 65535),
1354 B = to_fixed(b * 65535);
1355 store_3(rgb+0, cast<U16>((R & 0x00ff) << 8 | (R & 0xff00) >> 8) );
1356 store_3(rgb+1, cast<U16>((G & 0x00ff) << 8 | (G & 0xff00) >> 8) );
1357 store_3(rgb+2, cast<U16>((B & 0x00ff) << 8 | (B & 0xff00) >> 8) );
1358#endif
1359
1360}
SI void store_3(P *p, const T &v)

◆ FINAL_STAGE() [6/16]

FINAL_STAGE ( store_161616LE  ,
NoCtx   
)

Definition at line 1300 of file Transform_inl.h.

1300 {
1301 uintptr_t ptr = (uintptr_t)(dst + 6*i);
1302 assert( (ptr & 1) == 0 ); // The dst pointer must be 2-byte aligned
1303 uint16_t* rgb = (uint16_t*)ptr; // for this cast to uint16_t* to be safe.
1304#if defined(USING_NEON)
1305 uint16x4x3_t v = {{
1306 (uint16x4_t)U16_from_F(r),
1307 (uint16x4_t)U16_from_F(g),
1308 (uint16x4_t)U16_from_F(b),
1309 }};
1310 vst3_u16(rgb, v);
1311#else
1312 store_3(rgb+0, U16_from_F(r));
1313 store_3(rgb+1, U16_from_F(g));
1314 store_3(rgb+2, U16_from_F(b));
1315#endif
1316
1317}

◆ FINAL_STAGE() [7/16]

FINAL_STAGE ( store_4444  ,
NoCtx   
)

Definition at line 1242 of file Transform_inl.h.

1242 {
1243 store<U16>(dst + 2*i, cast<U16>(to_fixed(r * 15) << 12)
1244 | cast<U16>(to_fixed(g * 15) << 8)
1245 | cast<U16>(to_fixed(b * 15) << 4)
1246 | cast<U16>(to_fixed(a * 15) << 0));
1247}

◆ FINAL_STAGE() [8/16]

FINAL_STAGE ( store_565  ,
NoCtx   
)

Definition at line 1249 of file Transform_inl.h.

1249 {
1250 store<U16>(dst + 2*i, cast<U16>(to_fixed(r * 31) << 0 )
1251 | cast<U16>(to_fixed(g * 63) << 5 )
1252 | cast<U16>(to_fixed(b * 31) << 11 ));
1253}

◆ FINAL_STAGE() [9/16]

FINAL_STAGE ( store_888  ,
NoCtx   
)

Definition at line 1255 of file Transform_inl.h.

1255 {
1256 uint8_t* rgb = (uint8_t*)dst + 3*i;
1257#if defined(USING_NEON)
1258 // Same deal as load_888 but in reverse... we'll store using uint8x8x3_t, but
1259 // get there via U16 to save some instructions converting to float. And just
1260 // like load_888, we'd prefer to go via U32 but for ARMv7 support.
1261 U16 R = cast<U16>(to_fixed(r * 255)),
1262 G = cast<U16>(to_fixed(g * 255)),
1263 B = cast<U16>(to_fixed(b * 255));
1264
1265 uint8x8x3_t v = {{ (uint8x8_t)R, (uint8x8_t)G, (uint8x8_t)B }};
1266 vst3_lane_u8(rgb+0, v, 0);
1267 vst3_lane_u8(rgb+3, v, 2);
1268 vst3_lane_u8(rgb+6, v, 4);
1269 vst3_lane_u8(rgb+9, v, 6);
1270#else
1271 store_3(rgb+0, cast<U8>(to_fixed(r * 255)) );
1272 store_3(rgb+1, cast<U8>(to_fixed(g * 255)) );
1273 store_3(rgb+2, cast<U8>(to_fixed(b * 255)) );
1274#endif
1275}

◆ FINAL_STAGE() [10/16]

FINAL_STAGE ( store_8888  ,
NoCtx   
)

Definition at line 1277 of file Transform_inl.h.

1277 {
1278 store(dst + 4*i, cast<U32>(to_fixed(r * 255)) << 0
1279 | cast<U32>(to_fixed(g * 255)) << 8
1280 | cast<U32>(to_fixed(b * 255)) << 16
1281 | cast<U32>(to_fixed(a * 255)) << 24);
1282}

◆ FINAL_STAGE() [11/16]

FINAL_STAGE ( store_a8  ,
NoCtx   
)

Definition at line 1233 of file Transform_inl.h.

1233 {
1234 store(dst + 1*i, cast<U8>(to_fixed(a * 255)));
1235}

◆ FINAL_STAGE() [12/16]

FINAL_STAGE ( store_fff  ,
NoCtx   
)

Definition at line 1430 of file Transform_inl.h.

1430 {
1431 uintptr_t ptr = (uintptr_t)(dst + 12*i);
1432 assert( (ptr & 3) == 0 ); // The dst pointer must be 4-byte aligned
1433 float* rgb = (float*)ptr; // for this cast to float* to be safe.
1434#if defined(USING_NEON)
1435 float32x4x3_t v = {{
1436 (float32x4_t)r,
1437 (float32x4_t)g,
1438 (float32x4_t)b,
1439 }};
1440 vst3q_f32(rgb, v);
1441#else
1442 store_3(rgb+0, r);
1443 store_3(rgb+1, g);
1444 store_3(rgb+2, b);
1445#endif
1446}

◆ FINAL_STAGE() [13/16]

FINAL_STAGE ( store_ffff  ,
NoCtx   
)

Definition at line 1448 of file Transform_inl.h.

1448 {
1449 uintptr_t ptr = (uintptr_t)(dst + 16*i);
1450 assert( (ptr & 3) == 0 ); // The dst pointer must be 4-byte aligned
1451 float* rgba = (float*)ptr; // for this cast to float* to be safe.
1452#if defined(USING_NEON)
1453 float32x4x4_t v = {{
1454 (float32x4_t)r,
1455 (float32x4_t)g,
1456 (float32x4_t)b,
1457 (float32x4_t)a,
1458 }};
1459 vst4q_f32(rgba, v);
1460#else
1461 store_4(rgba+0, r);
1462 store_4(rgba+1, g);
1463 store_4(rgba+2, b);
1464 store_4(rgba+3, a);
1465#endif
1466}
SI void store_4(P *p, const T &v)

◆ FINAL_STAGE() [14/16]

FINAL_STAGE ( store_g8  ,
NoCtx   
)

Definition at line 1237 of file Transform_inl.h.

1237 {
1238 // g should be holding luminance (Y) (r,g,b ~~~> X,Y,Z)
1239 store(dst + 1*i, cast<U8>(to_fixed(g * 255)));
1240}

◆ FINAL_STAGE() [15/16]

FINAL_STAGE ( store_hhh  ,
NoCtx   
)

Definition at line 1383 of file Transform_inl.h.

1383 {
1384 uintptr_t ptr = (uintptr_t)(dst + 6*i);
1385 assert( (ptr & 1) == 0 ); // The dst pointer must be 2-byte aligned
1386 uint16_t* rgb = (uint16_t*)ptr; // for this cast to uint16_t* to be safe.
1387
1388 U16 R = Half_from_F(r),
1389 G = Half_from_F(g),
1390 B = Half_from_F(b);
1391#if defined(USING_NEON)
1392 uint16x4x3_t v = {{
1393 (uint16x4_t)R,
1394 (uint16x4_t)G,
1395 (uint16x4_t)B,
1396 }};
1397 vst3_u16(rgb, v);
1398#else
1399 store_3(rgb+0, R);
1400 store_3(rgb+1, G);
1401 store_3(rgb+2, B);
1402#endif
1403}
SI U16 Half_from_F(F f)

◆ FINAL_STAGE() [16/16]

FINAL_STAGE ( store_hhhh  ,
NoCtx   
)

Definition at line 1405 of file Transform_inl.h.

1405 {
1406 uintptr_t ptr = (uintptr_t)(dst + 8*i);
1407 assert( (ptr & 1) == 0 ); // The dst pointer must be 2-byte aligned
1408 uint16_t* rgba = (uint16_t*)ptr; // for this cast to uint16_t* to be safe.
1409
1410 U16 R = Half_from_F(r),
1411 G = Half_from_F(g),
1412 B = Half_from_F(b),
1413 A = Half_from_F(a);
1414#if defined(USING_NEON)
1415 uint16x4x4_t v = {{
1416 (uint16x4_t)R,
1417 (uint16x4_t)G,
1418 (uint16x4_t)B,
1419 (uint16x4_t)A,
1420 }};
1421 vst4_u16(rgba, v);
1422#else
1423 store(rgba, cast<U64>(R) << 0
1424 | cast<U64>(G) << 16
1425 | cast<U64>(B) << 32
1426 | cast<U64>(A) << 48);
1427#endif
1428}

◆ floor_()

SI F floor_ ( F  x)

Definition at line 219 of file Transform_inl.h.

219 {
220#if N == 1
221 return floorf_(x);
222#elif defined(__aarch64__)
223 return vrndmq_f32(x);
224#elif defined(USING_AVX512F)
225 // Clang's _mm512_floor_ps() passes its mask as -1, not (__mmask16)-1,
226 // and integer santizer catches that this implicit cast changes the
227 // value from -1 to 65535. We'll cast manually to work around it.
228 // Read this as `return _mm512_floor_ps(x)`.
229 return _mm512_mask_floor_ps(x, (__mmask16)-1, x);
230#elif defined(USING_AVX)
231 return __builtin_ia32_roundps256(x, 0x01/*_MM_FROUND_FLOOR*/);
232#elif defined(__SSE4_1__)
233 return _mm_floor_ps(x);
234#else
235 // Round trip through integers with a truncating cast.
236 F roundtrip = cast<F>(cast<I32>(x));
237 // If x is negative, truncating gives the ceiling instead of the floor.
238 return roundtrip - if_then_else(roundtrip > x, F1, F0);
239
240 // This implementation fails for values of x that are outside
241 // the range an integer can represent. We expect most x to be small.
242#endif
243}
static float floorf_(float x)

◆ gather_16()

SI U16 gather_16 ( const uint8_t *  p,
I32  ix 
)

Definition at line 439 of file Transform_inl.h.

439 {
440 // Load the i'th 16-bit value from p.
441 auto load_16 = [p](int i) {
442 return load<uint16_t>(p + 2*i);
443 };
444#if N == 1
445 U16 v = load_16(ix);
446#elif N == 4
447 U16 v = { load_16(ix[0]), load_16(ix[1]), load_16(ix[2]), load_16(ix[3]) };
448#elif N == 8
449 U16 v = { load_16(ix[0]), load_16(ix[1]), load_16(ix[2]), load_16(ix[3]),
450 load_16(ix[4]), load_16(ix[5]), load_16(ix[6]), load_16(ix[7]) };
451#elif N == 16
452 U16 v = { load_16(ix[ 0]), load_16(ix[ 1]), load_16(ix[ 2]), load_16(ix[ 3]),
453 load_16(ix[ 4]), load_16(ix[ 5]), load_16(ix[ 6]), load_16(ix[ 7]),
454 load_16(ix[ 8]), load_16(ix[ 9]), load_16(ix[10]), load_16(ix[11]),
455 load_16(ix[12]), load_16(ix[13]), load_16(ix[14]), load_16(ix[15]) };
456#endif
457 return v;
458}

◆ gather_24()

SI U32 gather_24 ( const uint8_t *  p,
I32  ix 
)

Definition at line 482 of file Transform_inl.h.

482 {
483 // First, back up a byte. Any place we're gathering from has a safe junk byte to read
484 // in front of it, either a previous table value, or some tag metadata.
485 p -= 1;
486
487 // Load the i'th 24-bit value from p, and 1 extra byte.
488 auto load_24_32 = [p](int i) {
489 return load<uint32_t>(p + 3*i);
490 };
491
492 // Now load multiples of 4 bytes (a junk byte, then r,g,b).
493#if N == 1
494 U32 v = load_24_32(ix);
495#elif N == 4
496 U32 v = { load_24_32(ix[0]), load_24_32(ix[1]), load_24_32(ix[2]), load_24_32(ix[3]) };
497#elif N == 8 && !defined(USING_AVX2)
498 U32 v = { load_24_32(ix[0]), load_24_32(ix[1]), load_24_32(ix[2]), load_24_32(ix[3]),
499 load_24_32(ix[4]), load_24_32(ix[5]), load_24_32(ix[6]), load_24_32(ix[7]) };
500#elif N == 8
501 (void)load_24_32;
502 // The gather instruction here doesn't need any particular alignment,
503 // but the intrinsic takes a const int*.
504 const int* p4 = bit_pun<const int*>(p);
505 I32 zero = { 0, 0, 0, 0, 0, 0, 0, 0},
506 mask = {-1,-1,-1,-1, -1,-1,-1,-1};
507 #if defined(__clang__)
508 U32 v = (U32)__builtin_ia32_gatherd_d256(zero, p4, 3*ix, mask, 1);
509 #elif defined(__GNUC__)
510 U32 v = (U32)__builtin_ia32_gathersiv8si(zero, p4, 3*ix, mask, 1);
511 #endif
512#elif N == 16
513 (void)load_24_32;
514 // The intrinsic is supposed to take const void* now, but it takes const int*, just like AVX2.
515 // And AVX-512 swapped the order of arguments. :/
516 const int* p4 = bit_pun<const int*>(p);
517 U32 v = (U32)_mm512_i32gather_epi32((__m512i)(3*ix), p4, 1);
518#endif
519
520 // Shift off the junk byte, leaving r,g,b in low 24 bits (and zero in the top 8).
521 return v >> 8;
522}

◆ gather_32()

SI U32 gather_32 ( const uint8_t *  p,
I32  ix 
)

Definition at line 460 of file Transform_inl.h.

460 {
461 // Load the i'th 32-bit value from p.
462 auto load_32 = [p](int i) {
463 return load<uint32_t>(p + 4*i);
464 };
465#if N == 1
466 U32 v = load_32(ix);
467#elif N == 4
468 U32 v = { load_32(ix[0]), load_32(ix[1]), load_32(ix[2]), load_32(ix[3]) };
469#elif N == 8
470 U32 v = { load_32(ix[0]), load_32(ix[1]), load_32(ix[2]), load_32(ix[3]),
471 load_32(ix[4]), load_32(ix[5]), load_32(ix[6]), load_32(ix[7]) };
472#elif N == 16
473 U32 v = { load_32(ix[ 0]), load_32(ix[ 1]), load_32(ix[ 2]), load_32(ix[ 3]),
474 load_32(ix[ 4]), load_32(ix[ 5]), load_32(ix[ 6]), load_32(ix[ 7]),
475 load_32(ix[ 8]), load_32(ix[ 9]), load_32(ix[10]), load_32(ix[11]),
476 load_32(ix[12]), load_32(ix[13]), load_32(ix[14]), load_32(ix[15]) };
477#endif
478 // TODO: AVX2 and AVX-512 gathers (c.f. gather_24).
479 return v;
480}

◆ gather_48()

SI void gather_48 ( const uint8_t *  p,
I32  ix,
U64 v 
)

Definition at line 525 of file Transform_inl.h.

525 {
526 // As in gather_24(), with everything doubled.
527 p -= 2;
528
529 // Load the i'th 48-bit value from p, and 2 extra bytes.
530 auto load_48_64 = [p](int i) {
531 return load<uint64_t>(p + 6*i);
532 };
533
534 #if N == 1
535 *v = load_48_64(ix);
536 #elif N == 4
537 *v = U64{
538 load_48_64(ix[0]), load_48_64(ix[1]), load_48_64(ix[2]), load_48_64(ix[3]),
539 };
540 #elif N == 8 && !defined(USING_AVX2)
541 *v = U64{
542 load_48_64(ix[0]), load_48_64(ix[1]), load_48_64(ix[2]), load_48_64(ix[3]),
543 load_48_64(ix[4]), load_48_64(ix[5]), load_48_64(ix[6]), load_48_64(ix[7]),
544 };
545 #elif N == 8
546 (void)load_48_64;
547 typedef int32_t __attribute__((vector_size(16))) Half_I32;
548 typedef long long __attribute__((vector_size(32))) Half_I64;
549
550 // The gather instruction here doesn't need any particular alignment,
551 // but the intrinsic takes a const long long*.
552 const long long int* p8 = bit_pun<const long long int*>(p);
553
554 Half_I64 zero = { 0, 0, 0, 0},
555 mask = {-1,-1,-1,-1};
556
557 ix *= 6;
558 Half_I32 ix_lo = { ix[0], ix[1], ix[2], ix[3] },
559 ix_hi = { ix[4], ix[5], ix[6], ix[7] };
560
561 #if defined(__clang__)
562 Half_I64 lo = (Half_I64)__builtin_ia32_gatherd_q256(zero, p8, ix_lo, mask, 1),
563 hi = (Half_I64)__builtin_ia32_gatherd_q256(zero, p8, ix_hi, mask, 1);
564 #elif defined(__GNUC__)
565 Half_I64 lo = (Half_I64)__builtin_ia32_gathersiv4di(zero, p8, ix_lo, mask, 1),
566 hi = (Half_I64)__builtin_ia32_gathersiv4di(zero, p8, ix_hi, mask, 1);
567 #endif
568 store((char*)v + 0, lo);
569 store((char*)v + 32, hi);
570 #elif N == 16
571 (void)load_48_64;
572 const long long int* p8 = bit_pun<const long long int*>(p);
573 __m512i lo = _mm512_i32gather_epi64(_mm512_extracti32x8_epi32((__m512i)(6*ix), 0), p8, 1),
574 hi = _mm512_i32gather_epi64(_mm512_extracti32x8_epi32((__m512i)(6*ix), 1), p8, 1);
575 store((char*)v + 0, lo);
576 store((char*)v + 64, hi);
577 #endif
578
579 *v >>= 16;
580 }
SI D bit_pun(const S &v)

◆ gather_8()

SI U8 gather_8 ( const uint8_t *  p,
I32  ix 
)

Definition at line 422 of file Transform_inl.h.

422 {
423#if N == 1
424 U8 v = p[ix];
425#elif N == 4
426 U8 v = { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]] };
427#elif N == 8
428 U8 v = { p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]],
429 p[ix[4]], p[ix[5]], p[ix[6]], p[ix[7]] };
430#elif N == 16
431 U8 v = { p[ix[ 0]], p[ix[ 1]], p[ix[ 2]], p[ix[ 3]],
432 p[ix[ 4]], p[ix[ 5]], p[ix[ 6]], p[ix[ 7]],
433 p[ix[ 8]], p[ix[ 9]], p[ix[10]], p[ix[11]],
434 p[ix[12]], p[ix[13]], p[ix[14]], p[ix[15]] };
435#endif
436 return v;
437}
V< uint8_t > U8

◆ Half_from_F()

SI U16 Half_from_F ( F  f)

Definition at line 180 of file Transform_inl.h.

180 {
181#if defined(USING_NEON_F16C)
182 return (U16)vcvt_f16_f32(f);
183#elif defined(USING_AVX512F)
184 return (U16)_mm512_cvtps_ph((__m512 )f, _MM_FROUND_CUR_DIRECTION );
185#elif defined(USING_AVX_F16C)
186 return (U16)__builtin_ia32_vcvtps2ph256(f, 0x04/*_MM_FROUND_CUR_DIRECTION*/);
187#else
188 // A float is 1-8-23 sign-exponent-mantissa, with 127 exponent bias.
189 U32 sem = bit_pun<U32>(f),
190 s = sem & 0x80000000,
191 em = sem ^ s;
192
193 // For simplicity we flush denorm half floats (including all denorm floats) to zero.
194 return cast<U16>(if_then_else(em < 0x38800000, (U32)F0
195 , (s>>16) + (em>>13) - ((127-15)<<10)));
196#endif
197}

◆ if_then_else()

template<typename C , typename T >
SI T if_then_else ( C  cond,
T  t,
T  e 
)

Definition at line 146 of file Transform_inl.h.

146 {
147 return bit_pun<T>( ( cond & bit_pun<C>(t)) |
148 (~cond & bit_pun<C>(e)) );
149 }

◆ load()

template<typename T , typename P >
SI T load ( const P *  ptr)

Definition at line 98 of file Transform_inl.h.

98 {
99 T val;
100 memcpy(&val, ptr, sizeof(val));
101 return val;
102}
#define T

◆ load_3()

template<typename T , typename P >
SI T load_3 ( const P *  p)

Definition at line 360 of file Transform_inl.h.

360 {
361#if N == 1
362 return (T)p[0];
363#elif N == 4
364 return T{p[ 0],p[ 3],p[ 6],p[ 9]};
365#elif N == 8
366 return T{p[ 0],p[ 3],p[ 6],p[ 9], p[12],p[15],p[18],p[21]};
367#elif N == 16
368 return T{p[ 0],p[ 3],p[ 6],p[ 9], p[12],p[15],p[18],p[21],
369 p[24],p[27],p[30],p[33], p[36],p[39],p[42],p[45]};
370#endif
371}

◆ load_4()

template<typename T , typename P >
SI T load_4 ( const P *  p)

Definition at line 374 of file Transform_inl.h.

374 {
375#if N == 1
376 return (T)p[0];
377#elif N == 4
378 return T{p[ 0],p[ 4],p[ 8],p[12]};
379#elif N == 8
380 return T{p[ 0],p[ 4],p[ 8],p[12], p[16],p[20],p[24],p[28]};
381#elif N == 16
382 return T{p[ 0],p[ 4],p[ 8],p[12], p[16],p[20],p[24],p[28],
383 p[32],p[36],p[40],p[44], p[48],p[52],p[56],p[60]};
384#endif
385}

◆ max_()

SI F max_ ( F  x,
F  y 
)

Definition at line 216 of file Transform_inl.h.

216{ return if_then_else(x < y, y, x); }

◆ min_()

SI F min_ ( F  x,
F  y 
)

Definition at line 215 of file Transform_inl.h.

215{ return if_then_else(x > y, y, x); }

◆ minus_1_ulp()

SI F minus_1_ulp ( F  v)

Definition at line 600 of file Transform_inl.h.

600 {
601 return bit_pun<F>( bit_pun<U32>(v) - 1 );
602}

◆ run_program()

void run_program ( const Op *  program,
const void **  contexts,
SKCMS_MAYBE_UNUSED ptrdiff_t  programSize,
const char *  src,
char *  dst,
int  n,
const size_t  src_bpp,
const size_t  dst_bpp 
)

Definition at line 1494 of file Transform_inl.h.

1496 {
1497#if SKCMS_HAS_MUSTTAIL
1498 // Convert the program into an array of tailcall stages.
1499 StageFn stages[32];
1500 assert(programSize <= ARRAY_COUNT(stages));
1501
1502 static constexpr StageFn kStageFns[] = {
1503#define M(name) &Exec_##name,
1506#undef M
1507 };
1508
1509 for (ptrdiff_t index = 0; index < programSize; ++index) {
1510 stages[index] = kStageFns[(int)program[index]];
1511 }
1512#else
1513 // Use the op array as-is.
1514 const Op* stages = program;
1515#endif
1516
1517 int i = 0;
1518 while (n >= N) {
1519 exec_stages(stages, contexts, src, dst, i);
1520 i += N;
1521 n -= N;
1522 }
1523 if (n > 0) {
1524 char tmp[4*4*N] = {0};
1525
1526 memcpy(tmp, (const char*)src + (size_t)i*src_bpp, (size_t)n*src_bpp);
1527 exec_stages(stages, contexts, tmp, tmp, 0);
1528 memcpy((char*)dst + (size_t)i*dst_bpp, tmp, (size_t)n*dst_bpp);
1529 }
1530}
static void exec_stages(const Op *ops, const void **contexts, const char *src, char *dst, int i)
void(*)(void) StageFn
Definition SkOpts.h:72
#define ARRAY_COUNT(arr)

◆ sample_clut_16() [1/2]

SI void sample_clut_16 ( const uint8_t *  grid_16,
I32  ix,
F r,
F g,
F b 
)

Definition at line 646 of file Transform_inl.h.

646 {
647#if defined(__arm__)
648 // This is up to 2x faster on 32-bit ARM than the #else-case fast path.
649 *r = F_from_U16_BE(gather_16(grid_16, 3*ix+0));
650 *g = F_from_U16_BE(gather_16(grid_16, 3*ix+1));
651 *b = F_from_U16_BE(gather_16(grid_16, 3*ix+2));
652#else
653 // This strategy is much faster for 64-bit builds, and fine for 32-bit x86 too.
654 U64 rgb;
655 gather_48(grid_16, ix, &rgb);
656 rgb = swap_endian_16x4(rgb);
657
658 *r = cast<F>((rgb >> 0) & 0xffff) * (1/65535.0f);
659 *g = cast<F>((rgb >> 16) & 0xffff) * (1/65535.0f);
660 *b = cast<F>((rgb >> 32) & 0xffff) * (1/65535.0f);
661#endif
662}
SI void gather_48(const uint8_t *p, I32 ix, U64 *v)
SI F F_from_U16_BE(U16 v)
SI U16 gather_16(const uint8_t *p, I32 ix)

◆ sample_clut_16() [2/2]

SI void sample_clut_16 ( const uint8_t *  grid_16,
I32  ix,
F r,
F g,
F b,
F a 
)

Definition at line 664 of file Transform_inl.h.

664 {
665 // TODO: gather_64()-based fast path?
666 *r = F_from_U16_BE(gather_16(grid_16, 4*ix+0));
667 *g = F_from_U16_BE(gather_16(grid_16, 4*ix+1));
668 *b = F_from_U16_BE(gather_16(grid_16, 4*ix+2));
669 *a = F_from_U16_BE(gather_16(grid_16, 4*ix+3));
670}

◆ sample_clut_8() [1/2]

SI void sample_clut_8 ( const uint8_t *  grid_8,
I32  ix,
F r,
F g,
F b 
)

Definition at line 628 of file Transform_inl.h.

628 {
629 U32 rgb = gather_24(grid_8, ix);
630
631 *r = cast<F>((rgb >> 0) & 0xff) * (1/255.0f);
632 *g = cast<F>((rgb >> 8) & 0xff) * (1/255.0f);
633 *b = cast<F>((rgb >> 16) & 0xff) * (1/255.0f);
634}
SI U32 gather_24(const uint8_t *p, I32 ix)

◆ sample_clut_8() [2/2]

SI void sample_clut_8 ( const uint8_t *  grid_8,
I32  ix,
F r,
F g,
F b,
F a 
)

Definition at line 636 of file Transform_inl.h.

636 {
637 // TODO: don't forget to optimize gather_32().
638 U32 rgba = gather_32(grid_8, ix);
639
640 *r = cast<F>((rgba >> 0) & 0xff) * (1/255.0f);
641 *g = cast<F>((rgba >> 8) & 0xff) * (1/255.0f);
642 *b = cast<F>((rgba >> 16) & 0xff) * (1/255.0f);
643 *a = cast<F>((rgba >> 24) & 0xff) * (1/255.0f);
644}
SI U32 gather_32(const uint8_t *p, I32 ix)

◆ STAGE() [1/57]

STAGE ( clamp  ,
NoCtx   
)

Definition at line 1063 of file Transform_inl.h.

1063 {
1064 r = max_(F0, min_(r, F1));
1065 g = max_(F0, min_(g, F1));
1066 b = max_(F0, min_(b, F1));
1067 a = max_(F0, min_(a, F1));
1068}

◆ STAGE() [2/57]

STAGE ( clut_A2B  ,
const skcms_A2B a2b 
)

Definition at line 1218 of file Transform_inl.h.

1218 {
1219 clut(a2b, &r,&g,&b,a);
1220
1221 if (a2b->input_channels == 4) {
1222 // CMYK is opaque.
1223 a = F1;
1224 }
1225}

◆ STAGE() [3/57]

STAGE ( clut_B2A  ,
const skcms_B2A b2a 
)

Definition at line 1227 of file Transform_inl.h.

1227 {
1228 clut(b2a, &r,&g,&b,&a);
1229}

◆ STAGE() [4/57]

STAGE ( force_opaque  ,
NoCtx   
)

Definition at line 1077 of file Transform_inl.h.

1077 {
1078 a = F1;
1079}

◆ STAGE() [5/57]

STAGE ( gamma_a  ,
const skcms_TransferFunction tf 
)

Definition at line 1161 of file Transform_inl.h.

1161{ a = apply_gamma(tf, a); }
SI F apply_gamma(const skcms_TransferFunction *tf, F x)

◆ STAGE() [6/57]

STAGE ( gamma_b  ,
const skcms_TransferFunction tf 
)

Definition at line 1160 of file Transform_inl.h.

1160{ b = apply_gamma(tf, b); }

◆ STAGE() [7/57]

STAGE ( gamma_g  ,
const skcms_TransferFunction tf 
)

Definition at line 1159 of file Transform_inl.h.

1159{ g = apply_gamma(tf, g); }

◆ STAGE() [8/57]

STAGE ( gamma_r  ,
const skcms_TransferFunction tf 
)

Definition at line 1158 of file Transform_inl.h.

1158{ r = apply_gamma(tf, r); }

◆ STAGE() [9/57]

STAGE ( gamma_rgb  ,
const skcms_TransferFunction tf 
)

Definition at line 1163 of file Transform_inl.h.

1163 {
1164 r = apply_gamma(tf, r);
1165 g = apply_gamma(tf, g);
1166 b = apply_gamma(tf, b);
1167}

◆ STAGE() [10/57]

STAGE ( hlg_a  ,
const skcms_TransferFunction tf 
)

Definition at line 1194 of file Transform_inl.h.

1194{ a = apply_hlg(tf, a); }
SI F apply_hlg(const skcms_TransferFunction *tf, F x)

◆ STAGE() [11/57]

STAGE ( hlg_b  ,
const skcms_TransferFunction tf 
)

Definition at line 1193 of file Transform_inl.h.

1193{ b = apply_hlg(tf, b); }

◆ STAGE() [12/57]

STAGE ( hlg_g  ,
const skcms_TransferFunction tf 
)

Definition at line 1192 of file Transform_inl.h.

1192{ g = apply_hlg(tf, g); }

◆ STAGE() [13/57]

STAGE ( hlg_r  ,
const skcms_TransferFunction tf 
)

Definition at line 1191 of file Transform_inl.h.

1191{ r = apply_hlg(tf, r); }

◆ STAGE() [14/57]

STAGE ( hlg_rgb  ,
const skcms_TransferFunction tf 
)

Definition at line 1196 of file Transform_inl.h.

1196 {
1197 r = apply_hlg(tf, r);
1198 g = apply_hlg(tf, g);
1199 b = apply_hlg(tf, b);
1200}

◆ STAGE() [15/57]

STAGE ( hlginv_a  ,
const skcms_TransferFunction tf 
)

Definition at line 1205 of file Transform_inl.h.

1205{ a = apply_hlginv(tf, a); }
SI F apply_hlginv(const skcms_TransferFunction *tf, F x)

◆ STAGE() [16/57]

STAGE ( hlginv_b  ,
const skcms_TransferFunction tf 
)

Definition at line 1204 of file Transform_inl.h.

1204{ b = apply_hlginv(tf, b); }

◆ STAGE() [17/57]

STAGE ( hlginv_g  ,
const skcms_TransferFunction tf 
)

Definition at line 1203 of file Transform_inl.h.

1203{ g = apply_hlginv(tf, g); }

◆ STAGE() [18/57]

STAGE ( hlginv_r  ,
const skcms_TransferFunction tf 
)

Definition at line 1202 of file Transform_inl.h.

1202{ r = apply_hlginv(tf, r); }

◆ STAGE() [19/57]

STAGE ( hlginv_rgb  ,
const skcms_TransferFunction tf 
)

Definition at line 1207 of file Transform_inl.h.

1207 {
1208 r = apply_hlginv(tf, r);
1209 g = apply_hlginv(tf, g);
1210 b = apply_hlginv(tf, b);
1211}

◆ STAGE() [20/57]

STAGE ( invert  ,
NoCtx   
)

Definition at line 1070 of file Transform_inl.h.

1070 {
1071 r = F1 - r;
1072 g = F1 - g;
1073 b = F1 - b;
1074 a = F1 - a;
1075}

◆ STAGE() [21/57]

STAGE ( lab_to_xyz  ,
NoCtx   
)

Definition at line 1118 of file Transform_inl.h.

1118 {
1119 // The L*a*b values are in r,g,b, but normalized to [0,1]. Reconstruct them:
1120 F L = r * 100.0f,
1121 A = g * 255.0f - 128.0f,
1122 B = b * 255.0f - 128.0f;
1123
1124 // Convert to CIE XYZ.
1125 F Y = (L + 16.0f) * (1/116.0f),
1126 X = Y + A*(1/500.0f),
1127 Z = Y - B*(1/200.0f);
1128
1129 X = if_then_else(X*X*X > 0.008856f, X*X*X, (X - (16/116.0f)) * (1/7.787f));
1130 Y = if_then_else(Y*Y*Y > 0.008856f, Y*Y*Y, (Y - (16/116.0f)) * (1/7.787f));
1131 Z = if_then_else(Z*Z*Z > 0.008856f, Z*Z*Z, (Z - (16/116.0f)) * (1/7.787f));
1132
1133 // Adjust to XYZD50 illuminant, and stuff back into r,g,b for the next op.
1134 r = X * 0.9642f;
1135 g = Y ;
1136 b = Z * 0.8249f;
1137}
static const SkScalar Y
static const SkScalar X
#define Z

◆ STAGE() [22/57]

STAGE ( load_1010102  ,
NoCtx   
)

Definition at line 886 of file Transform_inl.h.

886 {
887 U32 rgba = load<U32>(src + 4*i);
888
889 r = cast<F>((rgba >> 0) & 0x3ff) * (1/1023.0f);
890 g = cast<F>((rgba >> 10) & 0x3ff) * (1/1023.0f);
891 b = cast<F>((rgba >> 20) & 0x3ff) * (1/1023.0f);
892 a = cast<F>((rgba >> 30) & 0x3 ) * (1/ 3.0f);
893}

◆ STAGE() [23/57]

STAGE ( load_101010x_XR  ,
NoCtx   
)

Definition at line 895 of file Transform_inl.h.

895 {
896 static constexpr float min = -0.752941f;
897 static constexpr float max = 1.25098f;
898 static constexpr float range = max - min;
899 U32 rgba = load<U32>(src + 4*i);
900 r = cast<F>((rgba >> 0) & 0x3ff) * (1/1023.0f) * range + min;
901 g = cast<F>((rgba >> 10) & 0x3ff) * (1/1023.0f) * range + min;
902 b = cast<F>((rgba >> 20) & 0x3ff) * (1/1023.0f) * range + min;
903}

◆ STAGE() [24/57]

STAGE ( load_16161616BE  ,
NoCtx   
)

Definition at line 961 of file Transform_inl.h.

961 {
962 uintptr_t ptr = (uintptr_t)(src + 8*i);
963 assert( (ptr & 1) == 0 ); // src must be 2-byte aligned for this
964 const uint16_t* rgba = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
965#if defined(USING_NEON)
966 uint16x4x4_t v = vld4_u16(rgba);
967 r = cast<F>(swap_endian_16((U16)v.val[0])) * (1/65535.0f);
968 g = cast<F>(swap_endian_16((U16)v.val[1])) * (1/65535.0f);
969 b = cast<F>(swap_endian_16((U16)v.val[2])) * (1/65535.0f);
970 a = cast<F>(swap_endian_16((U16)v.val[3])) * (1/65535.0f);
971#else
972 U64 px = swap_endian_16x4(load<U64>(rgba));
973
974 r = cast<F>((px >> 0) & 0xffff) * (1/65535.0f);
975 g = cast<F>((px >> 16) & 0xffff) * (1/65535.0f);
976 b = cast<F>((px >> 32) & 0xffff) * (1/65535.0f);
977 a = cast<F>((px >> 48) & 0xffff) * (1/65535.0f);
978#endif
979}

◆ STAGE() [25/57]

STAGE ( load_16161616LE  ,
NoCtx   
)

Definition at line 921 of file Transform_inl.h.

921 {
922 uintptr_t ptr = (uintptr_t)(src + 8*i);
923 assert( (ptr & 1) == 0 ); // src must be 2-byte aligned for this
924 const uint16_t* rgba = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
925#if defined(USING_NEON)
926 uint16x4x4_t v = vld4_u16(rgba);
927 r = cast<F>((U16)v.val[0]) * (1/65535.0f);
928 g = cast<F>((U16)v.val[1]) * (1/65535.0f);
929 b = cast<F>((U16)v.val[2]) * (1/65535.0f);
930 a = cast<F>((U16)v.val[3]) * (1/65535.0f);
931#else
932 U64 px = load<U64>(rgba);
933
934 r = cast<F>((px >> 0) & 0xffff) * (1/65535.0f);
935 g = cast<F>((px >> 16) & 0xffff) * (1/65535.0f);
936 b = cast<F>((px >> 32) & 0xffff) * (1/65535.0f);
937 a = cast<F>((px >> 48) & 0xffff) * (1/65535.0f);
938#endif
939}

◆ STAGE() [26/57]

STAGE ( load_161616BE  ,
NoCtx   
)

Definition at line 941 of file Transform_inl.h.

941 {
942 uintptr_t ptr = (uintptr_t)(src + 6*i);
943 assert( (ptr & 1) == 0 ); // src must be 2-byte aligned for this
944 const uint16_t* rgb = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
945#if defined(USING_NEON)
946 uint16x4x3_t v = vld3_u16(rgb);
947 r = cast<F>(swap_endian_16((U16)v.val[0])) * (1/65535.0f);
948 g = cast<F>(swap_endian_16((U16)v.val[1])) * (1/65535.0f);
949 b = cast<F>(swap_endian_16((U16)v.val[2])) * (1/65535.0f);
950#else
951 U32 R = load_3<U32>(rgb+0),
952 G = load_3<U32>(rgb+1),
953 B = load_3<U32>(rgb+2);
954 // R,G,B are big-endian 16-bit, so byte swap them before converting to float.
955 r = cast<F>((R & 0x00ff)<<8 | (R & 0xff00)>>8) * (1/65535.0f);
956 g = cast<F>((G & 0x00ff)<<8 | (G & 0xff00)>>8) * (1/65535.0f);
957 b = cast<F>((B & 0x00ff)<<8 | (B & 0xff00)>>8) * (1/65535.0f);
958#endif
959}

◆ STAGE() [27/57]

STAGE ( load_161616LE  ,
NoCtx   
)

Definition at line 905 of file Transform_inl.h.

905 {
906 uintptr_t ptr = (uintptr_t)(src + 6*i);
907 assert( (ptr & 1) == 0 ); // src must be 2-byte aligned for this
908 const uint16_t* rgb = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
909#if defined(USING_NEON)
910 uint16x4x3_t v = vld3_u16(rgb);
911 r = cast<F>((U16)v.val[0]) * (1/65535.0f);
912 g = cast<F>((U16)v.val[1]) * (1/65535.0f);
913 b = cast<F>((U16)v.val[2]) * (1/65535.0f);
914#else
915 r = cast<F>(load_3<U32>(rgb+0)) * (1/65535.0f);
916 g = cast<F>(load_3<U32>(rgb+1)) * (1/65535.0f);
917 b = cast<F>(load_3<U32>(rgb+2)) * (1/65535.0f);
918#endif
919}

◆ STAGE() [28/57]

STAGE ( load_4444  ,
NoCtx   
)

Definition at line 835 of file Transform_inl.h.

835 {
836 U16 abgr = load<U16>(src + 2*i);
837
838 r = cast<F>((abgr >> 12) & 0xf) * (1/15.0f);
839 g = cast<F>((abgr >> 8) & 0xf) * (1/15.0f);
840 b = cast<F>((abgr >> 4) & 0xf) * (1/15.0f);
841 a = cast<F>((abgr >> 0) & 0xf) * (1/15.0f);
842}

◆ STAGE() [29/57]

STAGE ( load_565  ,
NoCtx   
)

Definition at line 844 of file Transform_inl.h.

844 {
845 U16 rgb = load<U16>(src + 2*i);
846
847 r = cast<F>(rgb & (uint16_t)(31<< 0)) * (1.0f / (31<< 0));
848 g = cast<F>(rgb & (uint16_t)(63<< 5)) * (1.0f / (63<< 5));
849 b = cast<F>(rgb & (uint16_t)(31<<11)) * (1.0f / (31<<11));
850}

◆ STAGE() [30/57]

STAGE ( load_888  ,
NoCtx   
)

Definition at line 852 of file Transform_inl.h.

852 {
853 const uint8_t* rgb = (const uint8_t*)(src + 3*i);
854#if defined(USING_NEON)
855 // There's no uint8x4x3_t or vld3 load for it, so we'll load each rgb pixel one at
856 // a time. Since we're doing that, we might as well load them into 16-bit lanes.
857 // (We'd even load into 32-bit lanes, but that's not possible on ARMv7.)
858 uint8x8x3_t v = {{ vdup_n_u8(0), vdup_n_u8(0), vdup_n_u8(0) }};
859 v = vld3_lane_u8(rgb+0, v, 0);
860 v = vld3_lane_u8(rgb+3, v, 2);
861 v = vld3_lane_u8(rgb+6, v, 4);
862 v = vld3_lane_u8(rgb+9, v, 6);
863
864 // Now if we squint, those 3 uint8x8_t we constructed are really U16s, easy to
865 // convert to F. (Again, U32 would be even better here if drop ARMv7 or split
866 // ARMv7 and ARMv8 impls.)
867 r = cast<F>((U16)v.val[0]) * (1/255.0f);
868 g = cast<F>((U16)v.val[1]) * (1/255.0f);
869 b = cast<F>((U16)v.val[2]) * (1/255.0f);
870#else
871 r = cast<F>(load_3<U32>(rgb+0) ) * (1/255.0f);
872 g = cast<F>(load_3<U32>(rgb+1) ) * (1/255.0f);
873 b = cast<F>(load_3<U32>(rgb+2) ) * (1/255.0f);
874#endif
875}

◆ STAGE() [31/57]

STAGE ( load_8888  ,
NoCtx   
)

Definition at line 877 of file Transform_inl.h.

877 {
878 U32 rgba = load<U32>(src + 4*i);
879
880 r = cast<F>((rgba >> 0) & 0xff) * (1/255.0f);
881 g = cast<F>((rgba >> 8) & 0xff) * (1/255.0f);
882 b = cast<F>((rgba >> 16) & 0xff) * (1/255.0f);
883 a = cast<F>((rgba >> 24) & 0xff) * (1/255.0f);
884}

◆ STAGE() [32/57]

STAGE ( load_a8  ,
NoCtx   
)

Definition at line 827 of file Transform_inl.h.

827 {
828 a = F_from_U8(load<U8>(src + 1*i));
829}
SI F F_from_U8(U8 v)

◆ STAGE() [33/57]

STAGE ( load_fff  ,
NoCtx   
)

Definition at line 1023 of file Transform_inl.h.

1023 {
1024 uintptr_t ptr = (uintptr_t)(src + 12*i);
1025 assert( (ptr & 3) == 0 ); // src must be 4-byte aligned for this
1026 const float* rgb = (const float*)ptr; // cast to const float* to be safe.
1027#if defined(USING_NEON)
1028 float32x4x3_t v = vld3q_f32(rgb);
1029 r = (F)v.val[0];
1030 g = (F)v.val[1];
1031 b = (F)v.val[2];
1032#else
1033 r = load_3<F>(rgb+0);
1034 g = load_3<F>(rgb+1);
1035 b = load_3<F>(rgb+2);
1036#endif
1037}
V< float > F

◆ STAGE() [34/57]

STAGE ( load_ffff  ,
NoCtx   
)

Definition at line 1039 of file Transform_inl.h.

1039 {
1040 uintptr_t ptr = (uintptr_t)(src + 16*i);
1041 assert( (ptr & 3) == 0 ); // src must be 4-byte aligned for this
1042 const float* rgba = (const float*)ptr; // cast to const float* to be safe.
1043#if defined(USING_NEON)
1044 float32x4x4_t v = vld4q_f32(rgba);
1045 r = (F)v.val[0];
1046 g = (F)v.val[1];
1047 b = (F)v.val[2];
1048 a = (F)v.val[3];
1049#else
1050 r = load_4<F>(rgba+0);
1051 g = load_4<F>(rgba+1);
1052 b = load_4<F>(rgba+2);
1053 a = load_4<F>(rgba+3);
1054#endif
1055}

◆ STAGE() [35/57]

STAGE ( load_g8  ,
NoCtx   
)

Definition at line 831 of file Transform_inl.h.

831 {
832 r = g = b = F_from_U8(load<U8>(src + 1*i));
833}

◆ STAGE() [36/57]

STAGE ( load_hhh  ,
NoCtx   
)

Definition at line 981 of file Transform_inl.h.

981 {
982 uintptr_t ptr = (uintptr_t)(src + 6*i);
983 assert( (ptr & 1) == 0 ); // src must be 2-byte aligned for this
984 const uint16_t* rgb = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
985#if defined(USING_NEON)
986 uint16x4x3_t v = vld3_u16(rgb);
987 U16 R = (U16)v.val[0],
988 G = (U16)v.val[1],
989 B = (U16)v.val[2];
990#else
991 U16 R = load_3<U16>(rgb+0),
992 G = load_3<U16>(rgb+1),
993 B = load_3<U16>(rgb+2);
994#endif
995 r = F_from_Half(R);
996 g = F_from_Half(G);
997 b = F_from_Half(B);
998}
V< uint16_t > U16
SI F F_from_Half(U16 half)

◆ STAGE() [37/57]

STAGE ( load_hhhh  ,
NoCtx   
)

Definition at line 1000 of file Transform_inl.h.

1000 {
1001 uintptr_t ptr = (uintptr_t)(src + 8*i);
1002 assert( (ptr & 1) == 0 ); // src must be 2-byte aligned for this
1003 const uint16_t* rgba = (const uint16_t*)ptr; // cast to const uint16_t* to be safe.
1004#if defined(USING_NEON)
1005 uint16x4x4_t v = vld4_u16(rgba);
1006 U16 R = (U16)v.val[0],
1007 G = (U16)v.val[1],
1008 B = (U16)v.val[2],
1009 A = (U16)v.val[3];
1010#else
1011 U64 px = load<U64>(rgba);
1012 U16 R = cast<U16>((px >> 0) & 0xffff),
1013 G = cast<U16>((px >> 16) & 0xffff),
1014 B = cast<U16>((px >> 32) & 0xffff),
1015 A = cast<U16>((px >> 48) & 0xffff);
1016#endif
1017 r = F_from_Half(R);
1018 g = F_from_Half(G);
1019 b = F_from_Half(B);
1020 a = F_from_Half(A);
1021}

◆ STAGE() [38/57]

STAGE ( matrix_3x3  ,
const skcms_Matrix3x3 matrix 
)

Definition at line 1094 of file Transform_inl.h.

1094 {
1095 const float* m = &matrix->vals[0][0];
1096
1097 F R = m[0]*r + m[1]*g + m[2]*b,
1098 G = m[3]*r + m[4]*g + m[5]*b,
1099 B = m[6]*r + m[7]*g + m[8]*b;
1100
1101 r = R;
1102 g = G;
1103 b = B;
1104}
unsigned useCenter Optional< SkMatrix > matrix
Definition SkRecords.h:258

◆ STAGE() [39/57]

STAGE ( matrix_3x4  ,
const skcms_Matrix3x4 matrix 
)

Definition at line 1106 of file Transform_inl.h.

1106 {
1107 const float* m = &matrix->vals[0][0];
1108
1109 F R = m[0]*r + m[1]*g + m[ 2]*b + m[ 3],
1110 G = m[4]*r + m[5]*g + m[ 6]*b + m[ 7],
1111 B = m[8]*r + m[9]*g + m[10]*b + m[11];
1112
1113 r = R;
1114 g = G;
1115 b = B;
1116}

◆ STAGE() [40/57]

STAGE ( pq_a  ,
const skcms_TransferFunction tf 
)

Definition at line 1183 of file Transform_inl.h.

1183{ a = apply_pq(tf, a); }
SI F apply_pq(const skcms_TransferFunction *tf, F x)

◆ STAGE() [41/57]

STAGE ( pq_b  ,
const skcms_TransferFunction tf 
)

Definition at line 1182 of file Transform_inl.h.

1182{ b = apply_pq(tf, b); }

◆ STAGE() [42/57]

STAGE ( pq_g  ,
const skcms_TransferFunction tf 
)

Definition at line 1181 of file Transform_inl.h.

1181{ g = apply_pq(tf, g); }

◆ STAGE() [43/57]

STAGE ( pq_r  ,
const skcms_TransferFunction tf 
)

Definition at line 1180 of file Transform_inl.h.

1180{ r = apply_pq(tf, r); }

◆ STAGE() [44/57]

STAGE ( pq_rgb  ,
const skcms_TransferFunction tf 
)

Definition at line 1185 of file Transform_inl.h.

1185 {
1186 r = apply_pq(tf, r);
1187 g = apply_pq(tf, g);
1188 b = apply_pq(tf, b);
1189}

◆ STAGE() [45/57]

STAGE ( premul  ,
NoCtx   
)

Definition at line 1081 of file Transform_inl.h.

1081 {
1082 r *= a;
1083 g *= a;
1084 b *= a;
1085}

◆ STAGE() [46/57]

STAGE ( swap_rb  ,
NoCtx   
)

Definition at line 1057 of file Transform_inl.h.

1057 {
1058 F t = r;
1059 r = b;
1060 b = t;
1061}

◆ STAGE() [47/57]

STAGE ( table_a  ,
const skcms_Curve curve 
)

Definition at line 1216 of file Transform_inl.h.

1216{ a = table(curve, a); }
SI F table(const skcms_Curve *curve, F v)

◆ STAGE() [48/57]

STAGE ( table_b  ,
const skcms_Curve curve 
)

Definition at line 1215 of file Transform_inl.h.

1215{ b = table(curve, b); }

◆ STAGE() [49/57]

STAGE ( table_g  ,
const skcms_Curve curve 
)

Definition at line 1214 of file Transform_inl.h.

1214{ g = table(curve, g); }

◆ STAGE() [50/57]

STAGE ( table_r  ,
const skcms_Curve curve 
)

Definition at line 1213 of file Transform_inl.h.

1213{ r = table(curve, r); }

◆ STAGE() [51/57]

STAGE ( tf_a  ,
const skcms_TransferFunction tf 
)

Definition at line 1172 of file Transform_inl.h.

1172{ a = apply_tf(tf, a); }
SI F apply_tf(const skcms_TransferFunction *tf, F x)

◆ STAGE() [52/57]

STAGE ( tf_b  ,
const skcms_TransferFunction tf 
)

Definition at line 1171 of file Transform_inl.h.

1171{ b = apply_tf(tf, b); }

◆ STAGE() [53/57]

STAGE ( tf_g  ,
const skcms_TransferFunction tf 
)

Definition at line 1170 of file Transform_inl.h.

1170{ g = apply_tf(tf, g); }

◆ STAGE() [54/57]

STAGE ( tf_r  ,
const skcms_TransferFunction tf 
)

Definition at line 1169 of file Transform_inl.h.

1169{ r = apply_tf(tf, r); }

◆ STAGE() [55/57]

STAGE ( tf_rgb  ,
const skcms_TransferFunction tf 
)

Definition at line 1174 of file Transform_inl.h.

1174 {
1175 r = apply_tf(tf, r);
1176 g = apply_tf(tf, g);
1177 b = apply_tf(tf, b);
1178}

◆ STAGE() [56/57]

STAGE ( unpremul  ,
NoCtx   
)

Definition at line 1087 of file Transform_inl.h.

1087 {
1088 F scale = if_then_else(F1 / a < INFINITY_, F1 / a, F0);
1089 r *= scale;
1090 g *= scale;
1091 b *= scale;
1092}
#define INFINITY_
const Scalar scale

◆ STAGE() [57/57]

STAGE ( xyz_to_lab  ,
NoCtx   
)

Definition at line 1140 of file Transform_inl.h.

1140 {
1141 F X = r * (1/0.9642f),
1142 Y = g,
1143 Z = b * (1/0.8249f);
1144
1145 X = if_then_else(X > 0.008856f, approx_pow(X, 1/3.0f), X*7.787f + (16/116.0f));
1146 Y = if_then_else(Y > 0.008856f, approx_pow(Y, 1/3.0f), Y*7.787f + (16/116.0f));
1147 Z = if_then_else(Z > 0.008856f, approx_pow(Z, 1/3.0f), Z*7.787f + (16/116.0f));
1148
1149 F L = Y*116.0f - 16.0f,
1150 A = (X-Y)*500.0f,
1151 B = (Y-Z)*200.0f;
1152
1153 r = L * (1/100.f);
1154 g = (A + 128.0f) * (1/255.0f);
1155 b = (B + 128.0f) * (1/255.0f);
1156}

◆ store()

template<typename T , typename P >
SI void store ( P *  ptr,
const T val 
)

Definition at line 104 of file Transform_inl.h.

104 {
105 memcpy(ptr, &val, sizeof(val));
106}

◆ store_3()

template<typename T , typename P >
SI void store_3 ( P *  p,
const T v 
)

Definition at line 388 of file Transform_inl.h.

388 {
389#if N == 1
390 p[0] = v;
391#elif N == 4
392 p[ 0] = v[ 0]; p[ 3] = v[ 1]; p[ 6] = v[ 2]; p[ 9] = v[ 3];
393#elif N == 8
394 p[ 0] = v[ 0]; p[ 3] = v[ 1]; p[ 6] = v[ 2]; p[ 9] = v[ 3];
395 p[12] = v[ 4]; p[15] = v[ 5]; p[18] = v[ 6]; p[21] = v[ 7];
396#elif N == 16
397 p[ 0] = v[ 0]; p[ 3] = v[ 1]; p[ 6] = v[ 2]; p[ 9] = v[ 3];
398 p[12] = v[ 4]; p[15] = v[ 5]; p[18] = v[ 6]; p[21] = v[ 7];
399 p[24] = v[ 8]; p[27] = v[ 9]; p[30] = v[10]; p[33] = v[11];
400 p[36] = v[12]; p[39] = v[13]; p[42] = v[14]; p[45] = v[15];
401#endif
402}

◆ store_4()

template<typename T , typename P >
SI void store_4 ( P *  p,
const T v 
)

Definition at line 405 of file Transform_inl.h.

405 {
406#if N == 1
407 p[0] = v;
408#elif N == 4
409 p[ 0] = v[ 0]; p[ 4] = v[ 1]; p[ 8] = v[ 2]; p[12] = v[ 3];
410#elif N == 8
411 p[ 0] = v[ 0]; p[ 4] = v[ 1]; p[ 8] = v[ 2]; p[12] = v[ 3];
412 p[16] = v[ 4]; p[20] = v[ 5]; p[24] = v[ 6]; p[28] = v[ 7];
413#elif N == 16
414 p[ 0] = v[ 0]; p[ 4] = v[ 1]; p[ 8] = v[ 2]; p[12] = v[ 3];
415 p[16] = v[ 4]; p[20] = v[ 5]; p[24] = v[ 6]; p[28] = v[ 7];
416 p[32] = v[ 8]; p[36] = v[ 9]; p[40] = v[10]; p[44] = v[11];
417 p[48] = v[12]; p[52] = v[13]; p[56] = v[14]; p[60] = v[15];
418#endif
419}

◆ strip_sign()

SI F strip_sign ( F  x,
U32 sign 
)

Definition at line 285 of file Transform_inl.h.

285 {
286 U32 bits = bit_pun<U32>(x);
287 *sign = bits & 0x80000000;
288 return bit_pun<F>(bits ^ *sign);
289}

◆ swap_endian_16x4()

SI U64 swap_endian_16x4 ( const U64 rgba)

Definition at line 206 of file Transform_inl.h.

206 {
207 return (rgba & 0x00ff00ff00ff00ff) << 8
208 | (rgba & 0xff00ff00ff00ff00) >> 8;
209}

◆ table()

SI F table ( const skcms_Curve curve,
F  v 
)

Definition at line 604 of file Transform_inl.h.

604 {
605 // Clamp the input to [0,1], then scale to a table index.
606 F ix = max_(F0, min_(v, F1)) * (float)(curve->table_entries - 1);
607
608 // We'll look up (equal or adjacent) entries at lo and hi, then lerp by t between the two.
609 I32 lo = cast<I32>( ix ),
610 hi = cast<I32>(minus_1_ulp(ix+1.0f));
611 F t = ix - cast<F>(lo); // i.e. the fractional part of ix.
612
613 // TODO: can we load l and h simultaneously? Each entry in 'h' is either
614 // the same as in 'l' or adjacent. We have a rough idea that's it'd always be safe
615 // to read adjacent entries and perhaps underflow the table by a byte or two
616 // (it'd be junk, but always safe to read). Not sure how to lerp yet.
617 F l,h;
618 if (curve->table_8) {
619 l = F_from_U8(gather_8(curve->table_8, lo));
620 h = F_from_U8(gather_8(curve->table_8, hi));
621 } else {
622 l = F_from_U16_BE(gather_16(curve->table_16, lo));
623 h = F_from_U16_BE(gather_16(curve->table_16, hi));
624 }
625 return l + (h-l)*t;
626}
SI U8 gather_8(const uint8_t *p, I32 ix)
SkScalar h
const uint8_t * table_16
const uint8_t * table_8
uint32_t table_entries

◆ to_fixed()

SI U32 to_fixed ( F  f)

Definition at line 134 of file Transform_inl.h.

134{ return (U32)cast<I32>(f + 0.5f); }

◆ U16_from_F()

SI U16 U16_from_F ( F  v)

Definition at line 595 of file Transform_inl.h.

595 {
596 // 65535 == inf in FP16, so promote to FP32 before converting.
597 return cast<U16>(cast<V<float>>(v) * 65535 + 0.5f);
598}
SI D cast(const S &v)
T __attribute__((ext_vector_type(N))) V

Variable Documentation

◆ F0

constexpr F F0 = 0.0f
staticconstexpr

Definition at line 27 of file Transform_inl.h.

◆ F1

constexpr F F1 = 1.0f
static

Definition at line 28 of file Transform_inl.h.

◆ FInfBits

constexpr F FInfBits = 0x7f800000
static

Definition at line 29 of file Transform_inl.h.