Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
SkMemset_opts_erms.cpp
Go to the documentation of this file.
1/*
2 * Copyright 2020 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/base/SkMSAN.h"
9#include "src/core/SkMemset.h"
10#include <cstddef>
11#include <cstdint>
12
13// memset16 and memset32 could work on 32-bit x86 too, but for simplicity just use this on x64
14#if (defined(__x86_64__) || defined(_M_X64)) && !defined(SK_ENABLE_OPTIMIZE_SIZE)
15
16static const char* note = "MSAN can't see that repsto initializes memory.";
17
18#if defined(_MSC_VER)
19#include <intrin.h>
20static inline void repsto(uint16_t* dst, uint16_t v, size_t n) {
21 sk_msan_mark_initialized(dst, dst + n, note);
22 __stosw(dst, v, n);
23}
24static inline void repsto(uint32_t* dst, uint32_t v, size_t n) {
25 sk_msan_mark_initialized(dst, dst + n, note);
26 static_assert(sizeof(uint32_t) == sizeof(unsigned long));
27 __stosd(reinterpret_cast<unsigned long*>(dst), v, n);
28}
29static inline void repsto(uint64_t* dst, uint64_t v, size_t n) {
30 sk_msan_mark_initialized(dst, dst + n, note);
31 __stosq(dst, v, n);
32}
33#else
34static inline void repsto(uint16_t* dst, uint16_t v, size_t n) {
35 sk_msan_mark_initialized(dst, dst + n, note);
36 asm volatile("rep stosw" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
37}
38static inline void repsto(uint32_t* dst, uint32_t v, size_t n) {
39 sk_msan_mark_initialized(dst, dst + n, note);
40 asm volatile("rep stosl" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
41}
42static inline void repsto(uint64_t* dst, uint64_t v, size_t n) {
43 sk_msan_mark_initialized(dst, dst + n, note);
44 asm volatile("rep stosq" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
45}
46#endif
47
48// ERMS is ideal for large copies but has a relatively high setup cost,
49// so we use the previous best routine for small inputs. FSRM would make this moot.
50static void (*g_memset16_prev)(uint16_t*, uint16_t, int);
51static void (*g_memset32_prev)(uint32_t*, uint32_t, int);
52static void (*g_memset64_prev)(uint64_t*, uint64_t, int);
53static void (*g_rect_memset16_prev)(uint16_t*, uint16_t, int, size_t, int);
54static void (*g_rect_memset32_prev)(uint32_t*, uint32_t, int, size_t, int);
55static void (*g_rect_memset64_prev)(uint64_t*, uint64_t, int, size_t, int);
56
57// Empirically determined with `nanobench -m memset`.
58static bool small(size_t bytes) { return bytes < 1024; }
59
60namespace erms {
61
62static inline void memset16(uint16_t* dst, uint16_t v, int n) {
63 return small(sizeof(v) * n) ? g_memset16_prev(dst, v, n) : repsto(dst, v, n);
64}
65static inline void memset32(uint32_t* dst, uint32_t v, int n) {
66 return small(sizeof(v) * n) ? g_memset32_prev(dst, v, n) : repsto(dst, v, n);
67}
68static inline void memset64(uint64_t* dst, uint64_t v, int n) {
69 return small(sizeof(v) * n) ? g_memset64_prev(dst, v, n) : repsto(dst, v, n);
70}
71
72static inline void rect_memset16(uint16_t* dst, uint16_t v, int n, size_t rowBytes, int height) {
73 if (small(sizeof(v) * n)) {
74 return g_rect_memset16_prev(dst, v, n, rowBytes, height);
75 }
76 for (int stride = rowBytes / sizeof(v); height-- > 0; dst += stride) {
77 repsto(dst, v, n);
78 }
79}
80static inline void rect_memset32(uint32_t* dst, uint32_t v, int n, size_t rowBytes, int height) {
81 if (small(sizeof(v) * n)) {
82 return g_rect_memset32_prev(dst, v, n, rowBytes, height);
83 }
84 for (int stride = rowBytes / sizeof(v); height-- > 0; dst += stride) {
85 repsto(dst, v, n);
86 }
87}
88static inline void rect_memset64(uint64_t* dst, uint64_t v, int n, size_t rowBytes, int height) {
89 if (small(sizeof(v) * n)) {
90 return g_rect_memset64_prev(dst, v, n, rowBytes, height);
91 }
92 for (int stride = rowBytes / sizeof(v); height-- > 0; dst += stride) {
93 repsto(dst, v, n);
94 }
95}
96
97} // namespace erms
98
99#endif // X86_64 && !SK_ENABLE_OPTIMIZE_SIZE
100
101namespace SkOpts {
103 #if (defined(__x86_64__) || defined(_M_X64)) && !defined(SK_ENABLE_OPTIMIZE_SIZE)
104 g_memset16_prev = memset16;
105 g_memset32_prev = memset32;
106 g_memset64_prev = memset64;
107 g_rect_memset16_prev = rect_memset16;
108 g_rect_memset32_prev = rect_memset32;
109 g_rect_memset64_prev = rect_memset64;
110
111 memset16 = erms::memset16;
112 memset32 = erms::memset32;
113 memset64 = erms::memset64;
114 rect_memset16 = erms::rect_memset16;
115 rect_memset32 = erms::rect_memset32;
116 rect_memset64 = erms::rect_memset64;
117 #endif // X86_64 && !SK_ENABLE_OPTIMIZE_SIZE
118 }
119} // namespace SkOpts
static void sk_msan_mark_initialized(const void *begin, const void *end, const char *skbug)
Definition SkMSAN.h:34
Type::kYUV Type::kRGBA() int(0.7 *637)
void(* rect_memset32)(uint32_t[], uint32_t, int, size_t, int)
void(* rect_memset16)(uint16_t[], uint16_t, int, size_t, int)
void(* memset64)(uint64_t[], uint64_t, int)
void(* memset16)(uint16_t[], uint16_t, int)
void Init_Memset_erms()
void(* memset32)(uint32_t[], uint32_t, int)
void(* rect_memset64)(uint64_t[], uint64_t, int, size_t, int)
dst
Definition cp.py:12
int32_t height