Functions
SK_SPI int	CountUTF8 (const char *utf8, size_t byteLength)

SK_SPI int	CountUTF16 (const uint16_t *utf16, size_t byteLength)

SK_SPI int	CountUTF32 (const int32_t *utf32, size_t byteLength)

SK_SPI SkUnichar	NextUTF8 (const char *ptr, const char end)

SK_SPI SkUnichar	NextUTF8WithReplacement (const char *ptr, const char end)

SK_SPI SkUnichar	NextUTF16 (const uint16_t *ptr, const uint16_t end)

SK_SPI SkUnichar	NextUTF32 (const int32_t *ptr, const int32_t end)

SK_SPI size_t	ToUTF8 (SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence]=nullptr)

SK_SPI size_t	ToUTF16 (SkUnichar uni, uint16_t utf16[2]=nullptr)

SK_SPI int	UTF8ToUTF16 (uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength)

SK_SPI int	UTF16ToUTF8 (char dst[], int dstCapacity, const uint16_t src[], size_t srcLength)

static bool	IsLeadingSurrogateUTF16 (uint16_t c)

static bool	IsTrailingSurrogateUTF16 (uint16_t c)

Variables
constexpr unsigned	kMaxBytesInUTF8Sequence = 4

Function Documentation

◆ CountUTF16()

int SkUTF::CountUTF16	(	const uint16_t *	utf16,
		size_t	byteLength
	)

Given a sequence of aligned UTF-16 characters in machine-endian form, return the number of unicode codepoints. If the sequence is invalid UTF-16, return -1.

Definition at line 70 of file SkUTF.cpp.

                                                              {
    if (!utf16 || !is_align2(intptr_t(utf16)) || !is_align2(byteLength)) {
        return -1;
    }
    const uint16_t* src = (const uint16_t*)utf16;
    const uint16_t* stop = src + (byteLength >> 1);
    int count = 0;
    while (src < stop) {
        unsigned c = *src++;
        if (utf16_is_low_surrogate(c)) {
            return -1;
        }
        if (utf16_is_high_surrogate(c)) {
            if (src >= stop) {
                return -1;
            }
            c = *src++;
            if (!utf16_is_low_surrogate(c)) {
                return -1;
            }
        }
        count += 1;
    }
    return count;
}

◆ CountUTF32()

int SkUTF::CountUTF32	(	const int32_t *	utf32,
		size_t	byteLength
	)

Given a sequence of aligned UTF-32 characters in machine-endian form, return the number of unicode codepoints. If the sequence is invalid UTF-32, return -1.

Definition at line 96 of file SkUTF.cpp.

                                                             {
    if (!is_align4(intptr_t(utf32)) || !is_align4(byteLength) || !SkTFitsIn<int>(byteLength >> 2)) {
        return -1;
    }
    const uint32_t kInvalidUnicharMask = 0xFF000000;    // unichar fits in 24 bits
    const uint32_t* ptr = (const uint32_t*)utf32;
    const uint32_t* stop = ptr + (byteLength >> 2);
    while (ptr < stop) {
        if (*ptr & kInvalidUnicharMask) {
            return -1;
        }
        ptr += 1;
    }
    return (int)(byteLength >> 2);
}

◆ CountUTF8()

int SkUTF::CountUTF8	(	const char *	utf8,
		size_t	byteLength
	)

Given a sequence of UTF-8 bytes, return the number of unicode codepoints. If the sequence is invalid UTF-8, return -1.

Definition at line 47 of file SkUTF.cpp.

                                                        {
    if (!utf8 && byteLength) {
        return -1;
    }
    int count = 0;
    const char* stop = utf8 + byteLength;
    while (utf8 < stop) {
        int type = utf8_byte_type(*(const uint8_t*)utf8);
        if (!utf8_type_is_valid_leading_byte(type) || utf8 + type > stop) {
            return -1;  // Sequence extends beyond end.
        }
        while(type-- > 1) {
            ++utf8;
            if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
                return -1;
            }
        }
        ++utf8;
        ++count;
    }
    return count;
}

◆ IsLeadingSurrogateUTF16()

static bool SkUTF::IsLeadingSurrogateUTF16 ( uint16_t c )

inlinestatic

Given a UTF-16 code point, returns true iff it is a leading surrogate. https://unicode.org/faq/utf_bom.html#utf16-2

Definition at line 91 of file SkUTF.h.

91{ return ((c) & 0xFC00) == 0xD800; }

◆ IsTrailingSurrogateUTF16()

static bool SkUTF::IsTrailingSurrogateUTF16 ( uint16_t c )

inlinestatic

Given a UTF-16 code point, returns true iff it is a trailing surrogate. https://unicode.org/faq/utf_bom.html#utf16-2

Definition at line 97 of file SkUTF.h.

97{ return ((c) & 0xFC00) == 0xDC00; }

◆ NextUTF16()

SkUnichar SkUTF::NextUTF16	(	const uint16_t **	ptr,
		const uint16_t *	end
	)

Given a sequence of aligned UTF-16 characters in machine-endian form, return the first unicode codepoint. The pointer will be incremented to point at the next codepoint's start. If invalid UTF-16 is encountered, set *ptr to end and return -1.

Definition at line 159 of file SkUTF.cpp.

                                                                    {
    if (!ptr || !end ) {
        return -1;
    }
    const uint16_t* src = *ptr;
    if (!src || src + 1 > end || !is_align2(intptr_t(src))) {
        return next_fail(ptr, end);
    }
    uint16_t c = *src++;
    SkUnichar result = c;
    if (utf16_is_low_surrogate(c)) {
        return next_fail(ptr, end);  // srcPtr should never point at low surrogate.
    }
    if (utf16_is_high_surrogate(c)) {
        if (src + 1 > end) {
            return next_fail(ptr, end);  // Truncated string.
        }
        uint16_t low = *src++;
        if (!utf16_is_low_surrogate(low)) {
            return next_fail(ptr, end);
        }
        /*
        [paraphrased from wikipedia]
        Take the high surrogate and subtract 0xD800, then multiply by 0x400.
        Take the low surrogate and subtract 0xDC00.  Add these two results
        together, and finally add 0x10000 to get the final decoded codepoint.
 
        unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
        unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000
        unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000
        unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000)
        */
        result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000);
    }
    *ptr = src;
    return result;
}

◆ NextUTF32()

SkUnichar SkUTF::NextUTF32	(	const int32_t **	ptr,
		const int32_t *	end
	)

Given a sequence of aligned UTF-32 characters in machine-endian form, return the first unicode codepoint. The pointer will be incremented to point at the next codepoint's start. If invalid UTF-32 is encountered, set *ptr to end and return -1.

Definition at line 197 of file SkUTF.cpp.

                                                                  {
    if (!ptr || !end ) {
        return -1;
    }
    const int32_t* s = *ptr;
    if (!s || s + 1 > end || !is_align4(intptr_t(s))) {
        return next_fail(ptr, end);
    }
    int32_t value = *s;
    const uint32_t kInvalidUnicharMask = 0xFF000000;    // unichar fits in 24 bits
    if (value & kInvalidUnicharMask) {
        return next_fail(ptr, end);
    }
    *ptr = s + 1;
    return value;
}

◆ NextUTF8()

SkUnichar SkUTF::NextUTF8	(	const char **	ptr,
		const char *	end
	)

Given a sequence of UTF-8 bytes, return the first unicode codepoint. The pointer will be incremented to point at the next codepoint's start. If invalid UTF-8 is encountered, set *ptr to end and return -1.

Definition at line 118 of file SkUTF.cpp.

                                                           {
    if (!ptr || !end ) {
        return -1;
    }
    const uint8_t*  p = (const uint8_t*)*ptr;
    if (!p || p >= (const uint8_t*)end) {
        return next_fail(ptr, end);
    }
    int             c = *p;
    int             hic = c << 24;
 
    if (!utf8_type_is_valid_leading_byte(utf8_byte_type(c))) {
        return next_fail(ptr, end);
    }
    if (hic < 0) {
        uint32_t mask = (uint32_t)~0x3F;
        hic = left_shift(hic, 1);
        do {
            ++p;
            if (p >= (const uint8_t*)end) {
                return next_fail(ptr, end);
            }
            // check before reading off end of array.
            uint8_t nextByte = *p;
            if (!utf8_byte_is_continuation(nextByte)) {
                return next_fail(ptr, end);
            }
            c = (c << 6) | (nextByte & 0x3F);
            mask <<= 5;
        } while ((hic = left_shift(hic, 1)) < 0);
        c &= ~mask;
    }
    *ptr = (const char*)p + 1;
    return c;
}

◆ NextUTF8WithReplacement()

SkUnichar SkUTF::NextUTF8WithReplacement	(	const char **	ptr,
		const char *	end
	)

Given a sequence of UTF-8 bytes, return the first unicode codepoint. The pointer will be incremented to point at the next codepoint's start. If invalid UTF-8 is encountered, set *ptr to end and return the replacement character (0xFFFD)

Definition at line 154 of file SkUTF.cpp.

                                                                          {
    SkUnichar val = SkUTF::NextUTF8(ptr, end);
    return val < 0 ? 0xFFFD : val;
}

◆ ToUTF16()

size_t SkUTF::ToUTF16	(	SkUnichar	uni,
		uint16_t	utf16[2] = `nullptr`
	)

Convert the unicode codepoint into UTF-16. If utf16 is non-null, place the result in that array. Return the number of UTF-16 code units in the result (1 or 2). If utf16 is null, simply return the number of code units that would be used. For invalid unicode codepoints, return 0.

Definition at line 243 of file SkUTF.cpp.

                                                      {
    if ((uint32_t)uni > 0x10FFFF) {
        return 0;
    }
    int extra = (uni > 0xFFFF);
    if (utf16) {
        if (extra) {
            utf16[0] = (uint16_t)((0xD800 - 64) + (uni >> 10));
            utf16[1] = (uint16_t)(0xDC00 | (uni & 0x3FF));
        } else {
            utf16[0] = (uint16_t)uni;
        }
    }
    return 1 + extra;
}

◆ ToUTF8()

SK_SPI size_t SkUTF::ToUTF8	(	SkUnichar	uni,
		char	utf8[kMaxBytesInUTF8Sequence] = `nullptr`
	)

Convert the unicode codepoint into UTF-8. If utf8 is non-null, place the result in that array. Return the number of bytes in the result. If utf8 is null, simply return the number of bytes that would be used. For invalid unicode codepoints, return 0.

◆ UTF16ToUTF8()

int SkUTF::UTF16ToUTF8	(	char	dst[],
		int	dstCapacity,
		const uint16_t	src[],
		size_t	srcLength
	)

Returns the number of resulting UTF8 values needed to convert the src utf16 sequence. If dst is not null, it is filled with the corresponding values up to its capacity. If there is an error, -1 is returned and the dst[] buffer is undefined.

Definition at line 291 of file SkUTF.cpp.

                                                                                          {
    if (!dst) {
        dstCapacity = 0;
    }
 
    int dstLength = 0;
    const char* endDst = dst + dstCapacity;
    const uint16_t* endSrc = src + srcLength;
    while (src < endSrc) {
        SkUnichar uni = NextUTF16(&src, endSrc);
        if (uni < 0) {
            return -1;
        }
 
        char utf8[SkUTF::kMaxBytesInUTF8Sequence];
        size_t count = ToUTF8(uni, utf8);
        if (count == 0) {
            return -1;
        }
        dstLength += count;
 
        if (dst) {
            const char* elems = utf8;
            while (dst < endDst && count > 0) {
                *dst++ = *elems++;
                count -= 1;
            }
        }
    }
    return dstLength;
}

◆ UTF8ToUTF16()

int SkUTF::UTF8ToUTF16	(	uint16_t	dst[],
		int	dstCapacity,
		const char	src[],
		size_t	srcByteLength
	)

Returns the number of resulting UTF16 values needed to convert the src utf8 sequence. If dst is not null, it is filled with the corresponding values up to its capacity. If there is an error, -1 is returned and the dst[] buffer is undefined.

Definition at line 259 of file SkUTF.cpp.

                                                                                              {
    if (!dst) {
        dstCapacity = 0;
    }
 
    int dstLength = 0;
    uint16_t* endDst = dst + dstCapacity;
    const char* endSrc = src + srcByteLength;
    while (src < endSrc) {
        SkUnichar uni = NextUTF8(&src, endSrc);
        if (uni < 0) {
            return -1;
        }
 
        uint16_t utf16[2];
        size_t count = ToUTF16(uni, utf16);
        if (count == 0) {
            return -1;
        }
        dstLength += count;
 
        if (dst) {
            uint16_t* elems = utf16;
            while (dst < endDst && count > 0) {
                *dst++ = *elems++;
                count -= 1;
            }
        }
    }
    return dstLength;
}

Variable Documentation

◆ kMaxBytesInUTF8Sequence

constexpr unsigned SkUTF::kMaxBytesInUTF8Sequence = 4

constexpr

Definition at line 59 of file SkUTF.h.

Functions

Variables