Use 0b instead of 0x and check 1-byte prefix first in Utf8CharSizeAt

1-byte symbols is more common then 4-bytes symbols.
So checking 1-byte prefix first is more efficient.
fix-squashed-planets
Evgeny Pestov 2022-02-14 18:09:08 +07:00
parent ec8a09e4a9
commit 4bce63e38d
1 changed files with 8 additions and 6 deletions

View File

@ -175,12 +175,14 @@ int StrUtils::Utf8CharSizeAt(const std::string &str, unsigned int pos)
return 0;
const char c = str[pos];
if((c & 0xF8) == 0xF0)
return 4;
if((c & 0xF0) == 0xE0)
return 3;
if((c & 0xE0) == 0xC0)
if((c & 0b1000'0000) == 0b0000'0000)
return 1;
if((c & 0b1110'0000) == 0b1100'0000)
return 2;
if((c & 0b1111'0000) == 0b1110'0000)
return 3;
if((c & 0b1111'1000) == 0b1111'0000)
return 4;
return 1;
}
@ -199,5 +201,5 @@ std::size_t StrUtils::Utf8StringLength(const std::string &str)
bool StrUtils::isUtf8ContinuationByte(char c)
{
return (c & 0b11'000000) == 0b10'000000;
return (c & 0b1100'0000) == 0b1000'0000;
}