UTF16 <-> UTF32

Liste des GroupesRevenir à cl c++ 
Sujet : UTF16 <-> UTF32
De : Bonita.Montero (at) *nospam* gmail.com (Bonita Montero)
Groupes : comp.lang.c++
Date : 31. Aug 2024, 22:01:43
Autres entêtes
Organisation : A noiseless patient Spider
Message-ID : <vavsr7$14n11$1@raubtier-asyl.eternal-september.org>
User-Agent : Mozilla Thunderbird
Today I needed conversion functions from UTF32 to UTF16 and in the
opposite direction. I wanted to allow result-string re-usage and
decided to give the result string as a reference-parameter. This
would help the result string to keep its capacity.
I think there's no way to implement that code faster.
bool u16ToU32( u16string_view str, u32string &u32Str )
{
auto iterate = [&]<bool Err>( bool_constant<Err>, auto fn ) -> bool
{
constexpr char16_t
SURR_HDR_MSK = 0xF800,
HIGH_SURR = 0xD800,
SURR_HDR = HIGH_SURR,
LOW_SURR  = 0xDC00,
SURR_MASK = 0xFC00;
for( auto it = str.begin(), end = str.end(); it != end; )
if( (*it & SURR_HDR_MSK) != SURR_HDR ) [[likely]]
fn( (char32_t)*it++ );
else
{
if( Err && (*it & SURR_MASK) != HIGH_SURR ) [[unlikely]]
return false;
if( Err && it + 1 == end ) [[unlikely]]
return false;
if( Err && (it[1] & SURR_MASK) != LOW_SURR ) [[unlikely]]
return false;
fn( 0x10000 + ((char32_t)(*it & ~SURR_MASK) << 10 | (char32_t)(it[1] & ~SURR_MASK)) );
it += 2;
}
return true;
};
size_t n = 0;
if( !iterate( true_type(), [&]( char32_t ) { ++n; } ) )
return false;
u32Str.resize_and_overwrite( n, [&]( char32_t *p, size_t n )
{
auto it = span( p, n ).begin();
iterate( false_type(), [&]( char32_t c ) { *it++ = c; } );
return n;
} );
return true;
}
pair<bool, u32string> u16ToU32( u16string_view str )
{
u32string u32Str;
if( !u16ToU32( str, u32Str ) ) [[unlikely]]
return { false, {} };
return { true, move( u32Str ) };
}
bool u32ToU16( u32string_view str, u16string &u16Str )
{
auto iterate = [&]<bool Err>( bool_constant<Err>, auto fn ) -> bool
{
constexpr char32_t
UNICODE_MAX = 0x10FFFF;
constexpr char16_t
HIGH_SURR = 0xD800,
LOW_SURR  = 0xDC00,
END_SURR = 0xDFFF;
for( auto it = str.begin(), end = str.end(); it != end; )
if( !Err || *it <= UNICODE_MAX && (*it < LOW_SURR || *it > END_SURR) ) [[likely]]
if( *it <= 0xFFFF ) [[likely]]
fn( (char16_t)*it++ );
else
{
char32_t c = *it++ - 0x10000;
fn( (char16_t)(HIGH_SURR | c >> 10) );
fn( (char16_t)(LOW_SURR | c & 0x3FF) );
}
else
return false;
return true;
};
size_t n = 0;
if( !iterate( true_type(), [&]( char16_t ) { ++n; } ) ) [[unlikely]]
return false;
u16Str.resize_and_overwrite( n, [&]( char16_t *p, size_t n )
{
auto it = span( p, n ).begin();
iterate( false_type(), [&]( char16_t c ) { *it++ = c; } );
return n;
} );
return true;
}
pair<bool, u16string> u32ToU16( u32string_view str )
{
u16string u16Str;
if( !u32ToU16( str, u16Str ) ) [[unlikely]]
return { false, {} };
return { true, move( u16Str ) };
}

Date Sujet#  Auteur
31 Aug 24 * UTF16 <-> UTF323Bonita Montero
1 Sep 24 `* Re: UTF16 <-> UTF322Chris Ahlstrom
6 Sep 24  `- Re: UTF16 <-> UTF321Bonita Montero

Haut de la page

Les messages affichés proviennent d'usenet.

NewsPortal