1
0
mirror of https://github.com/Swordfish90/cool-retro-term.git synced 2025-02-23 05:18:44 +00:00
cool-retro-term/yat/backend/utf8_decoder.h

110 lines
3.3 KiB
C++

/*******************************************************************************
* Copyright (c) 2013 Jørgen Lind
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/
#ifndef UTF8_DECODER
#define UTF8_DECODER
#include "controll_chars.h"
class Utf8Decoder
{
public:
inline Utf8Decoder();
inline void addChar(uchar character);
inline bool isLatin() const;
inline bool isC1() const;
inline void clear();
private:
short m_expected_length;
short m_length;
uint32_t m_unicode;
};
Utf8Decoder::Utf8Decoder()
{
clear();
}
void Utf8Decoder::addChar(uchar character)
{
if (m_length && m_length == m_expected_length) {
clear();
}
if (character < 0x80)
return;
fprintf(stderr, "Character: 0x%x\n", character);
if (m_expected_length == 0) {
//this is naive. There must be a faster way.
if ((character & 0xfc) == 0xfc) {
m_expected_length = 5;
m_unicode = character & 0x01;
} else if ((character & 0xf8) == 0xf8) {
m_expected_length = 4;
m_unicode = character & 0x03;
} else if ((character & 0xf0) == 0xf0) {
m_expected_length = 3;
m_unicode = character & 0x07;
} else if ((character & 0xe0) == 0xe0) {
m_expected_length = 2;
m_unicode = character & 0x0f;
} else if ((character & 0xc0) == 0xc0) {
m_expected_length = 1;
m_unicode = character & 0x1f;
} else {
m_expected_length = 0;
m_unicode = 0;
qWarning("Utf8Decoder: invalid decoder character");
}
} else {
fprintf(stderr, "Before 0x%x adding 0x%x pure 0x%x\n", m_unicode,(character & 0x3f), character);
m_unicode = (m_unicode << 6) | (character & 0x3f);
fprintf(stderr, "After 0x%x\n", m_unicode);
m_length++;
}
}
bool Utf8Decoder::isLatin() const
{
return m_expected_length < 2 && m_unicode < 0xff;
}
bool Utf8Decoder::isC1() const
{
return m_expected_length == 2 && m_length == m_expected_length &&
(m_unicode >= C1_8bit::C1_8bit_Start && m_unicode <= C1_8bit::C1_8bit_Stop);
}
void Utf8Decoder::clear()
{
m_expected_length = 0;
m_length = 0;
m_unicode = 0;
}
#endif