// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
// This source file is part of the Cangjie project, licensed under Apache-2.0
// with Runtime Library Exception.
//
// See https://cangjie-lang.cn/pages/LICENSE for license information.

#include <iostream>

#include "cangjie/Utils/Unicode.h"
#include "UnicodeTables/WidthData.generated.inc"

namespace Cangjie::Unicode {
/// For maintenance:
/// The tables themselves are autogenerated but this function is hardcoded. You should have
/// nothing to worry about if you re-run `WidthData.py` (for example, when updating Unicode.)
/// However, if you change the *actual structure* of the lookup tables (perhaps by editing the
/// `TABLE_CFGS` global in `WidthData.py`) you must ensure that this code reflects those changes.
int LookupWidth(UTF32 cp, bool isCJK)
{
    auto t1Offset = TABLES_0[(cp >> 13) & 0xFF];
    auto t2Offset = TABLES_1[128 * t1Offset + ((cp >> 6) & 0x7F)];
    auto packedWidths = TABLES_2[16 * t2Offset + ((cp >> 2) & 0xF)];
    auto width = (packedWidths >> (2 * (cp & 0b11))) & 0b11;
    return width == 3 ? isCJK ? 2 : 1 : width;
}

namespace {
enum class NextCharInfo {
    DEFAULT,
    LF,
    /// \u0338
    /// For preserving canonical equivalence with CJK
    COMBINING_LONG_SOLIDUS_OVERLAY,
    /// \ua4fc..\ua4fd
    /// Unicode ch18
    TRAILING_LISU_TONE_LETTER,
    /// \ufe0e
    VS15,
    /// \ufe0f
    VS16
};

std::pair<size_t, NextCharInfo> WidthInStr(UTF32 c, bool isCJK, NextCharInfo nextInfo)
{
    if ((isCJK && nextInfo == NextCharInfo::COMBINING_LONG_SOLIDUS_OVERLAY && (c == '<' || c == '=' || c == '>')) ||
        (nextInfo == NextCharInfo::VS16 && StartsEmojiPresentationSeq(c))) {
        return {2, NextCharInfo::DEFAULT};
    }
    if (c <= 0xa0) {
        if (c == '\n') {
            return {1, NextCharInfo::LF};
        }
        if (c == '\r' && nextInfo == NextCharInfo::LF) {
            return {0, NextCharInfo::DEFAULT};
        }
        return {1, NextCharInfo::DEFAULT};
    }
    if (c >= 0xa4f8 && c <= 0xa4fb && nextInfo == NextCharInfo::TRAILING_LISU_TONE_LETTER) {
        return {0, NextCharInfo::DEFAULT};
    }
    if (c == 0x0338) {
        return {0, NextCharInfo::COMBINING_LONG_SOLIDUS_OVERLAY};
    }
    if (c >= 0xa4fc && c <= 0xa4fd) {
        return {1, NextCharInfo::TRAILING_LISU_TONE_LETTER};
    }
    if (c == 0xfe0e) {
        return {0, NextCharInfo::VS15};
    }
    if (c == 0xfe0f) {
        return {0, NextCharInfo::VS16};
    }
    if (nextInfo == NextCharInfo::VS15 && !isCJK && StartsNonIdeographicTextPresentationSeq(c)) {
        return {1, NextCharInfo::DEFAULT};
    }
    return {LookupWidth(c, isCJK), NextCharInfo::DEFAULT};
}
}

int StrWidth(StringRef s, bool isCJK)
{
    auto str = s.ToUTF32();
    auto res{0};
    NextCharInfo st{NextCharInfo::DEFAULT};
    for (auto it = str.crbegin(); it != str.crend(); ++it) {
        UTF32 c = *it;
        auto r = WidthInStr(c, isCJK, st);
        res += static_cast<int>(r.first);
        st = r.second;
    }
    return res;
}

int SingleCharWidth(UTF32 codepoint, bool isCJK)
{
    if (codepoint < 0x7f) {
        if (codepoint >= 0x20) {
            // U+0020..U+007f are single-width ASCII chars
            return 1;
        }
        // U+0001..U+0020 are control codes
        return 1;
    }
    if (codepoint >= 0xa0) {
        return LookupWidth(codepoint, isCJK);
    }
    // U+007f..U+00a0 are control codes
    return 1;
}

constexpr static UTF32 TAB = 0x9;
constexpr static int TAB_LENGTH{4};
constexpr static int UNICODE_ESCAPE_WIDTH{8};

int DisplayWidth(StringRef s, bool isCJK)
{
    auto str = s.ToUTF32();
    auto res{0};
    NextCharInfo st{NextCharInfo::DEFAULT};
    for (auto it = str.crbegin(); it != str.crend(); ++it) {
        UTF32 c = *it;
        if (c == TAB) { // this may be incorrect if there is a remaining sequence awaiting in the cache
            res += 4;
            st = NextCharInfo::DEFAULT;
            continue;
        }
        // these chars are output in form '\u{00xx}', which has a length of 8
        if (c <= 0x8 || (0xb <= c && c <= 0x1f) || c == 0x7f) {
            res += UNICODE_ESCAPE_WIDTH;
            st = NextCharInfo::DEFAULT;
            continue;
        }
        auto r = WidthInStr(c, isCJK, st);
        res += static_cast<int>(r.first);
        st = r.second;
    }
    return res;
}

int DisplayWidth(UTF32 cp, bool isCJK)
{
    // these chars are output in form '\u{00xx}', which has a length of 8
    if (cp <= 0x8 || (0xb <= cp && cp <= 0x1f) || cp == 0x7f) {
        return UNICODE_ESCAPE_WIDTH;
    }
    if (cp == TAB) {
        return TAB_LENGTH;
    }
    return SingleCharWidth(cp, isCJK);
}
}
