/*  -*- C++ -*-
    SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>

    SPDX-License-Identifier: MIT
*/

#include "nsCodingStateMachine.h"

/*
Modification from frank tang's original work:
. 0x00 is allowed as a legal character. Since some web pages contains this char in
  text stream.
*/

// BIG5

namespace kencodingprober
{
static const unsigned int BIG5_cls[256 / 8] = {
    // PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07    //allow 0x00 as legal value
    PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
    PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 40 - 47
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 48 - 4f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 50 - 57
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 58 - 5f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 60 - 67
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 68 - 6f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 70 - 77
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 1), // 78 - 7f
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 80 - 87
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 88 - 8f
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 90 - 97
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 98 - 9f
    PCK4BITS(4, 3, 3, 3, 3, 3, 3, 3), // a0 - a7
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // a8 - af
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // b0 - b7
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // b8 - bf
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // c0 - c7
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // c8 - cf
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // d0 - d7
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // d8 - df
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // e0 - e7
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // e8 - ef
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // f0 - f7
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 0) // f8 - ff
};

static const unsigned int BIG5_st[3] = {
    PCK4BITS(eError, eStart, eStart, 3, eError, eError, eError, eError), // 00-07
    PCK4BITS(eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError), // 08-0f
    PCK4BITS(eError, eStart, eStart, eStart, eStart, eStart, eStart, eStart) // 10-17
};

static const unsigned int Big5CharLenTable[] = {0, 1, 1, 2, 0};

const SMModel Big5SMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls},
    5,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st},
    Big5CharLenTable,
    "Big5",
};

static const unsigned int EUCJP_cls[256 / 8] = {
    // PCK4BITS(5,4,4,4,4,4,4,4),  // 00 - 07
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 00 - 07
    PCK4BITS(4, 4, 4, 4, 4, 4, 5, 5), // 08 - 0f
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 10 - 17
    PCK4BITS(4, 4, 4, 5, 4, 4, 4, 4), // 18 - 1f
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 20 - 27
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 28 - 2f
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 30 - 37
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 38 - 3f
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 40 - 47
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 48 - 4f
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 50 - 57
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 58 - 5f
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 60 - 67
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 68 - 6f
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 70 - 77
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 78 - 7f
    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 80 - 87
    PCK4BITS(5, 5, 5, 5, 5, 5, 1, 3), // 88 - 8f
    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 90 - 97
    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 98 - 9f
    PCK4BITS(5, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e0 - e7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e8 - ef
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // f0 - f7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 5) // f8 - ff
};

static const unsigned int EUCJP_st[5] = {
    PCK4BITS(3, 4, 3, 5, eStart, eError, eError, eError), // 00-07
    PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 08-0f
    PCK4BITS(eItsMe, eItsMe, eStart, eError, eStart, eError, eError, eError), // 10-17
    PCK4BITS(eError, eError, eStart, eError, eError, eError, 3, eError), // 18-1f
    PCK4BITS(3, eError, eError, eError, eStart, eStart, eStart, eStart) // 20-27
};

static const unsigned int EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};

const SMModel EUCJPSMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls},
    6,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st},
    EUCJPCharLenTable,
    "EUC-JP",
};

static const unsigned int EUCKR_cls[256 / 8] = {
    // PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
    PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
    PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 40 - 47
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 48 - 4f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 50 - 57
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 58 - 5f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 60 - 67
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 80 - 87
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 88 - 8f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 90 - 97
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 98 - 9f
    PCK4BITS(0, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
    PCK4BITS(2, 2, 2, 2, 2, 3, 3, 3), // a8 - af
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
    PCK4BITS(2, 3, 2, 2, 2, 2, 2, 2), // c8 - cf
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 0) // f8 - ff
};

static const unsigned int EUCKR_st[2] = {
    PCK4BITS(eError, eStart, 3, eError, eError, eError, eError, eError), // 00-07
    PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart, eStart) // 08-0f
};

static const unsigned int EUCKRCharLenTable[] = {0, 1, 2, 0};

const SMModel EUCKRSMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls},
    4,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st},
    EUCKRCharLenTable,
    "EUC-KR",
};

/* obsolete GB2312 by gb18030
static unsigned int GB2312_cls [ 256 / 8 ] = {
//PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07
PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07
PCK4BITS(1,1,1,1,1,1,0,0),  // 08 - 0f
PCK4BITS(1,1,1,1,1,1,1,1),  // 10 - 17
PCK4BITS(1,1,1,0,1,1,1,1),  // 18 - 1f
PCK4BITS(1,1,1,1,1,1,1,1),  // 20 - 27
PCK4BITS(1,1,1,1,1,1,1,1),  // 28 - 2f
PCK4BITS(1,1,1,1,1,1,1,1),  // 30 - 37
PCK4BITS(1,1,1,1,1,1,1,1),  // 38 - 3f
PCK4BITS(1,1,1,1,1,1,1,1),  // 40 - 47
PCK4BITS(1,1,1,1,1,1,1,1),  // 48 - 4f
PCK4BITS(1,1,1,1,1,1,1,1),  // 50 - 57
PCK4BITS(1,1,1,1,1,1,1,1),  // 58 - 5f
PCK4BITS(1,1,1,1,1,1,1,1),  // 60 - 67
PCK4BITS(1,1,1,1,1,1,1,1),  // 68 - 6f
PCK4BITS(1,1,1,1,1,1,1,1),  // 70 - 77
PCK4BITS(1,1,1,1,1,1,1,1),  // 78 - 7f
PCK4BITS(1,0,0,0,0,0,0,0),  // 80 - 87
PCK4BITS(0,0,0,0,0,0,0,0),  // 88 - 8f
PCK4BITS(0,0,0,0,0,0,0,0),  // 90 - 97
PCK4BITS(0,0,0,0,0,0,0,0),  // 98 - 9f
PCK4BITS(0,2,2,2,2,2,2,2),  // a0 - a7
PCK4BITS(2,2,3,3,3,3,3,3),  // a8 - af
PCK4BITS(2,2,2,2,2,2,2,2),  // b0 - b7
PCK4BITS(2,2,2,2,2,2,2,2),  // b8 - bf
PCK4BITS(2,2,2,2,2,2,2,2),  // c0 - c7
PCK4BITS(2,2,2,2,2,2,2,2),  // c8 - cf
PCK4BITS(2,2,2,2,2,2,2,2),  // d0 - d7
PCK4BITS(2,2,2,2,2,2,2,2),  // d8 - df
PCK4BITS(2,2,2,2,2,2,2,2),  // e0 - e7
PCK4BITS(2,2,2,2,2,2,2,2),  // e8 - ef
PCK4BITS(2,2,2,2,2,2,2,2),  // f0 - f7
PCK4BITS(2,2,2,2,2,2,2,0)   // f8 - ff
};

static unsigned int GB2312_st [ 2] = {
PCK4BITS(eError,eStart,     3,eError,eError,eError,eError,eError),//00-07
PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
};

static const unsigned int GB2312CharLenTable[] = {0, 1, 2, 0};

SMModel GB2312SMModel = {
  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_cls },
   4,
  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_st },
  GB2312CharLenTable,
  "GB2312",
};
*/

// the following state machine data was created by perl script in
// intl/chardet/tools. It should be the same as in PSM detector.
static const unsigned int GB18030_cls[256 / 8] = {
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
    PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
    PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 30 - 37
    PCK4BITS(3, 3, 1, 1, 1, 1, 1, 1), // 38 - 3f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 40 - 47
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 48 - 4f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 50 - 57
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 58 - 5f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 60 - 67
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 68 - 6f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 70 - 77
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 4), // 78 - 7f
    PCK4BITS(5, 6, 6, 6, 6, 6, 6, 6), // 80 - 87
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // 88 - 8f
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // 90 - 97
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // 98 - 9f
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // a0 - a7
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // a8 - af
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // b0 - b7
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // b8 - bf
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // c0 - c7
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // c8 - cf
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // d0 - d7
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // d8 - df
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // e0 - e7
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // e8 - ef
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // f0 - f7
    PCK4BITS(6, 6, 6, 6, 6, 6, 6, 0) // f8 - ff
};

static const unsigned int GB18030_st[6] = {
    PCK4BITS(eError, eStart, eStart, eStart, eStart, eStart, 3, eError), // 00-07
    PCK4BITS(eError, eError, eError, eError, eError, eError, eItsMe, eItsMe), // 08-0f
    PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart), // 10-17
    PCK4BITS(4, eError, eStart, eStart, eError, eError, eError, eError), // 18-1f
    PCK4BITS(eError, eError, 5, eError, eError, eError, eItsMe, eError), // 20-27
    PCK4BITS(eError, eError, eStart, eStart, eStart, eStart, eStart, eStart) // 28-2f
};

// To be accurate, the length of class 6 can be either 2 or 4.
// But it is not necessary to discriminate between the two since
// it is used for frequency analysis only, and we are validating
// each code range there as well. So it is safe to set it to be
// 2 here.
static const unsigned int GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2};

const SMModel GB18030SMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls},
    7,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st},
    GB18030CharLenTable,
    "GB18030",
};

// sjis

static const unsigned int SJIS_cls[256 / 8] = {
    // PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
    PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
    PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 40 - 47
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 48 - 4f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 50 - 57
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 58 - 5f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 60 - 67
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 68 - 6f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 70 - 77
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 1), // 78 - 7f
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 80 - 87
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 88 - 8f
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
    // 0xa0 is illegal in sjis encoding, but some pages does
    // contain such byte. We need to be more error forgiven.
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // e0 - e7
    PCK4BITS(3, 3, 3, 3, 3, 4, 4, 4), // e8 - ef
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // f0 - f7
    PCK4BITS(4, 4, 4, 4, 4, 0, 0, 0) // f8 - ff
};

static const unsigned int SJIS_st[3] = {
    PCK4BITS(eError, eStart, eStart, 3, eError, eError, eError, eError), // 00-07
    PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 08-0f
    PCK4BITS(eItsMe, eItsMe, eError, eError, eStart, eStart, eStart, eStart) // 10-17
};

static const unsigned int SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};

const SMModel SJISSMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls},
    6,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st},
    SJISCharLenTable,
    "Shift_JIS",
};

static const unsigned int UCS2BE_cls[256 / 8] = {
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
    PCK4BITS(0, 0, 1, 0, 0, 2, 0, 0), // 08 - 0f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 18 - 1f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 40 - 47
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 80 - 87
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 88 - 8f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 90 - 97
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 98 - 9f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // a0 - a7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // a8 - af
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // b0 - b7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // b8 - bf
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // c0 - c7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // c8 - cf
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // d0 - d7
    PCK4BITS(6, 6, 6, 6, 7, 7, 7, 7), // d8 - df
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e0 - e7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e8 - ef
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // f0 - f7
    PCK4BITS(0, 0, 0, 0, 0, 0, 4, 5) // f8 - ff
};

// eStart and "6" are MSB states, "5" and "7" are LSB
// 9 is High Surrogate low byte
// 10 is Low Surrogate high byte
static const unsigned int UCS2BE_st[11] = {
    // clang-format off
    PCK4BITS(     5,      7,      7, eError,      4,      3,      9, eError), // 0
    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), // 1
    PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), // 2
    PCK4BITS(     6,      6,      6,      6, eError, eError,      9,      6), // 3
    PCK4BITS(     6,      6,      6,      6,      6, eItsMe,      6,      6), // 4
    PCK4BITS(     6,      6,      6,      6,      6,      6,      6,      6), // 5
    PCK4BITS(     5,      7,      7, eError,      5,      8,      9, eError), // 6
    PCK4BITS(     6,      6, eError,      6,      6,      6,      9,      6), // 7
    PCK4BITS(     6,      6,      6,      6, eError, eError,      9,      6), // 8
    PCK4BITS(    10,     10,     10,     10,     10,     10,     10,     10), // 9
    PCK4BITS(eError, eError, eError, eError, eError, eError, eError,      6), // 10
    // clang-format on
};

static const unsigned int UCS2BECharLenTable[] = {2, 2, 2, 0, 2, 2, 4, 4};

const SMModel UCS2BESMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2BE_cls},
    8,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2BE_st},
    UCS2BECharLenTable,
    "UTF-16BE",
};

static const unsigned int UCS2LE_cls[256 / 8] = {
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
    PCK4BITS(0, 0, 1, 0, 0, 2, 0, 0), // 08 - 0f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 18 - 1f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 40 - 47
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 80 - 87
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 88 - 8f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 90 - 97
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 98 - 9f
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // a0 - a7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // a8 - af
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // b0 - b7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // b8 - bf
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // c0 - c7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // c8 - cf
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // d0 - d7
    PCK4BITS(6, 6, 6, 6, 7, 7, 7, 7), // d8 - df
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e0 - e7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e8 - ef
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // f0 - f7
    PCK4BITS(0, 0, 0, 0, 0, 0, 4, 5) // f8 - ff
};

// eStart and "5" are LSB states, "3", "4", "6", "7" and "8" are MSB
// 9 is Low Surrogate LSB
// 10 is Low Surrogate MSB
static const unsigned int UCS2LE_st[11] = {
    // clang-format off
    PCK4BITS(     6,      6,      7,      6,      4,      3,      6,      6), // 0
    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), // 1
    PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), // 2
    PCK4BITS(     5,      5,      5, eError, eItsMe, eError,      9, eError), // 3
    PCK4BITS(     5,      5,      5, eError,      5, eError,      9, eError), // 4
    PCK4BITS(     6,      6,      7,      6,      8,      8,      6,      6), // 5
    PCK4BITS(     5,      5,      5, eError,      5,      5,      9, eError), // 6
    PCK4BITS(     5, eError, eError, eError,      5,      5,      9, eError), // 7
    PCK4BITS(     5,      5,      5, eError,      5, eError,      9, eError), // 8
    PCK4BITS(    10,     10,     10,     10,     10,     10,     10,     10), // 9
    PCK4BITS(eError, eError, eError, eError, eError, eError, eError,      5), // 10
    // clang-format on
};

static const unsigned int UCS2LECharLenTable[] = {2, 2, 2, 2, 2, 2, 4, 4};

const SMModel UCS2LESMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2LE_cls},
    8,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2LE_st},
    UCS2LECharLenTable,
    "UTF-16LE",
};

static const unsigned int UTF8_cls[256 / 8] = {
    // PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07  //allow 0x00 as a legal value
    PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
    PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 40 - 47
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 48 - 4f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 50 - 57
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 58 - 5f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 60 - 67
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77
    PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
    PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
    PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a0 - a7
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // a8 - af
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b0 - b7
    PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // b8 - bf
    PCK4BITS(0, 0, 5, 5, 5, 5, 5, 5), // c0 - c7
    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // c8 - cf
    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d0 - d7
    PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // d8 - df
    PCK4BITS(6, 7, 7, 7, 7, 7, 7, 7), // e0 - e7
    PCK4BITS(7, 7, 7, 7, 7, 8, 7, 7), // e7 - ef
    PCK4BITS(9, 10, 10, 10, 11, 0, 0, 0), // f0 - f7
    PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0) // f8 - ff
};

static const unsigned int UTF8_st[10 * 12 / 8] = {
    // clang-format off
    // byteclass: 0       1       2       3       4       5       6       7   // State
    //            8       9      10      11 |     0       1       2       3
    //            4       5       6       7       8       9      10      11
    PCK4BITS(eError, eStart, eError, eError, eError,      3,      4,      5), // eStart
    PCK4BITS(     6,      7,      8,      9, eError, eError, eError, eError), // eStart | eError
    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), // eError

    PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), // eItsMe
    PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart, eStart), // eItsMe | 3
    PCK4BITS(eStart, eError, eError, eError, eError, eError, eError, eError), // 3

    PCK4BITS(eError, eError, eError, eError,      3, eError, eError, eError), // 4
    PCK4BITS(eError, eError, eError, eError, eError, eError,      3,      3), // 4 | 5
    PCK4BITS(     3, eError, eError, eError, eError, eError, eError, eError), // 5

    PCK4BITS(eError, eError,      3,      3, eError, eError, eError, eError), // 6
    PCK4BITS(eError, eError, eError, eError, eError, eError, eError,      5), // 6 | 7
    PCK4BITS(     5, eError, eError, eError, eError, eError, eError, eError), // 7

    PCK4BITS(eError, eError,      5,      5,      5, eError, eError, eError), // 8
    PCK4BITS(eError, eError, eError, eError, eError, eError,      5, eError), // 8 | 9
    PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), // 9
    // clang-format on
};

static const unsigned int UTF8CharLenTable[] = {0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 4, 4};

const SMModel UTF8SMModel = {
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls},
    12,
    {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st},
    UTF8CharLenTable,
    "UTF-8",
};
}
