"""
All of the Enums that are used throughout the chardet package.

:author: Dan Blanchard (dan.blanchard@gmail.com)
"""

from enum import Flag, IntEnum, auto


class InputState(IntEnum):
    """
    This enum represents the different states a universal detector can be in.
    """

    PURE_ASCII = 0
    ESC_ASCII = 1
    HIGH_BYTE = 2


class LanguageFilter(Flag):
    """
    This enum represents the different language filters we can apply to a
    ``UniversalDetector``.
    """

    CHINESE_SIMPLIFIED = auto()
    CHINESE_TRADITIONAL = auto()
    JAPANESE = auto()
    KOREAN = auto()
    NON_CJK = auto()
    CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
    CJK = CHINESE | JAPANESE | KOREAN
    ALL = NON_CJK | CJK


class ProbingState(IntEnum):
    """
    This enum represents the different states a prober can be in.
    """

    DETECTING = 0
    FOUND_IT = 1
    NOT_ME = 2


class MachineState(IntEnum):
    """
    This enum represents the different states a state machine can be in.
    """

    START = 0
    ERROR = 1
    ITS_ME = 2


class SequenceLikelihood(IntEnum):
    """
    This enum represents the likelihood of a character following the previous one.
    """

    NEGATIVE = 0
    UNLIKELY = 1
    LIKELY = 2
    POSITIVE = 3


class CharacterCategory(IntEnum):
    """
    This enum represents the different categories language models for
    ``SingleByteCharsetProber`` put characters into.

    Anything less than DIGIT is considered a letter.
    """

    UNDEFINED = 255
    CONTROL = 254
    SYMBOL = 253
    LINE_BREAK = 252
    DIGIT = 251


class EncodingEra(Flag):
    """
    This enum represents different eras of character encodings, used to filter
    which encodings are considered during detection.

    The numeric values also serve as preference tiers for tie-breaking when
    confidence scores are very close. Lower values = more preferred/modern.
    """

    MODERN_WEB = auto()  # UTF-8/16/32, Windows-125x, modern multibyte (widely used)
    LEGACY_ISO = auto()  # ISO-8859-x (legacy but common)
    LEGACY_MAC = auto()  # Mac encodings (less common)
    DOS = auto()  # CP437, CP850, CP852, etc. (very legacy)
    MAINFRAME = auto()  # EBCDIC variants (CP037, CP500, etc.)
    ALL = MODERN_WEB | LEGACY_ISO | LEGACY_MAC | DOS | MAINFRAME
