/*
 * Decompiled with CFR 0.152.
 */
package net.java.sen.tokenizers.ja;

import net.java.sen.dictionary.CToken;
import net.java.sen.dictionary.Dictionary;
import net.java.sen.dictionary.Morpheme;
import net.java.sen.dictionary.Node;
import net.java.sen.dictionary.SentenceIterator;
import net.java.sen.dictionary.Tokenizer;
import net.java.sen.trie.CharIterator;

public class JapaneseTokenizer
extends Tokenizer {
    static final int OTHER = 128;
    static final int SPACE = 129;
    static final int KANJI = 130;
    static final int KATAKANA = 131;
    static final int HIRAGANA = 132;
    static final int HALF_WIDTH = 133;

    private int getCharClass(char c) {
        if (c <= '\u007f') {
            return c == ' ' || c == '\t' || c == '\r' || c == '\n' ? 129 : Character.getType(Character.toLowerCase(c));
        }
        if (c >= '\u3040' && c <= '\u309f') {
            return 132;
        }
        if (c >= '\u30a0' && c <= '\u30ff' && c != '\u30fb') {
            return 131;
        }
        if (c >= '\u4e00' && c <= '\u9fff') {
            return 130;
        }
        if (c >= '\uff00' && c <= '\uffef') {
            return 133;
        }
        return 128;
    }

    private int findUnknownToken(CharIterator iterator) {
        int length = 0;
        if (iterator.hasNext()) {
            int charClass = this.getCharClass(iterator.next());
            switch (charClass) {
                case 128: 
                case 130: 
                case 132: {
                    length = 1;
                    break;
                }
                default: {
                    length = 1;
                    while (iterator.hasNext() && this.getCharClass(iterator.next()) == charClass) {
                        ++length;
                    }
                    break block0;
                }
            }
        }
        return length;
    }

    @Override
    public Node lookup(SentenceIterator iterator, char[] surface) {
        Node resultNode = null;
        int charClass = this.getCharClass(iterator.current());
        int skipped = iterator.skippedCharCount();
        CToken[] t = this.getDictionary().commonPrefixSearch(iterator);
        int i = 0;
        while (!t[i].terminator) {
            Node newNode = new Node();
            newNode.setCToken(t[i]);
            newNode.length = t[i].length;
            newNode.start = iterator.origin();
            newNode.span = t[i].length + skipped;
            newNode.rnext = resultNode;
            newNode.morpheme = new Morpheme(this.getDictionary(), t[i].partOfSpeechIndex);
            resultNode = newNode;
            ++i;
        }
        if (resultNode != null && (charClass == 132 || charClass == 130)) {
            return resultNode;
        }
        iterator.rewindToOrigin();
        int unknownTokenLength = this.findUnknownToken(iterator);
        Node unknownNode = this.getUnknownNode(surface, iterator.origin(), unknownTokenLength, skipped + unknownTokenLength);
        unknownNode.rnext = resultNode;
        return unknownNode;
    }

    public JapaneseTokenizer(Dictionary dictionary, String unknownPartOfSpeechDescription) {
        super(dictionary, unknownPartOfSpeechDescription);
    }
}

