/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.nlp.tokenizer.sentence;

import ai.grazie.Direction;
import ai.grazie.nlp.patterns.AggregatedPattern;
import ai.grazie.nlp.patterns.Pattern;
import ai.grazie.nlp.patterns.RegexPattern;
import ai.grazie.nlp.patterns.standard.LikelyPatterns;
import ai.grazie.nlp.patterns.standard.StrictPatterns;
import ai.grazie.nlp.tokenizer.Tokenizer;
import ai.grazie.nlp.tokenizer.rule.PatternSplitTokenizer;
import ai.grazie.nlp.tokenizer.sentence.StandardSentenceTokenizer;
import ai.grazie.text.ExtensionsKt;
import ai.grazie.text.Text;
import ai.grazie.text.TextRange;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

@Metadata(mv={1, 7, 0}, k=1, xi=48, d1={"\u0000;\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\b\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\r\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002*\u0001\b\u0018\u0000 \u00122\u00020\u0001:\u0001\u0012B\u0011\u0012\n\b\u0002\u0010\u0002\u001a\u0004\u0018\u00010\u0003\u00a2\u0006\u0002\u0010\u0004J\u0015\u0010\u0007\u001a\u00020\b2\u0006\u0010\u0002\u001a\u00020\u0003H\u0002\u00a2\u0006\u0002\u0010\tJ\u0014\u0010\n\u001a\b\u0012\u0004\u0012\u00020\f0\u000b2\u0006\u0010\r\u001a\u00020\u000eJ\u0016\u0010\u000f\u001a\b\u0012\u0004\u0012\u00020\u00100\u000b2\u0006\u0010\r\u001a\u00020\u0011H\u0016R\u0010\u0010\u0005\u001a\u0004\u0018\u00010\u0006X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0013"}, d2={"Lai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer;", "Lai/grazie/nlp/tokenizer/Tokenizer;", "splitByNewLinesNumber", "", "(Ljava/lang/Integer;)V", "newLineTokenizer", "Lai/grazie/nlp/tokenizer/rule/PatternSplitTokenizer;", "newLinesPattern", "ai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer$newLinesPattern$1", "(I)Lai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer$newLinesPattern$1;", "tokenRanges", "", "Lai/grazie/text/TextRange;", "text", "", "tokenize", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "Lai/grazie/text/Text;", "Companion", "nlp-tokenizer"})
@SourceDebugExtension(value={"SMAP\nStandardSentenceTokenizer.kt\nKotlin\n*S Kotlin\n*F\n+ 1 StandardSentenceTokenizer.kt\nai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,119:1\n1549#2:120\n1620#2,3:121\n1360#2:124\n1446#2,2:125\n1549#2:127\n1620#2,3:128\n1448#2,3:131\n*S KotlinDebug\n*F\n+ 1 StandardSentenceTokenizer.kt\nai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer\n*L\n107#1:120\n107#1:121,3\n113#1:124\n113#1:125,2\n114#1:127\n114#1:128,3\n113#1:131,3\n*E\n"})
public final class StandardSentenceTokenizer
implements Tokenizer {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @Nullable
    private final PatternSplitTokenizer newLineTokenizer;
    @NotNull
    private static final StandardSentenceTokenizer Default = new StandardSentenceTokenizer(2);
    @NotNull
    private static final Companion.Parenthetical.1 Parenthetical;
    @NotNull
    private static final Companion.Ellipsis.1 Ellipsis;
    @NotNull
    private static final AggregatedPattern ignore;
    @NotNull
    private static final String normalSentenceEnd = "[?!.]+[)}\\]'\"]?";
    @NotNull
    private static final String romanBullet = "(^|\n)[ivxlmcdIVXLMCD]+\\)";
    @NotNull
    private static final String braceBulletContents = "[\\d\\p{L}]{1,3}(\\.[\\d\\p{L}]{1,3})*";
    @NotNull
    private static final String braceBullet = "(^|\n)(\\([\\d\\p{L}]{1,3}(\\.[\\d\\p{L}]{1,3})*\\)|\\[[\\d\\p{L}]{1,3}(\\.[\\d\\p{L}]{1,3})*\\])";
    @NotNull
    private static final String spaceOrEnd = "\n+|\\s+|$";
    @NotNull
    private static final Regex sentenceContinuation;
    @NotNull
    private static final Companion.endPunctuation.1 endPunctuation;
    @NotNull
    private static final PatternSplitTokenizer punctuationTokenizer;

    public StandardSentenceTokenizer(@Nullable Integer splitByNewLinesNumber) {
        this.newLineTokenizer = splitByNewLinesNumber == null ? null : new PatternSplitTokenizer(this.newLinesPattern(splitByNewLinesNumber), Direction.Right, null);
    }

    public /* synthetic */ StandardSentenceTokenizer(Integer n, int n2, DefaultConstructorMarker defaultConstructorMarker) {
        if ((n2 & 1) != 0) {
            n = 2;
        }
        this(n);
    }

    private final newLinesPattern.1 newLinesPattern(int splitByNewLinesNumber) {
        Regex regex = new Regex("\n{" + splitByNewLinesNumber + ",}");
        return new RegexPattern(regex){

            @NotNull
            public List<TextRange> find(@NotNull CharSequence text2) {
                Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
                return super.find(text2);
            }
        };
    }

    /*
     * WARNING - void declaration
     */
    @Override
    @NotNull
    public List<Tokenizer.Token> tokenize(@NotNull Text text2) {
        void $this$mapTo$iv$iv;
        Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
        Iterable $this$map$iv = this.tokenRanges(text2);
        boolean $i$f$map = false;
        Iterable iterable = $this$map$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            void it;
            TextRange textRange = (TextRange)item$iv$iv;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(new Tokenizer.Token(text2.substring((TextRange)it), (TextRange)it));
        }
        return (List)destination$iv$iv;
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public final List<TextRange> tokenRanges(@NotNull CharSequence text2) {
        void $this$flatMapTo$iv$iv;
        Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
        Object object = this.newLineTokenizer;
        if (object == null || (object = ((PatternSplitTokenizer)object).tokenRanges(text2)) == null) {
            object = CollectionsKt.listOf((Object)new TextRange(0, text2.length()));
        }
        Object hard = object;
        Iterable $this$flatMap$iv = (Iterable)hard;
        boolean $i$f$flatMap = false;
        Iterable iterable = $this$flatMap$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$flatMapTo = false;
        for (Object element$iv$iv : $this$flatMapTo$iv$iv) {
            void $this$mapTo$iv$iv;
            TextRange chunk = (TextRange)element$iv$iv;
            boolean bl = false;
            Iterable $this$map$iv = punctuationTokenizer.tokenRanges(text2.subSequence(chunk.getStart(), chunk.getEndExclusive()));
            boolean $i$f$map = false;
            Iterable iterable2 = $this$map$iv;
            Collection destination$iv$iv2 = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it;
                TextRange textRange = (TextRange)item$iv$iv;
                Collection collection = destination$iv$iv2;
                boolean bl2 = false;
                collection.add(ExtensionsKt.withOffset((TextRange)it, chunk.getStart()));
            }
            Iterable list$iv$iv = (List)destination$iv$iv2;
            CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
        }
        return (List)destination$iv$iv;
    }

    public StandardSentenceTokenizer() {
        this(null, 1, null);
    }

    public static final /* synthetic */ Regex access$getSentenceContinuation$cp() {
        return sentenceContinuation;
    }

    static {
        Pattern[] patternArray = new Pattern[]{StandardSentenceTokenizer.Companion.pairedParentheses(), StandardSentenceTokenizer.Companion.pairedDashes()};
        Pattern[] patternArray2 = patternArray;
        Parenthetical = new AggregatedPattern(patternArray2){

            @NotNull
            public List<TextRange> find(@NotNull CharSequence text2) {
                Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
                return super.find(text2);
            }
        };
        patternArray2 = new Regex("(^|\"|\\s)(\\.{3,}|\u2026)($|\"|\\s)");
        Ellipsis = new RegexPattern((Regex)patternArray2){

            @NotNull
            public List<TextRange> find(@NotNull CharSequence text2) {
                Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
                return super.find(text2);
            }
        };
        patternArray2 = new Pattern[]{Parenthetical, Ellipsis, LikelyPatterns.INSTANCE.getNameInitials(), LikelyPatterns.INSTANCE.getActionNameEllipsis(), LikelyPatterns.INSTANCE.getPunctuationOperator(), LikelyPatterns.INSTANCE.getIsEmail(), LikelyPatterns.INSTANCE.getIsAbbreviation(), LikelyPatterns.INSTANCE.getIsOrdinalNumeral(), LikelyPatterns.INSTANCE.getIsIPv4(), LikelyPatterns.INSTANCE.getIsURL(), LikelyPatterns.INSTANCE.getIsFilePath(), StrictPatterns.INSTANCE.getIsUUID()};
        ignore = new AggregatedPattern(patternArray2);
        Companion $this$sentenceContinuation_u24lambda_u243 = Companion;
        boolean bl = false;
        String dashes = "(\\s+[-\u2013\u2014]+)";
        String emotionalPunctuation = "([?!]|([:;]-?[()]+))";
        String directSpeechEnd = "(\\?\"|!(\\.+\"?|\\.*\")|\\.{3,}[\"?]*)";
        sentenceContinuation = new Regex("(" + directSpeechEnd + dashes + "?|" + emotionalPunctuation + dashes + ")\\s+\\p{Ll}");
        patternArray2 = new Regex("([?!.]+[)}\\]'\"]?|(^|\n)[ivxlmcdIVXLMCD]+\\)|(^|\n)(\\([\\d\\p{L}]{1,3}(\\.[\\d\\p{L}]{1,3})*\\)|\\[[\\d\\p{L}]{1,3}(\\.[\\d\\p{L}]{1,3})*\\]))(\n+|\\s+|$)|([:;]-?[()]+)(\n+|\\s+|$|[?!.]+[)}\\]'\"]?)+");
        endPunctuation = new RegexPattern((Regex)patternArray2){

            /*
             * WARNING - void declaration
             */
            @NotNull
            public List<TextRange> find(@NotNull CharSequence text2) {
                void $this$filterTo$iv$iv;
                Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
                Iterable $this$filter$iv = super.find(text2);
                boolean $i$f$filter = false;
                Iterable iterable = $this$filter$iv;
                Collection destination$iv$iv = new ArrayList<E>();
                boolean $i$f$filterTo = false;
                for (T element$iv$iv : $this$filterTo$iv$iv) {
                    TextRange it = (TextRange)element$iv$iv;
                    boolean bl = false;
                    if (!(!StandardSentenceTokenizer.access$getSentenceContinuation$cp().matchesAt(text2, it.getStart()))) continue;
                    destination$iv$iv.add(element$iv$iv);
                }
                return (List)destination$iv$iv;
            }
        };
        punctuationTokenizer = new PatternSplitTokenizer(endPunctuation, Direction.Right, ignore);
    }

    @Metadata(mv={1, 7, 0}, k=1, xi=48, d1={"\u0000Q\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\b\u0003\n\u0002\b\u0003\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000*\u0003\b\u000b\u0011\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002J\b\u0010\u001c\u001a\u00020\u001dH\u0002J\b\u0010\u001e\u001a\u00020\u001fH\u0002R\u0011\u0010\u0003\u001a\u00020\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0005\u0010\u0006R\u0010\u0010\u0007\u001a\u00020\bX\u0082\u0004\u00a2\u0006\u0004\n\u0002\u0010\tR\u0010\u0010\n\u001a\u00020\u000bX\u0082\u0004\u00a2\u0006\u0004\n\u0002\u0010\fR\u000e\u0010\r\u001a\u00020\u000eX\u0082T\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u000f\u001a\u00020\u000eX\u0082T\u00a2\u0006\u0002\n\u0000R\u0010\u0010\u0010\u001a\u00020\u0011X\u0082\u0004\u00a2\u0006\u0004\n\u0002\u0010\u0012R\u000e\u0010\u0013\u001a\u00020\u0014X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0015\u001a\u00020\u000eX\u0082T\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0016\u001a\u00020\u0017X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0018\u001a\u00020\u000eX\u0082T\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0019\u001a\u00020\u001aX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u001b\u001a\u00020\u000eX\u0082T\u00a2\u0006\u0002\n\u0000\u00a8\u0006 "}, d2={"Lai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer$Companion;", "", "()V", "Default", "Lai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer;", "getDefault", "()Lai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer;", "Ellipsis", "ai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer$Companion$Ellipsis$1", "Lai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer$Companion$Ellipsis$1;", "Parenthetical", "ai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer$Companion$Parenthetical$1", "Lai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer$Companion$Parenthetical$1;", "braceBullet", "", "braceBulletContents", "endPunctuation", "ai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer$Companion$endPunctuation$1", "Lai/grazie/nlp/tokenizer/sentence/StandardSentenceTokenizer$Companion$endPunctuation$1;", "ignore", "Lai/grazie/nlp/patterns/AggregatedPattern;", "normalSentenceEnd", "punctuationTokenizer", "Lai/grazie/nlp/tokenizer/rule/PatternSplitTokenizer;", "romanBullet", "sentenceContinuation", "Lkotlin/text/Regex;", "spaceOrEnd", "pairedDashes", "Lai/grazie/nlp/patterns/Pattern;", "pairedParentheses", "Lai/grazie/nlp/patterns/RegexPattern;", "nlp-tokenizer"})
    public static final class Companion {
        private Companion() {
        }

        @NotNull
        public final StandardSentenceTokenizer getDefault() {
            return Default;
        }

        private final RegexPattern pairedParentheses() {
            String noParentheses = "[^()]";
            String nestedParentheses = "\\(" + noParentheses + "{1,500}\\)" + noParentheses + "{0,500}";
            return new RegexPattern(new Regex("\\(" + noParentheses + "{1,500}(" + nestedParentheses + ")*\\)"));
        }

        private final Pattern pairedDashes() {
            Regex dashRegex = new Regex("[-\u2013\u2014]+([^-\u2013\u2014]{1,100})[-\u2013\u2014]+");
            Regex smileyRegex = new Regex("[:;]-?[()]+");
            return new RegexPattern(dashRegex, smileyRegex){
                final /* synthetic */ Regex $dashRegex;
                final /* synthetic */ Regex $smileyRegex;
                {
                    this.$dashRegex = $dashRegex;
                    this.$smileyRegex = $smileyRegex;
                    super($dashRegex);
                }

                /*
                 * WARNING - void declaration
                 */
                @NotNull
                public List<TextRange> find(@NotNull CharSequence text2) {
                    void $this$filterTo$iv$iv;
                    void $this$filter$iv;
                    Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
                    Iterable iterable = super.find(text2);
                    Regex regex = this.$dashRegex;
                    Regex regex2 = this.$smileyRegex;
                    boolean $i$f$filter = false;
                    void var6_6 = $this$filter$iv;
                    Collection destination$iv$iv = new ArrayList<E>();
                    boolean $i$f$filterTo = false;
                    for (T element$iv$iv : $this$filterTo$iv$iv) {
                        TextRange it = (TextRange)element$iv$iv;
                        boolean bl = false;
                        CharSequence withDashes = text2.subSequence(it.getStart(), it.getEndExclusive());
                        MatchResult matchResult = regex.matchEntire(withDashes);
                        Intrinsics.checkNotNull((Object)matchResult);
                        String withoutDashes = (String)matchResult.getGroupValues().get(1);
                        if (!(Regex.find$default((Regex)regex2, (CharSequence)withoutDashes, (int)0, (int)2, null) == null)) continue;
                        destination$iv$iv.add(element$iv$iv);
                    }
                    return (List)destination$iv$iv;
                }
            }.withWordBoundariesAround();
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

