/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.boilerpipe.filters.simple;

import de.l3s.boilerpipe.BoilerpipeFilter;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public final class MinClauseWordsFilter
implements BoilerpipeFilter {
    public static final MinClauseWordsFilter INSTANCE = new MinClauseWordsFilter(5, false);
    private int minWords;
    private final boolean acceptClausesWithoutDelimiter;
    private final Pattern PAT_CLAUSE_DELIMITER = Pattern.compile("[\\p{L}\\d][\\,\\.\\:\\;\\!\\?]+([ \\n\\r]+|$)");
    private final Pattern PAT_WHITESPACE = Pattern.compile("[ \\n\\r]+");

    public MinClauseWordsFilter(int n) {
        this(n, false);
    }

    public MinClauseWordsFilter(int n, boolean bl) {
        this.minWords = n;
        this.acceptClausesWithoutDelimiter = bl;
    }

    @Override
    public boolean process(TextDocument textDocument) throws BoilerpipeProcessingException {
        boolean bl = false;
        for (TextBlock textBlock : textDocument.getTextBlocks()) {
            int n;
            if (!textBlock.isContent()) continue;
            String string = textBlock.getText();
            Matcher matcher = this.PAT_CLAUSE_DELIMITER.matcher(string);
            boolean bl2 = matcher.find();
            int n2 = 0;
            boolean bl3 = false;
            while (bl2) {
                n = matcher.start() + 1;
                bl3 = this.isClause(string.subSequence(n2, n));
                n2 = matcher.end();
                if (bl3) break;
                bl2 = matcher.find();
            }
            n = string.length();
            if (this.acceptClausesWithoutDelimiter) {
                bl3 |= this.isClause(string.subSequence(n2, n));
            }
            if (bl3) continue;
            textBlock.setIsContent(false);
            bl = true;
        }
        return bl;
    }

    private boolean isClause(CharSequence charSequence) {
        Matcher matcher = this.PAT_WHITESPACE.matcher(charSequence);
        int n = 1;
        while (matcher.find()) {
            if (++n < this.minWords) continue;
            return true;
        }
        return n >= this.minWords;
    }
}

