/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.boilerpipe.sax;

import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import de.l3s.boilerpipe.labels.LabelAction;
import de.l3s.boilerpipe.sax.DefaultTagActionMap;
import de.l3s.boilerpipe.sax.TagAction;
import de.l3s.boilerpipe.sax.TagActionMap;
import de.l3s.boilerpipe.util.UnicodeTokenizer;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

public class BoilerpipeHTMLContentHandler
implements ContentHandler {
    private final Map<String, TagAction> tagActions;
    private String title = null;
    static final String ANCHOR_TEXT_START = "$\ue00a<";
    static final String ANCHOR_TEXT_END = ">\ue00a$";
    StringBuilder tokenBuffer = new StringBuilder();
    StringBuilder textBuffer = new StringBuilder();
    int inBody = 0;
    int inAnchor = 0;
    int inIgnorableElement = 0;
    int tagLevel = 0;
    int blockTagLevel = -1;
    boolean sbLastWasWhitespace = false;
    private int textElementIdx = 0;
    private final List<TextBlock> textBlocks = new ArrayList<TextBlock>();
    private String lastStartTag = null;
    private String lastEndTag = null;
    private Event lastEvent = null;
    private int offsetBlocks = 0;
    private BitSet currentContainedTextElements = new BitSet();
    private boolean flush = false;
    boolean inAnchorText = false;
    LinkedList<LinkedList<LabelAction>> labelStacks = new LinkedList();
    LinkedList<Integer> fontSizeStack = new LinkedList();
    private static final Pattern PAT_VALID_WORD_CHARACTER = Pattern.compile("[\\p{L}\\p{Nd}\\p{Nl}\\p{No}]");

    public void recycle() {
        this.tokenBuffer.setLength(0);
        this.textBuffer.setLength(0);
        this.inBody = 0;
        this.inAnchor = 0;
        this.inIgnorableElement = 0;
        this.sbLastWasWhitespace = false;
        this.textElementIdx = 0;
        this.textBlocks.clear();
        this.lastStartTag = null;
        this.lastEndTag = null;
        this.lastEvent = null;
        this.offsetBlocks = 0;
        this.currentContainedTextElements.clear();
        this.flush = false;
        this.inAnchorText = false;
    }

    public BoilerpipeHTMLContentHandler() {
        this(DefaultTagActionMap.INSTANCE);
    }

    public BoilerpipeHTMLContentHandler(TagActionMap tagActionMap) {
        this.tagActions = tagActionMap;
    }

    @Override
    public void endDocument() throws SAXException {
        this.flushBlock();
    }

    @Override
    public void endPrefixMapping(String string) throws SAXException {
    }

    @Override
    public void ignorableWhitespace(char[] cArray, int n, int n2) throws SAXException {
        if (!this.sbLastWasWhitespace) {
            this.textBuffer.append(' ');
            this.tokenBuffer.append(' ');
        }
        this.sbLastWasWhitespace = true;
    }

    @Override
    public void processingInstruction(String string, String string2) throws SAXException {
    }

    @Override
    public void setDocumentLocator(Locator locator) {
    }

    @Override
    public void skippedEntity(String string) throws SAXException {
    }

    @Override
    public void startDocument() throws SAXException {
    }

    @Override
    public void startPrefixMapping(String string, String string2) throws SAXException {
    }

    @Override
    public void startElement(String string, String string2, String string3, Attributes attributes) throws SAXException {
        this.labelStacks.add(null);
        TagAction tagAction = this.tagActions.get(string2);
        if (tagAction != null) {
            if (tagAction.changesTagLevel()) {
                ++this.tagLevel;
            }
            this.flush = tagAction.start(this, string2, string3, attributes) | this.flush;
        } else {
            ++this.tagLevel;
            this.flush = true;
        }
        this.lastEvent = Event.START_TAG;
        this.lastStartTag = string2;
    }

    @Override
    public void endElement(String string, String string2, String string3) throws SAXException {
        TagAction tagAction = this.tagActions.get(string2);
        this.flush = tagAction != null ? tagAction.end(this, string2, string3) | this.flush : true;
        if (tagAction == null || tagAction.changesTagLevel()) {
            --this.tagLevel;
        }
        if (this.flush) {
            this.flushBlock();
        }
        this.lastEvent = Event.END_TAG;
        this.lastEndTag = string2;
        this.labelStacks.removeLast();
    }

    @Override
    public void characters(char[] cArray, int n, int n2) throws SAXException {
        char c;
        ++this.textElementIdx;
        if (this.flush) {
            this.flushBlock();
            this.flush = false;
        }
        if (this.inIgnorableElement != 0) {
            return;
        }
        boolean bl = false;
        boolean bl2 = false;
        if (n2 == 0) {
            return;
        }
        int n3 = n + n2;
        for (int i = n; i < n3; ++i) {
            if (!Character.isWhitespace(cArray[i])) continue;
            cArray[i] = 32;
        }
        while (n < n3 && (c = cArray[n]) == ' ') {
            bl = true;
            ++n;
            --n2;
        }
        while (n2 > 0 && (c = cArray[n + n2 - 1]) == ' ') {
            bl2 = true;
            --n2;
        }
        if (n2 == 0) {
            if (bl || bl2) {
                if (!this.sbLastWasWhitespace) {
                    this.textBuffer.append(' ');
                    this.tokenBuffer.append(' ');
                }
                this.sbLastWasWhitespace = true;
            } else {
                this.sbLastWasWhitespace = false;
            }
            this.lastEvent = Event.WHITESPACE;
            return;
        }
        if (bl && !this.sbLastWasWhitespace) {
            this.textBuffer.append(' ');
            this.tokenBuffer.append(' ');
        }
        if (this.blockTagLevel == -1) {
            this.blockTagLevel = this.tagLevel;
        }
        this.textBuffer.append(cArray, n, n2);
        this.tokenBuffer.append(cArray, n, n2);
        if (bl2) {
            this.textBuffer.append(' ');
            this.tokenBuffer.append(' ');
        }
        this.sbLastWasWhitespace = bl2;
        this.lastEvent = Event.CHARACTERS;
        this.currentContainedTextElements.set(this.textElementIdx);
    }

    List<TextBlock> getTextBlocks() {
        return this.textBlocks;
    }

    public void flushBlock() {
        int n;
        if (this.inBody == 0) {
            if ("TITLE".equalsIgnoreCase(this.lastStartTag) && this.inBody == 0) {
                this.setTitle(this.tokenBuffer.toString().trim());
            }
            this.textBuffer.setLength(0);
            this.tokenBuffer.setLength(0);
            return;
        }
        int n2 = this.tokenBuffer.length();
        switch (n2) {
            case 0: {
                return;
            }
            case 1: {
                if (!this.sbLastWasWhitespace) break;
                this.textBuffer.setLength(0);
                this.tokenBuffer.setLength(0);
                return;
            }
        }
        String[] stringArray = UnicodeTokenizer.tokenize(this.tokenBuffer);
        int n3 = 0;
        int n4 = 0;
        int n5 = 0;
        int n6 = -1;
        int n7 = 0;
        int n8 = 0;
        for (String string : stringArray) {
            if (ANCHOR_TEXT_START.equals(string)) {
                this.inAnchorText = true;
                continue;
            }
            if (ANCHOR_TEXT_END.equals(string)) {
                this.inAnchorText = false;
                continue;
            }
            if (BoilerpipeHTMLContentHandler.isWord(string)) {
                int n9;
                ++n7;
                ++n3;
                ++n8;
                if (this.inAnchorText) {
                    ++n4;
                }
                if ((n6 += (n9 = string.length()) + 1) <= 80) continue;
                ++n5;
                n6 = n9;
                n8 = 1;
                continue;
            }
            ++n7;
        }
        if (n7 == 0) {
            return;
        }
        if (n5 == 0) {
            n = n3;
            n5 = 1;
        } else {
            n = n3 - n8;
        }
        TextBlock textBlock = new TextBlock(this.textBuffer.toString().trim(), this.currentContainedTextElements, n3, n4, n, n5, this.offsetBlocks);
        this.currentContainedTextElements = new BitSet();
        ++this.offsetBlocks;
        this.textBuffer.setLength(0);
        this.tokenBuffer.setLength(0);
        textBlock.setTagLevel(this.blockTagLevel);
        this.addTextBlock(textBlock);
        this.blockTagLevel = -1;
    }

    protected void addTextBlock(TextBlock textBlock) {
        for (Integer serializable : this.fontSizeStack) {
            if (serializable == null) continue;
            textBlock.addLabel("font-" + serializable);
            break;
        }
        for (LinkedList linkedList : this.labelStacks) {
            if (linkedList == null) continue;
            for (LabelAction labelAction : linkedList) {
                if (labelAction == null) continue;
                labelAction.addTo(textBlock);
            }
        }
        this.textBlocks.add(textBlock);
    }

    private static boolean isWord(String string) {
        return PAT_VALID_WORD_CHARACTER.matcher(string).find();
    }

    public String getTitle() {
        return this.title;
    }

    public void setTitle(String string) {
        if (string == null || string.length() == 0) {
            return;
        }
        this.title = string;
    }

    public TextDocument toTextDocument() {
        this.flushBlock();
        return new TextDocument(this.getTitle(), this.getTextBlocks());
    }

    public void addWhitespaceIfNecessary() {
        if (!this.sbLastWasWhitespace) {
            this.tokenBuffer.append(' ');
            this.textBuffer.append(' ');
            this.sbLastWasWhitespace = true;
        }
    }

    public void addLabelAction(LabelAction labelAction) throws IllegalStateException {
        LinkedList<LabelAction> linkedList = this.labelStacks.getLast();
        if (linkedList == null) {
            linkedList = new LinkedList();
            this.labelStacks.removeLast();
            this.labelStacks.add(linkedList);
        }
        linkedList.add(labelAction);
    }

    private static enum Event {
        START_TAG,
        END_TAG,
        CHARACTERS,
        WHITESPACE;

    }
}

