/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.boilerpipe.sax;

import de.l3s.boilerpipe.BoilerpipeExtractor;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.TextDocument;
import de.l3s.boilerpipe.sax.BoilerpipeSAXInput;
import de.l3s.boilerpipe.sax.HTMLDocument;
import de.l3s.boilerpipe.sax.HTMLFetcher;
import de.l3s.boilerpipe.sax.HTMLHighlighter;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import net.htmlparser.jericho.Attributes;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.OutputDocument;
import net.htmlparser.jericho.Segment;
import net.htmlparser.jericho.Source;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class HtmlArticleExtractor {
    public static final HtmlArticleExtractor INSTANCE = new HtmlArticleExtractor();
    private static final Set<String> NOT_ALLOWED_HTML_TAGS = new HashSet<String>(Arrays.asList("head", "html", "script", "style", "form", "body", "div", "span"));

    private HtmlArticleExtractor() {
    }

    public static HtmlArticleExtractor getInstance() {
        return INSTANCE;
    }

    public String process(BoilerpipeExtractor boilerpipeExtractor, URL uRL) throws IOException, BoilerpipeProcessingException, SAXException, URISyntaxException {
        HTMLDocument hTMLDocument = HTMLFetcher.fetch(uRL);
        return this.process(hTMLDocument, uRL.toURI(), boilerpipeExtractor);
    }

    public String process(HTMLDocument hTMLDocument, URI uRI, BoilerpipeExtractor boilerpipeExtractor) {
        HTMLHighlighter hTMLHighlighter = HTMLHighlighter.newExtractingInstance();
        hTMLHighlighter.setOutputHighlightOnly(true);
        String string = "";
        try {
            TextDocument textDocument = new BoilerpipeSAXInput(hTMLDocument.toInputSource()).getTextDocument();
            boilerpipeExtractor.process(textDocument);
            InputSource inputSource = hTMLDocument.toInputSource();
            string = hTMLHighlighter.process(textDocument, inputSource);
        }
        catch (Exception exception) {
            return null;
        }
        return this.removeNotAllowedTags(string, uRI);
    }

    private String removeNotAllowedTags(String string, URI uRI) {
        Source source = new Source((CharSequence)string);
        OutputDocument outputDocument = new OutputDocument(source);
        List list = source.getAllElements();
        for (Element element : list) {
            String string2;
            Attributes attributes = element.getAttributes();
            Map map = outputDocument.replace(attributes, true);
            if (!element.getName().contains("a")) {
                map.clear();
            } else if (map.get("href") != null && !(string2 = (String)map.get("href")).contains("http")) {
                URI uRI2 = uRI;
                try {
                    URI uRI3 = new URI(string2);
                    URI uRI4 = uRI2.resolve(uRI3);
                    map.put("href", uRI4.toString());
                }
                catch (URISyntaxException uRISyntaxException) {
                    outputDocument.remove((Segment)element);
                }
            }
            if (!NOT_ALLOWED_HTML_TAGS.contains(element.getName())) continue;
            string2 = element.getContent();
            if (element.getName() == "script" || element.getName() == "style" || element.getName() == "form") {
                outputDocument.remove((Segment)string2);
            }
            outputDocument.remove((Segment)element.getStartTag());
            if (element.getStartTag().isSyntacticalEmptyElementTag()) continue;
            outputDocument.remove((Segment)element.getEndTag());
        }
        Object object = outputDocument.toString();
        object = ((String)object).replaceAll("\\n", "");
        object = ((String)object).replaceAll("\\t", "");
        return object;
    }
}

