/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.boilerpipe.sax;

import de.l3s.boilerpipe.sax.HTMLDocument;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

public class HTMLFetcher {
    private static final Pattern PAT_CHARSET = Pattern.compile("charset=([^; ]+)$");

    private HTMLFetcher() {
    }

    public static HTMLDocument fetch(URL uRL) throws IOException {
        int n;
        String string;
        Object object;
        URLConnection uRLConnection = uRL.openConnection();
        String string2 = uRLConnection.getContentType();
        if (string2 == null || !string2.equals("text/html") && !string2.startsWith("text/html;")) {
            throw new IOException("Unsupported content type: " + string2);
        }
        Charset charset = Charset.forName("Cp1252");
        if (string2 != null && ((Matcher)(object = PAT_CHARSET.matcher(string2))).find()) {
            string = ((Matcher)object).group(1);
            try {
                charset = Charset.forName(string);
            }
            catch (UnsupportedCharsetException unsupportedCharsetException) {
                // empty catch block
            }
        }
        object = uRLConnection.getInputStream();
        string = uRLConnection.getContentEncoding();
        if (string != null) {
            if ("gzip".equalsIgnoreCase(string)) {
                object = new GZIPInputStream((InputStream)object);
            } else {
                System.err.println("WARN: unsupported Content-Encoding: " + string);
            }
        }
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        byte[] byArray = new byte[4096];
        while ((n = ((InputStream)object).read(byArray)) != -1) {
            byteArrayOutputStream.write(byArray, 0, n);
        }
        ((InputStream)object).close();
        byte[] byArray2 = byteArrayOutputStream.toByteArray();
        return new HTMLDocument(byArray2, charset);
    }
}

