/*
 * Decompiled with CFR 0.152.
 */
package org.commoncrawl.util.shared;

import java.net.MalformedURLException;
import java.util.Collection;
import java.util.regex.Pattern;
import org.commoncrawl.protocol.shared.URLFPV2;
import org.commoncrawl.util.shared.FPGenerator;
import org.commoncrawl.util.shared.GoogleURL;
import org.commoncrawl.util.shared.SessionIDURLNormalizer;
import org.commoncrawl.util.shared.TLDNamesCollection;
import org.commoncrawl.util.shared.URLFingerprint;

public class URLUtils {
    private static final int MAXNAME = 255;
    private static final int MAXLABEL = 63;
    private static final int MAXLABELS = 128;
    static Pattern invalidDomainCharactersRegEx = Pattern.compile("[^0-9a-z\\-\\._]");
    static Pattern ipAddressRegEx = Pattern.compile("^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$");
    static Pattern numericOnly = Pattern.compile("[0-9]*$");
    private static SessionIDURLNormalizer _sessionIdNormalizer = new SessionIDURLNormalizer();

    private static String buildRootNameString(String candidateString, String[] parts, int rootNameIndex) {
        int dotsToInclude;
        int partsToInclude = parts.length - rootNameIndex;
        int rootNameLength = dotsToInclude = partsToInclude - 1;
        for (int i = rootNameIndex; i < parts.length; ++i) {
            rootNameLength += parts[i].length();
        }
        return candidateString.substring(candidateString.length() - rootNameLength);
    }

    public static String extractTLDName(String hostName) {
        Collection<String> secondaryNames;
        String[] parts;
        if (ipAddressRegEx.matcher(hostName).matches()) {
            return "inaddr-arpa.arpa";
        }
        if (hostName.endsWith(".")) {
            hostName = hostName.substring(0, hostName.length() - 1);
        }
        if (hostName.startsWith("*") && hostName.length() > 1) {
            hostName = hostName.substring(1);
        }
        if (hostName.length() != 0 && !invalidDomainCharactersRegEx.matcher(hostName).find() && (parts = hostName.split("\\.")).length >= 2 && (secondaryNames = TLDNamesCollection.getSecondaryNames(parts[parts.length - 1])).size() != 0) {
            if (secondaryNames.contains(parts[parts.length - 2]) || secondaryNames.contains("*")) {
                if (secondaryNames.contains("!" + parts[parts.length - 2])) {
                    return URLUtils.buildRootNameString(hostName, parts, parts.length - 1);
                }
                return URLUtils.buildRootNameString(hostName, parts, parts.length - 2);
            }
            String extendedWildcard = "*." + parts[parts.length - 2];
            if (secondaryNames.contains(extendedWildcard)) {
                if (parts.length >= 3) {
                    String exclusionRule2 = "!" + parts[parts.length - 3] + "." + parts[parts.length - 2];
                    if (secondaryNames.contains(exclusionRule2)) {
                        return URLUtils.buildRootNameString(hostName, parts, parts.length - 2);
                    }
                    if (parts.length >= 4) {
                        return URLUtils.buildRootNameString(hostName, parts, parts.length - 3);
                    }
                }
            } else if (secondaryNames.contains("")) {
                return URLUtils.buildRootNameString(hostName, parts, parts.length - 1);
            }
        }
        return null;
    }

    public static String extractRootDomainName(String hostName) {
        Collection<String> secondaryNames;
        String[] parts;
        if (ipAddressRegEx.matcher(hostName).matches()) {
            return hostName;
        }
        if (hostName.endsWith(".")) {
            hostName = hostName.substring(0, hostName.length() - 1);
        }
        if (hostName.startsWith("*") && hostName.length() > 1) {
            hostName = hostName.substring(1);
        }
        if (hostName.length() != 0 && !invalidDomainCharactersRegEx.matcher(hostName).find() && (parts = hostName.split("\\.")).length >= 2 && (secondaryNames = TLDNamesCollection.getSecondaryNames(parts[parts.length - 1])).size() != 0) {
            if (secondaryNames.contains(parts[parts.length - 2]) || secondaryNames.contains("*")) {
                if (secondaryNames.contains("!" + parts[parts.length - 2])) {
                    return URLUtils.buildRootNameString(hostName, parts, parts.length - 2);
                }
                if (parts.length >= 3) {
                    return URLUtils.buildRootNameString(hostName, parts, parts.length - 3);
                }
            } else {
                String extendedWildcard = "*." + parts[parts.length - 2];
                if (secondaryNames.contains(extendedWildcard)) {
                    if (parts.length >= 3) {
                        String exclusionRule2 = "!" + parts[parts.length - 3] + "." + parts[parts.length - 2];
                        if (secondaryNames.contains(exclusionRule2)) {
                            return URLUtils.buildRootNameString(hostName, parts, parts.length - 3);
                        }
                        if (parts.length >= 4) {
                            return URLUtils.buildRootNameString(hostName, parts, parts.length - 4);
                        }
                    }
                } else if (secondaryNames.contains("")) {
                    return URLUtils.buildRootNameString(hostName, parts, parts.length - 2);
                }
            }
        }
        return null;
    }

    public static boolean isValidDomainName(String domainName) {
        if (domainName.length() > 255) {
            return false;
        }
        String candidate = domainName.toLowerCase();
        if (ipAddressRegEx.matcher(candidate).matches()) {
            return true;
        }
        if (invalidDomainCharactersRegEx.matcher(candidate).matches()) {
            return false;
        }
        String[] parts = domainName.split("\\.");
        if (parts.length > 128) {
            return false;
        }
        return URLUtils.extractRootDomainName(candidate) != null;
    }

    public static URLFPV2 getURLFPV2FromURL(String urlString) {
        try {
            String canonicalURL = URLUtils.canonicalizeURL(urlString, true);
            if (canonicalURL != null) {
                return URLUtils.getURLFPV2FromCanonicalURL(canonicalURL);
            }
        }
        catch (MalformedURLException malformedURLException) {
            // empty catch block
        }
        return null;
    }

    public static URLFPV2 getURLFPV2FromURLObject(GoogleURL urlObject) {
        try {
            String canonicalURL = URLUtils.canonicalizeURL(urlObject, true);
            if (canonicalURL != null) {
                return URLUtils.getURLFPV2FromCanonicalURL(canonicalURL);
            }
        }
        catch (MalformedURLException malformedURLException) {
            // empty catch block
        }
        return null;
    }

    public static URLFPV2 getURLFPV2FromCanonicalURL(String canonicalURL) {
        URLFPV2 urlFP = new URLFPV2();
        urlFP.setUrlHash(URLFingerprint.generate64BitURLFPrint(canonicalURL));
        String hostName = URLUtils.fastGetHostFromURL(canonicalURL);
        String rootDomainName = null;
        if (hostName != null) {
            rootDomainName = URLUtils.extractRootDomainName(hostName);
        }
        if (hostName != null && rootDomainName != null) {
            if (hostName.startsWith("www.") && !rootDomainName.equals(hostName)) {
                hostName = hostName.substring(4);
            }
            urlFP.setDomainHash(FPGenerator.std64.fp(hostName));
            urlFP.setRootDomainHash(FPGenerator.std64.fp(rootDomainName));
            return urlFP;
        }
        return null;
    }

    public static String canonicalizeURL(String incomingURL, boolean stripLeadingWWW) throws MalformedURLException {
        GoogleURL urlObject = new GoogleURL(incomingURL);
        if (!urlObject.isValid()) {
            throw new MalformedURLException("URL:" + incomingURL + " is invalid");
        }
        return URLUtils.canonicalizeURL(urlObject, stripLeadingWWW);
    }

    public static String canonicalizeURL(GoogleURL urlObject, boolean stripLeadingWWW) throws MalformedURLException {
        String rootName;
        String host;
        StringBuilder urlOut = new StringBuilder();
        urlOut.append(urlObject.getScheme());
        urlOut.append("://");
        if (urlObject.getUserName() != "") {
            urlOut.append(urlObject.getUserName());
            if (urlObject.getPassword() != "") {
                urlOut.append(":");
                urlOut.append(urlObject.getPassword());
            }
            urlOut.append("@");
        }
        if ((host = urlObject.getHost()).endsWith(".")) {
            host = host.substring(0, host.length() - 1);
        }
        if (stripLeadingWWW && host.startsWith("www.") && ((rootName = URLUtils.extractRootDomainName(host)) == null || !rootName.equals(host))) {
            host = host.substring(4);
        }
        urlOut.append(host);
        if (urlObject.getPort() != "" && !urlObject.getPort().equals("80")) {
            urlOut.append(":");
            urlOut.append(urlObject.getPort());
        }
        if (urlObject.getPath() != "") {
            int indexOfSemiColon = urlObject.getPath().indexOf(59);
            if (indexOfSemiColon != -1) {
                urlOut.append(urlObject.getPath().substring(0, indexOfSemiColon));
            } else {
                urlOut.append(urlObject.getPath());
            }
        }
        if (urlObject.getQuery() != "") {
            urlOut.append("?");
            urlOut.append(urlObject.getQuery());
        }
        String canonicalizedURL = urlOut.toString();
        canonicalizedURL = _sessionIdNormalizer.normalize(canonicalizedURL, "");
        return canonicalizedURL;
    }

    private static String fastGetHostFromURL(String urlString) {
        int hostStart = urlString.indexOf(":");
        if (hostStart != -1) {
            char nextChar;
            ++hostStart;
            int urlLength = urlString.length();
            while (hostStart < urlString.length() && ((nextChar = urlString.charAt(hostStart)) == '/' || nextChar == '\\' || nextChar == '\n' || nextChar == '\r' || nextChar == '\t' || nextChar == ' ')) {
                ++hostStart;
            }
            if (hostStart < urlLength) {
                GoogleURL urlObject;
                char nextChar2;
                int hostEnd;
                for (hostEnd = hostStart + 1; hostEnd < urlLength && (nextChar2 = urlString.charAt(hostEnd)) != '/' && nextChar2 != '?' && nextChar2 != ';' && nextChar2 != '#'; ++hostEnd) {
                }
                int indexOfAt = urlString.indexOf("@", hostStart);
                if (indexOfAt != -1 && indexOfAt < hostEnd) {
                    hostStart = indexOfAt + 1;
                }
                String host = urlString.substring(hostStart, hostEnd);
                int hostLength = host.length();
                int colonEnd = host.indexOf(":");
                if (colonEnd != -1) {
                    hostLength = colonEnd;
                    host = urlString.substring(hostStart, hostStart + hostLength);
                }
                if ((urlObject = new GoogleURL("http://" + host)).isValid()) {
                    return urlObject.getHost();
                }
            }
        }
        return null;
    }
}

