/*
 * Decompiled with CFR 0.152.
 */
package org.commoncrawl.hadoop.io.deprecated;

import java.io.BufferedReader;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackInputStream;
import java.io.SequenceInputStream;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.LinkedList;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.zip.CRC32;
import java.util.zip.CheckedInputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.record.Buffer;
import org.apache.hadoop.util.StringUtils;
import org.commoncrawl.io.shared.NIODataSink;
import org.commoncrawl.protocol.shared.ArcFileHeaderItem;
import org.commoncrawl.protocol.shared.ArcFileItem;
import org.junit.Assert;
import org.junit.Test;

public final class ArcFileReader
extends InflaterInputStream
implements NIODataSink {
    private static final Log LOG = LogFactory.getLog(ArcFileReader.class);
    private static InputStream _dummyStream = new InputStream(){

        @Override
        public int read() throws IOException {
            return 0;
        }
    };
    private LinkedBlockingQueue<BufferItem> _consumerQueue = null;
    private CRC32 _crc = new CRC32();
    private boolean _hasHeaderItem = true;
    private String _arcFileHeader = null;
    private boolean _eosReached = false;
    public static final int DEFAULT_BLOCK_SIZE = 32768;
    public static final int DEFAULT_BUFFER_QUEUE_SIZE = 1024;
    public static final int DEFAULT_TIMEOUT_VALUE = -1;
    private static int _blockSize = 32768;
    private static int _bufferQueueSize = 1024;
    private static int _ioTimeoutValue = -1;
    private int _streamPos = 0;
    private static final int GZIP_MAGIC = 35615;
    private static final int FHCRC = 2;
    private static final int FEXTRA = 4;
    private static final int FNAME = 8;
    private static final int FCOMMENT = 16;
    private byte[] tmpbuf = new byte[128];

    public ArcFileReader() {
        super(_dummyStream, new Inflater(true), _blockSize);
        this._consumerQueue = new LinkedBlockingQueue(_bufferQueueSize);
        this.in = new PushbackInputStream(new InputStream(){
            ByteBuffer _activeBuffer = null;
            byte[] oneByteArray = new byte[1];

            @Override
            public int read() throws IOException {
                if (this.read(this.oneByteArray, 0, 1) != -1) {
                    return this.oneByteArray[0] & 0xFF;
                }
                return -1;
            }

            @Override
            public int read(byte[] b, int off, int len) throws IOException {
                if (this._activeBuffer == null || this._activeBuffer.remaining() == 0) {
                    BufferItem nextItem = null;
                    try {
                        if (_ioTimeoutValue == -1) {
                            nextItem = (BufferItem)ArcFileReader.this._consumerQueue.take();
                        } else {
                            nextItem = (BufferItem)ArcFileReader.this._consumerQueue.poll(_ioTimeoutValue, TimeUnit.MILLISECONDS);
                            if (nextItem == null) {
                                throw new IOException("IO Timeout waiting for Buffer");
                            }
                        }
                    }
                    catch (InterruptedException e) {
                        throw new IOException("Thread Interrupted waiting for Buffer");
                    }
                    if (nextItem._buffer == null) {
                        ArcFileReader.this._eosReached = true;
                        return -1;
                    }
                    this._activeBuffer = nextItem._buffer;
                }
                int sizeAvailable = this._activeBuffer.remaining();
                int readSize = Math.min(sizeAvailable, len);
                this._activeBuffer.get(b, off, readSize);
                ArcFileReader.this._streamPos += readSize;
                return readSize;
            }
        }, _blockSize);
    }

    public static void setBlockSize(int blockSize) {
        _blockSize = blockSize;
    }

    public static void setBufferQueueSize(int bufferQueueSize) {
        _bufferQueueSize = bufferQueueSize;
    }

    public static void setIOTimeoutValue(int timeoutInMilliseconds) {
        _ioTimeoutValue = timeoutInMilliseconds;
    }

    public void setArcFileHasHeaderItemFlag(boolean value) {
        this._hasHeaderItem = value;
    }

    public void resetState() {
        this._arcFileHeader = null;
        this._consumerQueue.clear();
        this._crc.reset();
        this._eosReached = false;
        this.resetInflater();
    }

    public boolean hasMoreItems() throws IOException {
        if (this._arcFileHeader == null && this._hasHeaderItem) {
            this.readARCHeader();
        }
        return this.readHeader();
    }

    public void getNextItem(ArcFileItem itemOut) throws IOException {
        String arcFileName = itemOut.getArcFileName();
        itemOut.clear();
        itemOut.setArcFileName(arcFileName);
        this._crc.reset();
        this.resetInflater();
        itemOut.setArcFilePos(this.getARCFileStreamPos());
        ArcFileBuilder builder = new ArcFileBuilder(itemOut);
        while (true) {
            byte[] scanBuffer = new byte[_blockSize];
            ByteBuffer byteBuffer = ByteBuffer.wrap(scanBuffer);
            int readAmount = this.read(scanBuffer, 0, scanBuffer.length);
            if (readAmount == -1) break;
            this._crc.update(scanBuffer, 0, readAmount);
            byteBuffer.limit(readAmount);
            builder.inputData(byteBuffer);
        }
        this.readTrailer();
        builder.finish();
        itemOut.setArcFileSize(this.getARCFileStreamPos() - itemOut.getArcFilePos());
    }

    @Override
    public void finished() {
        try {
            this._consumerQueue.put(new BufferItem(null));
        }
        catch (InterruptedException interruptedException) {
            // empty catch block
        }
    }

    @Override
    public void available(ByteBuffer availableReadBuffer) {
        try {
            this._consumerQueue.put(new BufferItem(availableReadBuffer));
        }
        catch (InterruptedException interruptedException) {
            // empty catch block
        }
    }

    private void resetInflater() {
        this.inf.reset();
    }

    private void readARCHeader() throws IOException {
        this.readHeader();
        byte[] accumBuffer = new byte[4096];
        int accumAmount = 0;
        int readAmt = 0;
        while ((readAmt = this.read(accumBuffer, accumAmount, accumBuffer.length - accumAmount)) > 0) {
            if ((accumAmount += readAmt) != accumBuffer.length) continue;
            throw new IOException("Invalid ARC File Header");
        }
        if (readAmt == 0 || accumAmount == 0) {
            throw new IOException("Invalid ARC File Header");
        }
        this._crc.reset();
        this._crc.update(accumBuffer, 0, accumAmount);
        this.readTrailer();
        this._arcFileHeader = new String(accumBuffer, 0, accumAmount, "ISO-8859-1");
    }

    private boolean readHeader() throws IOException {
        if (!this._eosReached) {
            CheckedInputStream in = new CheckedInputStream(this.in, this._crc);
            this._crc.reset();
            try {
                if (ArcFileReader.readUShort(in) != 35615) {
                    throw new IOException("Not in GZIP format");
                }
                if (ArcFileReader.readUByte(in) != 8) {
                    throw new IOException("Unsupported compression method");
                }
                int flg = ArcFileReader.readUByte(in);
                this.skipBytes(in, 6);
                if ((flg & 4) == 4) {
                    this.skipBytes(in, ArcFileReader.readUShort(in));
                }
                if ((flg & 8) == 8) {
                    while (ArcFileReader.readUByte(in) != 0) {
                    }
                }
                if ((flg & 0x10) == 16) {
                    while (ArcFileReader.readUByte(in) != 0) {
                    }
                }
                if ((flg & 2) == 2) {
                    int v = (int)this._crc.getValue() & 0xFFFF;
                    if (ArcFileReader.readUShort(in) != v) {
                        throw new IOException("Corrupt GZIP header");
                    }
                }
                return true;
            }
            catch (EOFException eOFException) {
                // empty catch block
            }
        }
        return false;
    }

    private void readTrailer() throws IOException {
        PushbackInputStream in = (PushbackInputStream)this.in;
        int n = this.inf.getRemaining();
        if (n > 0) {
            in.unread(this.buf, this.len - n, n);
        }
        if (ArcFileReader.readUInt(in) != this._crc.getValue() || ArcFileReader.readUInt(in) != (this.inf.getBytesWritten() & 0xFFFFFFFFL)) {
            throw new IOException("Corrupt GZIP trailer");
        }
    }

    private static long readUInt(InputStream in) throws IOException {
        long s = ArcFileReader.readUShort(in);
        return (long)ArcFileReader.readUShort(in) << 16 | s;
    }

    private static int readUShort(InputStream in) throws IOException {
        int b = ArcFileReader.readUByte(in);
        return ArcFileReader.readUByte(in) << 8 | b;
    }

    private static int readUByte(InputStream in) throws IOException {
        int b = in.read();
        if (b == -1) {
            throw new EOFException();
        }
        if (b < -1 || b > 255) {
            throw new IOException("read() returned value out of range -1..255: " + b);
        }
        return b;
    }

    private void skipBytes(InputStream in, int n) throws IOException {
        while (n > 0) {
            int len = in.read(this.tmpbuf, 0, n < this.tmpbuf.length ? n : this.tmpbuf.length);
            if (len == -1) {
                throw new EOFException();
            }
            n -= len;
        }
    }

    private final int getARCFileStreamPos() throws IOException {
        PushbackInputStream in = (PushbackInputStream)this.in;
        return this._streamPos - in.available();
    }

    public void checkCRLFStateMachine() throws Exception {
        ArcFileItem item = new ArcFileItem();
        ArcFileBuilder builder = new ArcFileBuilder(item);
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)10));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertTrue((boolean)builder.checkForCRLFTerminator((byte)10));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)10));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)10));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertTrue((boolean)builder.checkForCRLFTerminator((byte)10));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)10));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)10));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertTrue((boolean)builder.checkForCRLFTerminator((byte)10));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)10));
        Assert.assertFalse((boolean)builder.checkForCRLFTerminator((byte)13));
        Assert.assertTrue((boolean)builder.checkForCRLFTerminator((byte)10));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void testReader(File file) throws Exception {
        this.checkCRLFStateMachine();
        ArcFileReader.setIOTimeoutValue(30000);
        this.resetState();
        Thread thread = new Thread(new Runnable(){

            @Override
            public void run() {
                try {
                    while (ArcFileReader.this.hasMoreItems()) {
                        ArcFileItem item = new ArcFileItem();
                        ArcFileReader.this.getNextItem(item);
                        LOG.info((Object)("GOT Item URL:" + item.getUri() + " StreamPos:" + item.getArcFilePos() + " Content Length:" + item.getContent().getCount()));
                        for (ArcFileHeaderItem headerItem : item.getHeaderItems()) {
                            if (!headerItem.isFieldDirty(1)) continue;
                        }
                    }
                    LOG.info((Object)"NO MORE ITEMS... BYE");
                }
                catch (IOException e) {
                    LOG.error((Object)StringUtils.stringifyException((Throwable)e));
                }
            }
        });
        thread.start();
        ReadableByteChannel channel = Channels.newChannel(new FileInputStream(file));
        try {
            int totalBytesRead = 0;
            while (true) {
                ByteBuffer buffer;
                int bytesRead;
                if ((bytesRead = channel.read(buffer = ByteBuffer.allocate(32768))) == -1) {
                    this.finished();
                    break;
                }
                buffer.flip();
                totalBytesRead += buffer.remaining();
                this.available(buffer);
            }
        }
        finally {
            channel.close();
        }
        LOG.info((Object)"Done Reading File.... Waiting for ArcFileThread to DIE");
        thread.join();
        LOG.info((Object)"Done Reading File.... ArcFileThread to DIED");
    }

    public static void main(String[] args) {
        File file = new File(args[0]);
        ArcFileReader reader = new ArcFileReader();
        try {
            reader.testReader(file);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static class ArcFileBuilder {
        SimpleDateFormat TIMESTAMP14 = new SimpleDateFormat("yyyyMMddHHmmss");
        ArcFileItem _item = null;
        Buffer _buffer = new Buffer();
        State _state = State.LookingForMetadata;
        LinkedList<ByteBuffer> _buffers = new LinkedList();
        ByteBuffer _activeBuffer = null;
        byte lastMatchChar = 0;
        int matchCount = 0;
        boolean eos = false;
        static Charset UTF8_Charset = Charset.forName("UTF8");
        static Charset ASCII_Charset = Charset.forName("ASCII");

        public ArcFileBuilder(ArcFileItem itemToConstruct) {
            this._item = itemToConstruct;
        }

        private final boolean checkForCRLFTerminator(byte matchingChar) {
            if (matchingChar == 10) {
                switch (this.matchCount) {
                    case 1: {
                        ++this.matchCount;
                        break;
                    }
                    case 3: {
                        this.matchCount = 0;
                        return true;
                    }
                    default: {
                        this.matchCount = 0;
                        break;
                    }
                }
            } else if (matchingChar == 13) {
                switch (this.matchCount) {
                    case 2: {
                        ++this.matchCount;
                        break;
                    }
                    default: {
                        this.matchCount = 1;
                        break;
                    }
                }
            } else {
                this.matchCount = 0;
            }
            return false;
        }

        private final boolean checkForTerminator(byte matchingChar) {
            boolean terminatorFound = false;
            switch (this._state) {
                case LookingForMetadata: {
                    if (matchingChar != 10) break;
                    terminatorFound = true;
                    break;
                }
                case LookingForHeaderTerminator: {
                    terminatorFound = this.checkForCRLFTerminator(matchingChar);
                }
            }
            if (terminatorFound) {
                this.matchCount = 0;
                if (this._activeBuffer.remaining() == 0) {
                    this._activeBuffer.rewind();
                    this._buffers.addLast(this._activeBuffer);
                    this._activeBuffer = null;
                } else {
                    ByteBuffer oldBuffer = this._activeBuffer;
                    this._activeBuffer = this._activeBuffer.slice();
                    oldBuffer.limit(oldBuffer.position());
                    oldBuffer.rewind();
                    this._buffers.addLast(oldBuffer);
                }
            }
            return terminatorFound;
        }

        private static InputStream newInputStream(final ByteBuffer buf) {
            return new InputStream(){

                @Override
                public synchronized int read() throws IOException {
                    if (!buf.hasRemaining()) {
                        return -1;
                    }
                    return buf.get();
                }

                @Override
                public synchronized int read(byte[] bytes, int off, int len) throws IOException {
                    len = Math.min(len, buf.remaining());
                    buf.get(bytes, off, len);
                    return len;
                }
            };
        }

        private static InputStreamReader readerFromScanBufferList(LinkedList<ByteBuffer> buffers, Charset charset) throws IOException {
            Vector<InputStream> inputStreams = new Vector<InputStream>();
            for (ByteBuffer buffer : buffers) {
                inputStreams.add(ArcFileBuilder.newInputStream(buffer));
            }
            buffers.clear();
            SequenceInputStream seqInputStream = new SequenceInputStream(inputStreams.elements());
            return new InputStreamReader((InputStream)seqInputStream, charset);
        }

        private final String readLine(Charset charset) throws IOException {
            BufferedReader reader = new BufferedReader(ArcFileBuilder.readerFromScanBufferList(this._buffers, charset));
            return reader.readLine();
        }

        private final void processMetadataLine(String metadata) throws IOException {
            StringTokenizer tokenizer = new StringTokenizer(metadata, " ");
            int tokenCount = 0;
            while (tokenizer.hasMoreElements() && tokenCount <= 5) {
                switch (++tokenCount) {
                    case 1: {
                        this._item.setUri(tokenizer.nextToken());
                        break;
                    }
                    case 2: {
                        this._item.setHostIP(tokenizer.nextToken());
                        break;
                    }
                    case 3: {
                        String timeStamp = tokenizer.nextToken();
                        try {
                            this._item.setTimestamp(this.TIMESTAMP14.parse(timeStamp).getTime());
                        }
                        catch (ParseException e) {
                            LOG.error((Object)("Invalid Timestamp Encountered in Item Metdata. URL:" + this._item.getUri() + " Timestamp:" + timeStamp + " Metadata:" + metadata));
                            this._item.setTimestamp(0L);
                        }
                        break;
                    }
                    case 4: {
                        this._item.setMimeType(tokenizer.nextToken());
                        break;
                    }
                    case 5: {
                        this._item.setRecordLength(Integer.parseInt(tokenizer.nextToken()));
                    }
                }
            }
        }

        private final void processHeaders() throws IOException {
            BufferedReader reader = new BufferedReader(ArcFileBuilder.readerFromScanBufferList(this._buffers, UTF8_Charset));
            String line = null;
            this._item.setFieldDirty(6);
            while ((line = reader.readLine()) != null) {
                if (line.length() == 0) continue;
                int colonPos = line.indexOf(58);
                ArcFileHeaderItem item = new ArcFileHeaderItem();
                if (colonPos != -1 && colonPos != line.length() - 1) {
                    item.setItemKey(line.substring(0, colonPos));
                    item.setItemValue(line.substring(colonPos + 1));
                    if (item.getItemKey().equals("x-commoncrawl-ContentTruncated")) {
                        String[] parts;
                        for (String part : parts = item.getItemValue().split(",")) {
                            if (part.equals(ArcFileItem.Flags.toString(2))) {
                                this._item.setFlags(this._item.getFlags() | 1);
                                continue;
                            }
                            if (!part.equals(ArcFileItem.Flags.toString(2))) continue;
                            this._item.setFlags(this._item.getFlags() | 2);
                        }
                    }
                } else {
                    item.setItemValue(line);
                }
                this._item.getHeaderItems().add(item);
            }
        }

        private final void transitionState() throws IOException {
            switch (this._state) {
                case LookingForMetadata: {
                    this.processMetadataLine(this.readLine(ASCII_Charset));
                    this._state = State.LookingForHeaderTerminator;
                    break;
                }
                case LookingForHeaderTerminator: {
                    this.processHeaders();
                    this._state = State.ReadingContent;
                    this._buffer.setCapacity(_blockSize);
                }
            }
        }

        public final void finish() throws IOException {
            if (this._state == State.ReadingContent) {
                this._state = State.Finished;
                if (this._buffer.getCount() == 0) {
                    LOG.error((Object)("ArcFileBuilder Encountered Item with Zero Length Content. URI:" + this._item.getUri()));
                } else {
                    this._item.setContent(this._buffer, false);
                    this._buffer = new Buffer();
                }
            } else {
                throw new IOException("ArcBuilder finish calledin Invalid State. State:" + (Object)((Object)this._state) + " ArcFile:" + this._item.getArcFileName() + " Position:" + this._item.getArcFilePos() + " Item URI:" + this._item.getUri());
            }
            this._item = null;
        }

        public final void inputData(ByteBuffer buffer) throws IOException {
            this._activeBuffer = buffer;
            while (this._activeBuffer != null && this._activeBuffer.remaining() != 0) {
                if (this._state != State.ReadingContent) {
                    byte b = this._activeBuffer.get();
                    if (b == 13 || b == 10) {
                        if (!this.checkForTerminator(b)) continue;
                        this.transitionState();
                        continue;
                    }
                    this.matchCount = 0;
                    continue;
                }
                int available = this._buffer.getCapacity() - this._buffer.getCount();
                if (available < this._activeBuffer.remaining()) {
                    int growByAmount = Math.max(this._activeBuffer.remaining() - available, _blockSize * 2);
                    this._buffer.setCapacity(this._buffer.getCapacity() + growByAmount);
                }
                this._buffer.append(this._activeBuffer.array(), this._activeBuffer.position() + this._activeBuffer.arrayOffset(), this._activeBuffer.remaining());
                this._activeBuffer = null;
            }
            if (this._activeBuffer != null) {
                this._activeBuffer.rewind();
                this._buffers.add(this._activeBuffer);
                this._activeBuffer = null;
            }
        }

        private static enum State {
            LookingForMetadata,
            LookingForHeaderTerminator,
            ReadingContent,
            Finished;

        }
    }

    private static final class BufferItem {
        public ByteBuffer _buffer;

        public BufferItem(ByteBuffer bufferItem) {
            this._buffer = bufferItem;
        }
    }
}

