package edu.utexas.its.eis.tools.qwicap.template.xml.util.xcsd;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/utexas/its/eis/tools/qwicap/template/xml/util/xcsd/XMLCharacterSetDetector.class */
public final class XMLCharacterSetDetector {
    private static final int kXMLDeclarationWorstCaseCharacterCount = 80;
    static final String UTF_8 = "UTF-8";
    static final String UTF_16BE = "UTF-16BE";
    static final String UTF_16LE = "UTF-16LE";
    static final String ISO_10646_UCS_4_2143 = "UCS-4 (2143 order)";
    static final String ISO_10646_UCS_4_3412 = "UCS-4 (3412 order)";
    static final String Four_Byte_Big_Endian = "UCS-4 without BOM, or other 32-bit, big-endian encoding";
    static final String Four_Byte_Little_Endian = "UCS-4 without BOM, or other 32-bit, little-endian encoding";
    static final String Four_Byte_2143 = "UCS-4 without BOM, or other 32-bit, '2143' byte-order encoding";
    static final String Four_Byte_3412 = "UCS-4 without BOM, or other 32-bit, '3412' byte-order encoding";
    static final String Two_Byte_Big_Endian = "UTF-16 without BOM, ISO-10646-UCS-2, or other 16-bit, big-endian encoding";
    static final String Two_Byte_Little_Endian = "UTF-16 without BOM, ISO-10646-UCS-2, or other 16-bit, little-endian encoding";
    static final String EBCDIC = "EBCDIC of undetermined code page";
    private final String MarkupName;
    private final InputStream In;
    private String CharSet;
    private int BytesPerCharacter;
    private boolean BOMFound;
    private boolean CharSetDetectionIsDefinitive;
    private static final Logger Log = Logger.getLogger(XMLCharacterSetDetector.class.getName());
    static final Pattern XMLDeclarationPattern = Pattern.compile("^(<[?]xml\\s+version=(['\"])(\\S+?)\\2\\s+encoding=(['\"])(\\S+?)\\4.*?(?:[?][>])?).*", 32);
    static final String UTF_32BE = "UTF-32BE";
    private static final boolean SupportsUTF32BE = supportsCharSet(UTF_32BE);
    static final String UTF_32LE = "UTF-32LE";
    private static final boolean SupportsUTF32LE = supportsCharSet(UTF_32LE);

    public XMLCharacterSetDetector(String str, InputStream inputStream) throws IOException {
        int read;
        int read2;
        if (!inputStream.markSupported()) {
            throw new IllegalArgumentException("The InputStream (\"" + str + "\") passed to XMLCharacterSetDetector must support 'mark' and 'reset' operations.");
        }
        this.MarkupName = str;
        this.In = inputStream;
        inputStream.mark(4);
        int i = 0;
        int i2 = 0;
        int i3 = 3;
        while (true) {
            i3--;
            if (i3 < 0 || (read2 = inputStream.read()) < 0) {
                break;
            }
            i = (i << 8) | read2;
            i2++;
        }
        if (!detect(i, i2) && (read = inputStream.read()) >= 0) {
            detect((i << 8) | read, i2 + 1);
        }
        if (!this.BOMFound) {
            inputStream.reset();
            tryToResolveCharacterSetAmbiguity();
            return;
        }
        if (this.CharSet == UTF_16BE || this.CharSet == UTF_16LE) {
            inputStream.reset();
            inputStream.read();
            inputStream.read();
        }
        this.CharSetDetectionIsDefinitive = true;
        String declaredEncoding = getDeclaredEncoding();
        if (declaredEncoding == null || this.CharSet.equalsIgnoreCase(declaredEncoding)) {
            return;
        }
        boolean z = true;
        if (this.CharSet == UTF_16BE) {
            if ("UTF-16".equalsIgnoreCase(declaredEncoding)) {
                z = false;
            }
        } else if (this.CharSet == UTF_32BE && ("UTF-32".equalsIgnoreCase(declaredEncoding) || "ISO-10646-UCS-4".equalsIgnoreCase(declaredEncoding))) {
            z = false;
        }
        if (z) {
            Log.log(Level.SEVERE, "The markup \"{0}\" has an XML byte-order-mark that identifies its encoding as \"{1}\", however its XML declaration identifies the encoding as \"{2}\". One or both of these indicators is wrong, and should be corrected. For now, it will be assumed that the encoding is \"{1}\".", new Object[]{str, this.CharSet, declaredEncoding});
        }
    }

    private void tryToResolveCharacterSetAmbiguity() {
        String group;
        try {
            int i = kXMLDeclarationWorstCaseCharacterCount * this.BytesPerCharacter;
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(readBytesAndReset(this.In, i));
            byteArrayInputStream.mark(i);
            String readString = readString(getReader(byteArrayInputStream), kXMLDeclarationWorstCaseCharacterCount);
            Matcher matcher = XMLDeclarationPattern.matcher(readString);
            if (matcher.matches() && (group = matcher.group(5)) != null) {
                if (group.equals(this.CharSet)) {
                    this.CharSetDetectionIsDefinitive = true;
                } else {
                    byteArrayInputStream.reset();
                    if (readString.equals(readString(new InputStreamReader(byteArrayInputStream, group), readString.length()))) {
                        this.CharSet = group;
                        this.CharSetDetectionIsDefinitive = true;
                    } else {
                        Log.log(Level.WARNING, "The markup \"{0}\" declares its encoding as \"{1}\", which is incorrect; will assume \"{2}\" encoding, instead.", new Object[]{this.MarkupName, group, this.CharSet});
                    }
                }
            }
        } catch (Exception e) {
            Log.log(Level.INFO, "Definitive character set detection failed for \"" + this.MarkupName + "\".", (Throwable) e);
        }
    }

    private String getDeclaredEncoding() {
        try {
            int i = kXMLDeclarationWorstCaseCharacterCount * this.BytesPerCharacter;
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(readBytesAndReset(this.In, i));
            byteArrayInputStream.mark(i);
            Matcher matcher = XMLDeclarationPattern.matcher(readString(getReader(byteArrayInputStream), kXMLDeclarationWorstCaseCharacterCount));
            if (matcher.matches()) {
                return matcher.group(5);
            }
            return null;
        } catch (Exception e) {
            Log.log(Level.INFO, "Retrieval of XML declaration's \"encoding\" attribute failed for \"" + this.MarkupName + "\".", (Throwable) e);
            return null;
        }
    }

    private static byte[] readBytesAndReset(InputStream inputStream, int i) throws IOException {
        int read;
        inputStream.mark(i);
        try {
            byte[] bArr = new byte[i];
            int i2 = 0;
            while (i2 < i && (read = inputStream.read(bArr, i2, i - i2)) != -1) {
                i2 += read;
            }
            if (i2 == i) {
                return bArr;
            }
            byte[] bArr2 = new byte[i2];
            System.arraycopy(bArr, 0, bArr2, 0, i2);
            inputStream.reset();
            return bArr2;
        } finally {
            inputStream.reset();
        }
    }

    private static String readString(Reader reader, int i) throws IOException {
        int read;
        char[] cArr = new char[i];
        int i2 = 0;
        while (i2 < i && (read = reader.read()) != -1) {
            int i3 = i2;
            i2++;
            cArr[i3] = (char) read;
        }
        return new String(cArr, 0, i2);
    }

    public String toString() {
        return !getCharSetDetected() ? "No character set detected." : getCharSetName();
    }

    public String getMarkupName() {
        return this.MarkupName;
    }

    public boolean getCharSetDetected() {
        return this.CharSet != null;
    }

    public String getCharSetName() {
        return this.CharSet;
    }

    public boolean getCharSetDetectionIsDefinitive() {
        return this.CharSetDetectionIsDefinitive;
    }

    public int getTypicalBytesPerCharacter() {
        return this.BytesPerCharacter;
    }

    private boolean detect(int i, int i2) {
        String str = null;
        int i3 = 1;
        boolean z = false;
        if (i2 != 3) {
            if (i2 == 4) {
                switch (i) {
                    case -16842752:
                        str = ISO_10646_UCS_4_3412;
                        i3 = 4;
                        z = true;
                        break;
                    case -131072:
                        str = UTF_32LE;
                        i3 = 4;
                        z = true;
                        break;
                    case 60:
                        str = Four_Byte_Big_Endian;
                        i3 = 4;
                        break;
                    case 15360:
                        str = Four_Byte_2143;
                        i3 = 4;
                        break;
                    case 65279:
                        str = UTF_32BE;
                        i3 = 4;
                        z = true;
                        break;
                    case 65534:
                        str = ISO_10646_UCS_4_2143;
                        i3 = 4;
                        z = true;
                        break;
                    case 3932160:
                        str = Four_Byte_3412;
                        i3 = 4;
                        break;
                    case 3932223:
                        str = Two_Byte_Big_Endian;
                        i3 = 2;
                        break;
                    case 1006632960:
                        str = Four_Byte_Little_Endian;
                        i3 = 4;
                        break;
                    case 1006649088:
                        str = Two_Byte_Little_Endian;
                        i3 = 2;
                        break;
                    case 1010792557:
                        str = UTF_8;
                        i3 = 1;
                        break;
                    case 1282385812:
                        str = EBCDIC;
                        i3 = 1;
                        break;
                    default:
                        int i4 = i >>> 16;
                        int i5 = i & 65535;
                        if (i4 == 65279 && i5 != 0) {
                            str = UTF_16BE;
                            i3 = 2;
                            z = true;
                            break;
                        } else if (i4 == 65534 && i5 != 0) {
                            str = UTF_16LE;
                            i3 = 2;
                            z = true;
                            break;
                        } else {
                            str = UTF_8;
                            i3 = 1;
                            break;
                        }
                        break;
                }
            }
        } else if (i == 15711167) {
            str = UTF_8;
            i3 = 1;
            z = true;
        }
        this.CharSet = str;
        this.BytesPerCharacter = i3;
        this.BOMFound = z;
        return str != null;
    }

    public Reader getReader() throws UnsupportedEncodingException {
        return getReader(this.In);
    }

    public Reader getReader(InputStream inputStream) throws UnsupportedEncodingException {
        try {
            if (this.CharSet == UTF_8) {
                return new InputStreamReader(inputStream, UTF_8);
            }
            if (this.CharSet == UTF_16BE) {
                return new InputStreamReader(inputStream, UTF_16BE);
            }
            if (this.CharSet == UTF_16LE) {
                return new InputStreamReader(inputStream, UTF_16LE);
            }
            if (this.CharSet == UTF_32BE) {
                return SupportsUTF32BE ? new InputStreamReader(inputStream, UTF_32BE) : new ReaderOf4ByteBigEndian(inputStream);
            }
            if (this.CharSet == UTF_32LE) {
                return SupportsUTF32LE ? new InputStreamReader(inputStream, UTF_32LE) : new ReaderOf4ByteLittleEndian(inputStream);
            }
            if (this.CharSet == ISO_10646_UCS_4_2143) {
                return new ReaderOf4Byte2143(inputStream);
            }
            if (this.CharSet == ISO_10646_UCS_4_3412) {
                return new ReaderOf4Byte3412(inputStream);
            }
            if (this.CharSet == Four_Byte_Big_Endian) {
                return new ReaderOf4ByteBigEndian(inputStream);
            }
            if (this.CharSet == Four_Byte_Little_Endian) {
                return new ReaderOf4ByteLittleEndian(inputStream);
            }
            if (this.CharSet == Four_Byte_2143) {
                return new ReaderOf4Byte2143(inputStream);
            }
            if (this.CharSet == Four_Byte_3412) {
                return new ReaderOf4Byte3412(inputStream);
            }
            if (this.CharSet == Two_Byte_Big_Endian) {
                return new ReaderOf2ByteBigEndian(inputStream);
            }
            if (this.CharSet == Two_Byte_Little_Endian) {
                return new ReaderOf2ByteLittleEndian(inputStream);
            }
            if (this.CharSet == EBCDIC) {
                return new InputStreamReader(inputStream, "IBM1047");
            }
            if (this.CharSet != null) {
                return new InputStreamReader(inputStream, this.CharSet);
            }
            throw new IllegalStateException("Unable to create even a vaguely correct reader for character set: " + this.CharSet);
        } catch (UnsupportedEncodingException e) {
            throw new UnsupportedEncodingForXMLDocumentException(this, e);
        }
    }

    private static boolean supportsCharSet(String str) {
        try {
            Charset.forName(str);
            return true;
        } catch (Exception e) {
            return false;
        }
    }
}
