package edu.cmu.minorthird.text;

import cern.colt.matrix.impl.AbstractFormatter;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.util.ProgressCounter;
import edu.cmu.minorthird.util.StringUtil;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/cmu/minorthird/text/TextLabelsLoader.class */
public class TextLabelsLoader {
    public static final int CLOSE_ALL_TYPES = 1;
    public static final int CLOSE_TYPES_IN_LABELED_DOCS = 2;
    public static final int DONT_CLOSE_TYPES = 3;
    public static final int CLOSE_BY_OPERATION = 4;
    private int closurePolicy = 4;
    private int warnings = 0;
    private static final int MAX_WARNINGS = 10;
    private static Logger log = Logger.getLogger(TextLabelsLoader.class);
    public static final String[] CLOSURE_NAMES = {"CLOSE_ALL_TYPES", "CLOSE_TYPES_IN_LABELED_DOCS", "DONT_CLOSE_TYPES", "CLOSE_BY_OPERATION"};

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/cmu/minorthird/text/TextLabelsLoader$LabelInfo.class */
    public class LabelInfo {
        public Span span;
        public String type;
        public int start;
        public int end;

        public LabelInfo(Span span, String str, int i, int i2) {
            this.span = span;
            this.type = str;
            this.start = i;
            this.end = i2;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/cmu/minorthird/text/TextLabelsLoader$TagInfo.class */
    public class TagInfo {
        public int pos;
        public String tag;
        public boolean isOpenTag;

        public TagInfo(int i, String str, boolean z) {
            this.pos = i;
            this.tag = str;
            this.isOpenTag = z;
        }
    }

    public void setClosurePolicy(int i) {
        this.closurePolicy = i;
    }

    public MutableTextLabels loadOps(TextBase textBase, File file) throws IOException, FileNotFoundException {
        BasicTextLabels basicTextLabels = new BasicTextLabels(textBase);
        importOps(basicTextLabels, textBase, file);
        return basicTextLabels;
    }

    public void importOps(MutableTextLabels mutableTextLabels, TextBase textBase, File file) throws IOException, FileNotFoundException {
        TextBase textBase2 = mutableTextLabels.getTextBase();
        if (textBase2 == null) {
            throw new IllegalStateException("TextBase attached to labels must not be null");
        }
        LineNumberReader lineNumberReader = new LineNumberReader(new FileReader(file));
        String str = null;
        ArrayList arrayList = new ArrayList();
        while (true) {
            try {
                String readLine = lineNumberReader.readLine();
                str = readLine;
                if (readLine == null) {
                    for (int i = 0; i < arrayList.size(); i++) {
                        closeLabels(mutableTextLabels.getTypes(), mutableTextLabels, textBase2.documentSpan((String) arrayList.get(i)));
                    }
                    lineNumberReader.close();
                    closeLabels(mutableTextLabels, this.closurePolicy);
                    return;
                }
                if (str.trim().length() != 0 && !str.startsWith("#")) {
                    log.debug("read line #" + lineNumberReader.getLineNumber() + ": " + str);
                    StringTokenizer stringTokenizer = new StringTokenizer(str);
                    try {
                        String advance = advance(stringTokenizer, lineNumberReader, file);
                        if ("addToType".equals(advance)) {
                            addToType(stringTokenizer, lineNumberReader, file, textBase2, mutableTextLabels);
                        } else if ("setSpanProp".equals(advance)) {
                            setSpanProp(stringTokenizer, lineNumberReader, file, textBase2, mutableTextLabels);
                        } else if ("closeType".equals(advance)) {
                            String advance2 = advance(stringTokenizer, lineNumberReader, file);
                            String advance3 = advance(stringTokenizer, lineNumberReader, file);
                            Span documentSpan = textBase2.documentSpan(advance2);
                            if (documentSpan != null) {
                                mutableTextLabels.closeTypeInside(advance3, documentSpan);
                                log.debug("closed " + advance3 + " on " + advance2);
                            } else {
                                this.warnings++;
                                if (this.warnings < 10) {
                                    log.warn("unknown id '" + advance2 + "' in closeType");
                                } else if (this.warnings == 10) {
                                    log.warn("there will be no more warnings of this sort given");
                                }
                            }
                        } else {
                            if (!"closeAllTypes".equalsIgnoreCase(advance)) {
                                throw new IllegalArgumentException("error on line " + lineNumberReader.getLineNumber() + " of " + file.getName());
                            }
                            arrayList.add(advance(stringTokenizer, lineNumberReader, file));
                        }
                    } catch (IllegalArgumentException e) {
                        throw getNewException(e, ", failed to find operation.");
                    }
                }
            } catch (IllegalArgumentException e2) {
                throw getNewException(e2, " on line: " + str);
            }
        }
    }

    private void addToType(StringTokenizer stringTokenizer, LineNumberReader lineNumberReader, File file, TextBase textBase, MutableTextLabels mutableTextLabels) {
        String advance = advance(stringTokenizer, lineNumberReader, file);
        String advance2 = advance(stringTokenizer, lineNumberReader, file);
        String advance3 = advance(stringTokenizer, lineNumberReader, file);
        String advance4 = advance(stringTokenizer, lineNumberReader, file);
        String advance5 = stringTokenizer.hasMoreTokens() ? advance(stringTokenizer, lineNumberReader, file) : null;
        try {
            int parseInt = Integer.parseInt(advance2);
            int parseInt2 = Integer.parseInt(advance3);
            Span documentSpan = textBase.documentSpan(advance);
            if (documentSpan == null) {
                this.warnings++;
                if (this.warnings < 10) {
                    log.warn("unknown id '" + advance + "' in addToType " + parseInt + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + parseInt2);
                } else if (this.warnings == 10) {
                    log.warn("there will be no more warnings of this sort given");
                }
            } else {
                Details details = null;
                if (advance5 != null) {
                    details = new Details(StringUtil.atof(advance5));
                }
                if (parseInt != 0 || parseInt2 >= 0) {
                    if (parseInt2 < 0) {
                        parseInt2 = documentSpan.asString().length() - parseInt;
                    }
                    mutableTextLabels.addToType(documentSpan.charIndexSubSpan(parseInt, parseInt + parseInt2), advance4, details);
                } else {
                    mutableTextLabels.addToType(documentSpan, advance4, details);
                }
            }
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException("bad number on line " + lineNumberReader.getLineNumber() + " of " + file.getName());
        }
    }

    private void setSpanProp(StringTokenizer stringTokenizer, LineNumberReader lineNumberReader, File file, TextBase textBase, MutableTextLabels mutableTextLabels) {
        String advance = advance(stringTokenizer, lineNumberReader, file);
        String advance2 = advance(stringTokenizer, lineNumberReader, file);
        String advance3 = advance(stringTokenizer, lineNumberReader, file);
        String advance4 = advance(stringTokenizer, lineNumberReader, file);
        String advance5 = advance(stringTokenizer, lineNumberReader, file);
        try {
            int parseInt = Integer.parseInt(advance2);
            int parseInt2 = Integer.parseInt(advance3);
            Span documentSpan = textBase.documentSpan(advance);
            if (documentSpan == null) {
                this.warnings++;
                if (this.warnings < 10) {
                    log.warn("unknown id '" + advance + "'");
                } else if (this.warnings == 10) {
                    log.warn("there will be no more warnings of this sort given");
                }
            } else if (parseInt != 0 || parseInt2 >= 0) {
                if (parseInt2 < 0) {
                    parseInt2 = documentSpan.asString().length() - parseInt;
                }
                mutableTextLabels.setProperty(documentSpan.charIndexSubSpan(parseInt, parseInt + parseInt2), advance4, advance5);
            } else {
                mutableTextLabels.setProperty(documentSpan, advance4, advance5);
            }
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException("bad number on line " + lineNumberReader.getLineNumber() + " of " + file.getName());
        }
    }

    private static IllegalArgumentException getNewException(IllegalArgumentException illegalArgumentException, String str) {
        String str2 = illegalArgumentException.getMessage() + str;
        StackTraceElement[] stackTrace = illegalArgumentException.getStackTrace();
        IllegalArgumentException illegalArgumentException2 = new IllegalArgumentException(str2);
        illegalArgumentException2.setStackTrace(stackTrace);
        return illegalArgumentException2;
    }

    private String advance(StringTokenizer stringTokenizer, LineNumberReader lineNumberReader, File file) {
        if (stringTokenizer.hasMoreTokens()) {
            return stringTokenizer.nextToken();
        }
        throw new IllegalArgumentException("error on line " + lineNumberReader.getLineNumber() + " of " + file.getName() + " failed to find token");
    }

    public void closeLabels(MutableTextLabels mutableTextLabels, int i) {
        Set types = mutableTextLabels.getTypes();
        TextBase textBase = mutableTextLabels.getTextBase();
        switch (i) {
            case 1:
                Span.Looper documentSpanIterator = textBase.documentSpanIterator();
                while (documentSpanIterator.hasNext()) {
                    closeLabels(types, mutableTextLabels, documentSpanIterator.nextSpan());
                }
                return;
            case 2:
                TreeSet treeSet = new TreeSet();
                Iterator it = types.iterator();
                while (it.hasNext()) {
                    Span.Looper instanceIterator = mutableTextLabels.instanceIterator((String) it.next());
                    while (instanceIterator.hasNext()) {
                        treeSet.add(instanceIterator.nextSpan().documentSpan());
                    }
                }
                Iterator it2 = treeSet.iterator();
                while (it2.hasNext()) {
                    closeLabels(types, mutableTextLabels, (Span) it2.next());
                }
                return;
            case 3:
            case 4:
                return;
            default:
                log.warn("closure policy(" + i + ") not recognized");
                return;
        }
    }

    private void closeLabels(Set set, MutableTextLabels mutableTextLabels, Span span) {
        Iterator it = set.iterator();
        while (it.hasNext()) {
            mutableTextLabels.closeTypeInside((String) it.next(), span);
        }
    }

    public MutableTextLabels loadSerialized(File file, TextBase textBase) throws IOException, FileNotFoundException {
        try {
            ObjectInputStream objectInputStream = new ObjectInputStream(new BufferedInputStream(new FileInputStream(file)));
            MutableTextLabels mutableTextLabels = (MutableTextLabels) objectInputStream.readObject();
            mutableTextLabels.setTextBase(textBase);
            objectInputStream.close();
            return mutableTextLabels;
        } catch (ClassNotFoundException e) {
            throw new IllegalArgumentException("can't read TextLabels from " + file + ": " + e);
        }
    }

    public void saveSerialized(MutableTextLabels mutableTextLabels, File file) throws IOException {
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(file)));
        objectOutputStream.writeObject(mutableTextLabels);
        objectOutputStream.flush();
        objectOutputStream.close();
    }

    public String printTypesAsOps(TextLabels textLabels) {
        StringBuffer stringBuffer = new StringBuffer();
        ProgressCounter progressCounter = new ProgressCounter("saving labels", "type", textLabels.getTypes().size());
        for (String str : textLabels.getTypes()) {
            ProgressCounter progressCounter2 = new ProgressCounter("saving type " + str, "span");
            Span.Looper instanceIterator = textLabels.instanceIterator(str);
            while (instanceIterator.hasNext()) {
                Span nextSpan = instanceIterator.nextSpan();
                if (nextSpan.size() > 0) {
                    int lo = nextSpan.getTextToken(0).getLo();
                    int hi = nextSpan.getTextToken(nextSpan.size() - 1).getHi();
                    Details details = textLabels.getDetails(nextSpan, str);
                    if (details == null || details == Details.DEFAULT) {
                        stringBuffer.append("addToType " + nextSpan.getDocumentId() + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + lo + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + (hi - lo) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + str + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
                    } else {
                        stringBuffer.append("addToType " + nextSpan.getDocumentId() + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + lo + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + (hi - lo) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + str + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + details.getConfidence() + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
                    }
                } else {
                    this.warnings++;
                    if (this.warnings < 10) {
                        log.warn("forgetting label on empty span type " + str + ": " + nextSpan);
                    } else if (this.warnings == 10) {
                        log.warn("there will be no more warnings of this sort given");
                    }
                }
                progressCounter2.progress();
            }
            progressCounter2.finished();
            Span.Looper closureIterator = textLabels.closureIterator(str);
            while (closureIterator.hasNext()) {
                Span nextSpan2 = closureIterator.nextSpan();
                if (nextSpan2.size() != nextSpan2.documentSpan().size()) {
                    throw new UnsupportedOperationException("can't save environment with closureSpans!=docSpans");
                }
                stringBuffer.append("closeType " + nextSpan2.getDocumentId() + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + str + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
            }
            progressCounter.progress();
        }
        progressCounter.finished();
        ProgressCounter progressCounter3 = new ProgressCounter("saving labels", "property", textLabels.getSpanProperties().size());
        for (String str2 : textLabels.getSpanProperties()) {
            Span.Looper spansWithProperty = textLabels.getSpansWithProperty(str2);
            while (spansWithProperty.hasNext()) {
                Span nextSpan3 = spansWithProperty.nextSpan();
                String property = textLabels.getProperty(nextSpan3, str2);
                int lo2 = nextSpan3.getTextToken(0).getLo();
                stringBuffer.append("setSpanProp " + nextSpan3.getDocumentId() + "  " + lo2 + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + (nextSpan3.getTextToken(nextSpan3.size() - 1).getHi() - lo2) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + str2 + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + property + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
            }
            progressCounter3.progress();
        }
        progressCounter3.finished();
        return stringBuffer.toString();
    }

    public void saveTypesAsOps(TextLabels textLabels, File file) throws IOException {
        PrintStream printStream = new PrintStream(new FileOutputStream(file));
        printStream.println(printTypesAsOps(textLabels));
        printStream.close();
    }

    public void saveTypesAsStrings(TextLabels textLabels, File file, boolean z) throws IOException {
        PrintStream printStream = new PrintStream(new FileOutputStream(file));
        for (String str : textLabels.getTypes()) {
            Span.Looper instanceIterator = textLabels.instanceIterator(str);
            while (instanceIterator.hasNext()) {
                Span nextSpan = instanceIterator.nextSpan();
                printStream.print(str);
                if (z) {
                    printStream.print(":" + nextSpan.getDocumentId() + ":" + nextSpan.getTextToken(0).getLo() + ":" + nextSpan.getTextToken(nextSpan.size() - 1).getHi());
                }
                printStream.println("\t" + nextSpan.asString().replace('\n', ' '));
            }
        }
        printStream.close();
    }

    public void saveDocsWithEmbeddedTypes(TextLabels textLabels, File file) throws IOException {
        Span.Looper documentSpanIterator = textLabels.getTextBase().documentSpanIterator();
        if (!file.mkdir()) {
            throw new IOException("Could not create directory named: " + file);
        }
        while (documentSpanIterator.hasNext()) {
            Span nextSpan = documentSpanIterator.nextSpan();
            PrintStream printStream = new PrintStream(new FileOutputStream(new File(file + "/" + nextSpan.getDocumentId())));
            printStream.println(createXMLmarkup(nextSpan.getDocumentId(), textLabels));
            printStream.close();
        }
    }

    public String markupDocumentSpan(String str, TextLabels textLabels) {
        TreeMap treeMap = new TreeMap();
        for (String str2 : textLabels.getTypes()) {
            Span.Looper instanceIterator = textLabels.instanceIterator(str2, str);
            while (instanceIterator.hasNext()) {
                Span nextSpan = instanceIterator.nextSpan();
                setBoundary(treeMap, "begin", str2, nextSpan.getLeftBoundary());
                setBoundary(treeMap, "end", str2, nextSpan.getRightBoundary());
            }
        }
        String asString = textLabels.getTextBase().documentSpan(str).asString();
        StringBuffer stringBuffer = new StringBuffer("");
        stringBuffer.append("<root>");
        int i = 0;
        TreeSet treeSet = new TreeSet();
        String str3 = null;
        for (Span span : treeMap.keySet()) {
            for (String[] strArr : (Set) treeMap.get(span)) {
                if ("begin".equals(strArr[0])) {
                    treeSet.add(strArr[1]);
                } else {
                    treeSet.remove(strArr[1]);
                }
            }
            int lo = span.documentSpanStartIndex() < span.documentSpan().size() ? span.documentSpan().subSpan(span.documentSpanStartIndex(), 1).getTextToken(0).getLo() : span.documentSpan().getTextToken(span.documentSpan().size() - 1).getHi();
            stringBuffer.append(asString.substring(i, lo));
            if (str3 != null) {
                stringBuffer.append("</" + str3 + ">");
            }
            String str4 = null;
            String str5 = null;
            if (treeSet.size() == 1) {
                str4 = (String) treeSet.iterator().next();
            } else if (treeSet.size() > 1) {
                str4 = "overlap";
                StringBuffer stringBuffer2 = new StringBuffer("");
                Iterator it = treeSet.iterator();
                while (it.hasNext()) {
                    if (stringBuffer2.length() > 0) {
                        stringBuffer2.append(",");
                    }
                    stringBuffer2.append((String) it.next());
                }
                str5 = stringBuffer2.toString();
            }
            if (str4 != null && str5 != null) {
                stringBuffer.append("<" + str4 + " value=\"" + str5 + "\">");
            } else if (str4 != null) {
                stringBuffer.append("<" + str4 + ">");
            }
            i = lo;
            str3 = str4;
        }
        stringBuffer.append(asString.substring(i, asString.length()));
        stringBuffer.append("</root>");
        return stringBuffer.toString();
    }

    public String createXMLmarkup(String str, TextLabels textLabels) {
        Span documentSpan = textLabels.getTextBase().documentSpan(str);
        String documentContents = textLabels.getTextBase().documentSpan(str).getDocumentContents();
        ArrayList arrayList = new ArrayList();
        for (String str2 : textLabels.getTypes()) {
            Span.Looper instanceIterator = textLabels.instanceIterator(str2, str);
            while (instanceIterator.hasNext()) {
                Span nextSpan = instanceIterator.nextSpan();
                int documentSpanStartIndex = nextSpan.documentSpanStartIndex();
                arrayList.add(new LabelInfo(nextSpan, str2, documentSpanStartIndex, (documentSpanStartIndex + nextSpan.size()) - 1));
            }
        }
        ArrayList arrayList2 = new ArrayList(arrayList.size());
        while (arrayList.size() > 0) {
            LabelInfo labelInfo = (LabelInfo) arrayList.remove(0);
            int i = -1;
            boolean z = false;
            for (int i2 = 0; i2 < arrayList2.size(); i2++) {
                LabelInfo labelInfo2 = (LabelInfo) arrayList2.get(i2);
                if (labelInfo.start < labelInfo2.start && labelInfo.end > labelInfo2.start && labelInfo.end < labelInfo2.end) {
                    z = true;
                } else if (labelInfo.start > labelInfo2.start && labelInfo.start < labelInfo2.end && labelInfo.end > labelInfo2.end) {
                    z = true;
                }
                if (labelInfo.start < labelInfo2.start || (labelInfo.start == labelInfo2.start && labelInfo.end >= labelInfo2.end)) {
                    i = i2;
                    break;
                }
            }
            if (z) {
                throw new IllegalArgumentException("Labels contain overalpping spans, cannot save as XML format.");
            }
            if (i > -1) {
                arrayList2.add(i, labelInfo);
            } else {
                arrayList2.add(labelInfo);
            }
        }
        ArrayList arrayList3 = new ArrayList(arrayList2.size() * 2);
        for (int i3 = 0; i3 < arrayList2.size(); i3++) {
            LabelInfo labelInfo3 = (LabelInfo) arrayList2.get(i3);
            arrayList3.add(new TagInfo(labelInfo3.start, "<" + labelInfo3.type + ">", true));
        }
        while (arrayList2.size() > 0) {
            LabelInfo labelInfo4 = (LabelInfo) arrayList2.remove(0);
            boolean z2 = false;
            int i4 = 0;
            while (true) {
                if (i4 >= arrayList3.size()) {
                    break;
                }
                if (labelInfo4.end < ((TagInfo) arrayList3.get(i4)).pos) {
                    arrayList3.add(i4, new TagInfo(labelInfo4.end, "</" + labelInfo4.type + ">", false));
                    z2 = true;
                    break;
                }
                i4++;
            }
            if (!z2) {
                arrayList3.add(new TagInfo(labelInfo4.end, "</" + labelInfo4.type + ">", false));
            }
        }
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("<root>");
        int i5 = 0;
        while (arrayList3.size() > 0) {
            TagInfo tagInfo = (TagInfo) arrayList3.remove(0);
            int lo = tagInfo.pos < documentSpan.size() ? tagInfo.isOpenTag ? documentSpan.subSpan(tagInfo.pos, 1).getTextToken(0).getLo() : documentSpan.subSpan(tagInfo.pos, 1).getTextToken(0).getHi() : documentContents.length();
            stringBuffer.append(documentContents.substring(i5, lo));
            stringBuffer.append(tagInfo.tag);
            i5 = lo;
        }
        stringBuffer.append(documentContents.substring(i5, documentContents.length()));
        stringBuffer.append("</root>");
        return stringBuffer.toString();
    }

    private void setBoundary(TreeMap treeMap, String str, String str2, Span span) {
        Set set = (Set) treeMap.get(span);
        if (set == null) {
            HashSet hashSet = new HashSet();
            set = hashSet;
            treeMap.put(span, hashSet);
        }
        set.add(new String[]{str, str2});
    }

    public String saveTypesAsXML(TextLabels textLabels) {
        StringBuffer stringBuffer = new StringBuffer("<extractions>\n");
        for (String str : textLabels.getTypes()) {
            Span.Looper instanceIterator = textLabels.instanceIterator(str);
            while (instanceIterator.hasNext()) {
                Span nextSpan = instanceIterator.nextSpan();
                stringBuffer.append("  <" + str + " lo=" + nextSpan.getTextToken(0).getLo() + " hi=" + nextSpan.getTextToken(nextSpan.size() - 1).getHi() + ">" + nextSpan.asString() + "</" + str + ">\n");
            }
        }
        stringBuffer.append("</extractions>\n");
        return stringBuffer.toString();
    }
}
