package edu.cmu.minorthird.text;

import edu.cmu.minorthird.text.Span;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.TreeSet;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/cmu/minorthird/text/SpanTypeTokenizer.class */
public class SpanTypeTokenizer extends CompoundTokenizer {
    private static Logger log = Logger.getLogger(SpanTypeTokenizer.class);
    private String spanType;
    private TextLabels labels;

    public SpanTypeTokenizer(String str, TextLabels textLabels) {
        this.spanType = str;
        this.labels = textLabels;
        this.parentTokenizer = textLabels.getTextBase().getTokenizer();
    }

    public String getSpanType() {
        return this.spanType;
    }

    public TextLabels getTextLabels() {
        return this.labels;
    }

    @Override // edu.cmu.minorthird.text.CompoundTokenizer, edu.cmu.minorthird.text.Tokenizer
    public String[] splitIntoTokens(String str) {
        return this.parentTokenizer.splitIntoTokens(str);
    }

    @Override // edu.cmu.minorthird.text.CompoundTokenizer, edu.cmu.minorthird.text.Tokenizer
    public TextToken[] splitIntoTokens(Document document) {
        if (this.labels.getTextBase().getDocument(document.getId()) == null) {
            log.warn("Labels for document with id: " + document.getId() + " are not available, will tokenize using base tokenizer.");
            return this.parentTokenizer.splitIntoTokens(document);
        }
        if (!this.labels.getTextBase().getDocument(document.getId()).getText().equals(document.getText())) {
            log.warn("Document with id: " + document.getId() + " differs from the document in the labels set with the same ID.  Will tokenize using base tokenizer.");
            return this.parentTokenizer.splitIntoTokens(document);
        }
        TextToken[] tokens = this.labels.getTextBase().getDocument(document.getId()).getTokens();
        TreeSet treeSet = new TreeSet();
        for (TextToken textToken : tokens) {
            treeSet.add(textToken);
        }
        ArrayList arrayList = new ArrayList();
        Iterator it = treeSet.iterator();
        TextToken textToken2 = (TextToken) it.next();
        Span.Looper instanceIterator = this.labels.instanceIterator(this.spanType, document.getId());
        while (instanceIterator.hasNext()) {
            Span nextSpan = instanceIterator.nextSpan();
            while (textToken2.getLo() < nextSpan.getTextToken(0).getLo()) {
                arrayList.add(new TextToken(document, textToken2.getLo(), textToken2.getLength()));
                textToken2 = (TextToken) it.next();
            }
            arrayList.add(new TextToken(document, nextSpan.getTextToken(0).getLo(), nextSpan.asString().length()));
            for (int i = 0; i < nextSpan.size(); i++) {
                textToken2 = it.hasNext() ? (TextToken) it.next() : null;
            }
        }
        if (textToken2 != null) {
            arrayList.add(new TextToken(document, textToken2.getLo(), textToken2.getLength()));
        }
        while (it.hasNext()) {
            TextToken textToken3 = (TextToken) it.next();
            arrayList.add(new TextToken(document, textToken3.getLo(), textToken3.getLength()));
        }
        return (TextToken[]) arrayList.toArray(new TextToken[0]);
    }
}
