package jangada;

import edu.cmu.minorthird.classify.BasicDataset;
import edu.cmu.minorthird.classify.BatchVersion;
import edu.cmu.minorthird.classify.BinaryClassifier;
import edu.cmu.minorthird.classify.ClassLabel;
import edu.cmu.minorthird.classify.DatasetClassifierTeacher;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.classify.Feature;
import edu.cmu.minorthird.classify.MutableInstance;
import edu.cmu.minorthird.classify.algorithms.linear.VotedPerceptron;
import edu.cmu.minorthird.text.FancyLoader;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextBase;
import edu.cmu.minorthird.text.TextLabels;
import edu.cmu.minorthird.util.IOUtil;
import edu.cmu.minorthird.util.LineProcessingUtil;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;

/* loaded from: input_file:jangada/SigFileDetector.class */
public class SigFileDetector {
    private BinaryClassifier classifier;
    public static final long serialVersionUID = 1;
    private String modelname = "models/VPsigDetectionModel";
    public final int CURRENT_VERSION_NUMBER = 1;

    /* loaded from: input_file:jangada/SigFileDetector$SigDetectorByLine.class */
    public class SigDetectorByLine {
        private int firstSearchLine;
        private int fromLine;
        private int lastSearchLine;
        private final int tail_lines = 10;
        private MutableInstance instance = new MutableInstance();

        public SigDetectorByLine() {
            this.firstSearchLine = 0;
            this.fromLine = 0;
            this.firstSearchLine = 0;
            this.fromLine = 0;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public MutableInstance getInstance(String str) {
            clear();
            return processMailFile(preProcessMailMessage(str));
        }

        private void clear() {
            this.firstSearchLine = 0;
            this.fromLine = 0;
            this.instance = new MutableInstance();
        }

        private MutableInstance processMailFile(String[] strArr) {
            if (this.lastSearchLine <= this.firstSearchLine) {
                throw new IllegalStateException("ERROR parsing message");
            }
            for (int i = this.lastSearchLine; i >= this.firstSearchLine; i--) {
                int i2 = this.lastSearchLine - i;
                if (LineProcessingUtil.lineMatcher("^[\\s]*---*[\\s]*$", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("sigMarker").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("^[\\s|\\t]?[\\s|\\t]?---?[\\s|\\t]*$", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("sigBeginMarker").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("^[\\s|\\t]*([\\*]|#|[\\+]|[\\^]|-|[\\~]|[\\&]|[////]|[\\$]|_|[\\!]|[\\/]|[\\%]|[\\:]|[\\=]){10,}[\\s]*$", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("otherMarkers").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("Dept\\.|University|Corp\\.|Corporations?|College|Ave\\.|Laboratory|[D|d]isclaimer|Division|Professor|Laboratories|Institutes?|Services|Engineering|Director|Sciences?|Address|Manager|Fax|Office|Mobile|Phone|Street|St\\.|Avenue", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("specWords").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("[^(\\<|\\>)][\\w|\\+|\\.|\\_|\\-]+\\@[\\w|\\-|\\_|\\.]+\\.[a-zA-z]{2,5}[^(\\<|\\>)]", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("emailA").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("[^(\\<|\\>)][(\\w|\\+|\\_|\\-)]+\\@[(\\w|\\-|\\_)]+[\\.][a-zA-z]{2,5}", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("emailB").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("[\\s|\\t](http\\:\\/\\/)*(www|web|w3)*(\\w[\\w|\\-]+)\\.(\\w[\\w|\\-]+)\\.(\\w[\\w|\\-]+)*[\\w]+", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("url").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("(\\-?\\d)*\\d\\d\\s?\\-?\\s?\\d\\d\\d\\d", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("phone").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("[A-Z][a-z]+\\s\\s?[A-Z][\\.]?\\s\\s?[A-Z][a-z]+", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("completeName").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("\"$", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("endQuote").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("^[\\w|\\-]+\\:", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("header").append(i2).toString()));
                }
                if (LineProcessingUtil.lineMatcher("^[\\s|\\t]*$", strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("BlankL").append(i2).toString()));
                } else {
                    this.instance.addBinary(new Feature(new StringBuffer("elseBlankL").append(i2).toString()));
                }
                int indentNumber = LineProcessingUtil.indentNumber(strArr[i]);
                if (indentNumber == 1) {
                    this.instance.addBinary(new Feature(new StringBuffer("indentUni").append(i2).toString()));
                } else if (indentNumber == 2) {
                    this.instance.addBinary(new Feature(new StringBuffer("indentBi").append(i2).toString()));
                } else if (indentNumber >= 3) {
                    this.instance.addBinary(new Feature(new StringBuffer("indentTri").append(i2).toString()));
                }
                double punctuationPercentage = LineProcessingUtil.punctuationPercentage(strArr[i]);
                if (punctuationPercentage > 0.2d) {
                    this.instance.addBinary(new Feature(new StringBuffer("punctPerc20").append(i2).toString()));
                } else {
                    this.instance.addBinary(new Feature(new StringBuffer("punctPerc0").append(i2).toString()));
                }
                if (punctuationPercentage > 0.5d) {
                    this.instance.addBinary(new Feature(new StringBuffer("punctPerc50").append(i2).toString()));
                }
                if (punctuationPercentage > 0.75d) {
                    this.instance.addBinary(new Feature(new StringBuffer("punctPerc75").append(i2).toString()));
                }
                if (punctuationPercentage > 0.9d) {
                    this.instance.addBinary(new Feature(new StringBuffer("punctPerc90").append(i2).toString()));
                }
                if (i > 0 && LineProcessingUtil.startWithSameInitialPunctCharacters(strArr[i], strArr[i - 1])) {
                    this.instance.addBinary(new Feature(new StringBuffer("prevsicline").append(i2).toString()));
                }
                if (this.fromLine > 0 && SigFilePredictor.detectFromName(strArr[this.fromLine], strArr[i])) {
                    this.instance.addBinary(new Feature(new StringBuffer("fromL").append(i2).toString()));
                }
            }
            return this.instance;
        }

        private String[] preProcessMailMessage(String str) {
            String[] split = str.split("\n");
            int length = split.length;
            int i = 0;
            for (int i2 = length - 1; LineProcessingUtil.lineMatcher("^[\\s]*$", split[i2]); i2--) {
                i++;
            }
            this.lastSearchLine = (length - 1) - i;
            int i3 = 0;
            int i4 = 0;
            for (int i5 = 0; i5 <= this.lastSearchLine; i5++) {
                if (LineProcessingUtil.lineMatcher("^\\s?\\s?From\\:", split[i5])) {
                    i3 = i5;
                    i4 = i3 + 1;
                }
                if (i3 <= 0 || !LineProcessingUtil.lineMatcher("^\\s?\\s?[a-zA-Z][a-z|A-Z|\\-|\\_]+\\:", split[i5])) {
                    if (i3 > 0) {
                        break;
                    }
                } else {
                    i4 = i5 + 1;
                }
            }
            this.fromLine = i3;
            this.firstSearchLine = (this.lastSearchLine - 10) + 1;
            if (this.firstSearchLine < i4) {
                this.firstSearchLine = i4;
            }
            if (this.lastSearchLine <= this.firstSearchLine || this.lastSearchLine - this.firstSearchLine > 11) {
                this.lastSearchLine = length - 1;
                this.firstSearchLine = (this.lastSearchLine - 10) + 1;
                if (this.firstSearchLine < 0) {
                    this.firstSearchLine = 0;
                }
            }
            return split;
        }
    }

    public SigFileDetector() {
        try {
            this.classifier = IOUtil.loadSerialized(getClass().getResourceAsStream(this.modelname));
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println(new StringBuffer("COULD NOT FIND MODEL FILE ").append(this.modelname).toString());
        }
    }

    public boolean hasSig(String str) {
        return this.classifier.score(new SigDetectorByLine().getInstance(str)) >= 0.0d;
    }

    private void createSigModel(TextLabels textLabels) throws IOException {
        SigDetectorByLine sigDetectorByLine = new SigDetectorByLine();
        TextBase textBase = textLabels.getTextBase();
        BasicDataset basicDataset = new BasicDataset();
        Span.Looper documentSpanIterator = textBase.documentSpanIterator();
        while (documentSpanIterator.hasNext()) {
            new ClassLabel();
            Span nextSpan = documentSpanIterator.nextSpan();
            basicDataset.add(new Example(sigDetectorByLine.getInstance(nextSpan.asString()), textLabels.hasType(nextSpan, "sig") ? ClassLabel.binaryLabel(1.0d) : ClassLabel.binaryLabel(-1.0d)));
        }
        System.out.println(new StringBuffer("dataset size = ").append(basicDataset.size()).toString());
        System.out.println("training the Model...");
        Serializable train = new DatasetClassifierTeacher(basicDataset).train(new BatchVersion(new VotedPerceptron(), 15));
        System.out.println(new StringBuffer("saving model in file...").append("mysigDetectionModel").toString());
        IOUtil.saveSerialized(train, new File("mysigDetectionModel"));
    }

    public static void main(String[] strArr) {
        try {
            if (strArr.length < 1) {
                usage();
                return;
            }
            String str = strArr[0];
            if (str.startsWith("-create") || str.startsWith("create")) {
                new SigFileDetector().createSigModel(FancyLoader.loadTextLabels(strArr[1]));
                return;
            }
            SigFileDetector sigFileDetector = new SigFileDetector();
            for (int i = 0; i < strArr.length; i++) {
                if (sigFileDetector.hasSig(LineProcessingUtil.readFile(strArr[i]))) {
                    System.out.println(new StringBuffer(String.valueOf(strArr[i])).append(" has Signature").toString());
                } else {
                    System.out.println(new StringBuffer(String.valueOf(strArr[i])).append(" has NOT Signature").toString());
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void usage() {
        System.out.println("Usage: SigFileDetector filename1 filename2 ...");
        System.out.println(" OR...");
        System.out.println("SigFileDetector -create yourfile.bsh");
    }
}
