/*
 * Decompiled with CFR 0.152.
 */
package org.nltk.mallet;

import edu.umass.cs.mallet.base.fst.CRF4;
import edu.umass.cs.mallet.base.fst.SimpleTagger;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.pipe.iterator.LineGroupIterator;
import edu.umass.cs.mallet.base.pipe.iterator.PipeInputIterator;
import edu.umass.cs.mallet.base.types.Alphabet;
import edu.umass.cs.mallet.base.types.FeatureSelection;
import edu.umass.cs.mallet.base.types.FeatureSequence;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.base.util.CommandOption;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.ObjectOutputStream;
import java.io.Reader;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import org.nltk.mallet.CRFInfo;

public class TrainCRF {
    private static final CommandOption.File trainFileOption = new CommandOption.File(TrainCRF.class, "train-file", "FILENAME", true, null, "The filename for the training data.", null);
    private static final CommandOption.File modelFileOption = new CommandOption.File(TrainCRF.class, "model-file", "FILENAME", true, null, "The CRF model file, a zip file containing crf-info.xml.TrainCRF will add crf-model.ser to this file.", null);
    private static final CommandOption.List commandOptions = new CommandOption.List("Train a CRF tagger.", new CommandOption[]{trainFileOption, modelFileOption});

    public static CRF4 createCRF(File file, CRFInfo cRFInfo) throws FileNotFoundException {
        FileReader fileReader = new FileReader(file);
        SimpleTagger.SimpleTaggerSentence2FeatureVectorSequence simpleTaggerSentence2FeatureVectorSequence = new SimpleTagger.SimpleTaggerSentence2FeatureVectorSequence();
        simpleTaggerSentence2FeatureVectorSequence.setTargetProcessing(true);
        simpleTaggerSentence2FeatureVectorSequence.getTargetAlphabet().lookupIndex((Object)cRFInfo.defaultLabel);
        InstanceList instanceList = new InstanceList((Pipe)simpleTaggerSentence2FeatureVectorSequence);
        instanceList.add((PipeInputIterator)new LineGroupIterator((Reader)fileReader, Pattern.compile("^\\s*$"), true));
        CRF4 cRF4 = new CRF4((Pipe)simpleTaggerSentence2FeatureVectorSequence, null);
        cRF4.setGaussianPriorVariance(cRFInfo.gaussianVariance);
        cRF4.setTransductionType(cRFInfo.transductionType);
        if (cRFInfo.stateInfoList != null) {
            for (CRFInfo.StateInfo object : cRFInfo.stateInfoList) {
                cRF4.addState(object.name, object.initialCost, object.finalCost, object.destinationNames, object.labelNames, object.weightNames);
            }
        } else if (cRFInfo.stateStructure == CRFInfo.FULLY_CONNECTED_STRUCTURE) {
            cRF4.addStatesForLabelsConnectedAsIn(instanceList);
        } else if (cRFInfo.stateStructure == CRFInfo.HALF_CONNECTED_STRUCTURE) {
            cRF4.addStatesForHalfLabelsConnectedAsIn(instanceList);
        } else if (cRFInfo.stateStructure == CRFInfo.THREE_QUARTERS_CONNECTED_STRUCTURE) {
            cRF4.addStatesForThreeQuarterLabelsConnectedAsIn(instanceList);
        } else if (cRFInfo.stateStructure == CRFInfo.BILABELS_STRUCTURE) {
            cRF4.addStatesForBiLabelsConnectedAsIn(instanceList);
        } else {
            throw new RuntimeException("Unexpected state structure " + cRFInfo.stateStructure);
        }
        if (cRFInfo.weightGroupInfoList != null) {
            for (CRFInfo.WeightGroupInfo weightGroupInfo : cRFInfo.weightGroupInfoList) {
                FeatureSelection featureSelection = FeatureSelection.createFromRegex((Alphabet)cRF4.getInputAlphabet(), (Pattern)Pattern.compile(weightGroupInfo.featureSelectionRegex));
                cRF4.setFeatureSelection(cRF4.getWeightsIndex(weightGroupInfo.name), featureSelection);
            }
        }
        cRF4.train(instanceList, null, null, null, cRFInfo.maxIterations);
        return cRF4;
    }

    public boolean[][] labelConnectionsIn(Alphabet alphabet, InstanceList instanceList, String string) {
        int n;
        int n2 = alphabet.size();
        boolean[][] blArray = new boolean[n2][n2];
        for (n = 0; n < instanceList.size(); ++n) {
            Instance instance = instanceList.getInstance(n);
            FeatureSequence featureSequence = (FeatureSequence)instance.getTarget();
            for (int i = 1; i < featureSequence.size(); ++i) {
                int n3 = alphabet.lookupIndex(featureSequence.get(i - 1));
                int n4 = alphabet.lookupIndex(featureSequence.get(i));
                assert (n3 >= 0 && n4 >= 0);
                blArray[n3][n4] = true;
            }
        }
        if (string != null) {
            n = alphabet.lookupIndex((Object)string);
            for (int i = 0; i < alphabet.size(); ++i) {
                blArray[n][i] = true;
            }
        }
        return blArray;
    }

    public static void main(String[] stringArray) throws Exception {
        Object var1_1 = null;
        int n = commandOptions.processOptions(stringArray);
        if (n != stringArray.length) {
            commandOptions.printUsage(true);
            throw new IllegalArgumentException("Unexpected arg " + stringArray[n]);
        }
        if (TrainCRF.trainFileOption.value == null) {
            commandOptions.printUsage(true);
            throw new IllegalArgumentException("Expected --train-file FILE");
        }
        if (TrainCRF.modelFileOption.value == null) {
            commandOptions.printUsage(true);
            throw new IllegalArgumentException("Expected --model-file FILE");
        }
        ZipFile zipFile = new ZipFile(TrainCRF.modelFileOption.value);
        ZipEntry zipEntry = zipFile.getEntry("crf-info.xml");
        CRFInfo cRFInfo = new CRFInfo(zipFile.getInputStream(zipEntry));
        byte[] byArray = new byte[(int)zipEntry.getSize()];
        zipFile.getInputStream(zipEntry).read(byArray);
        CRF4 cRF4 = TrainCRF.createCRF(TrainCRF.trainFileOption.value, cRFInfo);
        ZipOutputStream zipOutputStream = new ZipOutputStream(new FileOutputStream(TrainCRF.modelFileOption.value));
        zipOutputStream.putNextEntry(new ZipEntry("crf-info.xml"));
        zipOutputStream.write(byArray);
        zipOutputStream.closeEntry();
        zipOutputStream.putNextEntry(new ZipEntry("crf-model.ser"));
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(zipOutputStream);
        objectOutputStream.writeObject(cRF4);
        objectOutputStream.flush();
        zipOutputStream.closeEntry();
        zipOutputStream.close();
    }
}

