package edu.berkeley.nlp.PCFGLA;

import edu.berkeley.nlp.PCFGLA.Corpus;
import edu.berkeley.nlp.classify.Linearizer;
import edu.berkeley.nlp.classify.ParsingObjectiveFunction;
import edu.berkeley.nlp.ling.StateSet;
import edu.berkeley.nlp.ling.Tree;
import edu.berkeley.nlp.util.Numberer;
import edu.berkeley.nlp.util.Option;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:edu/berkeley/nlp/PCFGLA/ConditionalTrainer.class */
public class ConditionalTrainer {
    private static final double TOL = 1.0E-5d;

    /* loaded from: input_file:edu/berkeley/nlp/PCFGLA/ConditionalTrainer$Options.class */
    public static class Options {

        @Option(name = "-out", usage = "Output File for Grammar")
        public String outFileName;

        @Option(name = "-outDir", usage = "Output Directory for Grammar")
        public String outDir;

        @Option(name = "-path", usage = "Path to Corpus")
        public String path = null;

        @Option(name = "-SMcycles", usage = "The number of split&merge iterations (Default: 6)")
        public int numSplits = 6;

        @Option(name = "-mergingPercentage", usage = "Merging percentage (Default: 0.0)")
        public double mergingPercentage = 0.0d;

        @Option(name = "-baseline", usage = "Just read of the MLE baseline grammar")
        public boolean baseline = false;

        @Option(name = "-treebank", usage = "Language:  WSJ, CHNINESE, GERMAN, CONLL, SINGLEFILE (Default: ENGLISH)")
        public Corpus.TreeBankType treebank = Corpus.TreeBankType.WSJ;

        @Option(name = "-splitMaxIt", usage = "Maximum number of EM iterations after splitting (Default: 50)")
        public int splitMaxIterations = 100;

        @Option(name = "-splitMinIt", usage = "Minimum number of EM iterations after splitting (Default: 50)")
        public int splitMinIterations = 50;

        @Option(name = "-mergeMaxIt", usage = "Maximum number of EM iterations after merging (Default: 20)")
        public int mergeMaxIterations = 20;

        @Option(name = "-mergeMinIt", usage = "Minimum number of EM iterations after merging (Default: 20)")
        public int mergeMinIterations = 20;

        @Option(name = "-di", usage = "The number of allowed iterations in which the validation likelihood drops. (Default: 6)")
        public int di = 6;

        @Option(name = "-trfr", usage = "The fraction of the training corpus to keep (Default: 1.0)\n")
        public double trainingFractionToKeep = 1.0d;

        @Option(name = "-filter", usage = "Filter rules with prob below this threshold (Default: 1.0e-30)")
        public double filter = 1.0E-30d;

        @Option(name = "-smooth", usage = "Type of grammar smoothing used.")
        public String smooth = "NoSmoothing";

        @Option(name = "-b", usage = "LEFT/RIGHT Binarization (Default: RIGHT)")
        public Binarization binarization = Binarization.RIGHT;

        @Option(name = "-noSplit", usage = "Don't split - just load and continue training an existing grammar (true/false) (Default:false)")
        public boolean noSplit = false;

        @Option(name = "-initializeZero", usage = "Initialize conditional weights with zero")
        public boolean initializeZero = false;

        @Option(name = "-in", usage = "Input File for Grammar")
        public String inFile = null;

        @Option(name = "-randSeed", usage = "Seed for random number generator")
        public int randSeed = 8;

        @Option(name = "-sep", usage = "Set merging threshold for grammar and lexicon separately (Default: false)")
        public boolean separateMergingThreshold = false;

        @Option(name = "-hor", usage = "Horizontal Markovization (Default: 0)")
        public int horizontalMarkovization = 0;

        @Option(name = "-sub", usage = "Number of substates to split (Default: 1)")
        public int nSubStates = 1;

        @Option(name = "-ver", usage = "Vertical Markovization (Default: 1)")
        public int verticalMarkovization = 1;

        @Option(name = "-v", usage = "Verbose/Quiet (Default: Quiet)\n")
        public boolean verbose = false;

        @Option(name = "-r", usage = "Level of Randomness at init (Default: 1)\n")
        public double randomization = 1.0d;

        @Option(name = "-sm1", usage = "Lexicon smoothing parameter 1")
        public double smoothingParameter1 = 0.5d;

        @Option(name = "-sm2", usage = "Lexicon smoothing parameter 2)")
        public double smoothingParameter2 = 0.1d;

        @Option(name = "-spath", usage = "Whether or not to store the best path info (true/false) (Default: true)")
        public boolean findClosedUnaryPaths = true;

        @Option(name = "-unkT", usage = "Threshold for unknown words (Default: 5)")
        public int unkThresh = 5;

        @Option(name = "-doConditional", usage = "Do conditional training")
        public boolean doConditional = false;

        @Option(name = "-regularize", usage = "Regularize during optimization: 0-no regularization, 1-l1, 2-l2")
        public int regularize = 0;

        @Option(name = "-onlyMerge", usage = "Do only a conditional merge")
        public boolean onlyMerge = false;

        @Option(name = "-sigma", usage = "Regularization coefficient")
        public double sigma = 1.0d;

        @Option(name = "-cons", usage = "File with constraints")
        public String cons = null;

        @Option(name = "-nProcess", usage = "Distribute on that many cores")
        public int nProcess = 1;

        @Option(name = "-doNOTprojectConstraints", usage = "Do NOT project constraints")
        public boolean doNOTprojectConstraints = false;

        @Option(name = "-section", usage = "Which section of the corpus to process.")
        public String section = "train";

        @Option(name = "-outputLog", usage = "Print output to this file rather than STDOUT.")
        public String outputLog = null;

        @Option(name = "-maxL", usage = "Skip sentences which are longer than this.")
        public int maxL = 10000;

        @Option(name = "-nChunks", usage = "Store constraints in that many files.")
        public int nChunks = 1;

        @Option(name = "-logT", usage = "Log threshold for pruning")
        public double logT = -10.0d;

        @Option(name = "-lasso", usage = "Start of by regularizing less and make the regularization stronger with time")
        public boolean lasso = false;

        @Option(name = "-hierarchical", usage = "Use hierarchical rules")
        public boolean hierarchical = false;

        @Option(name = "-keepGoldTreeAlive", usage = "Don't prune the gold train when computing constraints")
        public boolean keepGoldTreeAlive = false;

        @Option(name = "-flattenParameters", usage = "Flatten parameters to reduce overconfidence")
        public double flattenParameters = 1.0d;

        @Option(name = "-usePosteriorTraining", usage = "Adam's new objective function")
        public boolean usePosteriorTraining = false;

        @Option(name = "-dontLoad", usage = "Don't load anything from the pipeline")
        public boolean dontLoad = false;

        @Option(name = "-predefinedMaxSplit", usage = "Use predifined number of subcategories")
        public boolean predefinedMaxSplit = false;

        @Option(name = "-collapseUnaries", usage = "Dont throw away trees with unaries, just collapse the unary chains")
        public boolean collapseUnaries = false;

        @Option(name = "-connectedLexicon", usage = "Score each word with the sum of its score and its signature score")
        public boolean connectedLexicon = false;

        @Option(name = "-adaptive", usage = "Use adpatively refined rules")
        public boolean adaptive = false;

        @Option(name = "-checkDerivative", usage = "Check the derivative of the objective function against an estimate with finite difference")
        public boolean checkDerivative = false;

        @Option(name = "-initRandomness", usage = "Amount of randomness to initialize the grammar with")
        public double initRandomness = 1.0d;

        @Option(name = "-markUnaryParents", usage = "Filter all training trees with any unaries (other than lexical and ROOT productions)")
        public boolean markUnaryParents = false;

        @Option(name = "-filterAllUnaries", usage = "Mark any unary parent with a ^u")
        public boolean filterAllUnaries = false;

        @Option(name = "-filterStupidFrickinWHNP", usage = "Temp hack!")
        public boolean filterStupidFrickinWHNP = false;

        @Option(name = "-initializeDir", usage = "Temp hack!")
        public String initializeDir = null;

        @Option(name = "-allPosteriorsWeight", usage = "Weight for the all posteriors regularizer")
        public double allPosteriorsWeight = 0.0d;

        @Option(name = "-testAll", usage = "Test grammars after each iteration, proceed by splitting the best")
        public boolean testAll = false;

        @Option(name = "-featurizedLexicon", usage = "Use featurized lexicon (no fixed signature classes")
        public boolean featurizedLexicon = false;

        @Option(name = "-spanFeatures", usage = "Use span features")
        public boolean spanFeatures = false;

        @Option(name = "-dontSaveGrammarsAfterEachIteration")
        public static boolean dontSaveGrammarsAfterEachIteration = false;

        @Option(name = "-hierarchicalChart")
        public static boolean hierarchicalChart = false;

        @Option(name = "-lockGrammar", usage = "Lock grammar weights, learn only span feature weights")
        public static boolean lockGrammar = false;

        @Option(name = "-useFirstAndLast", usage = "Use first and last span words as span features")
        public static boolean useFirstAndLast = false;

        @Option(name = "-usePreviousAndNext", usage = "Use previous and next span words as span features")
        public static boolean usePreviousAndNext = false;

        @Option(name = "-useBeginAndEndPairs", usage = "Use begin and end word-pairs as span features")
        public static boolean useBeginAndEndPairs = false;

        @Option(name = "-useSyntheticClass", usage = "Distiguish between real and synthetic constituents")
        public static boolean useSyntheticClass = false;

        @Option(name = "-usePunctuation", usage = "Use punctuation cues")
        public static boolean usePunctuation = false;

        @Option(name = "-minFeatureFrequency", usage = "Use punctuation cues")
        public static int minFeatureFrequency = 0;
    }

    /* JADX WARN: Removed duplicated region for block: B:222:0x1032  */
    /* JADX WARN: Removed duplicated region for block: B:231:0x111a  */
    /* JADX WARN: Removed duplicated region for block: B:238:0x1195  */
    /* JADX WARN: Removed duplicated region for block: B:242:0x11a1  */
    /* JADX WARN: Removed duplicated region for block: B:245:0x1126 A[ADDED_TO_REGION, EDGE_INSN: B:245:0x1126->B:236:0x1126 BREAK  A[LOOP:7: B:220:0x102d->B:243:?], SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static void main(java.lang.String[] r11) {
        /*
            Method dump skipped, instructions count: 4726
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: edu.berkeley.nlp.PCFGLA.ConditionalTrainer.main(java.lang.String[]):void");
    }

    private static ParsingObjectiveFunction newParsingObjectiveFunction(Options options, String str, Linearizer linearizer, StateSetTreeList stateSetTreeList, int i, double d) {
        return new ParsingObjectiveFunction(linearizer, stateSetTreeList, d, i, options.cons, options.nProcess, str, options.doNOTprojectConstraints, options.connectedLexicon);
    }

    public static double doOneEStep(Grammar grammar, Lexicon lexicon, Grammar grammar2, Lexicon lexicon2, StateSetTreeList stateSetTreeList, boolean z) {
        ArrayParser arrayParser = new ArrayParser(grammar, lexicon);
        double d = 0.0d;
        int i = 0;
        int size = stateSetTreeList.size();
        Iterator<Tree<StateSet>> it = stateSetTreeList.iterator();
        while (it.hasNext()) {
            Tree<StateSet> next = it.next();
            int i2 = i;
            i++;
            boolean z2 = ((double) i2) > ((double) size) / 2.0d;
            arrayParser.doInsideOutsideScores(next, true, false);
            double log = Math.log(next.getLabel().getIScore(0)) + (100 * next.getLabel().getIScale());
            if (!Double.isInfinite(log) && !Double.isNaN(log)) {
                lexicon2.trainTree(next, -1.0d, lexicon, z2, true);
                if (!z) {
                    grammar2.tallyStateSetTree(next, grammar);
                }
                d += log;
            } else if (GrammarTrainer.VERBOSE) {
                System.out.println("Training sentence " + i + " is given " + log + " log likelihood!");
                System.out.println("Root iScore " + next.getLabel().getIScore(0) + " scale " + next.getLabel().getIScale());
            }
        }
        return d;
    }

    public static double calculateLogLikelihood(Grammar grammar, Lexicon lexicon, StateSetTreeList stateSetTreeList) {
        ArrayParser arrayParser = new ArrayParser(grammar, lexicon);
        int i = 0;
        double d = 0.0d;
        Iterator<Tree<StateSet>> it = stateSetTreeList.iterator();
        while (it.hasNext()) {
            Tree<StateSet> next = it.next();
            arrayParser.doInsideScores(next, false, false, null);
            double log = Math.log(next.getLabel().getIScore(0)) + (100 * next.getLabel().getIScale());
            if (Double.isInfinite(log) || Double.isNaN(log)) {
                i++;
            } else {
                d += log;
            }
        }
        return d;
    }

    public static void printBadLLReason(Tree<StateSet> tree, SophisticatedLexicon sophisticatedLexicon) {
        System.out.println(tree.toString());
        boolean z = false;
        Iterator<StateSet> it = tree.getYield().iterator();
        for (StateSet stateSet : tree.getPreTerminalYield()) {
            String word = it.next().getWord();
            boolean z2 = true;
            for (int i = 0; i < stateSet.numSubStates(); i++) {
                double iScore = stateSet.getIScore(i);
                if (!Double.isInfinite(iScore) && !Double.isNaN(iScore)) {
                    z2 = false;
                }
            }
            if (z2) {
                System.out.println("LEXICON PROBLEM ON STATE " + ((int) stateSet.getState()) + " word " + word);
                System.out.println("  word " + sophisticatedLexicon.wordCounter.getCount(stateSet.getWord()));
                for (int i2 = 0; i2 < stateSet.numSubStates(); i2++) {
                    System.out.println("  tag " + sophisticatedLexicon.tagCounter[stateSet.getState()][i2]);
                    System.out.println("  word/state/sub " + sophisticatedLexicon.wordToTagCounters[stateSet.getState()].get(stateSet.getWord())[i2]);
                }
            }
            z = z || z2;
        }
        if (z) {
            System.out.println("  the likelihood is bad because of the lexicon");
        } else {
            System.out.println("  the likelihood is bad because of the grammar");
        }
    }

    public static double logLikelihood(List<Tree<StateSet>> list, boolean z) {
        double d = 0.0d;
        Iterator<Tree<StateSet>> it = list.iterator();
        while (it.hasNext()) {
            double iScore = it.next().getLabel().getIScore(0);
            if (z) {
                System.out.println("LL is " + iScore + ".");
            }
            if (Double.isInfinite(iScore) || Double.isNaN(iScore)) {
                System.out.println("LL is not finite.");
            } else {
                d += iScore;
            }
        }
        return d;
    }

    public static void updateStateSetTrees(List<Tree<StateSet>> list, ArrayParser arrayParser) {
        Iterator<Tree<StateSet>> it = list.iterator();
        while (it.hasNext()) {
            arrayParser.doInsideOutsideScores(it.next(), false, false);
        }
    }

    public static short[] initializeSubStateArray(List<Tree<String>> list, List<Tree<String>> list2, Numberer numberer, short s) {
        short[] sArr = {1, s};
        new StateSetTreeList(list, sArr, true, numberer);
        new StateSetTreeList(list2, sArr, true, numberer);
        StateSetTreeList.initializeTagNumberer(list, numberer);
        StateSetTreeList.initializeTagNumberer(list2, numberer);
        short[] sArr2 = new short[(short) numberer.total()];
        Arrays.fill(sArr2, s);
        sArr2[0] = 1;
        return sArr2;
    }

    public static boolean[][][][][] loadDataNoZip(String str) {
        try {
            ObjectInputStream objectInputStream = new ObjectInputStream(new FileInputStream(str));
            boolean[][][][][] zArr = (boolean[][][][][]) objectInputStream.readObject();
            objectInputStream.close();
            return zArr;
        } catch (IOException e) {
            System.out.println("IOException\n" + e);
            return null;
        } catch (ClassNotFoundException e2) {
            System.out.println("Class not found!");
            return null;
        }
    }

    public static boolean saveDataNoZip(boolean[][][][][] zArr, String str) {
        try {
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(str));
            objectOutputStream.writeObject(zArr);
            objectOutputStream.flush();
            objectOutputStream.close();
            return true;
        } catch (IOException e) {
            System.out.println("IOException: " + e);
            return false;
        }
    }

    protected static boolean matches(double d, double d2) {
        return Math.abs(d - d2) / ((Math.abs(d) + Math.abs(d2)) + 1.0E-10d) < TOL;
    }
}
