/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.patterns;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.patterns.CandidatePhrase;
import edu.stanford.nlp.patterns.ConstantsAndVariables;
import edu.stanford.nlp.patterns.DataInstance;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Execution;
import edu.stanford.nlp.util.GoogleNGramsSQLBacked;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.File;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

public class Data {
    public static double ratioDomainNgramFreqWithDataFreq = 1.0;
    public static Counter<CandidatePhrase> rawFreq = null;
    public static List<File> sentsFiles = null;
    public static Map<String, File> sentId2File = null;
    public static Map<String, DataInstance> sents = null;
    public static String inMemorySaveFileLocation = "";
    public static Counter<CandidatePhrase> processedDataFreq = null;
    public static Counter<String> domainNGramRawFreq = new ClassicCounter<String>();
    public static double ratioGoogleNgramFreqWithDataFreq = 1.0;
    @Execution.Option(name="domainNGramsFile")
    public static String domainNGramsFile = null;
    static boolean usingGoogleNgram = false;
    public static Map<String, Map<String, List<Integer>>> matchedTokensForEachPhrase = new ConcurrentHashMap<String, Map<String, List<Integer>>>();

    public static void computeRawFreqIfNull(int numWordsCompound, boolean batchProcess) {
        ConstantsAndVariables.DataSentsIterator iter = new ConstantsAndVariables.DataSentsIterator(batchProcess);
        while (iter.hasNext()) {
            Data.computeRawFreqIfNull((Map)((Pair)iter.next()).first(), numWordsCompound);
        }
    }

    public static void computeRawFreqIfNull(Map<String, DataInstance> sents, int numWordsCompound) {
        Redwood.log(new Object[]{Redwood.DBG, "Computing raw freq for every 1-" + numWordsCompound + " consecutive words"});
        for (DataInstance l : sents.values()) {
            List<List<CoreLabel>> ngrams = CollectionUtils.getNGrams(l.getTokens(), 1, numWordsCompound);
            for (List<CoreLabel> n : ngrams) {
                String s = "";
                for (CoreLabel c : n) {
                    s = s + " " + c.word();
                }
                if ((s = s.trim()).isEmpty()) continue;
                rawFreq.incrementCount(CandidatePhrase.createOrGet(s));
            }
        }
        if (usingGoogleNgram) {
            Data.setRatioGoogleNgramFreqWithDataFreq();
        }
        if (domainNGramRawFreq != null && domainNGramRawFreq.size() > 0) {
            ratioDomainNgramFreqWithDataFreq = domainNGramRawFreq.totalCount() / rawFreq.totalCount();
        }
    }

    public static void setRatioGoogleNgramFreqWithDataFreq() {
        ratioGoogleNgramFreqWithDataFreq = (double)GoogleNGramsSQLBacked.getTotalCount(1) / rawFreq.totalCount();
        Redwood.log(ConstantsAndVariables.minimaldebug, "Data", "ratioGoogleNgramFreqWithDataFreq is " + ratioGoogleNgramFreqWithDataFreq);
    }

    public static void loadDomainNGrams() {
        assert (domainNGramsFile != null);
        if (domainNGramRawFreq == null || domainNGramRawFreq.size() == 0) {
            for (String line : IOUtils.readLines(domainNGramsFile)) {
                String[] t = line.split("\t");
                domainNGramRawFreq.setCount(t[0], Double.valueOf(t[1]));
            }
            Redwood.log(ConstantsAndVariables.minimaldebug, "Data", "loading freq from domain ngram file " + domainNGramsFile);
        }
    }
}

