/*
 * Decompiled with CFR 0.152.
 */
package edu.neu.ccs.pyramid.application;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ConcurrentHashMultiset;
import com.google.common.collect.Multiset;
import edu.neu.ccs.pyramid.configuration.Config;
import edu.neu.ccs.pyramid.dataset.Density;
import edu.neu.ccs.pyramid.dataset.IdTranslator;
import edu.neu.ccs.pyramid.dataset.LabelTranslator;
import edu.neu.ccs.pyramid.dataset.MLClfDataSetBuilder;
import edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet;
import edu.neu.ccs.pyramid.dataset.TRECFormat;
import edu.neu.ccs.pyramid.elasticsearch.ESIndex;
import edu.neu.ccs.pyramid.elasticsearch.FeatureLoader;
import edu.neu.ccs.pyramid.elasticsearch.MultiLabelIndex;
import edu.neu.ccs.pyramid.feature.CategoricalFeature;
import edu.neu.ccs.pyramid.feature.CategoricalFeatureExpander;
import edu.neu.ccs.pyramid.feature.CodeDescription;
import edu.neu.ccs.pyramid.feature.Feature;
import edu.neu.ccs.pyramid.feature.FeatureList;
import edu.neu.ccs.pyramid.feature.Ngram;
import edu.neu.ccs.pyramid.feature_extraction.NgramEnumerator;
import edu.neu.ccs.pyramid.feature_extraction.NgramTemplate;
import edu.neu.ccs.pyramid.feature_extraction.StumpSelector;
import edu.neu.ccs.pyramid.util.BoundedBlockPriorityQueue;
import edu.neu.ccs.pyramid.util.Pair;
import edu.neu.ccs.pyramid.util.Serialization;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.logging.FileHandler;
import java.util.logging.Logger;
import java.util.logging.SimpleFormatter;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;

public class App1 {
    public static void main(String[] args) throws Exception {
        if (args.length != 1) {
            throw new IllegalArgumentException("Please specify a properties file.");
        }
        Config config = new Config(args[0]);
        App1.main(config);
    }

    public static void main(Config config) throws Exception {
        Throwable throwable;
        MultiLabelIndex index;
        Logger logger = Logger.getAnonymousLogger();
        String logFile = config.getString("output.log");
        FileHandler fileHandler = null;
        if (!logFile.isEmpty()) {
            new File(logFile).getParentFile().mkdirs();
            fileHandler = new FileHandler(logFile, true);
            SimpleFormatter formatter = new SimpleFormatter();
            fileHandler.setFormatter(formatter);
            logger.addHandler(fileHandler);
            logger.setUseParentHandlers(false);
        }
        logger.info(config.toString());
        File output = new File(config.getString("output.folder"));
        output.mkdirs();
        if (config.getBoolean("createTrainSet")) {
            index = App1.loadIndex(config, logger, "train");
            throwable = null;
            try {
                App1.createTrainSet(config, index, logger);
            }
            catch (Throwable throwable2) {
                throwable = throwable2;
                throw throwable2;
            }
            finally {
                if (index != null) {
                    if (throwable != null) {
                        try {
                            index.close();
                        }
                        catch (Throwable throwable3) {
                            throwable.addSuppressed(throwable3);
                        }
                    } else {
                        index.close();
                    }
                }
            }
        }
        if (config.getBoolean("createTestSet")) {
            index = App1.loadIndex(config, logger, "test");
            throwable = null;
            try {
                App1.createTestSet(config, index, logger);
            }
            catch (Throwable throwable4) {
                throwable = throwable4;
                throw throwable4;
            }
            finally {
                if (index != null) {
                    if (throwable != null) {
                        try {
                            index.close();
                        }
                        catch (Throwable throwable5) {
                            throwable.addSuppressed(throwable5);
                        }
                    } else {
                        index.close();
                    }
                }
            }
        }
        if (fileHandler != null) {
            fileHandler.close();
        }
    }

    static MultiLabelIndex loadIndex(Config config, Logger logger, String trainOrTest) throws Exception {
        MultiLabelIndex.Builder builder = new MultiLabelIndex.Builder().setIndexName(config.getString("index.indexName")).setClusterName(config.getString("index.clusterName")).setClientType(config.getString("index.clientType")).setDocumentType(config.getString("index.documentType"));
        if (trainOrTest.endsWith("train")) {
            builder.setExtMultiLabelField(config.getString("train.label.field"));
        }
        if (trainOrTest.endsWith("test")) {
            File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
            Config savedConfig = new Config(new File(metaDataFolder, "saved_config_app1"));
            builder.setExtMultiLabelField(savedConfig.getString("train.label.field"));
        }
        if (config.getString("index.clientType").equals("transport")) {
            String[] hosts = config.getString("index.hosts").split(Pattern.quote(","));
            String[] ports = config.getString("index.ports").split(Pattern.quote(","));
            builder.addHostsAndPorts(hosts, ports);
        }
        MultiLabelIndex index = builder.build();
        logger.info("index loaded");
        logger.info("there are " + index.getNumDocs() + " documents in the index.");
        return index;
    }

    static String[] getDocsForSplitFromQuery(ESIndex index, String query) {
        List<String> docs = index.matchStringQuery(query);
        return docs.toArray(new String[docs.size()]);
    }

    static IdTranslator loadIdTranslator(String[] indexIds) throws Exception {
        IdTranslator idTranslator = new IdTranslator();
        for (int i = 0; i < indexIds.length; ++i) {
            idTranslator.addData(i, "" + indexIds[i]);
        }
        return idTranslator;
    }

    private static boolean matchPrefixes(String name, Set<String> prefixes) {
        for (String prefix : prefixes) {
            if (!name.startsWith(prefix)) continue;
            return true;
        }
        return false;
    }

    static void addInitialFeatures(Config config, ESIndex index, FeatureList featureList, String[] ids, Logger logger) throws Exception {
        String featureFieldPrefix = config.getString("train.feature.featureFieldPrefix");
        Set prefixes = Arrays.stream(featureFieldPrefix.split(",")).map(String::trim).collect(Collectors.toSet());
        Set<String> allFields = index.listAllFields();
        List featureFields = allFields.stream().filter(field -> App1.matchPrefixes(field, prefixes)).collect(Collectors.toList());
        logger.info("all possible initial features:" + featureFields);
        for (String field2 : featureFields) {
            String featureType = index.getFieldType(field2);
            if (featureType.equalsIgnoreCase("string")) {
                CategoricalFeatureExpander expander = new CategoricalFeatureExpander();
                expander.setStart(featureList.size());
                expander.setVariableName(field2);
                expander.putSetting("source", "field");
                Collection<Terms.Bucket> buckets = index.termAggregation(field2, ids);
                Set categories = buckets.stream().map(MultiBucketsAggregation.Bucket::getKeyAsString).collect(Collectors.toSet());
                for (String category : categories) {
                    expander.addCategory(category);
                }
                List<CategoricalFeature> group = expander.expand();
                boolean toAdd = true;
                if (config.getBoolean("train.feature.categFeature.filter")) {
                    double threshold = config.getDouble("train.feature.categFeature.percentThreshold");
                    int numCategories = group.size();
                    if ((double)numCategories > (double)ids.length * threshold) {
                        toAdd = false;
                        logger.info("field " + field2 + " has too many categories (" + numCategories + "), omitted.");
                    }
                }
                if (!toAdd) continue;
                for (Feature feature : group) {
                    featureList.add(feature);
                }
                continue;
            }
            Feature feature = new Feature();
            feature.setName(field2);
            feature.setIndex(featureList.size());
            feature.getSettings().put("source", "field");
            featureList.add(feature);
        }
    }

    static boolean interesting(Multiset<Ngram> allNgrams, Ngram candidate, int count) {
        for (int slop = 0; slop < candidate.getSlop(); ++slop) {
            Ngram toCheck = new Ngram();
            toCheck.setInOrder(candidate.isInOrder());
            toCheck.setField(candidate.getField());
            toCheck.setNgram(candidate.getNgram());
            toCheck.setSlop(slop);
            toCheck.setName(candidate.getName());
            if (allNgrams.count((Object)toCheck) != count) continue;
            return false;
        }
        return true;
    }

    static Set<Ngram> gather(Config config, ESIndex index, String[] ids, Logger logger) throws Exception {
        File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
        metaDataFolder.mkdirs();
        ConcurrentHashMultiset allNgrams = ConcurrentHashMultiset.create();
        List<Integer> ns = config.getIntegers("train.feature.ngram.n");
        double minDf = config.getDouble("train.feature.ngram.minDf");
        int minDFrequency = (int)Math.floor((double)ids.length * minDf);
        List<String> fields = config.getStrings("train.feature.ngram.extractionFields");
        List<Integer> slops = config.getIntegers("train.feature.ngram.slop");
        for (String field : fields) {
            for (int n : ns) {
                for (int slop : slops) {
                    logger.info("gathering " + n + "-grams from field " + field + " with slop " + slop + " and minDf " + minDf + ", (actual frequency threshold = " + minDFrequency + ")");
                    NgramTemplate template = new NgramTemplate(field, n, slop);
                    Multiset<Ngram> ngrams = NgramEnumerator.gatherNgram(index, ids, template, minDFrequency);
                    logger.info("gathered " + ngrams.elementSet().size() + " ngrams");
                    int newCounter = 0;
                    for (Multiset.Entry entry : ngrams.entrySet()) {
                        int count;
                        Ngram ngram = (Ngram)entry.getElement();
                        if (!App1.interesting((Multiset<Ngram>)allNgrams, ngram, count = entry.getCount())) continue;
                        allNgrams.add((Object)ngram, count);
                        ++newCounter;
                    }
                    logger.info(newCounter + " are really new");
                }
            }
        }
        logger.info("there are " + allNgrams.elementSet().size() + " ngrams in total");
        return allNgrams.elementSet();
    }

    private static List<Ngram> addNgramFromFile(Config config, ESIndex index, Logger logger) throws IOException {
        ArrayList<Ngram> ngrams = new ArrayList<Ngram>();
        String externalNgramFile = config.getString("train.feature.externalNgramFile");
        List lines = FileUtils.readLines((File)new File(externalNgramFile));
        List<String> fields = config.getStrings("train.feature.ngram.extractionFields");
        String analyzer = config.getString("train.feature.analyzer");
        for (String field : fields) {
            for (String line : lines) {
                Ngram ngram = index.analyze(line, analyzer);
                ngram.setField(field);
                ngrams.add(ngram);
            }
        }
        logger.info("ngrams collected from file " + externalNgramFile);
        logger.info(((Object)ngrams).toString());
        return ngrams;
    }

    private static void addCodeDescription(Config config, ESIndex index, FeatureList featureList) throws Exception {
        String file = config.getString("train.feature.codeDesc.File");
        List lines = FileUtils.readLines((File)new File(file));
        String analyzer = config.getString("train.feature.codeDesc.analyzer");
        String field = config.getString("train.feature.codeDesc.matchField");
        int percentage = config.getInt("train.feature.codeDesc.minMatchPercentage");
        for (String line : lines) {
            List<String> terms = index.analyzeString(line, analyzer);
            CodeDescription codeDescription = new CodeDescription(terms, percentage, field);
            featureList.add(codeDescription);
        }
    }

    static void addNgramFeatures(FeatureList featureList, Set<Ngram> ngrams) {
        ngrams.stream().forEach(ngram -> {
            ngram.getSettings().put("source", "matching_score");
            featureList.add((Feature)ngram);
        });
    }

    static MultiLabelClfDataSet loadData(Config config, MultiLabelIndex index, FeatureList featureList, IdTranslator idTranslator, int totalDim, LabelTranslator labelTranslator, String docFilter) throws Exception {
        FeatureLoader.MatchScoreType matchScoreType;
        String matchScoreTypeString;
        File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
        Config savedConfig = new Config(new File(metaDataFolder, "saved_config_app1"));
        int numDataPoints = idTranslator.numData();
        int numClasses = labelTranslator.getNumClasses();
        MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numDataPoints(numDataPoints).numFeatures(totalDim).numClasses(numClasses).density(Density.SPARSE_RANDOM).missingValue(savedConfig.getBoolean("train.feature.missingValue")).build();
        for (int i = 0; i < numDataPoints; ++i) {
            String dataIndexId = idTranslator.toExtId(i);
            List<String> extMultiLabel = index.getExtMultiLabel(dataIndexId);
            if (savedConfig.getBoolean("train.label.filter")) {
                String prefix = savedConfig.getString("train.label.filter.prefix");
                extMultiLabel = extMultiLabel.stream().filter(extLabel -> extLabel.startsWith(prefix)).collect(Collectors.toList());
            }
            for (String extLabel2 : extMultiLabel) {
                int intLabel = labelTranslator.toIntLabel(extLabel2);
                dataSet.addLabel(i, intLabel);
            }
        }
        switch (matchScoreTypeString = savedConfig.getString("train.feature.ngram.matchScoreType")) {
            case "es_original": {
                matchScoreType = FeatureLoader.MatchScoreType.ES_ORIGINAL;
                break;
            }
            case "binary": {
                matchScoreType = FeatureLoader.MatchScoreType.BINARY;
                break;
            }
            case "frequency": {
                matchScoreType = FeatureLoader.MatchScoreType.FREQUENCY;
                break;
            }
            case "tfifl": {
                matchScoreType = FeatureLoader.MatchScoreType.TFIFL;
                break;
            }
            default: {
                throw new IllegalArgumentException("unknown ngramMatchScoreType");
            }
        }
        FeatureLoader.loadFeatures(index, dataSet, featureList, idTranslator, matchScoreType, docFilter);
        dataSet.setIdTranslator(idTranslator);
        dataSet.setLabelTranslator(labelTranslator);
        return dataSet;
    }

    static LabelTranslator loadTrainLabelTranslator(Config config, MultiLabelIndex index, String[] trainIndexIds, Logger logger) throws Exception {
        Collection buckets = index.termAggregation(config.getString("train.label.field"), trainIndexIds);
        if (config.getBoolean("train.label.filter")) {
            String prefix = config.getString("train.label.filter.prefix");
            buckets = buckets.stream().filter(bucket -> bucket.getKeyAsString().startsWith(prefix)).collect(Collectors.toList());
        }
        logger.info("there are " + buckets.size() + " classes in the training set.");
        ArrayList<String> labels = new ArrayList<String>();
        logger.info("label distribution in training set:");
        StringBuilder stringBuilder = new StringBuilder();
        for (Terms.Bucket bucket2 : buckets) {
            stringBuilder.append(bucket2.getKey());
            stringBuilder.append(":");
            stringBuilder.append(bucket2.getDocCount());
            stringBuilder.append(", ");
            labels.add(bucket2.getKeyAsString());
        }
        logger.info(stringBuilder.toString());
        String labelOrder = config.getString("train.label.order");
        if (labelOrder.equals("alphabetical")) {
            Collections.sort(labels);
        }
        LabelTranslator labelTranslator = new LabelTranslator(labels);
        return labelTranslator;
    }

    static LabelTranslator loadAugmentedLabelTranslator(Config config, MultiLabelIndex index, String[] testIndexIds, LabelTranslator trainLabelTranslator, Logger logger) {
        File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
        Config savedConfig = new Config(new File(metaDataFolder, "saved_config_app1"));
        ArrayList<String> extLabels = new ArrayList<String>();
        for (int i = 0; i < trainLabelTranslator.getNumClasses(); ++i) {
            extLabels.add(trainLabelTranslator.toExtLabel(i));
        }
        Collection buckets = index.termAggregation(savedConfig.getString("train.label.field"), testIndexIds);
        if (savedConfig.getBoolean("train.label.filter")) {
            String prefix = savedConfig.getString("train.label.filter.prefix");
            buckets = buckets.stream().filter(bucket -> bucket.getKeyAsString().startsWith(prefix)).collect(Collectors.toList());
        }
        ArrayList<String> newLabels = new ArrayList<String>();
        logger.info("label distribution in data set:");
        StringBuilder stringBuilder = new StringBuilder();
        for (Terms.Bucket bucket2 : buckets) {
            stringBuilder.append(bucket2.getKey());
            stringBuilder.append(":");
            stringBuilder.append(bucket2.getDocCount());
            stringBuilder.append(", ");
            if (extLabels.contains(bucket2.getKey())) continue;
            extLabels.add(bucket2.getKeyAsString());
            newLabels.add(bucket2.getKeyAsString());
        }
        logger.info(stringBuilder.toString());
        if (!newLabels.isEmpty()) {
            logger.warning("found new labels in data set: " + newLabels);
        }
        return new LabelTranslator(extLabels);
    }

    static void generateMetaData(Config config, MultiLabelIndex index, Logger logger) throws Exception {
        logger.info("generating meta data");
        File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
        metaDataFolder.mkdirs();
        config.store(new File(metaDataFolder, "saved_config_app1"));
        String[] trainIndexIds = App1.getDocsForSplitFromQuery(index, config.getString("train.splitQuery"));
        LabelTranslator trainLabelTranslator = App1.loadTrainLabelTranslator(config, index, trainIndexIds, logger);
        Serialization.serialize((Object)trainLabelTranslator, new File(metaDataFolder, "label_translator.ser"));
        FileUtils.writeStringToFile((File)new File(metaDataFolder, "label_translator.txt"), (String)trainLabelTranslator.toString());
        FeatureList featureList = new FeatureList();
        if (config.getBoolean("train.feature.useInitialFeatures")) {
            App1.addInitialFeatures(config, index, featureList, trainIndexIds, logger);
        }
        if (config.getBoolean("train.feature.useCodeDescription")) {
            App1.addCodeDescription(config, index, featureList);
        }
        Set<Ngram> ngrams = new HashSet<Ngram>();
        ngrams.addAll(App1.gather(config, index, trainIndexIds, logger));
        if (config.getBoolean("train.feature.filterNgramsByKeyWords")) {
            ngrams = App1.keywordsFilter(config, index, ngrams);
        }
        if (config.getBoolean("train.feature.filterNgramsByRegex")) {
            ngrams = App1.regexFilter(config, ngrams);
        }
        if (config.getBoolean("train.feature.addExternalNgrams")) {
            ngrams.addAll(App1.addNgramFromFile(config, index, logger));
        }
        App1.addNgramFeatures(featureList, ngrams);
        Serialization.serialize((Object)featureList, new File(metaDataFolder, "feature_list.ser"));
        try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(metaDataFolder, "feature_list.txt")));){
            for (Feature feature : featureList.getAll()) {
                bufferedWriter.write(feature.toString());
                bufferedWriter.newLine();
            }
        }
        if (config.getBoolean("train.feature.ngram.selection")) {
            App1.ngramSelection(config, index, config.getString("train.splitQuery"), logger);
        }
        logger.info("meta data generated");
    }

    static void createDataSet(Config config, MultiLabelIndex index, String[] indexIds, String datasetName, String docFilter, Logger logger) throws Exception {
        logger.info("creating data set " + datasetName);
        File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
        IdTranslator idTranslator = App1.loadIdTranslator(indexIds);
        String archive = config.getString("output.folder");
        LabelTranslator trainLabelTranslator = (LabelTranslator)Serialization.deserialize(new File(metaDataFolder, "label_translator.ser"));
        LabelTranslator labelTranslator = App1.loadAugmentedLabelTranslator(config, index, indexIds, trainLabelTranslator, logger);
        FeatureList featureList = (FeatureList)Serialization.deserialize(new File(metaDataFolder, "feature_list.ser"));
        MultiLabelClfDataSet dataSet = App1.loadData(config, index, featureList, idTranslator, featureList.size(), labelTranslator, docFilter);
        dataSet.setFeatureList(featureList);
        File dataFile = new File(new File(archive, "data_sets"), datasetName);
        TRECFormat.save(dataSet, dataFile);
        logger.info("data set " + datasetName + " created");
        ObjectMapper objectMapper = new ObjectMapper();
        objectMapper.writeValue(new File(dataFile, "data_config.json"), (Object)config);
    }

    static void createTrainSet(Config config, MultiLabelIndex index, Logger logger) throws Exception {
        App1.generateMetaData(config, index, logger);
        String[] indexIds = App1.getDocsForSplitFromQuery(index, config.getString("train.splitQuery"));
        App1.createDataSet(config, index, indexIds, config.getString("output.trainFolder"), config.getString("train.splitQuery"), logger);
    }

    static void createTestSet(Config config, MultiLabelIndex index, Logger logger) throws Exception {
        String[] indexIds = App1.getDocsForSplitFromQuery(index, config.getString("test.splitQuery"));
        App1.createDataSet(config, index, indexIds, config.getString("output.testFolder"), config.getString("test.splitQuery"), logger);
    }

    private static Set<Ngram> keywordsFilter(Config config, ESIndex index, Set<Ngram> ngrams) throws IOException {
        String externalKeywordsFile = config.getString("train.feature.filterNgrams.keyWordsFile");
        List lines = FileUtils.readLines((File)new File(externalKeywordsFile));
        String analyzer = config.getString("train.feature.analyzer");
        HashSet<String> keywords = new HashSet<String>();
        for (String line : lines) {
            keywords.add(index.analyze(line, analyzer).getNgram());
        }
        return ((Stream)ngrams.stream().parallel()).filter(ngram -> ngram.getN() == 1 || App1.containsKeyWords(ngram, keywords)).collect(Collectors.toSet());
    }

    private static boolean containsKeyWords(Ngram ngram, Set<String> keywords) {
        String[] terms;
        for (String term : terms = ngram.getTerms()) {
            if (!keywords.contains(term)) continue;
            return true;
        }
        return false;
    }

    private static Set<Ngram> regexFilter(Config config, Set<Ngram> ngrams) {
        String regex = config.getString("train.feature.filterNgrams.regex");
        return ngrams.parallelStream().filter(ngram -> !ngram.getNgram().matches(regex)).collect(Collectors.toSet());
    }

    private static double[][] loadLabels(Config config, MultiLabelIndex index, IdTranslator idTranslator, LabelTranslator labelTranslator) {
        File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
        Config savedConfig = new Config(new File(metaDataFolder, "saved_config_app1"));
        int numDataPoints = idTranslator.numData();
        int numClasses = labelTranslator.getNumClasses();
        double[][] labels = new double[numClasses][numDataPoints];
        for (int i = 0; i < numDataPoints; ++i) {
            String dataIndexId = idTranslator.toExtId(i);
            List<String> extMultiLabel = index.getExtMultiLabel(dataIndexId);
            if (savedConfig.getBoolean("train.label.filter")) {
                String prefix = savedConfig.getString("train.label.filter.prefix");
                extMultiLabel = extMultiLabel.stream().filter(extLabel -> extLabel.startsWith(prefix)).collect(Collectors.toList());
            }
            for (String extLabel2 : extMultiLabel) {
                int intLabel = labelTranslator.toIntLabel(extLabel2);
                labels[intLabel][i] = 1.0;
            }
        }
        return labels;
    }

    private static void ngramSelection(Config config, MultiLabelIndex index, String docFilter, Logger logger) throws Exception {
        FeatureLoader.MatchScoreType matchScoreType;
        logger.info("start ngram selection");
        File metaDataFolder = new File(config.getString("output.folder"), "meta_data");
        String matchScoreTypeString = config.getString("train.feature.ngram.matchScoreType");
        String[] indexIds = App1.getDocsForSplitFromQuery(index, config.getString("train.splitQuery"));
        IdTranslator idTranslator = App1.loadIdTranslator(indexIds);
        LabelTranslator labelTranslator = (LabelTranslator)Serialization.deserialize(new File(metaDataFolder, "label_translator.ser"));
        switch (matchScoreTypeString) {
            case "es_original": {
                matchScoreType = FeatureLoader.MatchScoreType.ES_ORIGINAL;
                break;
            }
            case "binary": {
                matchScoreType = FeatureLoader.MatchScoreType.BINARY;
                break;
            }
            case "frequency": {
                matchScoreType = FeatureLoader.MatchScoreType.FREQUENCY;
                break;
            }
            case "tfifl": {
                matchScoreType = FeatureLoader.MatchScoreType.TFIFL;
                break;
            }
            default: {
                throw new IllegalArgumentException("unknown ngramMatchScoreType");
            }
        }
        double[][] labels = App1.loadLabels(config, index, idTranslator, labelTranslator);
        int numLabels = labels.length;
        int toKeep = config.getInt("train.feature.ngram.selectPerLabel");
        ArrayList<BoundedBlockPriorityQueue<Pair>> queues = new ArrayList<BoundedBlockPriorityQueue<Pair>>();
        Comparator<Pair> comparator = Comparator.comparing(p -> (Double)p.getSecond());
        for (int l = 0; l < numLabels; ++l) {
            queues.add(new BoundedBlockPriorityQueue<Pair>(toKeep, comparator));
        }
        FeatureList featureList = (FeatureList)Serialization.deserialize(new File(metaDataFolder, "feature_list.ser"));
        ((Stream)featureList.getAll().stream().parallel()).filter(feature -> feature instanceof Ngram).map(feature -> (Ngram)feature).filter(ngram -> ngram.getN() > 1).forEach(ngram -> {
            double[] scores = StumpSelector.scores(index, labels, ngram, idTranslator, matchScoreType, docFilter);
            for (int l = 0; l < numLabels; ++l) {
                ((BoundedBlockPriorityQueue)queues.get(l)).add(new Pair<Ngram, Double>((Ngram)ngram, scores[l]));
            }
        });
        HashSet<Ngram> kept = new HashSet<Ngram>();
        StringBuilder stringBuilder = new StringBuilder();
        for (int l = 0; l < numLabels; ++l) {
            stringBuilder.append("-------------------------").append("\n");
            stringBuilder.append(labelTranslator.toExtLabel(l)).append(":").append("\n");
            BoundedBlockPriorityQueue queue = (BoundedBlockPriorityQueue)queues.get(l);
            while (queue.size() > 0) {
                Ngram ngram2 = (Ngram)((Pair)queue.poll()).getFirst();
                kept.add(ngram2);
                stringBuilder.append(ngram2.getNgram()).append(", ");
            }
            stringBuilder.append("\n");
        }
        File selectionFile = new File(metaDataFolder, "selected_ngrams.txt");
        FileUtils.writeStringToFile((File)selectionFile, (String)stringBuilder.toString());
        logger.info("finish ngram selection");
        logger.info("selected ngrams are written to " + selectionFile.getAbsolutePath());
        FeatureList selectedFeatures = new FeatureList();
        for (Feature feature2 : featureList.getAll()) {
            if (!(feature2 instanceof Ngram)) {
                selectedFeatures.add(feature2);
            }
            if (feature2 instanceof Ngram && ((Ngram)feature2).getN() == 1) {
                selectedFeatures.add(feature2);
            }
            if (!(feature2 instanceof Ngram) || ((Ngram)feature2).getN() <= 1 || !kept.contains(feature2)) continue;
            selectedFeatures.add(feature2);
        }
        FileUtils.copyFile((File)new File(metaDataFolder, "feature_list.ser"), (File)new File(metaDataFolder, "feature_list_all.ser"));
        FileUtils.copyFile((File)new File(metaDataFolder, "feature_list.txt"), (File)new File(metaDataFolder, "feature_list_all.txt"));
        Serialization.serialize((Object)selectedFeatures, new File(metaDataFolder, "feature_list.ser"));
        try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(metaDataFolder, "feature_list.txt")));){
            for (Feature feature3 : selectedFeatures.getAll()) {
                bufferedWriter.write(feature3.toString());
                bufferedWriter.newLine();
            }
        }
    }
}

