/*
 * Decompiled with CFR 0.152.
 */
package edu.neu.ccs.pyramid.elasticsearch;

import edu.neu.ccs.pyramid.elasticsearch.ESIndex;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

public class DuplicateDetector
implements Serializable {
    private static final long serialVersionUID = 1L;
    private transient ESIndex esIndex;
    Set<String> allDuplicates;
    private String splitField;

    public Set<String> getAllDuplicates() {
        return this.allDuplicates;
    }

    public DuplicateDetector(ESIndex esIndex, String splitField) {
        this.esIndex = esIndex;
        this.allDuplicates = Collections.newSetFromMap(new ConcurrentHashMap());
        this.splitField = splitField;
    }

    public void addDuplicates(Set<String> set) {
        this.allDuplicates.addAll(set);
    }

    public void detect() {
        ConcurrentHashMap hashToIds = new ConcurrentHashMap();
        int numDocs = this.esIndex.getNumDocs();
        IntStream.range(0, numDocs).parallel().filter(i -> this.esIndex.getStringField("" + i, this.splitField).equalsIgnoreCase("train")).forEach(i -> {
            Map<Integer, String> termVector = this.esIndex.getTermVectorFromIndex("" + i);
            int hash = termVector.hashCode();
            if (!hashToIds.containsKey(hash)) {
                hashToIds.put(hash, Collections.newSetFromMap(new ConcurrentHashMap()));
            }
            ((Set)hashToIds.get(hash)).add("" + i);
        });
        ((Stream)hashToIds.entrySet().stream().parallel()).map(Map.Entry::getValue).forEach(this::check);
    }

    private void check(Set<String> candidates) {
        HashSet<Doc> docs = new HashSet<Doc>();
        int size = candidates.size();
        if (size == 1) {
            return;
        }
        for (String id : candidates) {
            Doc doc = new Doc(id, this.esIndex.getTermVectorFromIndex(id));
            docs.add(doc);
        }
        Set uniqueIds = docs.stream().map(Doc::getId).collect(Collectors.toSet());
        HashSet<String> candidatesCopy = new HashSet<String>(candidates);
        candidatesCopy.removeAll(uniqueIds);
        this.addDuplicates(candidatesCopy);
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        this.allDuplicates.stream().sorted().forEach(str -> {
            sb.append((String)str);
            sb.append(",");
        });
        return sb.toString();
    }

    static class Doc {
        private String id;
        private Map<Integer, String> termVector;

        Doc(String id, Map<Integer, String> termVector) {
            this.id = id;
            this.termVector = termVector;
        }

        public String getId() {
            return this.id;
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || this.getClass() != o.getClass()) {
                return false;
            }
            Doc doc = (Doc)o;
            return this.termVector.equals(doc.termVector);
        }

        public int hashCode() {
            return this.termVector.hashCode();
        }
    }
}

