/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark;

import java.io.DataOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ArrayPrimitiveWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.KylinVersion;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.BytesUtil;
import org.apache.kylin.common.util.Dictionary;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.DimensionRangeInfo;
import org.apache.kylin.cube.cuboid.CuboidUtil;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.cube.model.CubeJoinedFlatTableEnrich;
import org.apache.kylin.cube.util.KeyValueBuilder;
import org.apache.kylin.dict.DictionaryGenerator;
import org.apache.kylin.dict.IDictionaryBuilder;
import org.apache.kylin.engine.EngineFactory;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.SerializableConfiguration;
import org.apache.kylin.engine.mr.common.StatisticsDecisionUtil;
import org.apache.kylin.engine.mr.steps.FactDistinctColumnsMapper;
import org.apache.kylin.engine.mr.steps.FactDistinctColumnsReducerMapping;
import org.apache.kylin.engine.mr.steps.SelfDefineSortableKey;
import org.apache.kylin.engine.spark.KylinSparkJobListener;
import org.apache.kylin.engine.spark.MultipleOutputsRDD;
import org.apache.kylin.engine.spark.SparkUtil;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.measure.hllc.RegisterType;
import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.shaded.com.google.common.base.Preconditions;
import org.apache.kylin.shaded.com.google.common.collect.Lists;
import org.apache.kylin.shaded.com.google.common.collect.Maps;
import org.apache.kylin.shaded.com.google.common.collect.Sets;
import org.apache.kylin.shaded.com.google.common.hash.HashFunction;
import org.apache.kylin.shaded.com.google.common.hash.Hasher;
import org.apache.kylin.shaded.com.google.common.hash.Hashing;
import org.apache.kylin.tool.shaded.org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.spark.Partitioner;
import org.apache.spark.SparkConf;
import org.apache.spark.TaskContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.scheduler.SparkListenerInterface;
import org.apache.spark.util.LongAccumulator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import scala.Tuple3;

public class SparkFactDistinct
extends AbstractApplication
implements Serializable {
    protected static final Logger logger = LoggerFactory.getLogger(SparkFactDistinct.class);
    public static final Option OPTION_CUBE_NAME;
    public static final Option OPTION_META_URL;
    public static final Option OPTION_OUTPUT_PATH;
    public static final Option OPTION_SEGMENT_ID;
    public static final Option OPTION_STATS_SAMPLING_PERCENT;
    public static final Option OPTION_INPUT_TABLE;
    public static final Option OPTION_INPUT_PATH;
    public static final Option OPTION_COUNTER_PATH;
    private Options options = new Options();

    public SparkFactDistinct() {
        this.options.addOption(OPTION_CUBE_NAME);
        this.options.addOption(OPTION_META_URL);
        this.options.addOption(OPTION_OUTPUT_PATH);
        this.options.addOption(OPTION_INPUT_TABLE);
        this.options.addOption(OPTION_INPUT_PATH);
        this.options.addOption(OPTION_SEGMENT_ID);
        this.options.addOption(OPTION_STATS_SAMPLING_PERCENT);
        this.options.addOption(OPTION_COUNTER_PATH);
    }

    @Override
    protected Options getOptions() {
        return this.options;
    }

    @Override
    protected void execute(OptionsHelper optionsHelper) throws Exception {
        String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
        String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL);
        String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
        String hiveTable = optionsHelper.getOptionValue(OPTION_INPUT_TABLE);
        String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
        String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
        String counterPath = optionsHelper.getOptionValue(OPTION_COUNTER_PATH);
        int samplingPercent = Integer.parseInt(optionsHelper.getOptionValue(OPTION_STATS_SAMPLING_PERCENT));
        Class[] kryoClassArray = new Class[]{Class.forName("scala.reflect.ClassTag$$anon$1"), Class.forName("org.apache.kylin.engine.mr.steps.SelfDefineSortableKey")};
        SparkConf conf = new SparkConf().setAppName("Fact distinct columns for:" + cubeName + " segment " + segmentId);
        conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        conf.set("spark.kryo.registrator", "org.apache.kylin.engine.spark.KylinKryoRegistrator");
        conf.set("spark.kryo.registrationRequired", "true").registerKryoClasses(kryoClassArray);
        KylinSparkJobListener jobListener = new KylinSparkJobListener();
        try (JavaSparkContext sc = new JavaSparkContext(conf);){
            sc.sc().addSparkListener((SparkListenerInterface)jobListener);
            HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(outputPath));
            SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration());
            KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
            CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName);
            Job job = Job.getInstance((Configuration)sConf.get());
            FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance);
            logger.info("RDD Output path: {}", (Object)outputPath);
            logger.info("getTotalReducerNum: {}", (Object)reducerMapping.getTotalReducerNum());
            logger.info("getCuboidRowCounterReducerNum: {}", (Object)reducerMapping.getCuboidRowCounterReducerNum());
            logger.info("counter path {}", (Object)counterPath);
            boolean isSequenceFile = "SEQUENCEFILE".equalsIgnoreCase(envConfig.getFlatTableStorageFormat());
            LongAccumulator bytesWritten = sc.sc().longAccumulator();
            JavaRDD<String[]> recordRDD = SparkUtil.hiveRecordInputRDD(isSequenceFile, sc, inputPath, hiveTable);
            JavaPairRDD flatOutputRDD = recordRDD.mapPartitionsToPair((PairFlatMapFunction)new FlatOutputFucntion(cubeName, segmentId, metaUrl, sConf, samplingPercent, bytesWritten));
            JavaPairRDD aggredRDD = flatOutputRDD.repartitionAndSortWithinPartitions((Partitioner)new FactDistinctPartitioner(cubeName, metaUrl, sConf, reducerMapping.getTotalReducerNum()));
            JavaPairRDD outputRDD = aggredRDD.mapPartitionsToPair((PairFlatMapFunction)new MultiOutputFunction(cubeName, metaUrl, sConf, samplingPercent));
            MultipleOutputs.addNamedOutput((Job)job, (String)"column", SequenceFileOutputFormat.class, NullWritable.class, Text.class);
            MultipleOutputs.addNamedOutput((Job)job, (String)"dict", SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
            MultipleOutputs.addNamedOutput((Job)job, (String)"statistics", SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
            MultipleOutputs.addNamedOutput((Job)job, (String)"partition", TextOutputFormat.class, NullWritable.class, LongWritable.class);
            FileOutputFormat.setOutputPath((Job)job, (Path)new Path(outputPath));
            FileOutputFormat.setCompressOutput((Job)job, (boolean)false);
            LazyOutputFormat.setOutputFormatClass((Job)job, SequenceFileOutputFormat.class);
            MultipleOutputsRDD<Writable, Writable> multipleOutputsRDD = MultipleOutputsRDD.rddToMultipleOutputsRDD((JavaPairRDD<String, Tuple3<Writable, Writable, String>>)outputRDD);
            multipleOutputsRDD.saveAsNewAPIHadoopDatasetWithMultipleOutputs(job.getConfiguration());
            long recordCount = recordRDD.count();
            logger.info("Map input records={}", (Object)recordCount);
            logger.info("HDFS Read: {} HDFS Write", (Object)bytesWritten.value());
            HashMap<String, String> counterMap = Maps.newHashMap();
            counterMap.put("source_records_count", String.valueOf(recordCount));
            counterMap.put("source_records_size", String.valueOf(bytesWritten.value()));
            HadoopUtil.writeToSequenceFile(sc.hadoopConfiguration(), counterPath, counterMap);
            HadoopUtil.deleteHDFSMeta(metaUrl);
        }
    }

    static {
        OptionBuilder.withArgName((String)"cubename");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube Name");
        OPTION_CUBE_NAME = OptionBuilder.create((String)"cubename");
        OptionBuilder.withArgName((String)"metaUrl");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"HDFS metadata url");
        OPTION_META_URL = OptionBuilder.create((String)"metaUrl");
        OptionBuilder.withArgName((String)"output");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube output path");
        OPTION_OUTPUT_PATH = OptionBuilder.create((String)"output");
        OptionBuilder.withArgName((String)"segmentId");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Cube Segment Id");
        OPTION_SEGMENT_ID = OptionBuilder.create((String)"segmentId");
        OptionBuilder.withArgName((String)"statisticssamplingpercent");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Statistics sampling percent");
        OPTION_STATS_SAMPLING_PERCENT = OptionBuilder.create((String)"statisticssamplingpercent");
        OptionBuilder.withArgName((String)"hiveTable");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Hive Intermediate Table");
        OPTION_INPUT_TABLE = OptionBuilder.create((String)"hiveTable");
        OptionBuilder.withArgName((String)"input");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Hive Intermediate Table PATH");
        OPTION_INPUT_PATH = OptionBuilder.create((String)"input");
        OptionBuilder.withArgName((String)"counterOutput");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"counter output path");
        OPTION_COUNTER_PATH = OptionBuilder.create((String)"counterOutput");
    }

    static class MultiOutputFunction
    implements PairFlatMapFunction<Iterator<Tuple2<SelfDefineSortableKey, Text>>, String, Tuple3<Writable, Writable, String>> {
        private volatile transient boolean initialized = false;
        private String DICT_FILE_POSTFIX = ".rldict";
        private String DIMENSION_COL_INFO_FILE_POSTFIX = ".dci";
        private String cubeName;
        private String metaUrl;
        private SerializableConfiguration conf;
        private int samplingPercent;
        private transient FactDistinctColumnsReducerMapping reducerMapping;
        private int taskId;
        private boolean isStatistics = false;
        private long baseCuboidId;
        private List<Long> baseCuboidRowCountInMappers;
        private Map<Long, HLLCounter> cuboidHLLMap;
        private TblColRef col;
        private boolean buildDictInReducer;
        private transient IDictionaryBuilder builder;
        private int rowCount = 0;
        private long totalRowsBeforeMerge = 0L;
        private KylinConfig cubeConfig;
        private CubeDesc cubeDesc;
        private String maxValue = null;
        private String minValue = null;
        private boolean isDimensionCol;
        private boolean isDictCol;
        private KylinConfig kConfig;
        private List<Tuple2<String, Tuple3<Writable, Writable, String>>> result;

        public MultiOutputFunction(String cubeName, String metaurl, SerializableConfiguration conf, int samplingPercent) {
            this.cubeName = cubeName;
            this.metaUrl = metaurl;
            this.conf = conf;
            this.samplingPercent = samplingPercent;
        }

        private void init() throws IOException {
            this.taskId = TaskContext.getPartitionId();
            this.kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(this.conf, this.metaUrl);
            try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(this.kConfig);){
                CubeInstance cubeInstance = CubeManager.getInstance(this.kConfig).getCube(this.cubeName);
                this.cubeDesc = cubeInstance.getDescriptor();
                this.cubeConfig = cubeInstance.getConfig();
                this.reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance);
                this.result = Lists.newArrayList();
                if (this.reducerMapping.isCuboidRowCounterReducer(this.taskId)) {
                    this.isStatistics = true;
                    this.baseCuboidId = cubeInstance.getCuboidScheduler().getBaseCuboidId();
                    this.baseCuboidRowCountInMappers = Lists.newArrayList();
                    this.cuboidHLLMap = Maps.newHashMap();
                    logger.info("Partition {} handling stats", (Object)this.taskId);
                } else {
                    this.col = this.reducerMapping.getColForReducer(this.taskId);
                    Preconditions.checkNotNull(this.col);
                    this.isDimensionCol = this.cubeDesc.listDimensionColumnsExcludingDerived(true).contains(this.col) && this.col.getType().needCompare();
                    this.isDictCol = this.cubeDesc.getAllColumnsNeedDictionaryBuilt().contains(this.col);
                    this.buildDictInReducer = this.kConfig.isBuildDictInReducerEnabled();
                    if (this.cubeDesc.getDictionaryBuilderClass(this.col) != null) {
                        this.buildDictInReducer = false;
                    }
                    if (this.reducerMapping.getReducerNumForDimCol(this.col) > 1) {
                        this.buildDictInReducer = false;
                    }
                    if (this.buildDictInReducer) {
                        this.builder = DictionaryGenerator.newDictionaryBuilder(this.col.getType());
                        this.builder.init(null, 0, null);
                    }
                    logger.info("Partition {} handling column {}, buildDictInReducer={}", this.taskId, this.col, this.buildDictInReducer);
                }
                this.initialized = true;
            }
        }

        private void logAFewRows(String value) {
            if (this.rowCount < 10) {
                logger.info("Received value: {}", (Object)value);
            }
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         * Enabled aggressive block sorting
         * Enabled unnecessary exception pruning
         * Enabled aggressive exception aggregation
         * Converted monitor instructions to comments
         * Lifted jumps to return sites
         */
        public Iterator<Tuple2<String, Tuple3<Writable, Writable, String>>> call(Iterator<Tuple2<SelfDefineSortableKey, Text>> tuple2Iterator) throws Exception {
            if (!this.initialized) {
                Class<SparkFactDistinct> clazz = SparkFactDistinct.class;
                // MONITORENTER : org.apache.kylin.engine.spark.SparkFactDistinct.class
                if (!this.initialized) {
                    this.init();
                }
                // MONITOREXIT : clazz
            }
            if (this.isStatistics) {
                this.calculateStatistics(tuple2Iterator);
                ArrayList<Long> allCuboids = Lists.newArrayList();
                allCuboids.addAll(this.cuboidHLLMap.keySet());
                Collections.sort(allCuboids);
                this.logMapperAndCuboidStatistics(allCuboids);
                this.outputStatistics(allCuboids, this.result);
                return this.result.iterator();
            }
            this.calculateColData(tuple2Iterator);
            if (this.isDimensionCol) {
                this.outputDimRangeInfo(this.result);
            }
            if (!this.buildDictInReducer) return this.result.iterator();
            try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(this.kConfig);){
                Dictionary<String> dict = this.builder.build();
                this.outputDict(this.col, dict, this.result);
                return this.result.iterator();
            }
        }

        private void calculateStatistics(Iterator<Tuple2<SelfDefineSortableKey, Text>> tuple2Iterator) throws IOException {
            while (tuple2Iterator.hasNext()) {
                HLLCounter hll = new HLLCounter(this.cubeConfig.getCubeStatsHLLPrecision());
                Tuple2<SelfDefineSortableKey, Text> tuple = tuple2Iterator.next();
                long cuboidId = Bytes.toLong(((SelfDefineSortableKey)tuple._1).getText().getBytes(), 1);
                ByteBuffer bf = ByteBuffer.wrap(((Text)tuple._2).getBytes(), 0, ((Text)tuple._2).getLength());
                hll.readRegisters(bf);
                this.totalRowsBeforeMerge += hll.getCountEstimate();
                if (cuboidId == this.baseCuboidId) {
                    this.baseCuboidRowCountInMappers.add(hll.getCountEstimate());
                }
                if (this.cuboidHLLMap.get(cuboidId) != null) {
                    this.cuboidHLLMap.get(cuboidId).merge(hll);
                    continue;
                }
                this.cuboidHLLMap.put(cuboidId, hll);
            }
        }

        private void calculateColData(Iterator<Tuple2<SelfDefineSortableKey, Text>> tuple2Iterator) {
            while (tuple2Iterator.hasNext()) {
                Tuple2<SelfDefineSortableKey, Text> tuple = tuple2Iterator.next();
                String value = Bytes.toString(((SelfDefineSortableKey)tuple._1).getText().getBytes(), 1, ((SelfDefineSortableKey)tuple._1).getText().getLength() - 1);
                this.logAFewRows(value);
                if (this.isDimensionCol) {
                    if (this.minValue == null || this.col.getType().compare(this.minValue, value) > 0) {
                        this.minValue = value;
                    }
                    if (this.maxValue == null || this.col.getType().compare(this.maxValue, value) < 0) {
                        this.maxValue = value;
                    }
                }
                if (this.isDictCol) {
                    if (this.buildDictInReducer) {
                        this.builder.addValue(value);
                    } else {
                        this.result.add((Tuple2<String, Tuple3<Writable, Writable, String>>)new Tuple2((Object)"column", (Object)new Tuple3((Object)NullWritable.get(), (Object)new Text(value.getBytes(StandardCharsets.UTF_8)), (Object)(this.col.getIdentity() + "/"))));
                    }
                }
                ++this.rowCount;
            }
        }

        private void logMapperAndCuboidStatistics(List<Long> allCuboids) {
            logger.info("Cuboid number for task: {}\t{}", (Object)this.taskId, (Object)allCuboids.size());
            logger.info("Samping percentage: \t{}", (Object)this.samplingPercent);
            logger.info("The following statistics are collected based on sampling data. ");
            logger.info("Number of Mappers: {}", (Object)this.baseCuboidRowCountInMappers.size());
            for (int i = 0; i < this.baseCuboidRowCountInMappers.size(); ++i) {
                if (this.baseCuboidRowCountInMappers.get(i) <= 0L) continue;
                logger.info("Base Cuboid in Mapper {} row count: \t {}", (Object)i, (Object)this.baseCuboidRowCountInMappers.get(i));
            }
            long grantTotal = 0L;
            for (long i : allCuboids) {
                grantTotal += this.cuboidHLLMap.get(i).getCountEstimate();
                logger.info("Cuboid {} row count is: \t {}", (Object)i, (Object)this.cuboidHLLMap.get(i).getCountEstimate());
            }
            logger.info("Sum of row counts (before merge) is: \t {}", (Object)this.totalRowsBeforeMerge);
            logger.info("After merge, the row count: \t {}", (Object)grantTotal);
        }

        private void outputDimRangeInfo(List<Tuple2<String, Tuple3<Writable, Writable, String>>> result) {
            if (this.col != null && this.minValue != null) {
                String dimRangeFileName = this.col.getIdentity() + "/" + this.col.getName() + this.DIMENSION_COL_INFO_FILE_POSTFIX;
                result.add((Tuple2<String, Tuple3<Writable, Writable, String>>)new Tuple2((Object)"partition", (Object)new Tuple3((Object)NullWritable.get(), (Object)new Text(this.minValue.getBytes(StandardCharsets.UTF_8)), (Object)dimRangeFileName)));
                result.add((Tuple2<String, Tuple3<Writable, Writable, String>>)new Tuple2((Object)"partition", (Object)new Tuple3((Object)NullWritable.get(), (Object)new Text(this.maxValue.getBytes(StandardCharsets.UTF_8)), (Object)dimRangeFileName)));
                logger.info("write dimension range info for col : {}  minValue:{} maxValue:{}", this.col.getName(), this.minValue, this.maxValue);
            }
        }

        private void outputDict(TblColRef col, Dictionary<String> dict, List<Tuple2<String, Tuple3<Writable, Writable, String>>> result) throws IOException {
            String dictFileName = col.getIdentity() + "/" + col.getName() + this.DICT_FILE_POSTFIX;
            try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
                 DataOutputStream outputStream = new DataOutputStream(baos);){
                outputStream.writeUTF(dict.getClass().getName());
                dict.write(outputStream);
                result.add((Tuple2<String, Tuple3<Writable, Writable, String>>)new Tuple2((Object)"dict", (Object)new Tuple3((Object)NullWritable.get(), (Object)new ArrayPrimitiveWritable((Object)baos.toByteArray()), (Object)dictFileName)));
            }
        }

        private void outputStatistics(List<Long> allCuboids, List<Tuple2<String, Tuple3<Writable, Writable, String>>> result) throws IOException {
            String statisticsFileName = "statistics/statistics";
            long grandTotal = 0L;
            for (HLLCounter hll : this.cuboidHLLMap.values()) {
                grandTotal += hll.getCountEstimate();
            }
            double mapperOverlapRatio = grandTotal == 0L ? 0.0 : (double)this.totalRowsBeforeMerge / (double)grandTotal;
            result.add((Tuple2<String, Tuple3<Writable, Writable, String>>)new Tuple2((Object)"statistics", (Object)new Tuple3((Object)new LongWritable(-1L), (Object)new BytesWritable(Bytes.toBytes(mapperOverlapRatio)), (Object)statisticsFileName)));
            result.add((Tuple2<String, Tuple3<Writable, Writable, String>>)new Tuple2((Object)"statistics", (Object)new Tuple3((Object)new LongWritable(-2L), (Object)new BytesWritable(Bytes.toBytes(this.baseCuboidRowCountInMappers.size())), (Object)statisticsFileName)));
            result.add((Tuple2<String, Tuple3<Writable, Writable, String>>)new Tuple2((Object)"statistics", (Object)new Tuple3((Object)new LongWritable(0L), (Object)new BytesWritable(Bytes.toBytes(this.samplingPercent)), (Object)statisticsFileName)));
            ByteBuffer valueBuf = ByteBuffer.allocate(0x100000);
            for (long i : allCuboids) {
                valueBuf.clear();
                this.cuboidHLLMap.get(i).writeRegisters(valueBuf);
                valueBuf.flip();
                byte[] valueCopy = new byte[valueBuf.limit()];
                System.arraycopy(valueBuf.array(), 0, valueCopy, 0, valueBuf.limit());
                result.add((Tuple2<String, Tuple3<Writable, Writable, String>>)new Tuple2((Object)"statistics", (Object)new Tuple3((Object)new LongWritable(i), (Object)new BytesWritable(valueCopy, valueCopy.length), (Object)statisticsFileName)));
            }
        }
    }

    static class FactDistinctPartitioner
    extends Partitioner {
        private volatile transient boolean initialized = false;
        private String cubeName;
        private String metaUrl;
        private SerializableConfiguration conf;
        private int totalReducerNum;
        private transient FactDistinctColumnsReducerMapping reducerMapping;

        public FactDistinctPartitioner(String cubeName, String metaUrl, SerializableConfiguration conf, int totalReducerNum) {
            this.cubeName = cubeName;
            this.metaUrl = metaUrl;
            this.conf = conf;
            this.totalReducerNum = totalReducerNum;
        }

        private void init() {
            KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(this.conf, this.metaUrl);
            try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(kConfig);){
                CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(this.cubeName);
                this.reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance);
                this.initialized = true;
            }
        }

        public int numPartitions() {
            return this.totalReducerNum;
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         * Enabled aggressive block sorting
         * Enabled unnecessary exception pruning
         * Enabled aggressive exception aggregation
         * Converted monitor instructions to comments
         * Lifted jumps to return sites
         */
        public int getPartition(Object o) {
            SelfDefineSortableKey skey;
            Text key;
            if (!this.initialized) {
                Class<SparkFactDistinct> clazz = SparkFactDistinct.class;
                // MONITORENTER : org.apache.kylin.engine.spark.SparkFactDistinct.class
                if (!this.initialized) {
                    this.init();
                }
                // MONITOREXIT : clazz
            }
            if ((key = (skey = (SelfDefineSortableKey)o).getText()).getBytes()[0] != -1) return BytesUtil.readUnsigned(key.getBytes(), 0, 1);
            Long cuboidId = Bytes.toLong(key.getBytes(), 1, 8);
            return this.reducerMapping.getReducerIdForCuboidRowCount(cuboidId);
        }
    }

    static class CuboidStatCalculator {
        private final int nRowKey;
        private final int[] rowkeyColIndex;
        private final Long[] cuboidIds;
        private final Integer[][] cuboidsBitSet;
        private HLLCounter[] cuboidsHLL;
        private final boolean isNewAlgorithm;
        private final HashFunction hf;
        private long[] rowHashCodesLong;

        public CuboidStatCalculator(int[] rowkeyColIndex, Long[] cuboidIds, Integer[][] cuboidsBitSet, boolean isUsePutRowKeyToHllNewAlgorithm, HLLCounter[] cuboidsHLL) {
            this.nRowKey = rowkeyColIndex.length;
            this.rowkeyColIndex = rowkeyColIndex;
            this.cuboidIds = cuboidIds;
            this.cuboidsBitSet = cuboidsBitSet;
            this.isNewAlgorithm = isUsePutRowKeyToHllNewAlgorithm;
            if (!this.isNewAlgorithm) {
                this.hf = Hashing.murmur3_32();
            } else {
                this.rowHashCodesLong = new long[this.nRowKey];
                this.hf = Hashing.murmur3_128();
            }
            this.cuboidsHLL = cuboidsHLL;
        }

        public void putRow(String[] row) {
            String[] copyRow = Arrays.copyOf(row, row.length);
            if (this.isNewAlgorithm) {
                this.putRowKeyToHLLNew(copyRow);
            } else {
                this.putRowKeyToHLLOld(copyRow);
            }
        }

        private void putRowKeyToHLLOld(String[] row) {
            int i;
            byte[][] rowHashCodes = new byte[this.nRowKey][];
            for (i = 0; i < this.nRowKey; ++i) {
                Hasher hc = this.hf.newHasher();
                String colValue = row[this.rowkeyColIndex[i]];
                rowHashCodes[i] = colValue != null ? hc.putUnencodedChars(colValue).hash().asBytes() : hc.putInt(0).hash().asBytes();
            }
            int n = this.cuboidsBitSet.length;
            for (i = 0; i < n; ++i) {
                Hasher hc = this.hf.newHasher();
                for (int position = 0; position < this.cuboidsBitSet[i].length; ++position) {
                    hc.putBytes(rowHashCodes[this.cuboidsBitSet[i][position]]);
                }
                this.cuboidsHLL[i].add(hc.hash().asBytes());
            }
        }

        private void putRowKeyToHLLNew(String[] row) {
            int i;
            for (i = 0; i < this.nRowKey; ++i) {
                Hasher hc = this.hf.newHasher();
                String colValue = row[this.rowkeyColIndex[i]];
                if (colValue == null) {
                    colValue = "0";
                }
                byte[] bytes = hc.putUnencodedChars(colValue).hash().asBytes();
                this.rowHashCodesLong[i] = Bytes.toLong(bytes) + (long)i;
            }
            int n = this.cuboidsBitSet.length;
            for (i = 0; i < n; ++i) {
                long value = 0L;
                for (int position = 0; position < this.cuboidsBitSet[i].length; ++position) {
                    value += this.rowHashCodesLong[this.cuboidsBitSet[i][position]];
                }
                this.cuboidsHLL[i].addHashDirectly(value);
            }
        }

        public HLLCounter[] getHLLCounters() {
            return this.cuboidsHLL;
        }

        public Long[] getCuboidIds() {
            return this.cuboidIds;
        }
    }

    static class FlatOutputFucntion
    implements PairFlatMapFunction<Iterator<String[]>, SelfDefineSortableKey, Text> {
        private volatile transient boolean initialized = false;
        private String cubeName;
        private String segmentId;
        private String metaUrl;
        private SerializableConfiguration conf;
        private int samplingPercent;
        private transient CuboidStatCalculator cuboidStatCalculator;
        private transient FactDistinctColumnsReducerMapping reducerMapping;
        private List<TblColRef> allCols;
        private int[] columnIndex;
        private transient FactDistinctColumnsMapper.DictColDeduper dictColDeduper;
        private Map<Integer, DimensionRangeInfo> dimensionRangeInfoMap;
        private transient ByteBuffer tmpbuf;
        private LongAccumulator bytesWritten;
        private KeyValueBuilder keyValueBuilder;

        public FlatOutputFucntion(String cubeName, String segmentId, String metaurl, SerializableConfiguration conf, int samplingPercent, LongAccumulator bytesWritten) {
            this.cubeName = cubeName;
            this.segmentId = segmentId;
            this.metaUrl = metaurl;
            this.conf = conf;
            this.samplingPercent = samplingPercent;
            this.dimensionRangeInfoMap = Maps.newHashMap();
            this.bytesWritten = bytesWritten;
        }

        private void init() {
            KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(this.conf, this.metaUrl);
            try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(kConfig);){
                CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(this.cubeName);
                CubeDesc cubeDesc = cubeInstance.getDescriptor();
                CubeSegment cubeSegment = cubeInstance.getSegmentById(this.segmentId);
                CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
                this.keyValueBuilder = new KeyValueBuilder(intermediateTableDesc);
                this.reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance);
                this.tmpbuf = ByteBuffer.allocate(4096);
                int[] rokeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
                Long[] cuboidIds = this.getCuboidIds(cubeSegment);
                Integer[][] cuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, rokeyColumnIndexes.length);
                boolean isNewAlgorithm = this.isUsePutRowKeyToHllNewAlgorithm(cubeDesc);
                HLLCounter[] cuboidsHLL = this.getInitCuboidsHLL(cuboidIds.length, cubeDesc.getConfig().getCubeStatsHLLPrecision());
                this.cuboidStatCalculator = new CuboidStatCalculator(rokeyColumnIndexes, cuboidIds, cuboidsBitSet, isNewAlgorithm, cuboidsHLL);
                this.allCols = this.reducerMapping.getAllDimDictCols();
                this.initDictColDeduper(cubeDesc);
                this.initColumnIndex(intermediateTableDesc);
                this.initialized = true;
            }
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         * Enabled aggressive block sorting
         * Enabled unnecessary exception pruning
         * Enabled aggressive exception aggregation
         * Converted monitor instructions to comments
         * Lifted jumps to return sites
         */
        public Iterator<Tuple2<SelfDefineSortableKey, Text>> call(Iterator<String[]> rowIterator) throws Exception {
            if (!this.initialized) {
                Class<SparkFactDistinct> clazz = SparkFactDistinct.class;
                // MONITORENTER : org.apache.kylin.engine.spark.SparkFactDistinct.class
                if (!this.initialized) {
                    this.init();
                }
                // MONITOREXIT : clazz
            }
            ArrayList<Tuple2<SelfDefineSortableKey, Text>> result = Lists.newArrayList();
            int rowCount = 0;
            while (true) {
                String[] row;
                if (rowIterator.hasNext()) {
                    row = rowIterator.next();
                    this.bytesWritten.add((long)this.countSizeInBytes(row));
                } else {
                    ByteBuffer hllBuf = ByteBuffer.allocate(0x100000);
                    Long[] cuboidIds = this.cuboidStatCalculator.getCuboidIds();
                    HLLCounter[] cuboidsHLL = this.cuboidStatCalculator.getHLLCounters();
                    for (int i = 0; i < cuboidIds.length; ++i) {
                        HLLCounter hll = cuboidsHLL[i];
                        this.tmpbuf.clear();
                        this.tmpbuf.put((byte)-1);
                        this.tmpbuf.putLong(cuboidIds[i]);
                        Text outputKey = new Text();
                        Text outputValue = new Text();
                        SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();
                        outputKey.set(this.tmpbuf.array(), 0, this.tmpbuf.position());
                        hllBuf.clear();
                        hll.writeRegisters(hllBuf);
                        outputValue.set(hllBuf.array(), 0, hllBuf.position());
                        sortableKey.init(outputKey, (byte)0);
                        result.add((Tuple2<SelfDefineSortableKey, Text>)new Tuple2((Object)sortableKey, (Object)outputValue));
                    }
                    Iterator<Map.Entry<Integer, DimensionRangeInfo>> iterator = this.dimensionRangeInfoMap.entrySet().iterator();
                    while (iterator.hasNext()) {
                        Map.Entry<Integer, DimensionRangeInfo> entry = iterator.next();
                        int colIndex = entry.getKey();
                        DimensionRangeInfo rangeInfo = entry.getValue();
                        DataType dataType = this.allCols.get(colIndex).getType();
                        this.addFieldValue(dataType, colIndex, rangeInfo.getMin(), result);
                        this.addFieldValue(dataType, colIndex, rangeInfo.getMax(), result);
                    }
                    return result.iterator();
                }
                for (int i = 0; i < this.allCols.size(); ++i) {
                    String fieldValue = row[this.columnIndex[i]];
                    if (fieldValue == null || this.keyValueBuilder.isNull(fieldValue)) continue;
                    DataType type = this.allCols.get(i).getType();
                    if (this.dictColDeduper.isDictCol(i)) {
                        if (!this.dictColDeduper.add(i, fieldValue)) continue;
                        this.addFieldValue(type, i, fieldValue, result);
                        continue;
                    }
                    DimensionRangeInfo old = this.dimensionRangeInfoMap.get(i);
                    if (old == null) {
                        old = new DimensionRangeInfo(fieldValue, fieldValue);
                        this.dimensionRangeInfoMap.put(i, old);
                        continue;
                    }
                    old.setMax(type.getOrder().max(old.getMax(), fieldValue));
                    old.setMin(type.getOrder().min(old.getMin(), fieldValue));
                }
                if (rowCount % 100 < this.samplingPercent) {
                    this.cuboidStatCalculator.putRow(row);
                }
                if (rowCount % 100 == 0) {
                    this.dictColDeduper.resetIfShortOfMem();
                }
                ++rowCount;
            }
        }

        private boolean isUsePutRowKeyToHllNewAlgorithm(CubeDesc cubeDesc) {
            boolean isUsePutRowKeyToHllNewAlgorithm;
            if (KylinVersion.isBefore200(cubeDesc.getVersion())) {
                isUsePutRowKeyToHllNewAlgorithm = false;
                logger.info("Found KylinVersion: {}. Use old algorithm for cuboid sampling.", (Object)cubeDesc.getVersion());
            } else {
                isUsePutRowKeyToHllNewAlgorithm = true;
                logger.info("Found KylinVersion: {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518", (Object)cubeDesc.getVersion());
            }
            return isUsePutRowKeyToHllNewAlgorithm;
        }

        private Long[] getCuboidIds(CubeSegment cubeSegment) {
            HashSet<Long> cuboidIdSet = Sets.newHashSet(cubeSegment.getCuboidScheduler().getAllCuboidIds());
            if (StatisticsDecisionUtil.isAbleToOptimizeCubingPlan(cubeSegment)) {
                cuboidIdSet.addAll(cubeSegment.getCubeDesc().getMandatoryCuboids());
            }
            return cuboidIdSet.toArray(new Long[cuboidIdSet.size()]);
        }

        private HLLCounter[] getInitCuboidsHLL(int cuboidSize, int hllPrecision) {
            HLLCounter[] cuboidsHLL = new HLLCounter[cuboidSize];
            for (int i = 0; i < cuboidSize; ++i) {
                cuboidsHLL[i] = new HLLCounter(hllPrecision, RegisterType.DENSE);
            }
            return cuboidsHLL;
        }

        private void initDictColDeduper(CubeDesc cubeDesc) {
            this.dictColDeduper = new FactDistinctColumnsMapper.DictColDeduper();
            Set<TblColRef> dictCols = cubeDesc.getAllColumnsNeedDictionaryBuilt();
            for (int i = 0; i < this.allCols.size(); ++i) {
                if (!dictCols.contains(this.allCols.get(i))) continue;
                this.dictColDeduper.setIsDictCol(i);
            }
        }

        private void initColumnIndex(CubeJoinedFlatTableEnrich intermediateTableDesc) {
            this.columnIndex = new int[this.allCols.size()];
            for (int i = 0; i < this.allCols.size(); ++i) {
                int columnIndexOnFlatTbl;
                TblColRef colRef = this.allCols.get(i);
                this.columnIndex[i] = columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef);
            }
        }

        private void addFieldValue(DataType type, Integer colIndex, String value, List<Tuple2<SelfDefineSortableKey, Text>> result) {
            int reducerIndex = this.reducerMapping.getReducerIdForCol(colIndex, value);
            this.tmpbuf.clear();
            byte[] valueBytes = Bytes.toBytes(value);
            int size = valueBytes.length + 1;
            if (size >= this.tmpbuf.capacity()) {
                this.tmpbuf = ByteBuffer.allocate(this.countNewSize(this.tmpbuf.capacity(), size));
            }
            this.tmpbuf.put(Bytes.toBytes(reducerIndex)[3]);
            this.tmpbuf.put(valueBytes);
            Text outputKey = new Text();
            SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();
            outputKey.set(this.tmpbuf.array(), 0, this.tmpbuf.position());
            sortableKey.init(outputKey, type);
            result.add((Tuple2<SelfDefineSortableKey, Text>)new Tuple2((Object)sortableKey, (Object)new Text()));
            if (result.size() < 10) {
                logger.info("Sample output: {} '{}' => reducer {}", this.allCols.get(colIndex), value, reducerIndex);
            }
        }

        private int countNewSize(int oldSize, int dataSize) {
            int newSize;
            for (newSize = oldSize * 2; newSize < dataSize; newSize *= 2) {
            }
            return newSize;
        }

        private int countSizeInBytes(String[] row) {
            int size = 0;
            for (String s : row) {
                size += s == null ? 1 : StringUtil.utf8Length(s);
                ++size;
            }
            return size;
        }
    }
}

