/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.mr.steps;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.kylin.common.KylinVersion;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.MemoryBudgetController;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.DimensionRangeInfo;
import org.apache.kylin.cube.cuboid.CuboidUtil;
import org.apache.kylin.engine.mr.common.StatisticsDecisionUtil;
import org.apache.kylin.engine.mr.steps.FactDistinctColumnsMapperBase;
import org.apache.kylin.engine.mr.steps.SelfDefineSortableKey;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.measure.hllc.RegisterType;
import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.shaded.com.google.common.collect.Maps;
import org.apache.kylin.shaded.com.google.common.collect.Sets;
import org.apache.kylin.shaded.com.google.common.hash.HashFunction;
import org.apache.kylin.shaded.com.google.common.hash.Hasher;
import org.apache.kylin.shaded.com.google.common.hash.Hashing;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FactDistinctColumnsMapper<KEYIN>
extends FactDistinctColumnsMapperBase<KEYIN, Object> {
    private static final Logger logger = LoggerFactory.getLogger(FactDistinctColumnsMapper.class);
    protected int nRowKey;
    private Integer[][] allCuboidsBitSet = null;
    private HLLCounter[] allCuboidsHLL = null;
    private Long[] cuboidIds;
    private int rowCount = 0;
    private int samplingPercentage;
    private ByteBuffer tmpbuf;
    private DictColDeduper dictColDeduper;
    private Map<Integer, DimensionRangeInfo> dimensionRangeInfoMap = Maps.newHashMap();
    private CuboidStatCalculator[] cuboidStatCalculators;
    private static final Text EMPTY_TEXT = new Text();
    private SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();

    @Override
    protected void doSetup(Mapper.Context context) throws IOException {
        int start;
        boolean isUsePutRowKeyToHllNewAlgorithm;
        super.doSetup(context);
        this.tmpbuf = ByteBuffer.allocate(4096);
        this.samplingPercentage = Integer.parseInt(context.getConfiguration().get("statistics.sampling.percent"));
        this.nRowKey = this.cubeDesc.getRowkey().getRowKeyColumns().length;
        HashSet<Long> cuboidIdSet = Sets.newHashSet(this.cubeSeg.getCuboidScheduler().getAllCuboidIds());
        if (StatisticsDecisionUtil.isAbleToOptimizeCubingPlan(this.cubeSeg)) {
            cuboidIdSet.addAll(this.cubeSeg.getCubeDesc().getMandatoryCuboids());
        }
        this.cuboidIds = cuboidIdSet.toArray(new Long[cuboidIdSet.size()]);
        this.allCuboidsBitSet = CuboidUtil.getCuboidBitSet(this.cuboidIds, this.nRowKey);
        this.allCuboidsHLL = new HLLCounter[this.cuboidIds.length];
        for (int i = 0; i < this.cuboidIds.length; ++i) {
            this.allCuboidsHLL[i] = new HLLCounter(this.cubeDesc.getConfig().getCubeStatsHLLPrecision(), RegisterType.DENSE);
        }
        if (KylinVersion.isBefore200(this.cubeDesc.getVersion())) {
            isUsePutRowKeyToHllNewAlgorithm = false;
            logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.", (Object)this.cubeDesc.getVersion());
        } else {
            isUsePutRowKeyToHllNewAlgorithm = true;
            logger.info("Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518", (Object)this.cubeDesc.getVersion());
        }
        int calculatorNum = this.getStatsThreadNum(this.cuboidIds.length);
        this.cuboidStatCalculators = new CuboidStatCalculator[calculatorNum];
        int splitSize = this.cuboidIds.length / calculatorNum;
        if (splitSize <= 0) {
            splitSize = 1;
        }
        for (int i = 0; i < calculatorNum && (start = i * splitSize) < this.cuboidIds.length; ++i) {
            CuboidStatCalculator calculator;
            int end = (i + 1) * splitSize;
            if (i == calculatorNum - 1) {
                end = this.cuboidIds.length;
            }
            HLLCounter[] cuboidsHLLSplit = Arrays.copyOfRange(this.allCuboidsHLL, start, end);
            Integer[][] cuboidsBitSetSplit = (Integer[][])Arrays.copyOfRange(this.allCuboidsBitSet, start, end);
            Long[] cuboidIdSplit = Arrays.copyOfRange(this.cuboidIds, start, end);
            this.cuboidStatCalculators[i] = calculator = new CuboidStatCalculator(i, this.intermediateTableDesc.getRowKeyColumnIndexes(), cuboidIdSplit, cuboidsBitSetSplit, isUsePutRowKeyToHllNewAlgorithm, cuboidsHLLSplit);
            calculator.start();
        }
        this.dictColDeduper = new DictColDeduper();
        Set<TblColRef> dictCols = this.cubeDesc.getAllColumnsNeedDictionaryBuilt();
        for (int i = 0; i < this.allCols.size(); ++i) {
            if (!dictCols.contains(this.allCols.get(i))) continue;
            this.dictColDeduper.setIsDictCol(i);
        }
    }

    private int getStatsThreadNum(int cuboidNum) {
        int unitNum = this.cubeDesc.getConfig().getCuboidNumberPerStatsCalculator();
        if (unitNum <= 0) {
            logger.warn("config from getCuboidNumberPerStatsCalculator() " + unitNum + " is should larger than 0");
            logger.info("Will use single thread for cuboid statistics calculation");
            return 1;
        }
        int calculatorNum = (cuboidNum - 1) / unitNum + 1;
        int maxCalculatorNum = this.cubeDesc.getConfig().getCuboidStatsCalculatorMaxNumber();
        if (calculatorNum > maxCalculatorNum) {
            calculatorNum = maxCalculatorNum;
        }
        return calculatorNum;
    }

    @Override
    public void doMap(KEYIN key, Object record, Mapper.Context context) throws IOException, InterruptedException {
        Collection<String[]> rowCollection = this.flatTableInputFormat.parseMapperInput(record);
        for (String[] row : rowCollection) {
            context.getCounter((Enum)RawDataCounter.BYTES).increment(this.countSizeInBytes(row));
            for (int i = 0; i < this.allCols.size(); ++i) {
                int colIndex = this.columnIndex[i];
                int rowSize = row.length;
                String fieldValue = " ";
                if (colIndex <= rowSize - 1) {
                    fieldValue = row[colIndex];
                } else {
                    logger.debug("colIndex:" + colIndex + " is more than rowSize: " + rowSize + " -1, so set empty value.");
                }
                if (fieldValue == null) continue;
                DataType type = ((TblColRef)this.allCols.get(i)).getType();
                if (this.dictColDeduper.isDictCol(i)) {
                    if (!this.dictColDeduper.add(i, fieldValue)) continue;
                    this.writeFieldValue(context, type, i, fieldValue);
                    continue;
                }
                DimensionRangeInfo old = this.dimensionRangeInfoMap.get(i);
                if (old == null) {
                    old = new DimensionRangeInfo(fieldValue, fieldValue);
                    this.dimensionRangeInfoMap.put(i, old);
                    continue;
                }
                old.setMax(type.getOrder().max(old.getMax(), fieldValue));
                old.setMin(type.getOrder().min(old.getMin(), fieldValue));
            }
            if (this.rowCount % 100 < this.samplingPercentage) {
                this.putRowKeyToHLL(row);
            }
            if (this.rowCount % 100 == 0) {
                this.dictColDeduper.resetIfShortOfMem();
            }
            ++this.rowCount;
        }
    }

    private void putRowKeyToHLL(String[] row) {
        for (CuboidStatCalculator cuboidStatCalculator : this.cuboidStatCalculators) {
            cuboidStatCalculator.putRow(row);
        }
    }

    private long countSizeInBytes(String[] row) {
        int size = 0;
        for (String s : row) {
            size += s == null ? 1 : StringUtil.utf8Length(s);
            ++size;
        }
        return size;
    }

    @Override
    protected void doCleanup(Mapper.Context context) throws IOException, InterruptedException {
        ByteBuffer hllBuf = ByteBuffer.allocate(0x100000);
        for (CuboidStatCalculator cuboidStatCalculator : this.cuboidStatCalculators) {
            cuboidStatCalculator.waitForCompletion();
        }
        for (CuboidStatCalculator cuboidStatCalculator : this.cuboidStatCalculators) {
            Long[] cuboidIds = cuboidStatCalculator.getCuboidIds();
            HLLCounter[] cuboidsHLL = cuboidStatCalculator.getHLLCounters();
            for (int i = 0; i < cuboidIds.length; ++i) {
                HLLCounter hll = cuboidsHLL[i];
                this.tmpbuf.clear();
                this.tmpbuf.put((byte)-1);
                this.tmpbuf.putLong(cuboidIds[i]);
                this.outputKey.set(this.tmpbuf.array(), 0, this.tmpbuf.position());
                hllBuf.clear();
                hll.writeRegisters(hllBuf);
                this.outputValue.set(hllBuf.array(), 0, hllBuf.position());
                this.sortableKey.init(this.outputKey, (byte)0);
                context.write((Object)this.sortableKey, (Object)this.outputValue);
            }
        }
        for (Integer colIndex : this.dimensionRangeInfoMap.keySet()) {
            DimensionRangeInfo rangeInfo = this.dimensionRangeInfoMap.get(colIndex);
            DataType dataType = ((TblColRef)this.allCols.get(colIndex)).getType();
            this.writeFieldValue(context, dataType, colIndex, rangeInfo.getMin());
            this.writeFieldValue(context, dataType, colIndex, rangeInfo.getMax());
        }
    }

    private int countNewSize(int oldSize, int dataSize) {
        int newSize;
        for (newSize = oldSize * 2; newSize < dataSize; newSize *= 2) {
        }
        return newSize;
    }

    private void writeFieldValue(Mapper.Context context, DataType type, Integer colIndex, String value) throws IOException, InterruptedException {
        int reducerIndex = this.reducerMapping.getReducerIdForCol(colIndex, value);
        this.tmpbuf.clear();
        byte[] valueBytes = Bytes.toBytes(value);
        int size = valueBytes.length + 1;
        if (size >= this.tmpbuf.capacity()) {
            this.tmpbuf = ByteBuffer.allocate(this.countNewSize(this.tmpbuf.capacity(), size));
        }
        this.tmpbuf.put(Bytes.toBytes(reducerIndex)[3]);
        this.tmpbuf.put(valueBytes);
        this.outputKey.set(this.tmpbuf.array(), 0, this.tmpbuf.position());
        if (this.cubeDesc.getDictionaryBuilderClass((TblColRef)this.allCols.get(colIndex)) == null) {
            this.sortableKey.init(this.outputKey, type);
        } else {
            this.sortableKey.init(this.outputKey, (byte)0);
        }
        context.write((Object)this.sortableKey, (Object)EMPTY_TEXT);
        if (this.rowCount < 10) {
            logger.info("Sample output: " + this.allCols.get(colIndex) + " '" + value + "' => reducer " + reducerIndex);
        }
    }

    public static class DictColDeduper {
        final boolean enabled;
        final int resetThresholdMB;
        final Map<Integer, Set<String>> colValueSets = Maps.newHashMap();

        public DictColDeduper() {
            this(200, 100);
        }

        public DictColDeduper(int enableThresholdMB, int resetThresholdMB) {
            this.enabled = MemoryBudgetController.getSystemAvailMB() >= enableThresholdMB;
            this.resetThresholdMB = resetThresholdMB;
        }

        public void setIsDictCol(int i) {
            this.colValueSets.put(i, new HashSet());
        }

        public boolean isDictCol(int i) {
            return this.colValueSets.containsKey(i);
        }

        public boolean add(int i, String fieldValue) {
            return this.colValueSets.get(i).add(fieldValue);
        }

        public Set<String> getValueSet(int i) {
            return this.colValueSets.get(i);
        }

        public void resetIfShortOfMem() {
            if (MemoryBudgetController.getSystemAvailMB() < this.resetThresholdMB) {
                for (Set<String> set : this.colValueSets.values()) {
                    set.clear();
                }
            }
        }
    }

    public static class CuboidStatCalculator
    implements Runnable {
        private final int id;
        private final int nRowKey;
        private final int[] rowkeyColIndex;
        private final Long[] cuboidIds;
        private final Integer[][] cuboidsBitSet;
        private volatile HLLCounter[] cuboidsHLL = null;
        private final boolean isNewAlgorithm;
        private final HashFunction hf;
        private long[] rowHashCodesLong;
        private BlockingQueue<String[]> queue = new LinkedBlockingQueue<String[]>(2000);
        private Thread workThread;
        private volatile boolean stop;

        public CuboidStatCalculator(int id, int[] rowkeyColIndex, Long[] cuboidIds, Integer[][] cuboidsBitSet, boolean isUsePutRowKeyToHllNewAlgorithm, HLLCounter[] cuboidsHLL) {
            this.id = id;
            this.nRowKey = rowkeyColIndex.length;
            this.rowkeyColIndex = rowkeyColIndex;
            this.cuboidIds = cuboidIds;
            this.cuboidsBitSet = cuboidsBitSet;
            this.isNewAlgorithm = isUsePutRowKeyToHllNewAlgorithm;
            if (!this.isNewAlgorithm) {
                this.hf = Hashing.murmur3_32();
            } else {
                this.rowHashCodesLong = new long[this.nRowKey];
                this.hf = Hashing.murmur3_128();
            }
            this.cuboidsHLL = cuboidsHLL;
            this.workThread = new Thread(this);
        }

        public void start() {
            logger.info("cuboid stats calculator:" + this.id + " started, handle cuboids number:" + this.cuboidIds.length);
            this.workThread.start();
        }

        public void putRow(String[] row) {
            String[] copyRow = Arrays.copyOf(row, row.length);
            try {
                this.queue.put(copyRow);
            }
            catch (InterruptedException e) {
                logger.error("interrupt", e);
            }
        }

        public void waitForCompletion() {
            this.stop = true;
            try {
                this.workThread.join();
            }
            catch (InterruptedException e) {
                logger.error("interrupt", e);
            }
        }

        private void putRowKeyToHLLOld(String[] row) {
            int i;
            byte[][] rowHashCodes = new byte[this.nRowKey][];
            for (i = 0; i < this.nRowKey; ++i) {
                Hasher hc = this.hf.newHasher();
                String colValue = row[this.rowkeyColIndex[i]];
                rowHashCodes[i] = colValue != null ? hc.putUnencodedChars(colValue).hash().asBytes() : hc.putInt(0).hash().asBytes();
            }
            int n = this.cuboidsBitSet.length;
            for (i = 0; i < n; ++i) {
                Hasher hc = this.hf.newHasher();
                for (int position = 0; position < this.cuboidsBitSet[i].length; ++position) {
                    hc.putBytes(rowHashCodes[this.cuboidsBitSet[i][position]]);
                }
                this.cuboidsHLL[i].add(hc.hash().asBytes());
            }
        }

        private void putRowKeyToHLLNew(String[] row) {
            int i;
            for (i = 0; i < this.nRowKey; ++i) {
                Hasher hc = this.hf.newHasher();
                String colValue = row[this.rowkeyColIndex[i]];
                if (colValue == null) {
                    colValue = "0";
                }
                byte[] bytes = hc.putUnencodedChars(colValue).hash().asBytes();
                this.rowHashCodesLong[i] = Bytes.toLong(bytes) + (long)i;
            }
            int n = this.cuboidsBitSet.length;
            for (i = 0; i < n; ++i) {
                long value = 0L;
                for (int position = 0; position < this.cuboidsBitSet[i].length; ++position) {
                    value += this.rowHashCodesLong[this.cuboidsBitSet[i][position]];
                }
                this.cuboidsHLL[i].addHashDirectly(value);
            }
        }

        public HLLCounter[] getHLLCounters() {
            return this.cuboidsHLL;
        }

        public Long[] getCuboidIds() {
            return this.cuboidIds;
        }

        @Override
        public void run() {
            while (true) {
                String[] row;
                if ((row = (String[])this.queue.poll()) == null && this.stop) break;
                if (row == null) {
                    Thread.yield();
                    continue;
                }
                if (this.isNewAlgorithm) {
                    this.putRowKeyToHLLNew(row);
                    continue;
                }
                this.putRowKeyToHLLOld(row);
            }
            logger.info("cuboid stats calculator:" + this.id + " completed.");
        }
    }

    public static enum RawDataCounter {
        BYTES;

    }
}

