/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark.job;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kylin.common.KapConfig;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.engine.spark.application.SparkApplication;
import org.apache.kylin.engine.spark.builder.NBuildSourceInfo;
import org.apache.kylin.engine.spark.builder.SnapshotBuilder;
import org.apache.kylin.engine.spark.job.BuildLayoutWithUpdate;
import org.apache.kylin.engine.spark.job.CuboidAggregator;
import org.apache.kylin.engine.spark.job.DFChooser;
import org.apache.kylin.engine.spark.job.KylinBuildEnv;
import org.apache.kylin.engine.spark.job.LogJobInfoUtils;
import org.apache.kylin.engine.spark.job.NSparkCubingUtil;
import org.apache.kylin.engine.spark.job.SanityChecker;
import org.apache.kylin.guava30.shaded.common.base.Preconditions;
import org.apache.kylin.guava30.shaded.common.collect.ImmutableSet;
import org.apache.kylin.guava30.shaded.common.collect.Lists;
import org.apache.kylin.guava30.shaded.common.collect.Maps;
import org.apache.kylin.guava30.shaded.common.collect.Sets;
import org.apache.kylin.metadata.cube.cuboid.NSpanningTree;
import org.apache.kylin.metadata.cube.cuboid.NSpanningTreeFactory;
import org.apache.kylin.metadata.cube.model.IndexEntity;
import org.apache.kylin.metadata.cube.model.IndexPlan;
import org.apache.kylin.metadata.cube.model.LayoutEntity;
import org.apache.kylin.metadata.cube.model.NDataLayout;
import org.apache.kylin.metadata.cube.model.NDataSegment;
import org.apache.kylin.metadata.cube.model.NDataflow;
import org.apache.kylin.metadata.cube.model.NDataflowManager;
import org.apache.kylin.metadata.cube.model.NDataflowUpdate;
import org.apache.kylin.metadata.cube.model.NIndexPlanManager;
import org.apache.kylin.metadata.model.NDataModel;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.datasource.storage.StorageListener;
import org.apache.spark.sql.datasource.storage.StorageStore;
import org.apache.spark.sql.datasource.storage.StorageStoreFactory;
import org.apache.spark.sql.datasource.storage.StorageStoreUtils;
import org.apache.spark.sql.datasource.storage.WriteTaskStats;
import org.apache.spark.sql.hive.utils.ResourceDetectUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.collection.JavaConversions;

@Deprecated
public class DFBuildJob
extends SparkApplication {
    protected static final Logger logger = LoggerFactory.getLogger(DFBuildJob.class);
    protected static String TEMP_DIR_SUFFIX = "_temp";
    public final HashMap<String, Long> seg2Count = new HashMap();
    protected NDataflowManager dfMgr;
    protected BuildLayoutWithUpdate buildLayoutWithUpdate;

    public static void main(String[] args) {
        DFBuildJob nDataflowBuildJob = new DFBuildJob();
        nDataflowBuildJob.execute(args);
    }

    @Override
    protected void doExecute() throws Exception {
        this.buildLayoutWithUpdate = new BuildLayoutWithUpdate();
        String dataflowId = this.getParam("dataflowId");
        HashSet segmentIds = Sets.newHashSet((Object[])StringUtils.split((String)this.getParam("segmentIds"), (String)","));
        Set layoutIds = NSparkCubingUtil.str2Longs((String)this.getParam("layoutIds"));
        this.dfMgr = NDataflowManager.getInstance((KylinConfig)this.config, (String)this.project);
        ArrayList<String> persistedFlatTable = new ArrayList<String>();
        ArrayList<String> persistedViewFactTable = new ArrayList<String>();
        Path shareDir = this.config.getJobTmpShareDir(this.project, this.jobId);
        if (this.config.isBuildCheckPartitionColEnabled()) {
            this.checkDateFormatIfExist(this.project, dataflowId);
        }
        IndexPlan indexPlan = this.dfMgr.getDataflow(dataflowId).getIndexPlan();
        Set cuboids = NSparkCubingUtil.toLayouts((IndexPlan)indexPlan, (Set)layoutIds).stream().filter(Objects::nonNull).collect(Collectors.toSet());
        this.buildSnapshot();
        for (String segId : segmentIds) {
            NSpanningTree nSpanningTree = NSpanningTreeFactory.fromLayouts(cuboids, (String)dataflowId);
            NDataSegment seg = this.getSegment(segId);
            if (this.needSkipSegment(segId)) continue;
            DFChooser datasetChooser = new DFChooser(nSpanningTree, seg, this.jobId, this.ss, this.config, true);
            datasetChooser.decideSources();
            NBuildSourceInfo buildFromFlatTable = datasetChooser.flatTableSource();
            Map<Long, NBuildSourceInfo> buildFromLayouts = datasetChooser.reuseSources();
            this.infos.clearCuboidsNumPerLayer(segId);
            if (!buildFromLayouts.isEmpty()) {
                NBuildSourceInfo min = Collections.min(buildFromLayouts.values(), (o1, o2) -> Math.toIntExact(o1.getCount() - o2.getCount()));
                long count = SanityChecker.getCount(min.getParentDS(), indexPlan.getLayoutEntity(Long.valueOf(min.getLayoutId())));
                this.seg2Count.put(segId, count);
                this.build(buildFromLayouts.values(), segId, nSpanningTree);
            }
            if (buildFromFlatTable != null) {
                String path = datasetChooser.persistFlatTableIfNecessary();
                if (!path.isEmpty()) {
                    logger.info("FlatTable persisted, compute column size");
                    persistedFlatTable.add(path);
                    this.computeColumnBytes(datasetChooser, seg, dataflowId, path);
                } else {
                    logger.info("FlatTable not persisted, only compute row count");
                    long rowCount = buildFromFlatTable.getFlattableDS().count();
                    this.updateColumnBytesInseg(dataflowId, new HashMap<String, Object>(), seg.getId(), rowCount);
                }
                if (!StringUtils.isBlank((CharSequence)buildFromFlatTable.getViewFactTablePath())) {
                    persistedViewFactTable.add(buildFromFlatTable.getViewFactTablePath());
                }
                if (!this.seg2Count.containsKey(segId)) {
                    this.seg2Count.put(segId, buildFromFlatTable.getParentDS().count());
                }
                this.build(Collections.singletonList(buildFromFlatTable), segId, nSpanningTree);
            }
            this.infos.recordSpanningTree(segId, nSpanningTree);
        }
        Map segmentSourceSize = ResourceDetectUtils.getSegmentSourceSize((Path)shareDir);
        this.updateSegmentSourceBytesSize(dataflowId, segmentSourceSize);
        this.tailingCleanups(segmentIds, persistedFlatTable, persistedViewFactTable);
        this.buildLayoutWithUpdate.shutDown();
    }

    protected void buildSnapshot() throws IOException {
        String dataflowId = this.getParam("dataflowId");
        SnapshotBuilder snapshotBuilder = new SnapshotBuilder();
        if (!this.config.isSnapshotManualManagementEnabled()) {
            snapshotBuilder.buildSnapshot(this.ss, this.dfMgr.getDataflow(dataflowId).getModel(), this.getIgnoredSnapshotTables());
        } else {
            logger.info("Skip snapshot build in snapshot manual mode, dataflow: {}, only calculate total rows", (Object)dataflowId);
            snapshotBuilder.calculateTotalRows(this.ss, this.dfMgr.getDataflow(dataflowId).getModel(), this.getIgnoredSnapshotTables());
        }
    }

    private void computeColumnBytes(DFChooser datasetChooser, NDataSegment seg, String dataflowId, String path) {
        this.ss.sparkContext().setJobDescription("Compute column bytes");
        Dataset df = this.ss.read().parquet(path);
        Map columnBytes = JavaConversions.mapAsJavaMap(datasetChooser.computeColumnBytes((Dataset<Row>)df));
        this.updateColumnBytesInseg(dataflowId, columnBytes, seg.getId(), df.count());
        this.ss.sparkContext().setJobDescription(null);
    }

    private boolean needSkipSegment(String segId) {
        NDataSegment seg = this.getSegment(segId);
        if (seg == null || seg.getSegRange() == null || seg.getModel() == null || seg.getIndexPlan() == null) {
            logger.info("Skip segment {}", (Object)segId);
            if (seg != null) {
                logger.info("Args is {} {} {}", new Object[]{seg.getSegRange(), seg.getModel(), seg.getIndexPlan()});
            }
            return true;
        }
        return false;
    }

    public void updateColumnBytesInseg(String dataflowId, Map<String, Object> columnBytes, String id, long rowCount) {
        HashMap map = Maps.newHashMap();
        int rows = this.config.getCapacitySampleRows();
        double multiple = 0.0;
        multiple = rowCount < (long)rows ? 1.0 : (double)rowCount / (double)rows;
        for (Map.Entry<String, Object> entry : columnBytes.entrySet()) {
            map.put(entry.getKey(), (long)((double)Long.parseLong(entry.getValue().toString()) * multiple));
        }
        NDataflow dataflow = this.dfMgr.getDataflow(dataflowId);
        NDataflowUpdate update = new NDataflowUpdate(dataflow.getUuid());
        ArrayList nDataSegments = Lists.newArrayList();
        NDataSegment segment = dataflow.getSegment(id).copy();
        segment.setSourceCount(rowCount);
        segment.getColumnSourceBytes().putAll(map);
        nDataSegments.add(segment);
        update.setToUpdateSegs(nDataSegments.toArray(new NDataSegment[0]));
        this.dfMgr.updateDataflow(update);
    }

    public void tailingCleanups(Set<String> segmentIds, List<String> flatTables, List<String> factViews) throws IOException {
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        for (String viewPath : factViews) {
            fs.delete(new Path(viewPath), true);
            logger.debug("Delete persisted view fact table: {}.", (Object)viewPath);
        }
        if (!this.config.isPersistFlatTableEnabled()) {
            for (String path : flatTables) {
                fs.delete(new Path(path), true);
                logger.debug("Delete persisted flat table: {}.", (Object)path);
            }
        }
        this.resetSegmentMemOnly(segmentIds, !this.config.isPersistFlatTableEnabled());
    }

    protected void updateSegmentSourceBytesSize(String dataflowId, Map<String, Object> toUpdateSegmentSourceSize) {
        NDataflow dataflow = this.dfMgr.getDataflow(dataflowId);
        NDataflowUpdate update = new NDataflowUpdate(dataflow.getUuid());
        ArrayList dataSegments = Lists.newArrayList();
        for (Map.Entry<String, Object> entry : toUpdateSegmentSourceSize.entrySet()) {
            NDataSegment segment = dataflow.getSegment(entry.getKey()).copy();
            if (Objects.isNull(segment)) {
                logger.info("Skip empty segment {} when updating segment source", (Object)entry.getKey());
                continue;
            }
            segment.setSourceBytesSize(((Long)entry.getValue()).longValue());
            segment.setLastBuildTime(System.currentTimeMillis());
            dataSegments.add(segment);
        }
        update.setToUpdateSegs(dataSegments.toArray(new NDataSegment[0]));
        this.dfMgr.updateDataflow(update);
        NIndexPlanManager indexPlanManager = NIndexPlanManager.getInstance((KylinConfig)this.config, (String)this.project);
        indexPlanManager.updateIndexPlan(dataflowId, copyForWrite -> copyForWrite.setLayoutBucketNumMapping(indexPlanManager.getIndexPlan(dataflowId).getLayoutBucketNumMapping()));
    }

    @Override
    protected String calculateRequiredCores() throws Exception {
        if (this.config.isSparkEngineTaskImpactInstanceEnabled()) {
            Path shareDir = this.config.getJobTmpShareDir(this.project, this.jobId);
            String maxLeafTasksNums = this.maxLeafTasksNums(shareDir);
            KylinConfig config = KylinConfig.getInstanceFromEnv();
            int factor = config.getSparkEngineTaskCoreFactor();
            int requiredCore = (int)Double.parseDouble(maxLeafTasksNums) / factor;
            logger.info("The maximum number of tasks required to run the job is {}, require cores: {}", (Object)maxLeafTasksNums, (Object)requiredCore);
            return String.valueOf(requiredCore);
        }
        return "1";
    }

    private String maxLeafTasksNums(Path shareDir) throws IOException {
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        FileStatus[] fileStatuses = fs.listStatus(shareDir, path -> path.toString().endsWith(ResourceDetectUtils.cubingDetectItemFileSuffix()));
        return ResourceDetectUtils.selectMaxValueInFiles((FileStatus[])fileStatuses);
    }

    public NDataSegment getSegment(String segId) {
        String dataflowId = this.getParam("dataflowId");
        return this.dfMgr.getDataflow(dataflowId).getSegment(segId);
    }

    protected void build(Collection<NBuildSourceInfo> buildSourceInfos, String segId, NSpanningTree st) throws IOException {
        List<NBuildSourceInfo> theFirstLevelBuildInfos = this.buildLayer(buildSourceInfos, segId, st);
        LinkedList<List<NBuildSourceInfo>> queue = new LinkedList<List<NBuildSourceInfo>>();
        if (!theFirstLevelBuildInfos.isEmpty()) {
            queue.offer(theFirstLevelBuildInfos);
        }
        while (!queue.isEmpty()) {
            List buildInfos = (List)queue.poll();
            List<NBuildSourceInfo> theNextLayer = this.buildLayer(buildInfos, segId, st);
            if (theNextLayer.isEmpty()) continue;
            queue.offer(theNextLayer);
        }
    }

    private List<NBuildSourceInfo> buildLayer(Collection<NBuildSourceInfo> buildSourceInfos, String segId, final NSpanningTree st) throws IOException {
        final NDataSegment seg = this.getSegment(segId);
        int cuboidsNumInLayer = 0;
        ArrayList<IndexEntity> allIndexesInCurrentLayer = new ArrayList<IndexEntity>();
        for (final NBuildSourceInfo info : buildSourceInfos) {
            Collection<IndexEntity> toBuildCuboids = info.getToBuildCuboids();
            HashSet children = toBuildCuboids.stream().map(IndexEntity::getId).collect(Collectors.toCollection(Sets::newHashSet));
            this.infos.recordParent2Children(seg.getLayout(info.getLayoutId()), children);
            cuboidsNumInLayer += toBuildCuboids.size();
            Preconditions.checkState((!toBuildCuboids.isEmpty() ? 1 : 0) != 0, (Object)"To be built cuboids is empty.");
            final Dataset<Row> parentDS = info.getParentDS();
            for (final IndexEntity index : toBuildCuboids) {
                Preconditions.checkNotNull(parentDS, (Object)"Parent dataset is null when building.");
                this.buildLayoutWithUpdate.submit(new BuildLayoutWithUpdate.JobEntity(){

                    @Override
                    public long getIndexId() {
                        return index.getId();
                    }

                    @Override
                    public String getName() {
                        return "build-index-" + index.getId();
                    }

                    @Override
                    public List<NDataLayout> build() throws IOException {
                        return DFBuildJob.this.buildIndex(seg, index, (Dataset<Row>)parentDS, st, info.getLayoutId());
                    }
                }, this.config);
                allIndexesInCurrentLayer.add(index);
            }
        }
        this.infos.recordCuboidsNumPerLayer(segId, cuboidsNumInLayer);
        this.buildLayoutWithUpdate.updateLayout(seg, this.config, this.project);
        st.decideTheNextLayer(allIndexesInCurrentLayer, this.getSegment(segId));
        return this.constructTheNextLayerBuildInfos(st, seg, allIndexesInCurrentLayer);
    }

    protected List<NBuildSourceInfo> constructTheNextLayerBuildInfos(NSpanningTree st, NDataSegment seg, Collection<IndexEntity> allIndexesInCurrentLayer) {
        ArrayList<NBuildSourceInfo> childrenBuildSourceInfos = new ArrayList<NBuildSourceInfo>();
        for (IndexEntity index : allIndexesInCurrentLayer) {
            Collection children = st.getChildrenByIndexPlan(index);
            if (children.isEmpty()) continue;
            NBuildSourceInfo theRootLevelBuildInfos = new NBuildSourceInfo();
            theRootLevelBuildInfos.setSparkSession(this.ss);
            LayoutEntity layout = (LayoutEntity)new ArrayList(st.getLayouts(index)).get(0);
            theRootLevelBuildInfos.setLayoutId(layout.getId());
            theRootLevelBuildInfos.setParentStorageDF((Dataset<Row>)StorageStoreUtils.toDF((NDataSegment)seg, (LayoutEntity)layout, (SparkSession)this.ss));
            theRootLevelBuildInfos.setToBuildCuboids(children);
            childrenBuildSourceInfos.add(theRootLevelBuildInfos);
        }
        return childrenBuildSourceInfos;
    }

    private List<NDataLayout> buildIndex(NDataSegment seg, IndexEntity cuboid, Dataset<Row> parent, NSpanningTree nSpanningTree, long parentId) throws IOException {
        Function<LayoutEntity, Column[]> toOrder;
        Dataset afterPrj;
        String parentName = String.valueOf(parentId);
        if (parentId == DFChooser.FLAT_TABLE_FLAG()) {
            parentName = "flat table";
        }
        logger.info("Build index:{}, in segment:{}", (Object)cuboid.getId(), (Object)seg.getId());
        LinkedList layouts = Lists.newLinkedList();
        Set dimIndexes = cuboid.getEffectiveDimCols().keySet();
        if (IndexEntity.isTableIndex((long)cuboid.getId())) {
            Preconditions.checkArgument((boolean)cuboid.getMeasures().isEmpty());
            afterPrj = parent.select(NSparkCubingUtil.getColumns((Set[])new Set[]{dimIndexes}));
            toOrder = layout -> NSparkCubingUtil.getColumns((Set[])new Set[]{layout.getOrderedDimensions().keySet()});
        } else {
            afterPrj = CuboidAggregator.agg(parent, dimIndexes, (Map<Integer, NDataModel.Measure>)cuboid.getEffectiveMeasures(), seg, nSpanningTree);
            toOrder = layout -> NSparkCubingUtil.getColumns((Set[])new Set[]{layout.getOrderedDimensions().keySet(), layout.getOrderedMeasures().keySet()});
        }
        for (LayoutEntity layout2 : nSpanningTree.getLayouts(cuboid)) {
            if (seg.isAlreadyBuilt(layout2.getId())) {
                logger.info("Skip already built layout:{}, in index:{}", (Object)layout2.getId(), (Object)cuboid.getId());
                continue;
            }
            logger.info("Build layout:{}, in index:{}", (Object)layout2.getId(), (Object)cuboid.getId());
            this.ss.sparkContext().setJobDescription("build " + layout2.getId() + " from parent " + parentName);
            ImmutableSet rowKeys = layout2.getOrderedDimensions().keySet();
            Dataset afterSort = afterPrj.select(toOrder.apply(layout2)).sortWithinPartitions(NSparkCubingUtil.getColumns((Set[])new Set[]{rowKeys}));
            layouts.add(this.saveAndUpdateLayout((Dataset<Row>)afterSort, seg, layout2));
            this.onLayoutFinished(layout2.getId());
        }
        this.ss.sparkContext().setJobDescription(null);
        logger.info("Finished Build index :{}, in segment:{}", (Object)cuboid.getId(), (Object)seg.getId());
        return layouts;
    }

    protected NDataLayout saveAndUpdateLayout(Dataset<Row> dataset, NDataSegment seg, LayoutEntity layout) throws IOException {
        this.ss.sparkContext().setLocalProperty("spark.scheduler.pool", "build");
        long layoutId = layout.getId();
        NDataLayout dataLayout = this.getDataLayout(seg, layoutId);
        StorageStore storage = StorageStoreFactory.create((NDataModel.DataStorageType)layout.getModel().getStorageType());
        storage.setStorageListener((StorageListener)new SanityChecker(this.seg2Count.getOrDefault(seg.getId(), SanityChecker.SKIP_FLAG())));
        WriteTaskStats taskStats = storage.saveSegmentLayout(layout, seg, KapConfig.wrap((KylinConfig)this.config), dataset);
        dataLayout.setBuildJobId(this.jobId);
        long rowCount = taskStats.numRows();
        if (rowCount == -1L) {
            KylinBuildEnv.get().buildJobInfos().recordAbnormalLayouts(layout.getId(), "Job metrics seems null, use count() to collect cuboid rows.");
            logger.warn("Can not get cuboid={} row cnt.", (Object)layout.getId());
        }
        dataLayout.setRows(rowCount);
        dataLayout.setSourceRows(taskStats.sourceRows());
        dataLayout.setPartitionNum(taskStats.numBucket());
        dataLayout.setPartitionValues(taskStats.partitionValues());
        dataLayout.setFileCount(taskStats.numFiles());
        dataLayout.setByteSize(taskStats.numBytes());
        dataLayout.setReady(true);
        return dataLayout;
    }

    protected NDataLayout getDataLayout(NDataSegment seg, long layoutId) {
        return NDataLayout.newDataLayout((NDataflow)seg.getDataflow(), (String)seg.getId(), (long)layoutId);
    }

    @Override
    protected String generateInfo() {
        return LogJobInfoUtils.dfBuildJobInfo();
    }

    private void resetSegmentMemOnly(Set<String> segmentIds, boolean resetFlatTable) {
        Optional.ofNullable(segmentIds).orElseGet(Collections::emptySet).forEach(segId -> {
            NDataSegment segCopy = this.getSegment((String)segId).copy();
            segCopy.setDictReady(false);
            if (resetFlatTable) {
                segCopy.setFlatTableReady(false);
            }
            segCopy.setFactViewReady(false);
            NDataflowUpdate dfUpdate = new NDataflowUpdate(segCopy.getModelUuid());
            dfUpdate.setToUpdateSegs(new NDataSegment[]{segCopy});
            NDataflowManager.getInstance((KylinConfig)this.config, (String)this.project).updateDataflow(dfUpdate);
        });
    }
}

