001    /*
002    // $Id: //open/mondrian/src/main/mondrian/rolap/agg/SegmentLoader.java#8 $
003    // This software is subject to the terms of the Common Public License
004    // Agreement, available at the following URL:
005    // http://www.opensource.org/licenses/cpl.html.
006    // Copyright (C) 2002-2008 Julian Hyde and others
007    // All Rights Reserved.
008    // You must accept the terms of that agreement to use this software.
009    */
010    package mondrian.rolap.agg;
011    
012    import mondrian.rolap.*;
013    import mondrian.olap.Util;
014    import mondrian.olap.MondrianProperties;
015    
016    import java.util.*;
017    import java.sql.ResultSet;
018    import java.sql.SQLException;
019    
020    /**
021     * <p>The <code>SegmentLoader</code> queries database and loads the data into
022     * the given set of segments.</p>
023     *
024     * <p>It reads a segment of <code>measure</code>, where <code>columns</code> are
025     * constrained to <code>values</code>.  Each entry in <code>values</code>
026     * can be null, meaning don't constrain, or can have several values. For
027     * example, <code>getSegment({Unit_sales}, {Region, State, Year}, {"West"},
028     * {"CA", "OR", "WA"}, null})</code> returns sales in states CA, OR and WA
029     * in the Western region, for all years.</p>
030     *
031     * @author Thiyagu
032     * @version $Id: //open/mondrian/src/main/mondrian/rolap/agg/SegmentLoader.java#8 $
033     * @since 24 May 2007
034     */
035    public class SegmentLoader {
036        /**
037         * Loads data for all the segments of the GroupingSets. If the grouping sets
038         * list contains more than one Grouping Set then data is loaded using the
039         * GROUP BY GROUPING SETS sql. Else if only one grouping set is passed in
040         * the list data is loaded without using GROUP BY GROUPING SETS sql. If the
041         * database does not support grouping sets
042         * {@link mondrian.rolap.sql.SqlQuery.Dialect#supportsGroupingSets()} then
043         * grouping sets list should always have only one element in it.
044         *
045         * <p>For example, if list has 2 grouping sets with columns A, B, C and B, C
046         * respectively, then the SQL will be
047         * "GROUP BY GROUPING SETS ((A, B, C), (B, C))".
048         *
049         * <p>Else if the list has only one grouping set then sql would be without
050         * grouping sets.
051         *
052         * <p>The <code>groupingSets</code> list should be topological order, with
053         * more detailed higher-level grouping sets occuring first. In other words,
054         * the first element of the list should always be the detailed grouping
055         * set (default grouping set), followed by grouping sets which can be
056         * rolled-up on this detailed grouping set.
057         * In the example (A, B, C) is the detailed grouping set and (B, C) is
058         * rolled-up using the detailed.
059         *
060         * @param groupingSets   List of grouping sets whose segments are loaded
061         * @param pinnedSegments Pinned segments
062         */
063        public void load(
064            List<GroupingSet> groupingSets,
065            RolapAggregationManager.PinSet pinnedSegments,
066            List<StarPredicate> compoundPredicateList)
067        {
068            GroupingSetsList groupingSetsList =
069                new GroupingSetsList(groupingSets);
070            boolean useGroupingSet = groupingSetsList.useGroupingSets();
071            RolapStar.Column[] defaultColumns =
072                groupingSetsList.getDefaultColumns();
073            SqlStatement stmt = null;
074            try {
075                stmt = createExecuteSql(
076                    groupingSetsList,
077                    compoundPredicateList);
078                int arity = defaultColumns.length;
079                SortedSet<Comparable<?>>[] axisValueSets =
080                    getDistinctValueWorkspace(arity);
081    
082                boolean[] axisContainsNull = new boolean[arity];
083    
084                List<Object[]> rows =
085                    processData(
086                        stmt, axisContainsNull,
087                        axisValueSets, groupingSetsList);
088    
089                boolean sparse =
090                    setAxisDataAndDecideSparseUse(axisValueSets,
091                        axisContainsNull, groupingSetsList,
092                        rows);
093    
094                SegmentDataset[] nonGroupingDataSets = null;
095    
096                final Map<BitKey, SegmentDataset[]> groupingDataSetsMap =
097                    new HashMap<BitKey, SegmentDataset[]>();
098    
099                if (useGroupingSet) {
100                    populateDataSetMapOnGroupingColumnsBitKeys(
101                        groupingSetsList,
102                        sparse, groupingDataSetsMap);
103                } else {
104                    nonGroupingDataSets = createDataSets(
105                        sparse,
106                        groupingSetsList.getDefaultSegments(),
107                        groupingSetsList.getDefaultAxes());
108                }
109    
110                loadDataToDataSets(
111                    groupingSetsList, rows, groupingDataSetsMap,
112                    nonGroupingDataSets, axisContainsNull, sparse);
113    
114                setDataToSegments(
115                    groupingSetsList, nonGroupingDataSets,
116                    groupingDataSetsMap, pinnedSegments);
117    
118            } catch (SQLException e) {
119                throw stmt.handle(e);
120            } finally {
121                if (stmt != null) {
122                    stmt.close();
123                }
124                // Any segments which are still loading have failed.
125                setFailOnStillLoadingSegments(groupingSetsList);
126            }
127        }
128    
129        void setFailOnStillLoadingSegments(GroupingSetsList groupingSetsList) {
130            for (GroupingSet groupingset : groupingSetsList.getGroupingSets()) {
131                for (Segment segment : groupingset.getSegments()) {
132                    segment.setFailIfStillLoading();
133                }
134            }
135        }
136    
137        /**
138         * Loads data to the datasets. If the grouping sets is used,
139         * dataset is fetched from groupingDataSetMap using grouping bit keys of
140         * the row data. If grouping sets is not used, data is loaded on to
141         * nonGroupingDataSets.
142         */
143        private void loadDataToDataSets(
144            GroupingSetsList groupingSetsList, List<Object[]> rows,
145            Map<BitKey, SegmentDataset[]> groupingDataSetMap,
146            SegmentDataset[] nonGroupingDataSets, boolean[] axisContainsNull,
147            boolean sparse)
148        {
149            int arity = groupingSetsList.getDefaultColumns().length;
150            boolean useGroupingSet = groupingSetsList.useGroupingSets();
151            Aggregation.Axis[] axes = groupingSetsList.getDefaultAxes();
152            int segmentLength = groupingSetsList.getDefaultSegments().length;
153    
154            List<Integer> pos = new ArrayList<Integer>(arity);
155            for (Object[] row : rows) {
156                final SegmentDataset[] datasets;
157                int groupingBitKeyIndex = arity + segmentLength;
158                if (useGroupingSet) {
159                    BitKey groupingBitKey = (BitKey) row[groupingBitKeyIndex];
160                    datasets = groupingDataSetMap.get(groupingBitKey);
161                } else {
162                    datasets = nonGroupingDataSets;
163                }
164                int k = 0;
165                for (int j = 0; j < arity; j++) {
166                    Object o = row[j];
167                    if (useGroupingSet &&
168                            isRollupNull(groupingSetsList, row, groupingBitKeyIndex, j)) {
169                        continue;
170                    }
171                    Aggregation.Axis axis = axes[j];
172                    int offset = axis.getOffset(o);
173                    pos.add(offset);
174                    k *= axes[j].getKeys().length;
175                    k += offset;
176                }
177    
178                if (sparse) {
179                    CellKey key = CellKey.Generator.newCellKey(toArray(pos));
180                    for (int j = 0; j < segmentLength; j++) {
181                        final Object o = row[arity + j];
182                        datasets[j].put(key, o);
183                    }
184                } else {
185                    for (int j = 0; j < segmentLength; j++) {
186                        final Object o = row[arity + j];
187                        ((DenseSegmentDataset) datasets[j]).set(k, o);
188                    }
189                }
190                pos.clear();
191            }
192        }
193    
194        private boolean isRollupNull(
195                GroupingSetsList groupingSetsList, Object[] row,
196                int groupingBitKeyIndex, int j) {
197            BitKey groupingBitKey = (BitKey) row[groupingBitKeyIndex];
198            boolean isGroupingBitSet =
199                    groupingBitKey.get(groupingSetsList.findGroupingFunctionIndex(j));
200            return row[j].equals(RolapUtil.sqlNullValue) && isGroupingBitSet;
201        }
202    
203        private int[] toArray(List<Integer> pos) {
204            int posArr[] = new int[pos.size()];
205            for (int i = 0; i < posArr.length; i++) {
206                posArr[i] = pos.get(i);
207            }
208            return posArr;
209        }
210    
211        private boolean setAxisDataAndDecideSparseUse(
212            SortedSet<Comparable<?>>[] axisValueSets,
213            boolean[] axisContainsNull,
214            GroupingSetsList groupingSetsList,
215            List<Object[]> rows)
216        {
217            Aggregation.Axis[] axes = groupingSetsList.getDefaultAxes();
218            RolapStar.Column[] allColumns = groupingSetsList.getDefaultColumns();
219            // Figure out size of dense array, and allocate it, or use a sparse
220            // array if appropriate.
221            boolean sparse = false;
222            int n = 1;
223            for (int i = 0; i < axes.length; i++) {
224                Aggregation.Axis axis = axes[i];
225                SortedSet<Comparable<?>> valueSet = axisValueSets[i];
226                int size = axis.loadKeys(valueSet, axisContainsNull[i]);
227                setAxisDataToGroupableList(groupingSetsList, valueSet,
228                    axisContainsNull[i], allColumns[i]);
229                int previous = n;
230                n *= size;
231                if ((n < previous) || (n < size)) {
232                    // Overflow has occurred.
233                    n = Integer.MAX_VALUE;
234                    sparse = true;
235                }
236            }
237            return useSparse(sparse, n, rows);
238        }
239    
240        boolean useSparse(boolean sparse, int n, List<Object[]> rows) {
241            sparse = sparse || useSparse((double) n, (double) rows.size());
242            return sparse;
243        }
244    
245        private void setDataToSegments(
246            GroupingSetsList groupingSetsList,
247            SegmentDataset[] detailedDataSet,
248            Map<BitKey, SegmentDataset[]> datasetsMap,
249            RolapAggregationManager.PinSet pinnedSegments)
250        {
251            List<GroupingSet> groupingSets = groupingSetsList.getGroupingSets();
252            boolean useGroupingSet = groupingSetsList.useGroupingSets();
253            for (int i = 0; i < groupingSets.size(); i++) {
254                Segment[] groupedSegments = groupingSets.get(i).getSegments();
255                SegmentDataset[] dataSets = useGroupingSet ? datasetsMap
256                    .get(groupingSetsList.getRollupColumnsBitKeyList().get(i)) :
257                    detailedDataSet;
258                for (int j = 0; j < groupedSegments.length; j++) {
259                    Segment groupedSegment = groupedSegments[j];
260                    groupedSegment
261                        .setData(dataSets[j], pinnedSegments);
262                }
263            }
264        }
265    
266        private void populateDataSetMapOnGroupingColumnsBitKeys(
267            GroupingSetsList groupingSetsList, boolean sparse,
268            Map<BitKey, SegmentDataset[]> datasetsMap)
269        {
270            List<GroupingSet> groupingSets = groupingSetsList.getGroupingSets();
271            List<BitKey> groupingColumnsBitKeyList =
272                groupingSetsList.getRollupColumnsBitKeyList();
273            for (int i = 0; i < groupingSets.size(); i++) {
274                GroupingSet groupingSet = groupingSets.get(i);
275                SegmentDataset[] datasets = createDataSets(sparse,
276                    groupingSet.getSegments(), groupingSet.getAxes());
277                datasetsMap.put(groupingColumnsBitKeyList.get(i), datasets);
278            }
279        }
280    
281        private int calcuateMaxDataSize(Aggregation.Axis[] axes) {
282            int n = 1;
283            for (Aggregation.Axis axis : axes) {
284                n *= axis.getKeys().length;
285            }
286            return n;
287        }
288    
289        private SegmentDataset[] createDataSets(boolean sparse,
290            Segment[] segments, Aggregation.Axis[] axes)
291        {
292            int n = (sparse ? 0 : calcuateMaxDataSize(axes));
293            SegmentDataset[] datasets;
294            if (sparse) {
295                datasets = new SparseSegmentDataset[segments.length];
296                for (int i = 0; i < segments.length; i++) {
297                    datasets[i] = new SparseSegmentDataset(segments[i]);
298                }
299            } else {
300                datasets = new DenseSegmentDataset[segments.length];
301                for (int i = 0; i < segments.length; i++) {
302                    datasets[i] = new DenseSegmentDataset(
303                        segments[i], new Object[n]);
304                }
305            }
306            return datasets;
307        }
308    
309        private void setAxisDataToGroupableList(
310            GroupingSetsList groupingSetsList,
311            SortedSet<Comparable<?>> valueSet, boolean axisContainsNull,
312            RolapStar.Column column)
313        {
314            for (GroupingSet groupingSet : groupingSetsList.getRollupGroupingSets()) {
315                RolapStar.Column[] columns = groupingSet.getColumns();
316                for (int i = 0; i < columns.length; i++) {
317                    if (columns[i].equals(column)) {
318                        groupingSet.getAxes()[i]
319                            .loadKeys(valueSet, axisContainsNull);
320                    }
321                }
322            }
323        }
324    
325        /**
326         * Creates and executes a SQL statement to retrieve the set of cells
327         * specified by a GroupingSetsList.
328         *
329         * <p>This method may be overridden in tests.
330         *
331         * @param groupingSetsList Grouping
332         * @return An executed SQL statement, or null
333         */
334        SqlStatement createExecuteSql(
335            GroupingSetsList groupingSetsList,
336            List<StarPredicate> compoundPredicateList) {
337            RolapStar star = groupingSetsList.getStar();
338            String sql =
339                AggregationManager.instance().generateSql(
340                    groupingSetsList, compoundPredicateList);
341            return RolapUtil.executeQuery(
342                star.getDataSource(), sql, "Segment.load",
343                "Error while loading segment");
344        }
345    
346        List<Object[]> processData(
347            SqlStatement stmt,
348            boolean[] axisContainsNull,
349            SortedSet<Comparable<?>>[] axisValueSets,
350            GroupingSetsList groupingSetsList) throws SQLException
351        {
352            Segment[] segments = groupingSetsList.getDefaultSegments();
353            int measureCount = segments.length;
354            List<Object[]> rawData = loadData(stmt, groupingSetsList);
355            List<Object[]> processedRows = new ArrayList<Object[]>(rawData.size());
356    
357            int arity = axisValueSets.length;
358            int groupingColumnStartIndex = arity + measureCount;
359            for (Object[] row : rawData) {
360                Object[] processedRow =
361                    groupingSetsList.useGroupingSets() ?
362                        new Object[row.length - (groupingSetsList
363                            .getRollupColumns().size()) + 1] :
364                        new Object[row.length];
365                // get the columns
366                int columnIndex = 0;
367                for (int axisIndex = 0; axisIndex < arity;
368                     axisIndex++, columnIndex++) {
369                    Object o = row[columnIndex];
370                    if (o == null) {
371                        o = RolapUtil.sqlNullValue;
372                        if (!groupingSetsList.useGroupingSets() ||
373                            !isAggregateNull(row, groupingColumnStartIndex,
374                                groupingSetsList, axisIndex)) {
375                            axisContainsNull[axisIndex] = true;
376                        }
377                    } else {
378                        axisValueSets[axisIndex].add(Aggregation.Axis.wrap(o));
379                    }
380                    processedRow[columnIndex] = o;
381                }
382                // get the measure
383                for (int i = 0; i < measureCount; i++, columnIndex++) {
384                    Object o = row[columnIndex];
385                    if (o == null) {
386                        o = Util.nullValue; // convert to placeholder
387                    } else if (segments[i].measure.getDatatype().isNumeric()) {
388                        if (o instanceof Double) {
389                            // nothing to do
390                        } else if (o instanceof Number) {
391                            o = ((Number) o).doubleValue();
392                        } else if (o instanceof byte[]) {
393                            // On MySQL 5.0 in German locale, values can come
394                            // out as byte arrays. Don't know why. Bug 1594119.
395                            o = Double.parseDouble(new String((byte[]) o));
396                        } else {
397                            o = Double.parseDouble(o.toString());
398                        }
399                    }
400                    processedRow[columnIndex] = o;
401                }
402                if (groupingSetsList.useGroupingSets()) {
403                    processedRow[columnIndex] = getRollupBitKey(
404                        groupingSetsList.getRollupColumns().size(), row,
405                        columnIndex);
406                }
407                processedRows.add(processedRow);
408            }
409            return processedRows;
410        }
411    
412        /**
413         * Generates bit key representing roll up columns
414         */
415        BitKey getRollupBitKey(int arity, Object[] row, int k) {
416            BitKey groupingBitKey = BitKey.Factory.makeBitKey(arity);
417            for (int i = 0; i < arity; i++) {
418                Object o = row[k + i];
419                if (isOne(o)) {
420                    groupingBitKey.set(i);
421                }
422            }
423            return groupingBitKey;
424        }
425    
426        private static boolean isOne(Object o) {
427            return ((Number) o).intValue() == 1;
428        }
429    
430        private boolean isAggregateNull(
431            Object[] row,
432            int groupingColumnStartIndex,
433            GroupingSetsList groupingSetsList,
434            int axisIndex)
435        {
436            int groupingFunctionIndex =
437                groupingSetsList.findGroupingFunctionIndex(axisIndex);
438            if (groupingFunctionIndex == -1) {
439                // Not a rollup column
440                return false;
441            }
442            return isOne(row[groupingColumnStartIndex + groupingFunctionIndex]);
443        }
444    
445        List<Object[]> loadData(
446            SqlStatement stmt,
447            GroupingSetsList groupingSetsList)
448            throws SQLException
449        {
450            int arity = groupingSetsList.getDefaultColumns().length;
451            int measureCount = groupingSetsList.getDefaultSegments().length;
452            int groupingFunctionsCount = groupingSetsList.getRollupColumns().size();
453    
454            List<Object[]> rows = new ArrayList<Object[]>();
455            ResultSet resultSet = stmt.getResultSet();
456            while (resultSet.next()) {
457                ++stmt.rowCount;
458                Object[] row =
459                    groupingSetsList.useGroupingSets() ?
460                        new Object[arity + measureCount +
461                            groupingFunctionsCount] :
462                        new Object[arity + measureCount];
463                for (int i = 0; i < row.length; i++) {
464                    row[i] = resultSet.getObject(i + 1);
465                }
466                rows.add(row);
467            }
468            return rows;
469        }
470    
471        List<RolapStar.Column[]> getGroupingColumnsList(
472            RolapStar.Column[] detailedBatchColumns,
473            List<GroupingSet> aggBatchDetails)
474        {
475            List<RolapStar.Column[]> groupingColumns =
476                new ArrayList<RolapStar.Column[]>();
477            if (aggBatchDetails.isEmpty()) {
478                return groupingColumns;
479            }
480            groupingColumns.add(detailedBatchColumns);
481            for (GroupingSet aggBatchDetail : aggBatchDetails) {
482                groupingColumns.add(aggBatchDetail.getSegments()[0]
483                    .aggregation.getColumns());
484    
485            }
486            return groupingColumns;
487        }
488    
489        SortedSet<Comparable<?>>[] getDistinctValueWorkspace(int arity) {
490            // Workspace to build up lists of distinct values for each axis.
491            SortedSet<Comparable<?>>[] axisValueSets = new SortedSet[arity];
492            for (int i = 0; i < axisValueSets.length; i++) {
493    
494                if (Util.PreJdk15) {
495                    // Work around the fact that Boolean is not Comparable until JDK
496                    // 1.5.
497                    assert !(Comparable.class.isAssignableFrom(Boolean.class));
498                    final SortedSet set =
499                        new TreeSet<Comparable<Object>>(
500                            new Comparator<Object>() {
501                                public int compare(Object o1, Object o2) {
502                                    if (o1 instanceof Boolean) {
503                                        boolean b1 = (Boolean) o1;
504                                        if (o2 instanceof Boolean) {
505                                            boolean b2 = (Boolean) o2;
506                                            return (b1 == b2 ? 0 :
507                                                (b1 ? 1 : -1));
508                                        } else {
509                                            return -1;
510                                        }
511                                    } else {
512                                        return ((Comparable) o1).compareTo(o2);
513                                    }
514                                }
515                            }
516                       );
517                    axisValueSets[i] = set;
518                } else {
519                    assert Comparable.class.isAssignableFrom(Boolean.class);
520                    axisValueSets[i] = new TreeSet<Comparable<?>>();
521                }
522            }
523            return axisValueSets;
524        }
525    
526        /**
527         * Decides whether to use a sparse representation for this segment, using
528         * the formula described
529         * {@link mondrian.olap.MondrianProperties#SparseSegmentCountThreshold here}.
530         *
531         * @param possibleCount Number of values in the space.
532         * @param actualCount   Actual number of values.
533         * @return Whether to use a sparse representation.
534         */
535        private static boolean useSparse(
536            final double possibleCount, final double actualCount)
537        {
538            final MondrianProperties properties = MondrianProperties.instance();
539            double densityThreshold =
540                properties.SparseSegmentDensityThreshold.get();
541            if (densityThreshold < 0) {
542                densityThreshold = 0;
543            }
544            if (densityThreshold > 1) {
545                densityThreshold = 1;
546            }
547            int countThreshold = properties.SparseSegmentCountThreshold.get();
548            if (countThreshold < 0) {
549                countThreshold = 0;
550            }
551            boolean sparse =
552                (possibleCount - countThreshold) * densityThreshold >
553                    actualCount;
554            if (possibleCount < countThreshold) {
555                assert !sparse :
556                    "Should never use sparse if count is less " +
557                        "than threshold, possibleCount=" + possibleCount +
558                        ", actualCount=" + actualCount +
559                        ", countThreshold=" + countThreshold +
560                        ", densityThreshold=" + densityThreshold;
561            }
562            if (possibleCount == actualCount) {
563                assert !sparse :
564                    "Should never use sparse if result is 100% dense: " +
565                        "possibleCount=" + possibleCount +
566                        ", actualCount=" + actualCount +
567                        ", countThreshold=" + countThreshold +
568                        ", densityThreshold=" + densityThreshold;
569            }
570            return sparse;
571        }
572    }
573    
574    // End SegmentLoader.java