001 /* 002 // $Id: //open/mondrian/src/main/mondrian/rolap/agg/SegmentLoader.java#8 $ 003 // This software is subject to the terms of the Common Public License 004 // Agreement, available at the following URL: 005 // http://www.opensource.org/licenses/cpl.html. 006 // Copyright (C) 2002-2008 Julian Hyde and others 007 // All Rights Reserved. 008 // You must accept the terms of that agreement to use this software. 009 */ 010 package mondrian.rolap.agg; 011 012 import mondrian.rolap.*; 013 import mondrian.olap.Util; 014 import mondrian.olap.MondrianProperties; 015 016 import java.util.*; 017 import java.sql.ResultSet; 018 import java.sql.SQLException; 019 020 /** 021 * <p>The <code>SegmentLoader</code> queries database and loads the data into 022 * the given set of segments.</p> 023 * 024 * <p>It reads a segment of <code>measure</code>, where <code>columns</code> are 025 * constrained to <code>values</code>. Each entry in <code>values</code> 026 * can be null, meaning don't constrain, or can have several values. For 027 * example, <code>getSegment({Unit_sales}, {Region, State, Year}, {"West"}, 028 * {"CA", "OR", "WA"}, null})</code> returns sales in states CA, OR and WA 029 * in the Western region, for all years.</p> 030 * 031 * @author Thiyagu 032 * @version $Id: //open/mondrian/src/main/mondrian/rolap/agg/SegmentLoader.java#8 $ 033 * @since 24 May 2007 034 */ 035 public class SegmentLoader { 036 /** 037 * Loads data for all the segments of the GroupingSets. If the grouping sets 038 * list contains more than one Grouping Set then data is loaded using the 039 * GROUP BY GROUPING SETS sql. Else if only one grouping set is passed in 040 * the list data is loaded without using GROUP BY GROUPING SETS sql. If the 041 * database does not support grouping sets 042 * {@link mondrian.rolap.sql.SqlQuery.Dialect#supportsGroupingSets()} then 043 * grouping sets list should always have only one element in it. 044 * 045 * <p>For example, if list has 2 grouping sets with columns A, B, C and B, C 046 * respectively, then the SQL will be 047 * "GROUP BY GROUPING SETS ((A, B, C), (B, C))". 048 * 049 * <p>Else if the list has only one grouping set then sql would be without 050 * grouping sets. 051 * 052 * <p>The <code>groupingSets</code> list should be topological order, with 053 * more detailed higher-level grouping sets occuring first. In other words, 054 * the first element of the list should always be the detailed grouping 055 * set (default grouping set), followed by grouping sets which can be 056 * rolled-up on this detailed grouping set. 057 * In the example (A, B, C) is the detailed grouping set and (B, C) is 058 * rolled-up using the detailed. 059 * 060 * @param groupingSets List of grouping sets whose segments are loaded 061 * @param pinnedSegments Pinned segments 062 */ 063 public void load( 064 List<GroupingSet> groupingSets, 065 RolapAggregationManager.PinSet pinnedSegments, 066 List<StarPredicate> compoundPredicateList) 067 { 068 GroupingSetsList groupingSetsList = 069 new GroupingSetsList(groupingSets); 070 boolean useGroupingSet = groupingSetsList.useGroupingSets(); 071 RolapStar.Column[] defaultColumns = 072 groupingSetsList.getDefaultColumns(); 073 SqlStatement stmt = null; 074 try { 075 stmt = createExecuteSql( 076 groupingSetsList, 077 compoundPredicateList); 078 int arity = defaultColumns.length; 079 SortedSet<Comparable<?>>[] axisValueSets = 080 getDistinctValueWorkspace(arity); 081 082 boolean[] axisContainsNull = new boolean[arity]; 083 084 List<Object[]> rows = 085 processData( 086 stmt, axisContainsNull, 087 axisValueSets, groupingSetsList); 088 089 boolean sparse = 090 setAxisDataAndDecideSparseUse(axisValueSets, 091 axisContainsNull, groupingSetsList, 092 rows); 093 094 SegmentDataset[] nonGroupingDataSets = null; 095 096 final Map<BitKey, SegmentDataset[]> groupingDataSetsMap = 097 new HashMap<BitKey, SegmentDataset[]>(); 098 099 if (useGroupingSet) { 100 populateDataSetMapOnGroupingColumnsBitKeys( 101 groupingSetsList, 102 sparse, groupingDataSetsMap); 103 } else { 104 nonGroupingDataSets = createDataSets( 105 sparse, 106 groupingSetsList.getDefaultSegments(), 107 groupingSetsList.getDefaultAxes()); 108 } 109 110 loadDataToDataSets( 111 groupingSetsList, rows, groupingDataSetsMap, 112 nonGroupingDataSets, axisContainsNull, sparse); 113 114 setDataToSegments( 115 groupingSetsList, nonGroupingDataSets, 116 groupingDataSetsMap, pinnedSegments); 117 118 } catch (SQLException e) { 119 throw stmt.handle(e); 120 } finally { 121 if (stmt != null) { 122 stmt.close(); 123 } 124 // Any segments which are still loading have failed. 125 setFailOnStillLoadingSegments(groupingSetsList); 126 } 127 } 128 129 void setFailOnStillLoadingSegments(GroupingSetsList groupingSetsList) { 130 for (GroupingSet groupingset : groupingSetsList.getGroupingSets()) { 131 for (Segment segment : groupingset.getSegments()) { 132 segment.setFailIfStillLoading(); 133 } 134 } 135 } 136 137 /** 138 * Loads data to the datasets. If the grouping sets is used, 139 * dataset is fetched from groupingDataSetMap using grouping bit keys of 140 * the row data. If grouping sets is not used, data is loaded on to 141 * nonGroupingDataSets. 142 */ 143 private void loadDataToDataSets( 144 GroupingSetsList groupingSetsList, List<Object[]> rows, 145 Map<BitKey, SegmentDataset[]> groupingDataSetMap, 146 SegmentDataset[] nonGroupingDataSets, boolean[] axisContainsNull, 147 boolean sparse) 148 { 149 int arity = groupingSetsList.getDefaultColumns().length; 150 boolean useGroupingSet = groupingSetsList.useGroupingSets(); 151 Aggregation.Axis[] axes = groupingSetsList.getDefaultAxes(); 152 int segmentLength = groupingSetsList.getDefaultSegments().length; 153 154 List<Integer> pos = new ArrayList<Integer>(arity); 155 for (Object[] row : rows) { 156 final SegmentDataset[] datasets; 157 int groupingBitKeyIndex = arity + segmentLength; 158 if (useGroupingSet) { 159 BitKey groupingBitKey = (BitKey) row[groupingBitKeyIndex]; 160 datasets = groupingDataSetMap.get(groupingBitKey); 161 } else { 162 datasets = nonGroupingDataSets; 163 } 164 int k = 0; 165 for (int j = 0; j < arity; j++) { 166 Object o = row[j]; 167 if (useGroupingSet && 168 isRollupNull(groupingSetsList, row, groupingBitKeyIndex, j)) { 169 continue; 170 } 171 Aggregation.Axis axis = axes[j]; 172 int offset = axis.getOffset(o); 173 pos.add(offset); 174 k *= axes[j].getKeys().length; 175 k += offset; 176 } 177 178 if (sparse) { 179 CellKey key = CellKey.Generator.newCellKey(toArray(pos)); 180 for (int j = 0; j < segmentLength; j++) { 181 final Object o = row[arity + j]; 182 datasets[j].put(key, o); 183 } 184 } else { 185 for (int j = 0; j < segmentLength; j++) { 186 final Object o = row[arity + j]; 187 ((DenseSegmentDataset) datasets[j]).set(k, o); 188 } 189 } 190 pos.clear(); 191 } 192 } 193 194 private boolean isRollupNull( 195 GroupingSetsList groupingSetsList, Object[] row, 196 int groupingBitKeyIndex, int j) { 197 BitKey groupingBitKey = (BitKey) row[groupingBitKeyIndex]; 198 boolean isGroupingBitSet = 199 groupingBitKey.get(groupingSetsList.findGroupingFunctionIndex(j)); 200 return row[j].equals(RolapUtil.sqlNullValue) && isGroupingBitSet; 201 } 202 203 private int[] toArray(List<Integer> pos) { 204 int posArr[] = new int[pos.size()]; 205 for (int i = 0; i < posArr.length; i++) { 206 posArr[i] = pos.get(i); 207 } 208 return posArr; 209 } 210 211 private boolean setAxisDataAndDecideSparseUse( 212 SortedSet<Comparable<?>>[] axisValueSets, 213 boolean[] axisContainsNull, 214 GroupingSetsList groupingSetsList, 215 List<Object[]> rows) 216 { 217 Aggregation.Axis[] axes = groupingSetsList.getDefaultAxes(); 218 RolapStar.Column[] allColumns = groupingSetsList.getDefaultColumns(); 219 // Figure out size of dense array, and allocate it, or use a sparse 220 // array if appropriate. 221 boolean sparse = false; 222 int n = 1; 223 for (int i = 0; i < axes.length; i++) { 224 Aggregation.Axis axis = axes[i]; 225 SortedSet<Comparable<?>> valueSet = axisValueSets[i]; 226 int size = axis.loadKeys(valueSet, axisContainsNull[i]); 227 setAxisDataToGroupableList(groupingSetsList, valueSet, 228 axisContainsNull[i], allColumns[i]); 229 int previous = n; 230 n *= size; 231 if ((n < previous) || (n < size)) { 232 // Overflow has occurred. 233 n = Integer.MAX_VALUE; 234 sparse = true; 235 } 236 } 237 return useSparse(sparse, n, rows); 238 } 239 240 boolean useSparse(boolean sparse, int n, List<Object[]> rows) { 241 sparse = sparse || useSparse((double) n, (double) rows.size()); 242 return sparse; 243 } 244 245 private void setDataToSegments( 246 GroupingSetsList groupingSetsList, 247 SegmentDataset[] detailedDataSet, 248 Map<BitKey, SegmentDataset[]> datasetsMap, 249 RolapAggregationManager.PinSet pinnedSegments) 250 { 251 List<GroupingSet> groupingSets = groupingSetsList.getGroupingSets(); 252 boolean useGroupingSet = groupingSetsList.useGroupingSets(); 253 for (int i = 0; i < groupingSets.size(); i++) { 254 Segment[] groupedSegments = groupingSets.get(i).getSegments(); 255 SegmentDataset[] dataSets = useGroupingSet ? datasetsMap 256 .get(groupingSetsList.getRollupColumnsBitKeyList().get(i)) : 257 detailedDataSet; 258 for (int j = 0; j < groupedSegments.length; j++) { 259 Segment groupedSegment = groupedSegments[j]; 260 groupedSegment 261 .setData(dataSets[j], pinnedSegments); 262 } 263 } 264 } 265 266 private void populateDataSetMapOnGroupingColumnsBitKeys( 267 GroupingSetsList groupingSetsList, boolean sparse, 268 Map<BitKey, SegmentDataset[]> datasetsMap) 269 { 270 List<GroupingSet> groupingSets = groupingSetsList.getGroupingSets(); 271 List<BitKey> groupingColumnsBitKeyList = 272 groupingSetsList.getRollupColumnsBitKeyList(); 273 for (int i = 0; i < groupingSets.size(); i++) { 274 GroupingSet groupingSet = groupingSets.get(i); 275 SegmentDataset[] datasets = createDataSets(sparse, 276 groupingSet.getSegments(), groupingSet.getAxes()); 277 datasetsMap.put(groupingColumnsBitKeyList.get(i), datasets); 278 } 279 } 280 281 private int calcuateMaxDataSize(Aggregation.Axis[] axes) { 282 int n = 1; 283 for (Aggregation.Axis axis : axes) { 284 n *= axis.getKeys().length; 285 } 286 return n; 287 } 288 289 private SegmentDataset[] createDataSets(boolean sparse, 290 Segment[] segments, Aggregation.Axis[] axes) 291 { 292 int n = (sparse ? 0 : calcuateMaxDataSize(axes)); 293 SegmentDataset[] datasets; 294 if (sparse) { 295 datasets = new SparseSegmentDataset[segments.length]; 296 for (int i = 0; i < segments.length; i++) { 297 datasets[i] = new SparseSegmentDataset(segments[i]); 298 } 299 } else { 300 datasets = new DenseSegmentDataset[segments.length]; 301 for (int i = 0; i < segments.length; i++) { 302 datasets[i] = new DenseSegmentDataset( 303 segments[i], new Object[n]); 304 } 305 } 306 return datasets; 307 } 308 309 private void setAxisDataToGroupableList( 310 GroupingSetsList groupingSetsList, 311 SortedSet<Comparable<?>> valueSet, boolean axisContainsNull, 312 RolapStar.Column column) 313 { 314 for (GroupingSet groupingSet : groupingSetsList.getRollupGroupingSets()) { 315 RolapStar.Column[] columns = groupingSet.getColumns(); 316 for (int i = 0; i < columns.length; i++) { 317 if (columns[i].equals(column)) { 318 groupingSet.getAxes()[i] 319 .loadKeys(valueSet, axisContainsNull); 320 } 321 } 322 } 323 } 324 325 /** 326 * Creates and executes a SQL statement to retrieve the set of cells 327 * specified by a GroupingSetsList. 328 * 329 * <p>This method may be overridden in tests. 330 * 331 * @param groupingSetsList Grouping 332 * @return An executed SQL statement, or null 333 */ 334 SqlStatement createExecuteSql( 335 GroupingSetsList groupingSetsList, 336 List<StarPredicate> compoundPredicateList) { 337 RolapStar star = groupingSetsList.getStar(); 338 String sql = 339 AggregationManager.instance().generateSql( 340 groupingSetsList, compoundPredicateList); 341 return RolapUtil.executeQuery( 342 star.getDataSource(), sql, "Segment.load", 343 "Error while loading segment"); 344 } 345 346 List<Object[]> processData( 347 SqlStatement stmt, 348 boolean[] axisContainsNull, 349 SortedSet<Comparable<?>>[] axisValueSets, 350 GroupingSetsList groupingSetsList) throws SQLException 351 { 352 Segment[] segments = groupingSetsList.getDefaultSegments(); 353 int measureCount = segments.length; 354 List<Object[]> rawData = loadData(stmt, groupingSetsList); 355 List<Object[]> processedRows = new ArrayList<Object[]>(rawData.size()); 356 357 int arity = axisValueSets.length; 358 int groupingColumnStartIndex = arity + measureCount; 359 for (Object[] row : rawData) { 360 Object[] processedRow = 361 groupingSetsList.useGroupingSets() ? 362 new Object[row.length - (groupingSetsList 363 .getRollupColumns().size()) + 1] : 364 new Object[row.length]; 365 // get the columns 366 int columnIndex = 0; 367 for (int axisIndex = 0; axisIndex < arity; 368 axisIndex++, columnIndex++) { 369 Object o = row[columnIndex]; 370 if (o == null) { 371 o = RolapUtil.sqlNullValue; 372 if (!groupingSetsList.useGroupingSets() || 373 !isAggregateNull(row, groupingColumnStartIndex, 374 groupingSetsList, axisIndex)) { 375 axisContainsNull[axisIndex] = true; 376 } 377 } else { 378 axisValueSets[axisIndex].add(Aggregation.Axis.wrap(o)); 379 } 380 processedRow[columnIndex] = o; 381 } 382 // get the measure 383 for (int i = 0; i < measureCount; i++, columnIndex++) { 384 Object o = row[columnIndex]; 385 if (o == null) { 386 o = Util.nullValue; // convert to placeholder 387 } else if (segments[i].measure.getDatatype().isNumeric()) { 388 if (o instanceof Double) { 389 // nothing to do 390 } else if (o instanceof Number) { 391 o = ((Number) o).doubleValue(); 392 } else if (o instanceof byte[]) { 393 // On MySQL 5.0 in German locale, values can come 394 // out as byte arrays. Don't know why. Bug 1594119. 395 o = Double.parseDouble(new String((byte[]) o)); 396 } else { 397 o = Double.parseDouble(o.toString()); 398 } 399 } 400 processedRow[columnIndex] = o; 401 } 402 if (groupingSetsList.useGroupingSets()) { 403 processedRow[columnIndex] = getRollupBitKey( 404 groupingSetsList.getRollupColumns().size(), row, 405 columnIndex); 406 } 407 processedRows.add(processedRow); 408 } 409 return processedRows; 410 } 411 412 /** 413 * Generates bit key representing roll up columns 414 */ 415 BitKey getRollupBitKey(int arity, Object[] row, int k) { 416 BitKey groupingBitKey = BitKey.Factory.makeBitKey(arity); 417 for (int i = 0; i < arity; i++) { 418 Object o = row[k + i]; 419 if (isOne(o)) { 420 groupingBitKey.set(i); 421 } 422 } 423 return groupingBitKey; 424 } 425 426 private static boolean isOne(Object o) { 427 return ((Number) o).intValue() == 1; 428 } 429 430 private boolean isAggregateNull( 431 Object[] row, 432 int groupingColumnStartIndex, 433 GroupingSetsList groupingSetsList, 434 int axisIndex) 435 { 436 int groupingFunctionIndex = 437 groupingSetsList.findGroupingFunctionIndex(axisIndex); 438 if (groupingFunctionIndex == -1) { 439 // Not a rollup column 440 return false; 441 } 442 return isOne(row[groupingColumnStartIndex + groupingFunctionIndex]); 443 } 444 445 List<Object[]> loadData( 446 SqlStatement stmt, 447 GroupingSetsList groupingSetsList) 448 throws SQLException 449 { 450 int arity = groupingSetsList.getDefaultColumns().length; 451 int measureCount = groupingSetsList.getDefaultSegments().length; 452 int groupingFunctionsCount = groupingSetsList.getRollupColumns().size(); 453 454 List<Object[]> rows = new ArrayList<Object[]>(); 455 ResultSet resultSet = stmt.getResultSet(); 456 while (resultSet.next()) { 457 ++stmt.rowCount; 458 Object[] row = 459 groupingSetsList.useGroupingSets() ? 460 new Object[arity + measureCount + 461 groupingFunctionsCount] : 462 new Object[arity + measureCount]; 463 for (int i = 0; i < row.length; i++) { 464 row[i] = resultSet.getObject(i + 1); 465 } 466 rows.add(row); 467 } 468 return rows; 469 } 470 471 List<RolapStar.Column[]> getGroupingColumnsList( 472 RolapStar.Column[] detailedBatchColumns, 473 List<GroupingSet> aggBatchDetails) 474 { 475 List<RolapStar.Column[]> groupingColumns = 476 new ArrayList<RolapStar.Column[]>(); 477 if (aggBatchDetails.isEmpty()) { 478 return groupingColumns; 479 } 480 groupingColumns.add(detailedBatchColumns); 481 for (GroupingSet aggBatchDetail : aggBatchDetails) { 482 groupingColumns.add(aggBatchDetail.getSegments()[0] 483 .aggregation.getColumns()); 484 485 } 486 return groupingColumns; 487 } 488 489 SortedSet<Comparable<?>>[] getDistinctValueWorkspace(int arity) { 490 // Workspace to build up lists of distinct values for each axis. 491 SortedSet<Comparable<?>>[] axisValueSets = new SortedSet[arity]; 492 for (int i = 0; i < axisValueSets.length; i++) { 493 494 if (Util.PreJdk15) { 495 // Work around the fact that Boolean is not Comparable until JDK 496 // 1.5. 497 assert !(Comparable.class.isAssignableFrom(Boolean.class)); 498 final SortedSet set = 499 new TreeSet<Comparable<Object>>( 500 new Comparator<Object>() { 501 public int compare(Object o1, Object o2) { 502 if (o1 instanceof Boolean) { 503 boolean b1 = (Boolean) o1; 504 if (o2 instanceof Boolean) { 505 boolean b2 = (Boolean) o2; 506 return (b1 == b2 ? 0 : 507 (b1 ? 1 : -1)); 508 } else { 509 return -1; 510 } 511 } else { 512 return ((Comparable) o1).compareTo(o2); 513 } 514 } 515 } 516 ); 517 axisValueSets[i] = set; 518 } else { 519 assert Comparable.class.isAssignableFrom(Boolean.class); 520 axisValueSets[i] = new TreeSet<Comparable<?>>(); 521 } 522 } 523 return axisValueSets; 524 } 525 526 /** 527 * Decides whether to use a sparse representation for this segment, using 528 * the formula described 529 * {@link mondrian.olap.MondrianProperties#SparseSegmentCountThreshold here}. 530 * 531 * @param possibleCount Number of values in the space. 532 * @param actualCount Actual number of values. 533 * @return Whether to use a sparse representation. 534 */ 535 private static boolean useSparse( 536 final double possibleCount, final double actualCount) 537 { 538 final MondrianProperties properties = MondrianProperties.instance(); 539 double densityThreshold = 540 properties.SparseSegmentDensityThreshold.get(); 541 if (densityThreshold < 0) { 542 densityThreshold = 0; 543 } 544 if (densityThreshold > 1) { 545 densityThreshold = 1; 546 } 547 int countThreshold = properties.SparseSegmentCountThreshold.get(); 548 if (countThreshold < 0) { 549 countThreshold = 0; 550 } 551 boolean sparse = 552 (possibleCount - countThreshold) * densityThreshold > 553 actualCount; 554 if (possibleCount < countThreshold) { 555 assert !sparse : 556 "Should never use sparse if count is less " + 557 "than threshold, possibleCount=" + possibleCount + 558 ", actualCount=" + actualCount + 559 ", countThreshold=" + countThreshold + 560 ", densityThreshold=" + densityThreshold; 561 } 562 if (possibleCount == actualCount) { 563 assert !sparse : 564 "Should never use sparse if result is 100% dense: " + 565 "possibleCount=" + possibleCount + 566 ", actualCount=" + actualCount + 567 ", countThreshold=" + countThreshold + 568 ", densityThreshold=" + densityThreshold; 569 } 570 return sparse; 571 } 572 } 573 574 // End SegmentLoader.java