[CARBONDATA-3223] Fixed Wrong Datasize and Indexsize calculation for old store using...
authormanishnalla1994 <manish.nalla1994@gmail.com>
Wed, 2 Jan 2019 12:30:36 +0000 (18:00 +0530)
committermanishgupta88 <tomanishgupta18@gmail.com>
Mon, 7 Jan 2019 06:03:06 +0000 (11:33 +0530)
Problem: Table Created and Loading on older version(1.1) was showing data-size and index-size 0B when refreshed on new version. This was
because when the data-size was coming as "null" we were not computing it, directly assigning 0 value to it.

Solution: Showing the old datasize and indexsize as NA.

Also refactored SetQuerySegment code for better understandability.

This closes #3047

hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala
integration/spark2/src/main/scala/org/apache/spark/sql/CarbonCountStar.scala

index 24691f2..26144e2 100644 (file)
@@ -277,12 +277,7 @@ m filterExpression
   public static void setQuerySegment(Configuration conf, AbsoluteTableIdentifier identifier) {
     String dbName = identifier.getCarbonTableIdentifier().getDatabaseName().toLowerCase();
     String tbName = identifier.getCarbonTableIdentifier().getTableName().toLowerCase();
-    String segmentNumbersFromProperty = CarbonProperties.getInstance()
-        .getProperty(CarbonCommonConstants.CARBON_INPUT_SEGMENTS + dbName + "." + tbName, "*");
-    if (!segmentNumbersFromProperty.trim().equals("*")) {
-      CarbonInputFormat.setSegmentsToAccess(conf,
-          Segment.toSegmentList(segmentNumbersFromProperty.split(","), null));
-    }
+    getQuerySegmentToAccess(conf, dbName, tbName);
   }
 
   /**
@@ -827,4 +822,22 @@ m filterExpression
     }
     return projectColumns.toArray(new String[projectColumns.size()]);
   }
+
+  private static void getQuerySegmentToAccess(Configuration conf, String dbName, String tableName) {
+    String segmentNumbersFromProperty = CarbonProperties.getInstance()
+        .getProperty(CarbonCommonConstants.CARBON_INPUT_SEGMENTS + dbName + "." + tableName, "*");
+    if (!segmentNumbersFromProperty.trim().equals("*")) {
+      CarbonInputFormat.setSegmentsToAccess(conf,
+          Segment.toSegmentList(segmentNumbersFromProperty.split(","), null));
+    }
+  }
+
+  /**
+   * Set `CARBON_INPUT_SEGMENTS` from property to configuration
+   */
+  public static void setQuerySegment(Configuration conf, CarbonTable carbonTable) {
+    String tableName = carbonTable.getTableName();
+    getQuerySegmentToAccess(conf, carbonTable.getDatabaseName(), tableName);
+  }
+
 }
index da9d4c2..11db430 100644 (file)
@@ -107,8 +107,8 @@ object CarbonStore {
             (indices.asScala.map(_.getFile_size).sum, FileFactory.getCarbonFile(indexPath).getSize)
           } else {
             // for batch segment, we can get the data size from table status file directly
-            (if (load.getDataSize == null) 0L else load.getDataSize.toLong,
-              if (load.getIndexSize == null) 0L else load.getIndexSize.toLong)
+            (if (load.getDataSize == null) -1L else load.getDataSize.toLong,
+              if (load.getIndexSize == null) -1L else load.getIndexSize.toLong)
           }
 
           if (showHistory) {
index ac8eb64..297cb54 100644 (file)
@@ -52,7 +52,7 @@ case class CarbonCountStar(
       .setConfigurationToCurrentThread(sparkSession.sessionState.newHadoopConf())
     val absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier
     val (job, tableInputFormat) = createCarbonInputFormat(absoluteTableIdentifier)
-    CarbonInputFormat.setQuerySegment(job.getConfiguration, absoluteTableIdentifier)
+    CarbonInputFormat.setQuerySegment(job.getConfiguration, carbonTable)
 
     // get row count
     val rowCount = CarbonUpdateUtil.getRowCount(