[CARBONDATA-3224] Support SDK validate the improper value when using withLoadOptions
authorxubo245 <xubo29@huawei.com>
Thu, 3 Jan 2019 03:26:55 +0000 (11:26 +0800)
committerZhang Zhichao <441586683@qq.com>
Mon, 7 Jan 2019 15:25:56 +0000 (23:25 +0800)
1. validate BAD_RECORDS_ACTION
2. validate BAD_RECORDS_LOGGER_ENABLE

This closes #3048

integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordLoggerTest.scala
integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
store/CSDK/test/main.cpp
store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java

index b6ba0e0..5c28cbd 100644 (file)
@@ -269,6 +269,41 @@ class BadRecordLoggerTest extends QueryTest with BeforeAndAfterAll {
     assert(checkRedirectedCsvContentAvailableInSource(csvFilePath, redirectCsvPath))
   }
 
+  test("test load ddl command with improper value") {
+    sql("drop table IF EXISTS dataLoadOptionTests")
+    sql(
+      s"""
+         | CREATE TABLE IF NOT EXISTS dataLoadOptionTests(
+         |   ID BigInt,
+         |   date Timestamp,
+         |   country String,
+         |   actual_price Double,
+         |   Quantity int,
+         |   sold_price Decimal(19,2)
+         | ) STORED BY 'carbondata'
+      """.stripMargin.trim)
+    val csvFilePath = s"$resourcesPath/badrecords/emptyTimeStampValue.csv"
+    try {
+      sql(
+        s"""
+           | LOAD DATA local inpath '" + $csvFilePath + "' INTO TABLE dataLoadOptionTests
+           | OPTIONS(
+           |   'bad_records_logger_enable'='fals',
+           |   'DELIMITER'= ',',
+           |   'QUOTECHAR'= '\"'
+           | )""".stripMargin.trim);
+      assert(false)
+    } catch {
+      case ex: Exception =>
+        assert(ex.getMessage.contains(
+          "option BAD_RECORDS_LOGGER_ENABLE can have only either TRUE or FALSE, " +
+            "It shouldn't be fals"
+        ))
+    } finally {
+      sql("drop table IF EXISTS dataLoadOptionTests")
+    }
+  }
+
   def getRedirectCsvPath(dbName: String, tableName: String, segment: String, task: String) = {
     var badRecordLocation = CarbonProperties.getInstance()
       .getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC)
@@ -308,17 +343,17 @@ class BadRecordLoggerTest extends QueryTest with BeforeAndAfterAll {
   }
 
   override def afterAll {
-    sql("drop table sales")
-    sql("drop table sales_test")
-    sql("drop table serializable_values")
-    sql("drop table serializable_values_false")
-    sql("drop table insufficientColumn")
-    sql("drop table insufficientColumn_false")
-    sql("drop table emptyColumnValues")
-    sql("drop table emptyColumnValues_false")
-    sql("drop table empty_timestamp")
-    sql("drop table empty_timestamp_false")
-    sql("drop table dataloadOptionTests")
+    sql("drop table IF EXISTS sales")
+    sql("drop table IF EXISTS sales_test")
+    sql("drop table IF EXISTS serializable_values")
+    sql("drop table IF EXISTS serializable_values_false")
+    sql("drop table IF EXISTS insufficientColumn")
+    sql("drop table IF EXISTS insufficientColumn_false")
+    sql("drop table IF EXISTS emptyColumnValues")
+    sql("drop table IF EXISTS emptyColumnValues_false")
+    sql("drop table IF EXISTS empty_timestamp")
+    sql("drop table IF EXISTS empty_timestamp_false")
+    sql("drop table IF EXISTS dataloadOptionTests")
     sql("drop table IF EXISTS loadIssue")
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
index 7feb51c..cf44685 100644 (file)
@@ -1163,6 +1163,14 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
       }
     }
 
+    if (options.exists(_._1.equalsIgnoreCase("BAD_RECORDS_LOGGER_ENABLE"))) {
+      val optionValue: String = options("bad_records_logger_enable").head._2
+      val isValid = CarbonUtil.validateBoolean(optionValue)
+      if (!isValid) throw new MalformedCarbonCommandException(
+        "option BAD_RECORDS_LOGGER_ENABLE can have only either TRUE or FALSE, " +
+          "It shouldn't be " + optionValue)
+    }
+
     if (options.exists(_._1.equalsIgnoreCase("BAD_RECORDS_ACTION"))) {
       val optionValue: String = options("bad_records_action").head._2
       try {
index 10a6599..5128c4b 100644 (file)
@@ -526,6 +526,71 @@ void testCarbonProperties(JNIEnv *env) {
     }
 }
 
+bool testValidateBadRecordsActionWithImproperValue(JNIEnv *env, char *path) {
+    char *jsonSchema = "[{stringField:string},{shortField:short},{intField:int},{longField:long},{doubleField:double},{boolField:boolean},{dateField:date},{timeField:timestamp},{floatField:float},{arrayField:array}]";
+    try {
+        CarbonWriter writer;
+        writer.builder(env);
+        writer.outputPath(path);
+        writer.withCsvInput(jsonSchema);
+        writer.withLoadOption("BAD_RECORDS_ACTION", "FAL");
+        writer.writtenBy("CSDK");
+        writer.build();
+    } catch (jthrowable ex) {
+        env->ExceptionDescribe();
+        env->ExceptionClear();
+    }
+}
+
+bool testValidateBadRecordsLoggerEnableWithImproperValue(JNIEnv *env, char *path) {
+    char *jsonSchema = "[{stringField:string},{shortField:short},{intField:int},{longField:long},{doubleField:double},{boolField:boolean},{dateField:date},{timeField:timestamp},{floatField:float},{arrayField:array}]";
+    try {
+        CarbonWriter writer;
+        writer.builder(env);
+        writer.outputPath(path);
+        writer.withCsvInput(jsonSchema);
+        writer.withLoadOption("bad_records_logger_enable", "FLSE");
+        writer.writtenBy("CSDK");
+        writer.build();
+    } catch (jthrowable ex) {
+        env->ExceptionDescribe();
+        env->ExceptionClear();
+    }
+}
+
+bool testValidateQuoteCharWithImproperValue(JNIEnv *env, char *path) {
+    char *jsonSchema = "[{stringField:string},{shortField:short},{intField:int},{longField:long},{doubleField:double},{boolField:boolean},{dateField:date},{timeField:timestamp},{floatField:float},{arrayField:array}]";
+    try {
+        CarbonWriter writer;
+        writer.builder(env);
+        writer.outputPath(path);
+        writer.withCsvInput(jsonSchema);
+        writer.withLoadOption("quotechar", "##");
+        writer.writtenBy("CSDK");
+        writer.build();
+    } catch (jthrowable ex) {
+        env->ExceptionDescribe();
+        env->ExceptionClear();
+    }
+}
+
+bool testValidateEscapeCharWithImproperValue(JNIEnv *env, char *path) {
+    char *jsonSchema = "[{stringField:string},{shortField:short},{intField:int},{longField:long},{doubleField:double},{boolField:boolean},{dateField:date},{timeField:timestamp},{floatField:float},{arrayField:array}]";
+    try {
+        CarbonWriter writer;
+        writer.builder(env);
+        writer.outputPath(path);
+        writer.withCsvInput(jsonSchema);
+        writer.withLoadOption("escapechar", "##");
+        writer.writtenBy("CSDK");
+        writer.build();
+    } catch (jthrowable ex) {
+        env->ExceptionDescribe();
+        env->ExceptionClear();
+    }
+}
+
+
 /**
  * test write data
  * test WithLoadOption interface
@@ -545,6 +610,8 @@ bool testWriteData(JNIEnv *env, char *path, int argc, char *argv[]) {
         writer.outputPath(path);
         writer.withCsvInput(jsonSchema);
         writer.withLoadOption("complex_delimiter_level_1", "#");
+        writer.withLoadOption("BAD_RECORDS_ACTION", "FORCE");
+        writer.withLoadOption("bad_records_logger_enable", "FALSE");
         writer.writtenBy("CSDK");
         writer.taskNo(15541554.81);
         writer.withThreadSafe(1);
@@ -859,6 +926,10 @@ int main(int argc, char *argv[]) {
         tryCatchException(env);
         tryCarbonRowException(env, smallFilePath);
         testCarbonProperties(env);
+        testValidateBadRecordsActionWithImproperValue(env, "./test");
+        testValidateBadRecordsLoggerEnableWithImproperValue(env, "./test");
+        testValidateQuoteCharWithImproperValue(env, "./test");
+        testValidateEscapeCharWithImproperValue(env, "./test");
         testWriteData(env, "./data", 1, argv);
         testWriteData(env, "./dataLoadOption", 1, argv);
         readFromLocalWithoutProjection(env, smallFilePath);
index 9ad519c..cdb610d 100644 (file)
@@ -31,6 +31,7 @@ import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.carbondata.common.annotations.InterfaceAudience;
 import org.apache.carbondata.common.annotations.InterfaceStability;
+import org.apache.carbondata.common.constants.LoggerAction;
 import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
@@ -46,6 +47,7 @@ import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
 import org.apache.carbondata.processing.loading.model.CarbonLoadModel;
 import org.apache.carbondata.processing.loading.model.CarbonLoadModelBuilder;
+import org.apache.carbondata.processing.util.CarbonLoaderUtil;
 
 import org.apache.hadoop.conf.Configuration;
 
@@ -182,7 +184,7 @@ public class CarbonWriterBuilder {
   public CarbonWriterBuilder withLoadOptions(Map<String, String> options) {
     Objects.requireNonNull(options, "Load options should not be null");
     //validate the options.
-    for (String option: options.keySet()) {
+    for (String option : options.keySet()) {
       if (!option.equalsIgnoreCase("bad_records_logger_enable") &&
           !option.equalsIgnoreCase("bad_records_action") &&
           !option.equalsIgnoreCase("bad_record_path") &&
@@ -198,6 +200,35 @@ public class CarbonWriterBuilder {
       }
     }
 
+    for (Map.Entry<String, String> entry : options.entrySet()) {
+      if (entry.getKey().equalsIgnoreCase("bad_records_action")) {
+        try {
+          LoggerAction.valueOf(entry.getValue().toUpperCase());
+        } catch (Exception e) {
+          throw new IllegalArgumentException(
+              "option BAD_RECORDS_ACTION can have only either " +
+                  "FORCE or IGNORE or REDIRECT or FAIL. It shouldn't be " + entry.getValue());
+        }
+      } else if (entry.getKey().equalsIgnoreCase("bad_records_logger_enable")) {
+        boolean isValid;
+        isValid = CarbonUtil.validateBoolean(entry.getValue());
+        if (!isValid) {
+          throw new IllegalArgumentException("Invalid value "
+              + entry.getValue() + " for key " + entry.getKey());
+        }
+      } else if (entry.getKey().equalsIgnoreCase("quotechar")) {
+        String quoteChar = entry.getValue();
+        if (quoteChar.length() > 1) {
+          throw new IllegalArgumentException("QUOTECHAR cannot be more than one character.");
+        }
+      } else if (entry.getKey().equalsIgnoreCase("escapechar")) {
+        String escapeChar = entry.getValue();
+        if (escapeChar.length() > 1 && !CarbonLoaderUtil.isValidEscapeSequence(escapeChar)) {
+          throw new IllegalArgumentException("ESCAPECHAR cannot be more than one character.");
+        }
+      }
+    }
+
     if (this.options == null) {
       // convert it to treeMap as keys need to be case insensitive
       this.options = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
index 72510e9..acd9e5a 100644 (file)
@@ -2011,7 +2011,8 @@ public class CarbonReaderTest extends TestCase {
   }
 
   @Test
-  public void testSdkWriteWhenArrayOfStringIsEmpty() throws IOException, InvalidLoadOptionException {
+  public void testSdkWriteWhenArrayOfStringIsEmpty()
+      throws IOException, InvalidLoadOptionException {
     String badRecordAction =
         CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION);
     CarbonProperties.getInstance()
@@ -2040,6 +2041,226 @@ public class CarbonReaderTest extends TestCase {
     writer.close();
     CarbonProperties.getInstance()
         .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, badRecordAction);
+    FileUtils.deleteDirectory(new File(path));
+  }
+
+  @Test
+  public void testValidateBadRecordsActionWithImproperValue() throws IOException {
+    String path = "./testValidateBadRecordsActionValue";
+    Field[] fields = new Field[2];
+    fields[0] = new Field("stringField", DataTypes.STRING);
+    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
+    Schema schema = new Schema(fields);
+    Map map = new HashMap();
+    map.put("BAD_RECORDS_ACTION", "FAL");
+    try {
+      CarbonWriter.builder()
+          .outputPath(path)
+          .withLoadOptions(map)
+          .withCsvInput(schema)
+          .enableLocalDictionary(false)
+          .writtenBy("CarbonReaderTest")
+          .build();
+      Assert.fail();
+    } catch (IllegalArgumentException e) {
+      Assert.assertTrue(e.getMessage().contains("option BAD_RECORDS_ACTION can have only either " +
+          "FORCE or IGNORE or REDIRECT or FAIL. It shouldn't be FAL"));
+    } catch (Exception e) {
+      Assert.fail();
+    } finally {
+      FileUtils.deleteDirectory(new File(path));
+    }
   }
 
+  @Test
+  public void testValidateBadRecordsActionWithProperValue() throws IOException {
+    String path = "./testValidateBadRecordsActionValue";
+    Field[] fields = new Field[2];
+    fields[0] = new Field("stringField", DataTypes.STRING);
+    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
+    Schema schema = new Schema(fields);
+    Map map = new HashMap();
+    map.put("BAD_RECORDS_ACTION", "FAIL");
+    try {
+      CarbonWriter.builder()
+          .outputPath(path)
+          .withLoadOptions(map)
+          .withCsvInput(schema)
+          .enableLocalDictionary(false)
+          .writtenBy("CarbonReaderTest")
+          .build();
+    } catch (IllegalArgumentException e) {
+      e.printStackTrace();
+      Assert.fail();
+    } catch (Exception e) {
+      Assert.fail();
+    } finally {
+      FileUtils.deleteDirectory(new File(path));
+    }
+  }
+
+  @Test
+  public void testValidateBadRecordsLoggerEnableWithImproperValue() throws IOException {
+    String path = "./testValidateBadRecordsLoggerEnableValue";
+    Field[] fields = new Field[2];
+    fields[0] = new Field("stringField", DataTypes.STRING);
+    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
+    Schema schema = new Schema(fields);
+    Map map = new HashMap();
+    map.put("bad_records_logger_enable", "FLSE");
+    try {
+      CarbonWriter.builder()
+          .outputPath(path)
+          .withLoadOptions(map)
+          .withCsvInput(schema)
+          .enableLocalDictionary(false)
+          .writtenBy("CarbonReaderTest")
+          .build();
+      Assert.fail();
+    } catch (IllegalArgumentException e) {
+      Assert.assertTrue(e.getMessage().contains(
+          "Invalid value FLSE for key bad_records_logger_enable"));
+    } catch (Exception e) {
+      Assert.fail();
+    } finally {
+      FileUtils.deleteDirectory(new File(path));
+    }
+  }
+
+  @Test
+  public void testValidateBadRecordsLoggerEnableWithProperValue() throws IOException {
+    String path = "./testValidateBadRecordsLoggerEnableValue";
+    Field[] fields = new Field[2];
+    fields[0] = new Field("stringField", DataTypes.STRING);
+    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
+    Schema schema = new Schema(fields);
+    Map map = new HashMap();
+    map.put("bad_records_logger_enable", "FALSE");
+    try {
+      CarbonWriter.builder()
+          .outputPath(path)
+          .withLoadOptions(map)
+          .withCsvInput(schema)
+          .enableLocalDictionary(false)
+          .writtenBy("CarbonReaderTest")
+          .build();
+    } catch (IllegalArgumentException e) {
+      e.printStackTrace();
+      Assert.fail();
+    } catch (Exception e) {
+      Assert.fail();
+    } finally {
+      FileUtils.deleteDirectory(new File(path));
+    }
+  }
+
+  @Test
+  public void testValidateQuoteCharWithImproperValue() throws IOException {
+    String path = "./testValidateQuoteCharWithImproperValue";
+    Field[] fields = new Field[2];
+    fields[0] = new Field("stringField", DataTypes.STRING);
+    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
+    Schema schema = new Schema(fields);
+    Map map = new HashMap();
+    map.put("quotechar", "##");
+    try {
+      CarbonWriter.builder()
+          .outputPath(path)
+          .withLoadOptions(map)
+          .withCsvInput(schema)
+          .enableLocalDictionary(false)
+          .writtenBy("CarbonReaderTest")
+          .build();
+      Assert.fail();
+    } catch (IllegalArgumentException e) {
+      Assert.assertTrue(e.getMessage().contains(
+          "QUOTECHAR cannot be more than one character."));
+    } catch (Exception e) {
+      Assert.fail();
+    } finally {
+      FileUtils.deleteDirectory(new File(path));
+    }
+  }
+
+  @Test
+  public void testValidateQuoteCharWithProperValue() throws IOException {
+    String path = "./testValidateQuoteCharWithProperValue";
+    Field[] fields = new Field[2];
+    fields[0] = new Field("stringField", DataTypes.STRING);
+    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
+    Schema schema = new Schema(fields);
+    Map map = new HashMap();
+    map.put("quotechar", "#");
+    try {
+      CarbonWriter.builder()
+          .outputPath(path)
+          .withLoadOptions(map)
+          .withCsvInput(schema)
+          .enableLocalDictionary(false)
+          .writtenBy("CarbonReaderTest")
+          .build();
+    } catch (IllegalArgumentException e) {
+      e.printStackTrace();
+      Assert.fail();
+    } catch (Exception e) {
+      Assert.fail();
+    } finally {
+      FileUtils.deleteDirectory(new File(path));
+    }
+  }
+
+  @Test
+  public void testValidateEscapeCharWithImproperValue() throws IOException {
+    String path = "./testValidateEscapeCharWithImproperValue";
+    Field[] fields = new Field[2];
+    fields[0] = new Field("stringField", DataTypes.STRING);
+    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
+    Schema schema = new Schema(fields);
+    Map map = new HashMap();
+    map.put("escapechar", "##");
+    try {
+      CarbonWriter.builder()
+          .outputPath(path)
+          .withLoadOptions(map)
+          .withCsvInput(schema)
+          .enableLocalDictionary(false)
+          .writtenBy("CarbonReaderTest")
+          .build();
+      Assert.fail();
+    } catch (IllegalArgumentException e) {
+      Assert.assertTrue(e.getMessage().contains(
+          "ESCAPECHAR cannot be more than one character."));
+    } catch (Exception e) {
+      Assert.fail();
+    } finally {
+      FileUtils.deleteDirectory(new File(path));
+    }
+  }
+
+  @Test
+  public void testValidateEscapeCharWithProperValue() throws IOException {
+    String path = "./testValidateEscapeCharWithProperValue";
+    Field[] fields = new Field[2];
+    fields[0] = new Field("stringField", DataTypes.STRING);
+    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
+    Schema schema = new Schema(fields);
+    Map map = new HashMap();
+    map.put("escapechar", "#");
+    try {
+      CarbonWriter.builder()
+          .outputPath(path)
+          .withLoadOptions(map)
+          .withCsvInput(schema)
+          .enableLocalDictionary(false)
+          .writtenBy("CarbonReaderTest")
+          .build();
+    } catch (IllegalArgumentException e) {
+      e.printStackTrace();
+      Assert.fail();
+    } catch (Exception e) {
+      Assert.fail();
+    } finally {
+      FileUtils.deleteDirectory(new File(path));
+    }
+  }
 }