METAMODEL-1102 Separated FixedWidthLineParser
authorClaudia Pesu <Claudia.Pesu@HumanInference.com>
Wed, 13 Jul 2016 08:20:36 +0000 (10:20 +0200)
committerDennis Du Krøger <d@hp23c.dk>
Wed, 13 Jul 2016 08:20:36 +0000 (10:20 +0200)
Closes apache/metamodel#114

CHANGES.md
fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthLineParser.java [new file with mode: 0644]
fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthLineParserTest.java [new file with mode: 0644]
fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
fixedwidth/src/test/resources/example_simple3.txt [new file with mode: 0644]

index 3ff4ca0..65223ac 100644 (file)
@@ -5,7 +5,8 @@
  * [METAMODEL-1086] - Fixed encoding issue when CsvDataContext is instantiated with InputStream.
  * [METAMODEL-1094] - Added support for Apache Cassandra version 3.x.
  * [METAMODEL-1093] - Close compiled ResultSets.
-
+ * [METAMODEL-1102] - Separated FixedWidthLineParser.
 ### Apache MetaModel 4.5.3
 
  * [METAMODEL-235] - Fixed a bug related to handling of null or missing values in ElasticSearch using REST client.
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthLineParser.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthLineParser.java
new file mode 100644 (file)
index 0000000..3746333
--- /dev/null
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+import java.io.IOException;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.ArrayList;
+import java.util.List;
+
+public class FixedWidthLineParser {
+    private final int _expectedLineLength;
+    private volatile int _rowNumber;
+    private final FixedWidthConfiguration _configuration; 
+    
+    public FixedWidthLineParser(FixedWidthConfiguration configuration, int expectedLineLength, int rowNumber) {
+        _configuration = configuration; 
+        _expectedLineLength = expectedLineLength; \r        _rowNumber = rowNumber; 
+    }
+    
+    
+    public String[] parseLine(String line) throws IOException {
+        final List<String> values = new ArrayList<String>();
+        int[] valueWidths = _configuration.getValueWidths();
+    
+        if (line == null) {
+            return null;
+        }
+
+        StringBuilder nextValue = new StringBuilder();
+
+        int valueIndex = 0;
+
+        final CharacterIterator it = new StringCharacterIterator(line);
+        for (char c = it.first(); c != CharacterIterator.DONE; c = it
+                .next()) {
+            nextValue.append(c);
+
+            final int valueWidth;
+            if (_configuration.isConstantValueWidth()) {
+                valueWidth = _configuration.getFixedValueWidth();
+            } else {
+                if (valueIndex >= valueWidths.length) {
+                    if (_configuration.isFailOnInconsistentLineWidth()) {
+                        String[] result = values.toArray(new String[values
+                                .size()]);
+                        throw new InconsistentValueWidthException(result,
+                                line, _rowNumber + 1);
+                    } else {
+                        // silently ignore the inconsistency
+                        break;
+                    }
+                }
+                valueWidth = _configuration.getValueWidth(valueIndex); 
+            }
+
+            if (nextValue.length() == valueWidth) {
+                // write the value
+                values.add(nextValue.toString().trim());
+                nextValue = new StringBuilder();
+                valueIndex++;
+            }
+        }
+
+        if (nextValue.length() > 0) {
+            values.add(nextValue.toString().trim());
+        }
+
+        String[] result = values.toArray(new String[values.size()]);
+
+        if (!_configuration.isFailOnInconsistentLineWidth() && ! _configuration.isConstantValueWidth()) {
+            if (result.length != valueWidths.length) {
+                String[] correctedResult = new String[valueWidths.length];
+                for (int i = 0; i < result.length
+                        && i < valueWidths.length; i++) {
+                    correctedResult[i] = result[i];
+                }
+                result = correctedResult;
+            }
+        }
+
+        if (_configuration.isFailOnInconsistentLineWidth()) {
+            _rowNumber++;
+            if (_configuration.isConstantValueWidth()) {
+                if (line.length() % _configuration.getFixedValueWidth() != 0) {
+                    throw new InconsistentValueWidthException(result, line,
+                            _rowNumber);
+                }
+            } else {
+                if (result.length != values.size()) {
+                    throw new InconsistentValueWidthException(result, line,
+                            _rowNumber);
+                }
+
+                if (line.length() != _expectedLineLength) {
+                    throw new InconsistentValueWidthException(result, line,
+                            _rowNumber);
+                }
+            }
+        }
+
+        return result;
+}
+}
index 40dc145..d7a18cf 100644 (file)
@@ -22,10 +22,6 @@ import java.io.BufferedReader;
 import java.io.Closeable;
 import java.io.IOException;
 import java.io.Reader;
-import java.text.CharacterIterator;
-import java.text.StringCharacterIterator;
-import java.util.ArrayList;
-import java.util.List;
 
 /**
  * Reader capable of separating values based on a fixed width setting.
@@ -33,12 +29,7 @@ import java.util.List;
 final public class FixedWidthReader implements Closeable {
 
        private final BufferedReader _reader;
-       private final int _fixedValueWidth;
-       private final int[] _valueWidths;
-       private final boolean _failOnInconsistentLineWidth;
-       private final int expectedLineLength;
-       private final boolean constantWidth;
-       private volatile int _rowNumber;
+       private final FixedWidthLineParser _parser; 
 
        public FixedWidthReader(Reader reader, int fixedValueWidth,
                        boolean failOnInconsistentLineWidth) {
@@ -49,13 +40,9 @@ final public class FixedWidthReader implements Closeable {
        public FixedWidthReader(BufferedReader reader, int fixedValueWidth,
                        boolean failOnInconsistentLineWidth) {
                _reader = reader;
-               _fixedValueWidth = fixedValueWidth;
-               _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
-               _rowNumber = 0;
-               _valueWidths = null;
-
-               constantWidth = true;
-               expectedLineLength = -1;
+        final FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(
+                FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, fixedValueWidth, failOnInconsistentLineWidth);
+        _parser = new FixedWidthLineParser(fixedWidthConfiguration, -1, 0);
        }
 
        public FixedWidthReader(Reader reader, int[] valueWidths,
@@ -67,19 +54,16 @@ final public class FixedWidthReader implements Closeable {
        public FixedWidthReader(BufferedReader reader, int[] valueWidths,
                        boolean failOnInconsistentLineWidth) {
                _reader = reader;
-               _fixedValueWidth = -1;
-               _valueWidths = valueWidths;
-               _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
-               _rowNumber = 0;
-
-               constantWidth = false;
+               int fixedValueWidth = -1;
                int expectedLineLength = 0;
-               if (_fixedValueWidth == -1) {
-                       for (int i = 0; i < _valueWidths.length; i++) {
-                               expectedLineLength += _valueWidths[i];
+               if (fixedValueWidth == -1) {
+                       for (int i = 0; i < valueWidths.length; i++) {
+                               expectedLineLength += valueWidths[i];
                        }
                }
-               this.expectedLineLength = expectedLineLength;
+        final FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(
+                FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, valueWidths, failOnInconsistentLineWidth);
+        _parser = new FixedWidthLineParser(fixedWidthConfiguration, expectedLineLength, 0);
        }
 
        
@@ -96,95 +80,12 @@ final public class FixedWidthReader implements Closeable {
         String line;
         try {
             line = _reader.readLine();
-            return readLine(line);
+            return _parser.parseLine(line);
         } catch (IOException e) {
             throw new IllegalStateException(e);
         }
        }
        
-       public String[] readLine(String line) throws IOException {
-
-
-                       final List<String> values = new ArrayList<String>();
-               
-                       if (line == null) {
-                               return null;
-                       }
-
-                       StringBuilder nextValue = new StringBuilder();
-
-                       int valueIndex = 0;
-
-                       final CharacterIterator it = new StringCharacterIterator(line);
-                       for (char c = it.first(); c != CharacterIterator.DONE; c = it
-                                       .next()) {
-                               nextValue.append(c);
-
-                               final int valueWidth;
-                               if (constantWidth) {
-                                       valueWidth = _fixedValueWidth;
-                               } else {
-                                       if (valueIndex >= _valueWidths.length) {
-                                               if (_failOnInconsistentLineWidth) {
-                                                       String[] result = values.toArray(new String[values
-                                                                       .size()]);
-                                                       throw new InconsistentValueWidthException(result,
-                                                                       line, _rowNumber + 1);
-                                               } else {
-                                                       // silently ignore the inconsistency
-                                                       break;
-                                               }
-                                       }
-                                       valueWidth = _valueWidths[valueIndex];
-                               }
-
-                               if (nextValue.length() == valueWidth) {
-                                       // write the value
-                                       values.add(nextValue.toString().trim());
-                                       nextValue = new StringBuilder();
-                                       valueIndex++;
-                               }
-                       }
-
-                       if (nextValue.length() > 0) {
-                               values.add(nextValue.toString().trim());
-                       }
-
-                       String[] result = values.toArray(new String[values.size()]);
-
-                       if (!_failOnInconsistentLineWidth && !constantWidth) {
-                               if (result.length != _valueWidths.length) {
-                                       String[] correctedResult = new String[_valueWidths.length];
-                                       for (int i = 0; i < result.length
-                                                       && i < _valueWidths.length; i++) {
-                                               correctedResult[i] = result[i];
-                                       }
-                                       result = correctedResult;
-                               }
-                       }
-
-                       if (_failOnInconsistentLineWidth) {
-                               _rowNumber++;
-                               if (constantWidth) {
-                                       if (line.length() % _fixedValueWidth != 0) {
-                                               throw new InconsistentValueWidthException(result, line,
-                                                               _rowNumber);
-                                       }
-                               } else {
-                                       if (result.length != values.size()) {
-                                               throw new InconsistentValueWidthException(result, line,
-                                                               _rowNumber);
-                                       }
-
-                                       if (line.length() != expectedLineLength) {
-                                               throw new InconsistentValueWidthException(result, line,
-                                                               _rowNumber);
-                                       }
-                               }
-                       }
-
-                       return result;
-       }
 
        @Override
        public void close() throws IOException {
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthLineParserTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthLineParserTest.java
new file mode 100644 (file)
index 0000000..50d5097
--- /dev/null
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class FixedWidthLineParserTest {
+    
+    @Rule
+    public ExpectedException exception = ExpectedException.none();
+
+    @Test
+    public void testParser() throws IOException {
+        int[] widths = new int[] { 8, 9 };
+        FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, widths, false); 
+        final FixedWidthLineParser parser = new FixedWidthLineParser(fixedWidthConfiguration, 17, 0); 
+
+        final String lineToParse1 = "greeting  greeter  ";
+        final String[] line = parser.parseLine(lineToParse1);
+        assertEquals("[greeting, greeter]", Arrays.asList(line).toString());
+        
+        final String lineToParse2="howdy     partner"; 
+        String[] line2 = parser.parseLine(lineToParse2);
+        assertEquals("[howdy, partner]", Arrays.asList(line2).toString()); 
+        
+        final String lineToParse3 ="hi        there "; 
+        String[] line3 = parser.parseLine(lineToParse3);
+        assertEquals("[hi, there]", Arrays.asList(line3).toString()); 
+        
+    }
+    
+    @Test
+    public void testParserFailInconsistentRowException() throws IOException {
+        int[] widths = new int[] { 8, 5 };
+        FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, widths, true); 
+        final FixedWidthLineParser parser = new FixedWidthLineParser(fixedWidthConfiguration, 17, 0); 
+
+        final String lineToParse1 = "greeting  greeter  ";
+        exception.expect(InconsistentValueWidthException.class);
+        @SuppressWarnings("unused")
+        final String[] line = parser.parseLine(lineToParse1);
+    }
+}
index dd45900..4d11f0e 100644 (file)
@@ -26,12 +26,17 @@ import java.io.FileReader;
 import java.io.IOException;
 import java.util.Arrays;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 
 public class FixedWidthReaderTest {
 
+    @Rule
+    public final ExpectedException exception = ExpectedException.none();
+    
     @Test
-    public void testBufferedReader() throws IOException {
+    public void testBufferedReader1() throws IOException {
         final File file = new File("src/test/resources/example_simple1.txt");
         final BufferedReader reader = new BufferedReader(new FileReader(file));
         int[] widths = new int[] { 8, 9 };
@@ -44,14 +49,55 @@ public class FixedWidthReaderTest {
             assertEquals("[hi, there]", Arrays.asList(line3).toString());
         }
     }
-
+    
     @Test
-    public void testNoBufferReader() throws IOException {
-        int[] widths = new int[] { 8, 9 };
-        final String lineToBeRead = "greeting  greeter  ";
-        @SuppressWarnings("resource")
-        final FixedWidthReader fixedWidthReader = new FixedWidthReader(null, widths, false);
-        final String[] line = fixedWidthReader.readLine(lineToBeRead);
-        assertEquals("[greeting, greeter]", Arrays.asList(line).toString());
+    public void testBufferedReader2() throws IOException {
+        final File file = new File("src/test/resources/example_simple2.txt");
+        final BufferedReader reader = new BufferedReader(new FileReader(file));
+        int[] widths = new int[] {1, 8, 9 };
+        try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, widths, false)) {
+            final String[] line1 = fixedWidthReader.readLine();
+            assertEquals("[i, greeting, greeter]", Arrays.asList(line1).toString());
+            final String[] line2 = fixedWidthReader.readLine();
+            assertEquals("[1, hello, world]", Arrays.asList(line2).toString());
+            final String[] line3 = fixedWidthReader.readLine();
+            assertEquals("[2, hi, there]", Arrays.asList(line3).toString());
+        }
     }
+    
+    @Test
+    public void testBufferedReader3() throws IOException {
+        final File file = new File("src/test/resources/example_simple3.txt");
+        final BufferedReader reader = new BufferedReader(new FileReader(file));
+        try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, 5, false)) {
+            final String[] line1 = fixedWidthReader.readLine();
+            assertEquals("[hello]", Arrays.asList(line1).toString());
+            final String[] line2 = fixedWidthReader.readLine();
+            assertEquals("[world]", Arrays.asList(line2).toString());
+            final String[] line3 = fixedWidthReader.readLine();
+            assertEquals("[howdy]", Arrays.asList(line3).toString());
+            final String[] line4 = fixedWidthReader.readLine();
+            assertEquals("[ther]", Arrays.asList(line4).toString());
+        }
+    }
+    
+    @Test
+    public void testBufferedReaderFailOnInconsistentRows() throws IOException {
+        final File file = new File("src/test/resources/example_simple3.txt");
+        final BufferedReader reader = new BufferedReader(new FileReader(file));
+        try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, 5, true)) {
+            final String[] line1 = fixedWidthReader.readLine();
+            assertEquals("[hello]", Arrays.asList(line1).toString());
+            final String[] line2 = fixedWidthReader.readLine();
+            assertEquals("[world]", Arrays.asList(line2).toString());
+            final String[] line3 = fixedWidthReader.readLine();
+            assertEquals("[howdy]", Arrays.asList(line3).toString());
+           
+            exception.expect(InconsistentValueWidthException.class);            
+            @SuppressWarnings("unused")
+            final String[] line4 = fixedWidthReader.readLine();
+        }
+    }
+
+   
 }
diff --git a/fixedwidth/src/test/resources/example_simple3.txt b/fixedwidth/src/test/resources/example_simple3.txt
new file mode 100644 (file)
index 0000000..a9e1cf3
--- /dev/null
@@ -0,0 +1,4 @@
+hello
+world
+howdy
+ther
\ No newline at end of file