* [METAMODEL-1099] - Created a new DataContextFactory SPI and a extensible registry of implementations based on ServiceLoader.
* [METAMODEL-1099] - Implemented DataContextFactory SPI for connectors: JDBC, CSV, ElasticSearch
+ * [METAMODEL-250] - Added support for EBCDIC files (part of 'fixedwidth' module).
* [METAMODEL-1103] - Fixed a bug pertaining to anchoring of wildcards in LIKE operands.
* [METAMODEL-1088] - Add support for aliases in MongoDB.
* [METAMODEL-1086] - Fixed encoding issue when CsvDataContext is instantiated with InputStream.
--- /dev/null
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+/**
+ * Special fixed-width configuration for EBCDIC files.
+ */
+public final class EbcdicConfiguration extends FixedWidthConfiguration {
+
+ private final boolean _skipEbcdicHeader;
+ private final boolean _eolPresent;
+
+ public EbcdicConfiguration(int columnNameLineNumber, String encoding, int fixedValueWidth,
+ boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
+ super(columnNameLineNumber, encoding, fixedValueWidth, failOnInconsistentLineWidth);
+ _skipEbcdicHeader = skipEbcdicHeader;
+ _eolPresent = eolPresent;
+ }
+
+ public EbcdicConfiguration(int columnNameLineNumber, String encoding, int[] valueWidths,
+ boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
+ super(columnNameLineNumber, null, encoding, valueWidths, failOnInconsistentLineWidth);
+ _skipEbcdicHeader = skipEbcdicHeader;
+ _eolPresent = eolPresent;
+ }
+
+ /**
+ * Determines if the input file contains a header that should be skipped before reading records data.
+ *
+ * @return a boolean indicating whether or not to skip EBCDIC header.
+ */
+ public boolean isSkipEbcdicHeader() {
+ return _skipEbcdicHeader;
+ }
+
+ /**
+ * Determines if the input file contains new line characters.
+ *
+ * @return a boolean indicating whether or not the input contains new line characters.
+ */
+ public boolean isEolPresent() {
+ return _eolPresent;
+ }
+}
--- /dev/null
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+
+/**
+ * Reader capable of separating values based on a fixed width setting.
+ */
+class EbcdicReader extends FixedWidthReader {
+
+ private final boolean _skipEbcdicHeader;
+ private final boolean _eolPresent;
+ private boolean _headerSkipped;
+
+ public EbcdicReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
+ boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
+ super(stream, charsetName, valueWidths, failOnInconsistentLineWidth);
+ _skipEbcdicHeader = skipEbcdicHeader;
+ _eolPresent = eolPresent;
+ }
+
+ @Override
+ protected void beforeReadLine() {
+ if (shouldSkipHeader()) {
+ try {
+ skipHeader();
+ } catch (IOException e) {
+ throw new IllegalStateException("A problem occurred while skipping the input stream. ", e);
+ }
+ }
+ }
+
+ private boolean shouldSkipHeader() {
+ return (_skipEbcdicHeader && !_headerSkipped);
+ }
+
+ private void skipHeader() throws IOException {
+ _headerSkipped = true;
+ _stream.skip(_expectedLineLength);
+ }
+
+ @Override
+ protected String readSingleRecordData() throws IOException {
+ if (_eolPresent) {
+ return super.readSingleRecordData();
+ } else {
+ byte[] buffer = new byte[_expectedLineLength];
+ int bytesRead = _stream.read(buffer, 0, _expectedLineLength);
+
+ if (bytesRead < 0) {
+ return null;
+ }
+
+ return new String(buffer, _charsetName);
+ }
+ }
+}
* Represents the specification of a single column for a\r
* {@link FixedWidthDataContext}.\r
*/\r
-public final class FixedWidthColumnSpec implements HasName {\r
+final class FixedWidthColumnSpec implements HasName {\r
\r
private final String name;\r
private final int width;\r
import org.apache.metamodel.util.HasNameMapper;\r
\r
/**\r
- * Configuration of metadata about a fixed width values datacontext.\r
+ * Configuration of metadata about a fixed width values data context.\r
*/\r
-public final class FixedWidthConfiguration extends BaseObject implements\r
- Serializable {\r
+public class FixedWidthConfiguration extends BaseObject implements Serializable {\r
\r
- private static final long serialVersionUID = 1L;\r
+ private static final long serialVersionUID = 1L;\r
\r
- public static final int NO_COLUMN_NAME_LINE = 0;\r
- public static final int DEFAULT_COLUMN_NAME_LINE = 1;\r
+ public static final int NO_COLUMN_NAME_LINE = 0;\r
+ public static final int DEFAULT_COLUMN_NAME_LINE = 1;\r
\r
- private final String encoding;\r
- private final int fixedValueWidth;\r
- private final int[] valueWidths;\r
- private final int columnNameLineNumber;\r
- private final boolean failOnInconsistentLineWidth;\r
- private final ColumnNamingStrategy columnNamingStrategy;\r
+ private final String encoding;\r
+ private final int fixedValueWidth;\r
+ private final int[] valueWidths;\r
+ private final int columnNameLineNumber;\r
+ private final boolean failOnInconsistentLineWidth;\r
+ private final ColumnNamingStrategy columnNamingStrategy;\r
\r
- public FixedWidthConfiguration(int fixedValueWidth) {\r
- this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING,\r
- fixedValueWidth);\r
- }\r
+ public FixedWidthConfiguration(int fixedValueWidth) {\r
+ this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING, fixedValueWidth);\r
+ }\r
\r
- public FixedWidthConfiguration(int[] valueWidth) {\r
- this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING, valueWidth,\r
- false);\r
- }\r
+ public FixedWidthConfiguration(int[] valueWidth) {\r
+ this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING, valueWidth, false);\r
+ }\r
\r
public FixedWidthConfiguration(int columnNameLineNumber, String encoding, int fixedValueWidth) {\r
this(columnNameLineNumber, encoding, fixedValueWidth, false);\r
this.valueWidths = new int[0];\r
}\r
\r
- public FixedWidthConfiguration(int columnNameLineNumber, String encoding,\r
- int[] valueWidths, boolean failOnInconsistentLineWidth) {\r
+ public FixedWidthConfiguration(int columnNameLineNumber, String encoding, int[] valueWidths, \r
+ boolean failOnInconsistentLineWidth) {\r
this(columnNameLineNumber, null, encoding, valueWidths, failOnInconsistentLineWidth);\r
}\r
- \r
+\r
public FixedWidthConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy, String encoding,\r
int[] valueWidths, boolean failOnInconsistentLineWidth) {\r
this.encoding = encoding;\r
this.columnNamingStrategy = columnNamingStrategy;\r
this.valueWidths = valueWidths;\r
}\r
- \r
+\r
public FixedWidthConfiguration(String encoding, List<FixedWidthColumnSpec> columnSpecs) {\r
this(encoding, columnSpecs, false);\r
}\r
}\r
\r
/**\r
- * The line number (1 based) from which to get the names of the columns.\r
- * \r
- * @return an int representing the line number of the column headers/names.\r
- */\r
- public int getColumnNameLineNumber() {\r
- return columnNameLineNumber;\r
- }\r
- \r
- /**\r
- * Gets a {@link ColumnNamingStrategy} to use if needed.\r
- * @return\r
- */\r
- public ColumnNamingStrategy getColumnNamingStrategy() {\r
- if (columnNamingStrategy == null) {\r
- return ColumnNamingStrategies.defaultStrategy();\r
- }\r
+ * The line number (1 based) from which to get the names of the columns.\r
+ *\r
+ * @return an int representing the line number of the column headers/names.\r
+ */\r
+ public int getColumnNameLineNumber() {\r
+ return columnNameLineNumber;\r
+ }\r
+\r
+ /**\r
+ * Gets a {@link ColumnNamingStrategy} to use if needed.\r
+ * @return column naming strategy\r
+ */\r
+ public ColumnNamingStrategy getColumnNamingStrategy() {\r
+ if (columnNamingStrategy == null) {\r
+ return ColumnNamingStrategies.defaultStrategy();\r
+ }\r
return columnNamingStrategy;\r
}\r
\r
- /**\r
- * Gets the file encoding to use for reading the file.\r
- * \r
- * @return the text encoding to use for reading the file.\r
- */\r
- public String getEncoding() {\r
- return encoding;\r
- }\r
-\r
- /**\r
- * Gets the width of each value within the fixed width value file.\r
- * \r
- * @return the fixed width to use when parsing the file.\r
- */\r
- public int getFixedValueWidth() {\r
- return fixedValueWidth;\r
- }\r
-\r
- public int[] getValueWidths() {\r
- return valueWidths;\r
- }\r
-\r
- /**\r
- * Determines if the {@link DataSet#next()} should throw an exception in\r
- * case of inconsistent line width in the fixed width value file.\r
- * \r
- * @return a boolean indicating whether or not to fail on inconsistent line\r
- * widths.\r
- */\r
- public boolean isFailOnInconsistentLineWidth() {\r
- return failOnInconsistentLineWidth;\r
- }\r
-\r
- @Override\r
- protected void decorateIdentity(List<Object> identifiers) {\r
- identifiers.add(columnNameLineNumber);\r
- identifiers.add(encoding);\r
- identifiers.add(fixedValueWidth);\r
- identifiers.add(valueWidths);\r
- identifiers.add(failOnInconsistentLineWidth);\r
- }\r
-\r
- @Override\r
- public String toString() {\r
- return "FixedWidthConfiguration[encoding=" + encoding\r
- + ", fixedValueWidth=" + fixedValueWidth + ", valueWidths="\r
- + Arrays.toString(valueWidths) + ", columnNameLineNumber="\r
- + columnNameLineNumber + ", failOnInconsistentLineWidth="\r
- + failOnInconsistentLineWidth + "]";\r
- }\r
-\r
- public boolean isConstantValueWidth() {\r
- return fixedValueWidth != -1;\r
- }\r
-\r
- public int getValueWidth(int columnIndex) {\r
- if (isConstantValueWidth()) {\r
- return fixedValueWidth;\r
- }\r
- return valueWidths[columnIndex];\r
- }\r
+ /**\r
+ * Gets the file encoding to use for reading the file.\r
+ *\r
+ * @return the text encoding to use for reading the file.\r
+ */\r
+ public String getEncoding() {\r
+ return encoding;\r
+ }\r
+\r
+ /**\r
+ * Gets the width of each value within the fixed width value file.\r
+ *\r
+ * @return the fixed width to use when parsing the file.\r
+ */\r
+ public int getFixedValueWidth() {\r
+ return fixedValueWidth;\r
+ }\r
+\r
+ public int[] getValueWidths() {\r
+ return valueWidths;\r
+ }\r
+\r
+ /**\r
+ * Determines if the {@link DataSet#next()} should throw an exception in\r
+ * case of inconsistent line width in the fixed width value file.\r
+ *\r
+ * @return a boolean indicating whether or not to fail on inconsistent line\r
+ * widths.\r
+ */\r
+ public boolean isFailOnInconsistentLineWidth() {\r
+ return failOnInconsistentLineWidth;\r
+ }\r
+\r
+ @Override\r
+ protected void decorateIdentity(List<Object> identifiers) {\r
+ identifiers.add(columnNameLineNumber);\r
+ identifiers.add(encoding);\r
+ identifiers.add(fixedValueWidth);\r
+ identifiers.add(valueWidths);\r
+ identifiers.add(failOnInconsistentLineWidth);\r
+ }\r
+\r
+ @Override\r
+ public String toString() {\r
+ return "FixedWidthConfiguration[encoding=" + encoding\r
+ + ", fixedValueWidth=" + fixedValueWidth + ", valueWidths="\r
+ + Arrays.toString(valueWidths) + ", columnNameLineNumber="\r
+ + columnNameLineNumber + ", failOnInconsistentLineWidth="\r
+ + failOnInconsistentLineWidth + "]";\r
+ }\r
+\r
+ public boolean isConstantValueWidth() {\r
+ return fixedValueWidth != -1;\r
+ }\r
+\r
+ public int getValueWidth(int columnIndex) {\r
+ if (isConstantValueWidth()) {\r
+ return fixedValueWidth;\r
+ }\r
+ return valueWidths[columnIndex];\r
+ }\r
}\r
* "http://support.sas.com/documentation/cdl/en/etlug/67323/HTML/default/viewer.htm#p0h03yig7fp1qan1arghp3lwjqi6.htm">\r
* described here</a>.\r
* \r
- * @param encoding\r
- * @param resource\r
- * the format file resource\r
- * @param failOnInconsistentLineWidth\r
+ * @param encoding the format file encoding\r
+ * @param resource the format file resource \r
+ * @param failOnInconsistentLineWidth flag specifying whether inconsistent line should stop processing or not\r
* @return a {@link FixedWidthConfiguration} object to use\r
*/\r
public FixedWidthConfiguration readFromSasFormatFile(String encoding, Resource resource,\r
\r
/**\r
* Reads a {@link FixedWidthConfiguration} based on a SAS INPUT declaration.\r
- * The reader method also optionally will look for a LABEL defintion for\r
- * column naming.\r
+ * The reader method also optionally will look for a LABEL definition for column naming.\r
* \r
- * @param encoding\r
- * @param resource\r
- * the format file resource\r
- * @param failOnInconsistentLineWidth\r
+ * @param encoding the format file encoding\r
+ * @param resource the format file resource\r
+ * @param failOnInconsistentLineWidth flag specifying whether inconsistent line should stop processing or not\r
* @return a {@link FixedWidthConfiguration} object to use\r
*/\r
public FixedWidthConfiguration readFromSasInputDefinition(String encoding, Resource resource,\r
\r
return new FixedWidthConfiguration(encoding, columnSpecs, failOnInconsistentLineWidth);\r
}\r
-\r
}\r
*/
package org.apache.metamodel.fixedwidth;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.InputStream;
-import java.io.Reader;
import org.apache.metamodel.MetaModelException;
import org.apache.metamodel.QueryPostprocessDataContext;
/**
* Gets the resource being read
*
- * @return
+ * @return a {@link Resource} object
*/
public Resource getResource() {
return _resource;
private FixedWidthReader createReader() {
final InputStream inputStream = _resource.read();
- final Reader fileReader = FileHelper.getReader(inputStream, _configuration.getEncoding());
final FixedWidthReader reader;
- if (_configuration.isConstantValueWidth()) {
- reader = new FixedWidthReader(fileReader, _configuration.getFixedValueWidth(), _configuration
- .isFailOnInconsistentLineWidth());
+
+ if (_configuration instanceof EbcdicConfiguration) {
+ reader = new EbcdicReader((BufferedInputStream) inputStream, _configuration.getEncoding(),
+ _configuration.getValueWidths(), _configuration.isFailOnInconsistentLineWidth(),
+ ((EbcdicConfiguration) _configuration).isSkipEbcdicHeader(),
+ ((EbcdicConfiguration) _configuration).isEolPresent());
} else {
- reader = new FixedWidthReader(fileReader, _configuration.getValueWidths(), _configuration
- .isFailOnInconsistentLineWidth());
+ if (_configuration.isConstantValueWidth()) {
+ reader = new FixedWidthReader(inputStream, _configuration.getEncoding(),
+ _configuration.getFixedValueWidth(), _configuration.isFailOnInconsistentLineWidth());
+ } else {
+ reader = new FixedWidthReader(inputStream, _configuration.getEncoding(),
+ _configuration.getValueWidths(), _configuration.isFailOnInconsistentLineWidth());
+ }
}
+
return reader;
}
-
}
if (columnNumber < stringValues.length) {
rowValues[i] = stringValues[columnNumber];
} else {
- // Ticket #125: Missing values should be enterpreted as
- // null.
+ // Ticket #125: Missing values should be interpreted as null.
rowValues[i] = null;
}
}
*/
package org.apache.metamodel.fixedwidth;
-import java.io.BufferedReader;
+import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.IOException;
-import java.io.Reader;
+import java.io.InputStream;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.ArrayList;
+import java.util.List;
/**
* Reader capable of separating values based on a fixed width setting.
*/
-final public class FixedWidthReader implements Closeable {
-
- private final BufferedReader _reader;
- private final FixedWidthLineParser _parser;
-
- public FixedWidthReader(Reader reader, int fixedValueWidth,
- boolean failOnInconsistentLineWidth) {
- this(new BufferedReader(reader), fixedValueWidth,
- failOnInconsistentLineWidth);
- }
-
- public FixedWidthReader(BufferedReader reader, int fixedValueWidth,
- boolean failOnInconsistentLineWidth) {
- _reader = reader;
- final FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(
- FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, fixedValueWidth, failOnInconsistentLineWidth);
- _parser = new FixedWidthLineParser(fixedWidthConfiguration, -1, 0);
- }
-
- public FixedWidthReader(Reader reader, int[] valueWidths,
- boolean failOnInconsistentLineWidth) {
- this(new BufferedReader(reader), valueWidths,
- failOnInconsistentLineWidth);
- }
-
- public FixedWidthReader(BufferedReader reader, int[] valueWidths,
- boolean failOnInconsistentLineWidth) {
- _reader = reader;
- int fixedValueWidth = -1;
- int expectedLineLength = 0;
- if (fixedValueWidth == -1) {
- for (int i = 0; i < valueWidths.length; i++) {
- expectedLineLength += valueWidths[i];
- }
- }
- final FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(
- FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, valueWidths, failOnInconsistentLineWidth);
- _parser = new FixedWidthLineParser(fixedWidthConfiguration, expectedLineLength, 0);
- }
-
-
- /***
- * Reads the next line in the file.
- *
- * @return an array of values in the next line, or null if the end of the
- * file has been reached.
- *
- * @throws IllegalStateException
- * if an exception occurs while reading the file.
- */
- public String[] readLine() throws IllegalStateException {
- String line;
+class FixedWidthReader implements Closeable {
+ private static final int END_OF_STREAM = -1;
+ private static final int LINE_FEED = '\n';
+ private static final int CARRIAGE_RETURN = '\r';
+
+ protected final String _charsetName;
+ private final int _fixedValueWidth;
+ private final int[] _valueWidths;
+ private int _valueIndex = 0;
+ private final boolean _failOnInconsistentLineWidth;
+ private final boolean _constantWidth;
+ private volatile int _rowNumber;
+ protected final BufferedInputStream _stream;
+ protected final int _expectedLineLength;
+
+ public FixedWidthReader(InputStream stream, String charsetName, int fixedValueWidth,
+ boolean failOnInconsistentLineWidth) {
+ this(new BufferedInputStream(stream), charsetName, fixedValueWidth, failOnInconsistentLineWidth);
+ }
+
+ private FixedWidthReader(BufferedInputStream stream, String charsetName, int fixedValueWidth,
+ boolean failOnInconsistentLineWidth) {
+ _stream = stream;
+ _charsetName = charsetName;
+ _fixedValueWidth = fixedValueWidth;
+ _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
+ _rowNumber = 0;
+ _valueWidths = null;
+ _constantWidth = true;
+ _expectedLineLength = -1;
+ }
+
+ public FixedWidthReader(InputStream stream, String charsetName, int[] valueWidths,
+ boolean failOnInconsistentLineWidth) {
+ this(new BufferedInputStream(stream), charsetName, valueWidths, failOnInconsistentLineWidth);
+ }
+
+ FixedWidthReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
+ boolean failOnInconsistentLineWidth) {
+ _stream = stream;
+ _charsetName = charsetName;
+ _fixedValueWidth = -1;
+ _valueWidths = valueWidths;
+ _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
+ _rowNumber = 0;
+ _constantWidth = false;
+ int expectedLineLength = 0;
+
+ for (final int _valueWidth : _valueWidths) {
+ expectedLineLength += _valueWidth;
+ }
+
+ _expectedLineLength = expectedLineLength;
+ }
+
+ /**
+ * This reads and returns the next record from the file. Usually, it is a line but in case the new line characters
+ * are not present, the length of the content depends on the column-widths setting.
+ *
+ * @return an array of values in the next line, or null if the end of the file has been reached.
+ * @throws IllegalStateException if an exception occurs while reading the file.
+ */
+ public String[] readLine() throws IllegalStateException {
try {
- line = _reader.readLine();
- return _parser.parseLine(line);
+ beforeReadLine();
+ _rowNumber++;
+ return getValues();
} catch (IOException e) {
throw new IllegalStateException(e);
}
- }
-
+ }
+
+ /**
+ * Empty hook that enables special behavior in sub-classed readers (by overriding this method).
+ */
+ protected void beforeReadLine() {
+ return;
+ }
+
+ private String[] getValues() throws IOException {
+ final List<String> values = new ArrayList<>();
+ final String singleRecordData = readSingleRecordData();
+
+ if (singleRecordData == null) {
+ return null;
+ }
+
+ processSingleRecordData(singleRecordData, values);
+ String[] result = values.toArray(new String[values.size()]);
+
+ if (!_failOnInconsistentLineWidth && !_constantWidth) {
+ result = correctResult(result);
+ }
+
+ validateConsistentValue(singleRecordData, result, values.size());
+
+ return result;
+ }
+
+ private void validateConsistentValue(String recordData, String[] result, int valuesSize) {
+ if (!_failOnInconsistentLineWidth) {
+ return;
+ }
+
+ InconsistentValueWidthException inconsistentValueException = null;
+
+ if (_constantWidth) {
+ if (recordData.length() % _fixedValueWidth != 0) {
+ inconsistentValueException = new InconsistentValueWidthException(result, recordData, _rowNumber);
+ }
+ } else if (result.length != valuesSize || recordData.length() != _expectedLineLength) {
+ inconsistentValueException = new InconsistentValueWidthException(result, recordData, _rowNumber);
+ }
+
+ if (inconsistentValueException != null) {
+ throw inconsistentValueException;
+ }
+ }
+
+ private void processSingleRecordData(final String singleRecordData, final List<String> values) {
+ StringBuilder nextValue = new StringBuilder();
+ final CharacterIterator it = new StringCharacterIterator(singleRecordData);
+ _valueIndex = 0;
+
+ for (char c = it.first(); c != CharacterIterator.DONE; c = it.next()) {
+ processCharacter(c, nextValue, values, singleRecordData);
+ }
+
+ if (nextValue.length() > 0) {
+ addNewValueIfAppropriate(values, nextValue);
+ }
+ }
+
+ String readSingleRecordData() throws IOException {
+ StringBuilder line = new StringBuilder();
+ int ch;
+
+ for (ch = _stream.read(); !isEndingCharacter(ch); ch = _stream.read()) {
+ line.append((char) ch);
+ }
+
+ if (ch == CARRIAGE_RETURN) {
+ readLineFeedIfFollows();
+ }
+
+ return (line.length()) > 0 ? line.toString() : null;
+ }
+
+ private void readLineFeedIfFollows() throws IOException {
+ _stream.mark(1);
+
+ if (_stream.read() != LINE_FEED) {
+ _stream.reset();
+ }
+ }
+
+ private boolean isEndingCharacter(int ch) {
+ return (ch == CARRIAGE_RETURN || ch == LINE_FEED || ch == END_OF_STREAM);
+ }
+
+ private void processCharacter(char c, StringBuilder nextValue, List<String> values, String recordData) {
+ nextValue.append(c);
+ final int valueWidth = getValueWidth(values, recordData);
+
+ if (nextValue.length() == valueWidth) {
+ addNewValueIfAppropriate(values, nextValue);
+ nextValue.setLength(0); // clear the buffer
+
+ if (_valueWidths != null) {
+ _valueIndex = (_valueIndex + 1) % _valueWidths.length;
+ }
+ }
+ }
+
+ private int getValueWidth(List<String> values, String recordData) {
+ if (_constantWidth) {
+ return _fixedValueWidth;
+ } else {
+ if (_valueIndex >= _valueWidths.length) {
+ if (_failOnInconsistentLineWidth) {
+ String[] result = values.toArray(new String[values.size()]);
+ throw new InconsistentValueWidthException(result, recordData, _rowNumber + 1);
+ } else {
+ return -1; // silently ignore the inconsistency
+ }
+ }
+
+ return _valueWidths[_valueIndex];
+ }
+ }
+
+ private void addNewValueIfAppropriate(List<String> values, StringBuilder nextValue) {
+ if (_valueWidths != null) {
+ if (values.size() < _valueWidths.length) {
+ values.add(nextValue.toString().trim());
+ }
+ } else {
+ values.add(nextValue.toString().trim());
+ }
+ }
+
+ private String[] correctResult(String[] result) {
+ if (result.length != _valueWidths.length) {
+ String[] correctedResult = new String[_valueWidths.length];
+
+ for (int i = 0; i < result.length && i < _valueWidths.length; i++) {
+ correctedResult[i] = result[i];
+ }
+
+ result = correctedResult;
+ }
- @Override
- public void close() throws IOException {
- _reader.close();
- }
+ return result;
+ }
+ @Override
+ public void close() throws IOException {
+ _stream.close();
+ }
}
--- /dev/null
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+import java.io.File;
+
+import org.apache.metamodel.data.DataSet;
+import org.apache.metamodel.schema.Schema;
+import org.apache.metamodel.schema.Table;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class EBCDICTest {
+ private static final int[] COLUMN_WIDTHS = new int[] { 2, 7, 10, 10 };
+ private static final long EXPECTED_ROWS_COUNT = 49; // 50 lines, 1. is a header
+ private static final String ENCODING = "IBM500";
+ private static final String[] EXPECTED_ROWS = new String[] {
+ "Row[values=[01, name-01, surname-01, address-01]]",
+ "Row[values=[02, name-02, surname-02, address-02]]",
+ "Row[values=[03, name-03, surname-03, address-03]]",
+ };
+ private final FixedWidthDataContext _context;
+ private final Table _table;
+
+ public EBCDICTest() {
+ String fileName = "fixed-width-2-7-10-10.ebc";
+ FixedWidthConfiguration configuration = new EbcdicConfiguration(FixedWidthConfiguration.NO_COLUMN_NAME_LINE,
+ ENCODING, COLUMN_WIDTHS, false, true, false);
+ _context = new FixedWidthDataContext(new File("src/test/resources/" + fileName), configuration);
+ Schema schema = _context.getDefaultSchema();
+ _table = schema.getTableByName(fileName);
+ }
+
+ @Test
+ public void testRowsCount() throws Exception {
+ long rows = 0;
+
+ try (final DataSet dataSet = _context.query().from(_table).selectCount().execute()) {
+ if (dataSet.next()) {
+ Object[] values = dataSet.getRow().getValues();
+ rows = (long) values[0];
+ }
+ }
+
+ assertEquals(EXPECTED_ROWS_COUNT, rows);
+ }
+
+ @Test
+ public void testFirstRows() throws Exception {
+ int limit = EXPECTED_ROWS.length;
+ int i = 0;
+
+ try (final DataSet dataSet = _context.query().from(_table).selectAll().limit(limit).execute()) {
+ while (dataSet.next()) {
+ assertEquals(EXPECTED_ROWS[i], dataSet.getRow().toString());
+ i++;
+ }
+ }
+ }
+}
*/
package org.apache.metamodel.fixedwidth;
-import org.apache.metamodel.fixedwidth.FixedWidthConfiguration;
-
import junit.framework.TestCase;
public class FixedWidthConfigurationTest extends TestCase {
}
public void testEquals() throws Exception {
- FixedWidthConfiguration conf1 = new FixedWidthConfiguration(1, "UTF8",
- 10, true);
- FixedWidthConfiguration conf2 = new FixedWidthConfiguration(1, "UTF8",
- 10, true);
+ FixedWidthConfiguration conf1 = new FixedWidthConfiguration(1, "UTF8", 10, true);
+ FixedWidthConfiguration conf2 = new FixedWidthConfiguration(1, "UTF8", 10, true);
assertEquals(conf1, conf2);
- FixedWidthConfiguration conf3 = new FixedWidthConfiguration(1, "UTF8",
- 10, false);
+ FixedWidthConfiguration conf3 = new FixedWidthConfiguration(1, "UTF8", 10, false);
assertFalse(conf1.equals(conf3));
}
}
import org.apache.metamodel.DataContext;
import org.apache.metamodel.data.DataSet;
-import org.apache.metamodel.fixedwidth.FixedWidthConfiguration;
-import org.apache.metamodel.fixedwidth.FixedWidthDataContext;
-import org.apache.metamodel.fixedwidth.InconsistentValueWidthException;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.schema.Schema;
import org.apache.metamodel.schema.Table;
*/
package org.apache.metamodel.fixedwidth;
-import static org.junit.Assert.assertEquals;
-
-import java.io.BufferedReader;
+import java.io.BufferedInputStream;
import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
import java.io.IOException;
import java.util.Arrays;
import org.junit.Test;
import org.junit.rules.ExpectedException;
+import static org.junit.Assert.assertEquals;
+
public class FixedWidthReaderTest {
+ private static final String CHARSET = "UTF-8";
@Rule
public final ExpectedException exception = ExpectedException.none();
@Test
public void testBufferedReader1() throws IOException {
final File file = new File("src/test/resources/example_simple1.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
int[] widths = new int[] { 8, 9 };
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, widths, false)) {
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, widths, false)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[greeting, greeter]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@Test
public void testBufferedReader2() throws IOException {
final File file = new File("src/test/resources/example_simple2.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
int[] widths = new int[] {1, 8, 9 };
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, widths, false)) {
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, widths, false)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[i, greeting, greeter]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@Test
public void testBufferedReader3() throws IOException {
final File file = new File("src/test/resources/example_simple3.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, 5, false)) {
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, 5, false)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[hello]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@Test
public void testBufferedReaderFailOnInconsistentRows() throws IOException {
final File file = new File("src/test/resources/example_simple3.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, 5, true)) {
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, 5, true)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[hello]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
final String[] line4 = fixedWidthReader.readLine();
}
}
-
-
}
--- /dev/null
+ÉÄÕÁÔÅ`ÉÄâäÙÕÁÔÅ`ÉÄÁÄÄÙÅââ`ÉÄðñ\95\81\94\85`ðñ¢¤\99\95\81\94\85`ðñ\81\84\84\99\85¢¢`ðñðò\95\81\94\85`ðò¢¤\99\95\81\94\85`ðò\81\84\84\99\85¢¢`ðòðó\95\81\94\85`ðó¢¤\99\95\81\94\85`ðó\81\84\84\99\85¢¢`ðóðô\95\81\94\85`ðô¢¤\99\95\81\94\85`ðô\81\84\84\99\85¢¢`ðôðõ\95\81\94\85`ðõ¢¤\99\95\81\94\85`ðõ\81\84\84\99\85¢¢`ðõðö\95\81\94\85`ðö¢¤\99\95\81\94\85`ðö\81\84\84\99\85¢¢`ðöð÷\95\81\94\85`ð÷¢¤\99\95\81\94\85`ð÷\81\84\84\99\85¢¢`ð÷ðø\95\81\94\85`ðø¢¤\99\95\81\94\85`ðø\81\84\84\99\85¢¢`ðøðù\95\81\94\85`ðù¢¤\99\95\81\94\85`ðù\81\84\84\99\85¢¢`ðùñð\95\81\94\85`ñð¢¤\99\95\81\94\85`ñð\81\84\84\99\85¢¢`ñðññ\95\81\94\85`ññ¢¤\99\95\81\94\85`ññ\81\84\84\99\85¢¢`ñññò\95\81\94\85`ñò¢¤\99\95\81\94\85`ñò\81\84\84\99\85¢¢`ñòñó\95\81\94\85`ñó¢¤\99\95\81\94\85`ñó\81\84\84\99\85¢¢`ñóñô\95\81\94\85`ñô¢¤\99\95\81\94\85`ñô\81\84\84\99\85¢¢`ñôñõ\95\81\94\85`ñõ¢¤\99\95\81\94\85`ñõ\81\84\84\99\85¢¢`ñõñö\95\81\94\85`ñö¢¤\99\95\81\94\85`ñö\81\84\84\99\85¢¢`ñöñ÷\95\81\94\85`ñ÷¢¤\99\95\81\94\85`ñ÷\81\84\84\99\85¢¢`ñ÷ñø\95\81\94\85`ñø¢¤\99\95\81\94\85`ñø\81\84\84\99\85¢¢`ñøñù\95\81\94\85`ñù¢¤\99\95\81\94\85`ñù\81\84\84\99\85¢¢`ñùòð\95\81\94\85`òð¢¤\99\95\81\94\85`òð\81\84\84\99\85¢¢`òðòñ\95\81\94\85`òñ¢¤\99\95\81\94\85`òñ\81\84\84\99\85¢¢`òñòò\95\81\94\85`òò¢¤\99\95\81\94\85`òò\81\84\84\99\85¢¢`òòòó\95\81\94\85`òó¢¤\99\95\81\94\85`òó\81\84\84\99\85¢¢`òóòô\95\81\94\85`òô¢¤\99\95\81\94\85`òô\81\84\84\99\85¢¢`òôòõ\95\81\94\85`òõ¢¤\99\95\81\94\85`òõ\81\84\84\99\85¢¢`òõòö\95\81\94\85`òö¢¤\99\95\81\94\85`òö\81\84\84\99\85¢¢`òöò÷\95\81\94\85`ò÷¢¤\99\95\81\94\85`ò÷\81\84\84\99\85¢¢`ò÷òø\95\81\94\85`òø¢¤\99\95\81\94\85`òø\81\84\84\99\85¢¢`òøòù\95\81\94\85`òù¢¤\99\95\81\94\85`òù\81\84\84\99\85¢¢`òùóð\95\81\94\85`óð¢¤\99\95\81\94\85`óð\81\84\84\99\85¢¢`óðóñ\95\81\94\85`óñ¢¤\99\95\81\94\85`óñ\81\84\84\99\85¢¢`óñóò\95\81\94\85`óò¢¤\99\95\81\94\85`óò\81\84\84\99\85¢¢`óòóó\95\81\94\85`óó¢¤\99\95\81\94\85`óó\81\84\84\99\85¢¢`óóóô\95\81\94\85`óô¢¤\99\95\81\94\85`óô\81\84\84\99\85¢¢`óôóõ\95\81\94\85`óõ¢¤\99\95\81\94\85`óõ\81\84\84\99\85¢¢`óõóö\95\81\94\85`óö¢¤\99\95\81\94\85`óö\81\84\84\99\85¢¢`óöó÷\95\81\94\85`ó÷¢¤\99\95\81\94\85`ó÷\81\84\84\99\85¢¢`ó÷óø\95\81\94\85`󸢤\99\95\81\94\85`óø\81\84\84\99\85¢¢`óøóù\95\81\94\85`óù¢¤\99\95\81\94\85`óù\81\84\84\99\85¢¢`óùôð\95\81\94\85`ôð¢¤\99\95\81\94\85`ôð\81\84\84\99\85¢¢`ôðôñ\95\81\94\85`ôñ¢¤\99\95\81\94\85`ôñ\81\84\84\99\85¢¢`ôñôò\95\81\94\85`ôò¢¤\99\95\81\94\85`ôò\81\84\84\99\85¢¢`ôòôó\95\81\94\85`ôó¢¤\99\95\81\94\85`ôó\81\84\84\99\85¢¢`ôóôô\95\81\94\85`ôô¢¤\99\95\81\94\85`ôô\81\84\84\99\85¢¢`ôôôõ\95\81\94\85`ôõ¢¤\99\95\81\94\85`ôõ\81\84\84\99\85¢¢`ôõôö\95\81\94\85`ôö¢¤\99\95\81\94\85`ôö\81\84\84\99\85¢¢`ôöô÷\95\81\94\85`ô÷¢¤\99\95\81\94\85`ô÷\81\84\84\99\85¢¢`ô÷ôø\95\81\94\85`ôø¢¤\99\95\81\94\85`ôø\81\84\84\99\85¢¢`ôøôù\95\81\94\85`ôù¢¤\99\95\81\94\85`ôù\81\84\84\99\85¢¢`ôù
\ No newline at end of file