METAMODEL-244: Addition for Excel
authorKasper Sørensen <i.am.kasper.sorensen@gmail.com>
Fri, 13 May 2016 03:15:33 +0000 (20:15 -0700)
committerKasper Sørensen <i.am.kasper.sorensen@gmail.com>
Fri, 13 May 2016 03:15:33 +0000 (20:15 -0700)
Closes #99

couchdb/src/test/java/org/apache/metamodel/couchdb/CouchDbDataContextTest.java
excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java
excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java
excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java
excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java

index 9e1f5fe..c2ec998 100644 (file)
@@ -53,7 +53,8 @@ public class CouchDbDataContextTest extends CouchDbTestCase {
         super.setUp();
 
         if (isConfigured()) {
-            httpClient = new StdHttpClient.Builder().host(getHostname()).build();
+            final int timeout = 8 * 1000; // 8 seconds should be more than enough
+            httpClient = new StdHttpClient.Builder().socketTimeout(timeout).host(getHostname()).build();
 
             // set up a simple database
             couchDbInstance = new StdCouchDbInstance(httpClient);
index 1b8b534..009fad4 100644 (file)
@@ -30,7 +30,10 @@ import org.apache.metamodel.schema.MutableSchema;
 import org.apache.metamodel.schema.MutableTable;
 import org.apache.metamodel.schema.Schema;
 import org.apache.metamodel.schema.Table;
-import org.apache.metamodel.util.AlphabeticSequence;
+import org.apache.metamodel.schema.naming.ColumnNamingContext;
+import org.apache.metamodel.schema.naming.ColumnNamingContextImpl;
+import org.apache.metamodel.schema.naming.ColumnNamingSession;
+import org.apache.metamodel.schema.naming.ColumnNamingStrategy;
 import org.apache.metamodel.util.FileHelper;
 import org.apache.metamodel.util.Resource;
 import org.apache.poi.ss.usermodel.Cell;
@@ -131,19 +134,22 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat
                 row = rowIterator.next();
             }
 
-            // build columns by using alphabetic sequences
-            // (A,B,C...)
-            AlphabeticSequence sequence = new AlphabeticSequence();
+            // build columns without any intrinsic column names
+            final ColumnNamingStrategy columnNamingStrategy = _configuration.getColumnNamingStrategy();
+            try (final ColumnNamingSession columnNamingSession = columnNamingStrategy.startColumnNamingSession()) {
+                final int offset = getColumnOffset(row);
+                for (int i = 0; i < offset; i++) {
+                    columnNamingSession.getNextColumnName(new ColumnNamingContextImpl(i));
+                }
 
-            final int offset = getColumnOffset(row);
-            for (int i = 0; i < offset; i++) {
-                sequence.next();
+                for (int j = offset; j < row.getLastCellNum(); j++) {
+                    final ColumnNamingContext namingContext = new ColumnNamingContextImpl(table, null, j);
+                    final Column column = new MutableColumn(columnNamingSession.getNextColumnName(namingContext),
+                            ColumnType.STRING, table, j, true);
+                    table.addColumn(column);
+                }
             }
 
-            for (int j = offset; j < row.getLastCellNum(); j++) {
-                Column column = new MutableColumn(sequence.next(), ColumnType.STRING, table, j, true);
-                table.addColumn(column);
-            }
         } else {
 
             boolean hasColumns = true;
@@ -183,14 +189,17 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat
         final int offset = getColumnOffset(row);
 
         // build columns based on cell values.
-        for (int j = offset; j < rowLength; j++) {
-            Cell cell = row.getCell(j);
-            String columnName = ExcelUtils.getCellValue(wb, cell);
-            if (columnName == null || "".equals(columnName)) {
-                columnName = "[Column " + (j + 1) + "]";
+        try (final ColumnNamingSession columnNamingSession = _configuration.getColumnNamingStrategy()
+                .startColumnNamingSession()) {
+            for (int j = offset; j < rowLength; j++) {
+                final Cell cell = row.getCell(j);
+                final String intrinsicColumnName = ExcelUtils.getCellValue(wb, cell);
+                final ColumnNamingContext columnNamingContext = new ColumnNamingContextImpl(table, intrinsicColumnName,
+                        j);
+                final String columnName = columnNamingSession.getNextColumnName(columnNamingContext);
+                final Column column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true);
+                table.addColumn(column);
             }
-            Column column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true);
-            table.addColumn(column);
         }
     }
 
index 9220ea3..4779bb1 100644 (file)
@@ -21,6 +21,8 @@ package org.apache.metamodel.excel;
 import java.io.Serializable;
 import java.util.List;
 
+import org.apache.metamodel.schema.naming.ColumnNamingStrategies;
+import org.apache.metamodel.schema.naming.ColumnNamingStrategy;
 import org.apache.metamodel.util.BaseObject;
 
 /**
@@ -37,6 +39,7 @@ public final class ExcelConfiguration extends BaseObject implements
        public static final int DEFAULT_COLUMN_NAME_LINE = 1;
 
        private final int columnNameLineNumber;
+       private final ColumnNamingStrategy columnNamingStrategy;
        private final boolean skipEmptyLines;
        private final boolean skipEmptyColumns;
 
@@ -44,12 +47,28 @@ public final class ExcelConfiguration extends BaseObject implements
                this(DEFAULT_COLUMN_NAME_LINE, true, false);
        }
 
-       public ExcelConfiguration(int columnNameLineNumber, boolean skipEmptyLines,
-                       boolean skipEmptyColumns) {
-               this.columnNameLineNumber = columnNameLineNumber;
-               this.skipEmptyLines = skipEmptyLines;
-               this.skipEmptyColumns = skipEmptyColumns;
-       }
+    public ExcelConfiguration(int columnNameLineNumber, boolean skipEmptyLines, boolean skipEmptyColumns) {
+        this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns);
+    }
+
+    public ExcelConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy,
+            boolean skipEmptyLines, boolean skipEmptyColumns) {
+        this.columnNameLineNumber = columnNameLineNumber;
+        this.skipEmptyLines = skipEmptyLines;
+        this.skipEmptyColumns = skipEmptyColumns;
+        this.columnNamingStrategy = columnNamingStrategy;
+    }
+    
+    /**
+     * Gets a {@link ColumnNamingStrategy} to use if needed.
+     * @return
+     */
+    public ColumnNamingStrategy getColumnNamingStrategy() {
+        if (columnNamingStrategy == null) {
+            return ColumnNamingStrategies.defaultStrategy();
+        }
+        return columnNamingStrategy;
+    }
 
        /**
         * The line number (1 based) from which to get the names of the columns.
index 28c1f8e..0ce1c64 100644 (file)
@@ -165,8 +165,8 @@ public final class ExcelDataContext extends QueryPostprocessDataContext implemen
             return new MutableSchema(getMainSchemaName());
         }
         try {
-            SpreadsheetReaderDelegate delegate = getSpreadsheetReaderDelegate();
-            Schema schema = delegate.createSchema(getMainSchemaName());
+            final SpreadsheetReaderDelegate delegate = getSpreadsheetReaderDelegate();
+            final Schema schema = delegate.createSchema(getMainSchemaName());
             assert getMainSchemaName().equals(schema.getName());
             return schema;
         } catch (Exception e) {
index ab34ef6..94a9ff7 100644 (file)
@@ -39,7 +39,9 @@ import org.apache.metamodel.schema.MutableSchema;
 import org.apache.metamodel.schema.MutableTable;
 import org.apache.metamodel.schema.Schema;
 import org.apache.metamodel.schema.Table;
-import org.apache.metamodel.util.AlphabeticSequence;
+import org.apache.metamodel.schema.naming.ColumnNamingContextImpl;
+import org.apache.metamodel.schema.naming.ColumnNamingSession;
+import org.apache.metamodel.schema.naming.ColumnNamingStrategy;
 import org.apache.metamodel.util.FileHelper;
 import org.apache.metamodel.util.FileResource;
 import org.apache.metamodel.util.Resource;
@@ -141,8 +143,8 @@ final class XlsxSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate {
         for (Column column : columns) {
             selectItems.add(new SelectItem(column));
         }
-        final XlsxRowPublisherAction publishAction = new XlsxRowPublisherAction(_configuration, columns,
-                relationshipId, xssfReader);
+        final XlsxRowPublisherAction publishAction = new XlsxRowPublisherAction(_configuration, columns, relationshipId,
+                xssfReader);
 
         return new RowPublisherDataSet(selectItems.toArray(new SelectItem[selectItems.size()]), maxRows, publishAction,
                 new Closeable() {
@@ -161,26 +163,31 @@ final class XlsxSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate {
             @Override
             public boolean row(int rowNumber, List<String> values, List<Style> styles) {
                 final int columnNameLineNumber = _configuration.getColumnNameLineNumber();
-                if (columnNameLineNumber == ExcelConfiguration.NO_COLUMN_NAME_LINE) {
-                    AlphabeticSequence alphabeticSequence = new AlphabeticSequence();
-                    List<String> generatedColumnNames = new ArrayList<String>(values.size());
-                    for (String originalColumnName : values) {
-                        String columnName = alphabeticSequence.next();
-                        if (originalColumnName == null) {
-                            columnName = null;
-                        }
-                        generatedColumnNames.add(columnName);
-                    }
-                    buildColumns(table, generatedColumnNames);
-                    return false;
-                } else {
+                final boolean hasColumnNameLine = columnNameLineNumber != ExcelConfiguration.NO_COLUMN_NAME_LINE;
+
+                if (hasColumnNameLine) {
                     final int zeroBasedLineNumber = columnNameLineNumber - 1;
-                    if (rowNumber >= zeroBasedLineNumber) {
-                        buildColumns(table, values);
-                        return false;
+                    if (rowNumber < zeroBasedLineNumber) {
+                        // jump to read the next line
+                        return true;
+                    }
+                }
+
+                final ColumnNamingStrategy columnNamingStrategy = _configuration.getColumnNamingStrategy();
+                try (ColumnNamingSession session = columnNamingStrategy.startColumnNamingSession()) {
+                    for (int i = 0; i < values.size(); i++) {
+                        final String intrinsicColumnName = hasColumnNameLine ? values.get(i) : null;
+                        final String columnName = session.getNextColumnName(new ColumnNamingContextImpl(table,
+                                intrinsicColumnName, i));
+
+                        if (!(_configuration.isSkipEmptyColumns() && values.get(i) == null)) {
+                            table.addColumn(new MutableColumn(columnName, ColumnType.STRING, table, i, true));
+                        }
                     }
                 }
-                return true;
+
+                // now we're done, no more reading
+                return false;
             }
         };
         final XlsxSheetToRowsHandler handler = new XlsxSheetToRowsHandler(rowCallback, xssfReader, _configuration);
@@ -196,19 +203,6 @@ final class XlsxSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate {
         }
     }
 
-    protected void buildColumns(final MutableTable table, final List<String> columnNames) {
-        int columnNumber = 0;
-        for (String columnName : columnNames) {
-            if (columnName != null || !_configuration.isSkipEmptyColumns()) {
-                if (columnName == null) {
-                    columnName = "[Column " + (columnNumber + 1) + "]";
-                }
-                table.addColumn(new MutableColumn(columnName, ColumnType.STRING, table, columnNumber, true));
-            }
-            columnNumber++;
-        }
-    }
-
     private void buildTables(final XSSFReader xssfReader, final XlsxWorkbookToTablesHandler workbookToTables)
             throws Exception {
         final InputStream workbookData = xssfReader.getWorkbookData();
index 81155c0..3b69290 100644 (file)
@@ -276,7 +276,7 @@ public class ExcelDataContextTest extends TestCase {
 
         assertNotNull(table);
 
-        assertEquals("[[Column 1], hello]", Arrays.toString(table.getColumnNames()));
+        assertEquals("[A, hello]", Arrays.toString(table.getColumnNames()));
 
         Query q = dc.query().from(table).select(table.getColumns()).toQuery();
         DataSet ds = dc.executeQuery(q);
@@ -427,7 +427,7 @@ public class ExcelDataContextTest extends TestCase {
         Table table = schema.getTables()[0];
         assertEquals("[Column[name=a,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
                 + "Column[name=b,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
-                + "Column[name=[Column 3],columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+                + "Column[name=A,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
                 + "Column[name=d,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]",
                 Arrays.toString(table.getColumns()));