BilanzOdsReader.java
package de.aikiit.bilanzanalyser.reader;
import de.aikiit.bilanzanalyser.entity.BilanzRow;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.extern.log4j.Log4j2;
import org.odftoolkit.odfdom.doc.OdfSpreadsheetDocument;
import org.odftoolkit.odfdom.doc.table.OdfTable;
import org.odftoolkit.odfdom.doc.table.OdfTableCell;
import org.odftoolkit.odfdom.doc.table.OdfTableRow;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;
import static de.aikiit.bilanzanalyser.reader.BilanzRowParser.fromOdfTableRow;
@Data
@AllArgsConstructor
@Log4j2
public final class BilanzOdsReader {
private String tableName;
private Path source;
public static void main(final String[] args) {
try (OdfSpreadsheetDocument document = OdfSpreadsheetDocument.loadDocument(new File("/tmp/example.ods"))) {
OdfTable table = document.getTableByName("Ausgaben");
log.info("{} lines to read", table.getRowCount());
for (int row = 0; row < 10 /*table.getRowCount() */; row++) {
for (int col = 0; col < table.getColumnCount(); col++) {
OdfTableCell cell = table.getCellByPosition(col, row);
System.out.print(cell.getStringValue() + "\t");
}
System.out.println();
}
} catch (Exception e) {
log.error(e);
}
}
/**
* Parses the given tableName in the configured ODS file.
*
* @return result object.
* @throws IOException in case of general I/O errors as parsing errors are transformed into skipped rows.
*/
public BilanzRowParserResult extractData() throws IOException {
try {
BilanzRowParserResult result = BilanzRowParserResult.empty();
OdfTable table = readTable();
if (table == null) {
return result;
}
// ODS default is 1048576 albeit it's only empty rows
log.info("Given table '{}' has {} rows", this.tableName, table.getRowCount());
List<BilanzRow> rows = new ArrayList<>();
AtomicInteger counter = new AtomicInteger(0);
AtomicInteger readRows = new AtomicInteger(0);
AtomicInteger rowsWithParsingErrors = new AtomicInteger(0);
for (int rowCount = 0; rowCount < table.getRowCount(); rowCount++) {
OdfTableRow row = table.getRowByIndex(rowCount);
// only allow 5 empty rows in a row
if (counter.get() == 5) {
log.info("STOPPING due to too many empty lines after having read {} non-empty rows.", readRows.get());
break;
}
OdfTableCell cell = row.getCellByIndex(0);
if (cell.getStringValue().isEmpty()) {
counter.incrementAndGet();
// skip empty rows
} else {
counter.set(0);
readRows.incrementAndGet();
result = result.withRow();
Optional<BilanzRow> br = fromOdfTableRow(row);
if (br.isPresent()) {
rows.add(br.get());
} else {
rowsWithParsingErrors.incrementAndGet();
result = result.withError();
}
}
}
log.info("Extracted {} rows successfully, while skipping {} not well formatted rows.", rows.size(), rowsWithParsingErrors.get());
return result.withRows(rows);
} catch (Exception e) {
throw new IOException(e);
}
}
private OdfTable readTable() throws Exception {
try (OdfSpreadsheetDocument document = OdfSpreadsheetDocument.loadDocument(source.toFile())) {
return document.getTableByName(this.tableName);
}
}
}