View Javadoc
1   package de.aikiit.bilanzanalyser.reader;
2   
3   import de.aikiit.bilanzanalyser.entity.BilanzRow;
4   import lombok.AllArgsConstructor;
5   import lombok.Data;
6   import lombok.extern.log4j.Log4j2;
7   import org.odftoolkit.odfdom.doc.OdfSpreadsheetDocument;
8   import org.odftoolkit.odfdom.doc.table.OdfTable;
9   import org.odftoolkit.odfdom.doc.table.OdfTableCell;
10  import org.odftoolkit.odfdom.doc.table.OdfTableRow;
11  
12  import java.io.File;
13  import java.io.IOException;
14  import java.nio.file.Path;
15  import java.util.ArrayList;
16  import java.util.List;
17  import java.util.Optional;
18  import java.util.concurrent.atomic.AtomicInteger;
19  
20  import static de.aikiit.bilanzanalyser.reader.BilanzRowParser.fromOdfTableRow;
21  
22  @Data
23  @AllArgsConstructor
24  @Log4j2
25  public final class BilanzOdsReader {
26  
27      private String tableName;
28      private Path source;
29  
30      /**
31       * Parses the given tableName in the configured ODS file.
32       *
33       * @return result object.
34       * @throws IOException in case of general I/O errors as parsing errors are transformed into skipped rows.
35       */
36      public BilanzRowParserResult extractData() throws IOException {
37          try {
38              BilanzRowParserResult result = BilanzRowParserResult.empty();
39  
40              OdfTable table = readTable();
41              if (table == null) {
42                  return result;
43              }
44  
45              // ODS default is 1048576 albeit it's only empty rows
46              log.info("Given table '{}' has {} rows", this.tableName, table.getRowCount());
47  
48              List<BilanzRow> rows = new ArrayList<>();
49              AtomicInteger counter = new AtomicInteger(0);
50              AtomicInteger readRows = new AtomicInteger(0);
51              AtomicInteger rowsWithParsingErrors = new AtomicInteger(0);
52  
53              for (int rowCount = 0; rowCount < table.getRowCount(); rowCount++) {
54                  OdfTableRow row = table.getRowByIndex(rowCount);
55  
56                  // only allow 5 empty rows in a row
57                  if (counter.get() == 5) {
58                      log.info("STOPPING due to too many empty lines after having read {} non-empty rows.", readRows.get());
59                      break;
60                  }
61  
62                  OdfTableCell cell = row.getCellByIndex(0);
63                  if (cell.getStringValue().isEmpty()) {
64                      counter.incrementAndGet();
65                      // skip empty rows
66                  } else {
67                      counter.set(0);
68                      readRows.incrementAndGet();
69                      result = result.withRow();
70  
71                      Optional<BilanzRow> br = fromOdfTableRow(row);
72                      if (br.isPresent()) {
73                          rows.add(br.get());
74                      } else {
75                          rowsWithParsingErrors.incrementAndGet();
76                          result = result.withError();
77                      }
78                  }
79              }
80  
81              log.info("Extracted {} rows successfully, while skipping {} not well formatted rows.", rows.size(), rowsWithParsingErrors.get());
82              return result.withRows(rows);
83          } catch (Exception e) {
84              throw new IOException(e);
85          }
86      }
87  
88      private OdfTable readTable() throws Exception {
89          try (OdfSpreadsheetDocument document = OdfSpreadsheetDocument.loadDocument(source.toFile())) {
90              return document.getTableByName(this.tableName);
91          }
92      }
93  }