Changeset 7655
- Timestamp:
- Mar 19, 2019, 11:09:07 AM (4 years ago)
- Location:
- trunk
- Files:
-
- 1 added
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/core/net/sf/basedb/util/excel/XlsxToCsvUtil.java
r7654 r7655 10 10 import java.util.List; 11 11 12 import org.apache.poi.poifs.filesystem.FileMagic; 12 13 import org.apache.poi.ss.usermodel.Cell; 13 14 import org.apache.poi.ss.usermodel.CellType; … … 57 58 } 58 59 60 /** 61 Check if the given stream is an Excel file by looking at the first few bytes. 62 The stream must support seek(). Call {@link #prepareForCheck(InputStream)} 63 before calling this method if it is not sure. 64 */ 65 public static boolean isAnExcelFile(InputStream in) 66 { 67 try 68 { 69 return FileMagic.valueOf(in) == FileMagic.OOXML; 70 } 71 catch (Exception ex) 72 {} 73 return false; 74 } 75 76 /** 77 Prepare the input stream for checking with 78 {@link #isAnExcelFile(InputStream)}. If the given stream 79 supports seek() it is returned as it is, otherwise it is 80 wrapped by a buffered stream. 81 */ 82 public static InputStream prepareForCheck(InputStream in) 83 { 84 return FileMagic.prepareToCheckMagic(in); 85 } 86 59 87 private Charset charset; 60 88 private String columnSeparator; … … 64 92 private EncoderDecoder encoder; 65 93 private Formatter<Date> dateFormat; 94 private Formatter<Date> timestampFormat; 66 95 private Formatter<Number> numberFormat; 67 96 … … 192 221 this.dateFormat = dateFormat; 193 222 } 223 224 /** 225 Get the date formatter that is used for formatting cells with timestamp-like values. 226 If null, the default implementation is used (which may not be predicatable). 227 */ 228 public Formatter<Date> getTimestampFormat() 229 { 230 return timestampFormat; 231 } 232 233 /** 234 Set a date formatter to use for formatting cells with timestamp-like values. 235 If null, the default implementation is used (which may not be predicatable). 236 */ 237 public void setTimestampFormat(Formatter<Date> timestampFormat) 238 { 239 this.timestampFormat = timestampFormat; 240 } 241 194 242 195 243 /** … … 405 453 if (isDate) 406 454 { 407 if (dateFormat != null) 455 String formatString = cell.getCellStyle().getDataFormatString(); 456 boolean hasTimePart = hasTimePart(formatString); 457 boolean hasDatePart = hasDatePart(formatString); 458 Date d = cell.getDateCellValue(); 459 if (hasDatePart && hasTimePart) 408 460 { 409 value = dateFormat.format(cell.getDateCellValue()); 461 if (timestampFormat != null) value = timestampFormat.format(d); 462 } 463 else if (hasDatePart) 464 { 465 if (dateFormat != null) value = dateFormat.format(d); 466 } 467 else 468 { 469 // Only time part -- let it be handled by the DataFormat 410 470 } 411 471 } … … 431 491 } 432 492 433 493 private boolean hasTimePart(String formatString) 494 { 495 if (formatString == null) return false; 496 return formatString.contains("S") || formatString.contains("H"); 497 } 498 499 private boolean hasDatePart(String formatString) 500 { 501 if (formatString == null) return false; 502 return formatString.contains("Y") || formatString.contains("D"); 503 } 504 434 505 } -
trunk/src/core/net/sf/basedb/util/parser/FlatFileParser.java
r7627 r7655 34 34 import java.util.regex.Matcher; 35 35 import java.util.regex.PatternSyntaxException; 36 37 36 38 import java.util.Arrays; 37 39 import java.util.LinkedList; … … 42 44 import java.util.Set; 43 45 import java.util.Collections; 46 import java.util.Date; 44 47 45 48 import net.sf.basedb.core.BaseException; … … 47 50 import net.sf.basedb.core.signal.ThreadSignalHandler; 48 51 import net.sf.basedb.util.InputStreamTracker; 52 import net.sf.basedb.util.NumberFormatUtil; 53 import net.sf.basedb.util.Values; 49 54 import net.sf.basedb.util.charset.CharsetUtil; 55 import net.sf.basedb.util.excel.XlsxToCsvUtil; 56 import net.sf.basedb.util.formatter.DateFormatter; 57 import net.sf.basedb.util.formatter.Formatter; 58 import net.sf.basedb.util.formatter.NumberFormatFormatter; 50 59 import net.sf.basedb.util.jep.JepFunction; 51 60 … … 270 279 271 280 /** 281 The default date formatter to use when creating mappers. 282 */ 283 private Formatter<Date> dateFormat = null; 284 285 /** 286 The default timestamp format to use when creating mappers. 287 */ 288 private Formatter<Date> timestampFormat = null; 289 290 /** 272 291 The value that was captured by the bofMarker pattern. 273 292 */ … … 545 564 546 565 /** 547 Set the input stream that will be parsed. 566 Set the input stream that will be parsed. The stream can be either a 567 text CSV-like stream or an Excel workbook (xlsx). 568 569 If the stream is an Excel workbook the following apply: 570 571 * If no date format has been specified, yyyy-MM-dd is used 572 * If no timestamp format has been specified, yyyy-MM-dd hh:mm:ss is used 573 * If no number format has been specified, 'dot' is used 574 * The data splitter regular expression is changed to \\t 575 548 576 @param in The <code>InputStream</code> 549 @param charset Name The name of the character set to use when parsing550 the file, or null to use the default charset specified by577 @param charsetOrSheetName If CSV, the name of the character set to 578 use when parsing the file, or null to use the default charset specified by 551 579 {@link Config#getCharset()} 580 If Excel, the name or index of the worksheet in the workbook, the default is 581 to parse the first sheet (index=0) 552 582 @since 2.1.1 553 583 */ 554 public void setInputStream(InputStream in, String charsetName) 555 { 584 public void setInputStream(InputStream in, String charsetOrSheetName) 585 { 586 in = XlsxToCsvUtil.prepareForCheck(in); 587 String charsetName = charsetOrSheetName; 588 if (XlsxToCsvUtil.isAnExcelFile(in)) 589 { 590 XlsxToCsvUtil xlsUtil = new XlsxToCsvUtil(); 591 if (dateFormat == null) dateFormat = new DateFormatter("yyyy-MM-dd"); 592 if (timestampFormat == null) timestampFormat = new DateFormatter("yyyy-MM-dd hh:mm:ss"); 593 if (numberFormat == null) numberFormat = NumberFormatUtil.getNumberFormat('.', (char)0); 594 xlsUtil.setDateFormat(dateFormat); 595 xlsUtil.setTimestampFormat(timestampFormat); 596 xlsUtil.setNumberFormat(new NumberFormatFormatter(numberFormat)); 597 xlsUtil.setEvaluateFormulas(true); 598 setDataSplitterRegexp(Pattern.compile("\\t")); 599 charsetName = "UTF-8"; 600 try 601 { 602 xlsUtil.readWorkbook(in); 603 } 604 catch (IOException ex) 605 { 606 throw new RuntimeException(ex); 607 } 608 if (xlsUtil.getSheetNames().contains(charsetOrSheetName)) 609 { 610 // Get the named sheet 611 in = xlsUtil.getSheetAsCsv(charsetOrSheetName).parseToCsv(); 612 } 613 else 614 { 615 // Or try to convert to index with 0 as default 616 in = xlsUtil.getSheetAsCsv(Values.getInt(charsetOrSheetName)).parseToCsv(); 617 } 618 } 556 619 Charset cs = CharsetUtil.getCharset(charsetName); 557 620 this.tracker = new InputStreamTracker(in); … … 914 977 { 915 978 return numberFormat; 979 } 980 981 /** 982 Set the default date format to use when creating mappers. 983 If null, xxx is used. 984 @since 3.15 985 */ 986 public void setDefaultDateFormat(Formatter<Date> dateFormat) 987 { 988 this.dateFormat = dateFormat; 989 } 990 991 /** 992 Get the default date format. 993 @since 3.15 994 */ 995 public Formatter<Date> getDefaultDateFormat() 996 { 997 return dateFormat; 998 } 999 1000 /** 1001 Set the default timestamp format to use when creating mappers. 1002 If null, xxx is used. 1003 @since 3.15 1004 */ 1005 public void setDefaultTimestampFormat(Formatter<Date> timestampFormat) 1006 { 1007 this.timestampFormat = timestampFormat; 1008 } 1009 1010 /** 1011 Get the default timestamp format. 1012 @since 3.15 1013 */ 1014 public Formatter<Date> getDefaultTimestampFormat() 1015 { 1016 return timestampFormat; 916 1017 } 917 1018 -
trunk/www/common/plugin/parse_file.jsp
r7654 r7655 169 169 fileInputStream = wex.wrapInputStream(fileInputStream); 170 170 } 171 172 if (excelMode) 173 { 174 // TODO - this should be inside the FlatFileParser 175 String dateFormat = Values.getString(request.getParameter("dateFormat"), "yyyy-MM-dd"); 176 XlsxToCsvUtil util = new XlsxToCsvUtil(); 177 util.setDateFormat(new DateFormatter(dateFormat)); 178 util.setEvaluateFormulas(true); 179 util.readWorkbook(fileInputStream); 180 SheetInfo sheetInfo = util.getSheetAsCsv(sheet); 181 fileInputStream = sheetInfo.parseToCsv(); 182 } 183 184 parser.setInputStream(fileInputStream, charsetName); 171 172 String dateFormat = Values.getStringOrNull(request.getParameter("dateFormat")); 173 if (dateFormat != null) parser.setDefaultDateFormat(new DateFormatter(dateFormat)); 174 String timestampFormat = Values.getStringOrNull(request.getParameter("timestampFormat")); 175 if (timestampFormat != null) parser.setDefaultTimestampFormat(new DateFormatter(timestampFormat)); 176 String decimalSeparator = Values.getStringOrNull(request.getParameter("decimalSeparator")); 177 if (decimalSeparator != null) 178 { 179 char ds = "dot".equals(decimalSeparator) ? '.' : ','; 180 parser.setDefaultNumberFormat(NumberFormatUtil.getNumberFormat(ds, (char)0)); 181 } 182 183 parser.setInputStream(fileInputStream, excelMode ? sheet : charsetName); 185 184 lastLine = parser.parseHeaders(); 186 185 dataIsFound = lastLine == FlatFileParser.LineType.DATA || lastLine == FlatFileParser.LineType.DATA_HEADER; -
trunk/www/common/plugin/test_with_file.js
r7654 r7655 175 175 176 176 var parseWin = window.frames['parsed']; 177 parseWin.ParseFile.pleaseWaitWhileParsing(); 177 if (parseWin && parseWin.ParseFile) 178 { 179 parseWin.ParseFile.pleaseWaitWhileParsing(); 180 } 178 181 frm.submit(); 179 182 } … … 192 195 frm.maxDataColumns.value = configWin.Configure.getParameterValue('maxDataColumns'); 193 196 frm.trimQuotes.checked = configWin.Configure.getParameterValue('trimQuotes') != 'false'; 197 frm.decimalSeparator.value = configWin.Configure.getParameterValue('decimalSeparator'); 194 198 frm.dateFormat.value = configWin.Configure.getParameterValue('dateFormat'); 199 frm.timestampFormat.value = configWin.Configure.getParameterValue('timestampFormat'); 195 200 196 201 // Copy the list since ith is not certain that it is the same as Config.allCharsets() -
trunk/www/common/plugin/test_with_file.jsp
r7654 r7655 180 180 <input type="hidden" name="file_id" value=""> 181 181 <input type="hidden" name="filemode" value="csv"> 182 <input type="hidden" name="decimalSeparator" value=""> 182 183 <input type="hidden" name="dateFormat" value=""> 184 <input type="hidden" name="timestampFormat" value=""> 183 185 184 186 <div class="content">
Note: See TracChangeset
for help on using the changeset viewer.