Changeset 7654


Ignore:
Timestamp:
Mar 15, 2019, 2:35:35 PM (3 years ago)
Author:
Nicklas Nordborg
Message:

References #2161: Import data from Excel files

Started to implement a utility for parsing an Excel file like it was a CSV file. The XlsxToCsvUtil is inspired by the one originally developed by Reggie, but it need to support a wider range of Excel features.

The "Test with file" function is currently used as a testbed.

Location:
trunk
Files:
1 added
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/core/net/sf/basedb/core/Install.java

    r7653 r7654  
    7171import net.sf.basedb.util.Values;
    7272import net.sf.basedb.util.XmlUtil2;
     73import net.sf.basedb.util.excel.XlsxToCsvUtil;
    7374import net.sf.basedb.util.extensions.manager.Settings;
    7475import net.sf.basedb.util.extensions.xml.PluginInfo;
     
    627628      createMimeType("application/vnd.ms-excel", "Excel spreadsheet (Microsoft)", "xlm", null, true);
    628629      createMimeType("application/vnd.ms-excel", "Excel spreadsheet (Microsoft)", "xlw", null, true);
    629       createMimeType("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "Microsoft Office Excel 2007 workbook", "xlsx", null, false);
     630      createMimeType(XlsxToCsvUtil.XLSX_MIME_TYPE, "Microsoft Office Excel 2007 workbook", "xlsx", null, false);
    630631      createMimeType("application/x-compress", "Unix compressed file", "z", null, false);
    631632      createMimeType("application/zip", "DOS/PC - Pkzipped archive", "zip", null, false);
  • trunk/www/common/plugin/parse_file.js

    r7604 r7654  
    4444  }
    4545
     46  parse.pleaseWaitWhileParsing = function()
     47  {
     48    TabControl.setActiveTab('parsedFile.filedata');
     49    Doc.hide('myContent');
     50    Doc.show('waitWhileParsing');
     51  }
     52 
    4653  parse.initColumnMapping = function(element, autoInit)
    4754  {
  • trunk/www/common/plugin/parse_file.jsp

    r7605 r7654  
    3939  import="net.sf.basedb.clients.web.Base"
    4040  import="net.sf.basedb.util.Values"
     41  import="net.sf.basedb.util.formatter.DateFormatter"
     42  import="net.sf.basedb.util.NumberFormatUtil"
     43  import="net.sf.basedb.util.excel.XlsxToCsvUtil"
     44  import="net.sf.basedb.util.excel.XlsxToCsvUtil.SheetInfo"
    4145  import="net.sf.basedb.util.fuzzy.StringMatcher"
    4246  import="net.sf.basedb.util.fuzzy.StringMatcher.FuzzyMatch"
     
    6569{
    6670  String path = request.getParameter("path");
    67   String charsetName = Values.getString(request.getParameter("charset"), Config.getCharset());
     71  boolean excelMode = "excel".equals(Values.getString(request.getParameter("filemode")));
     72  String charsetName = Values.getString(request.getParameter("charset"), excelMode ? "UTF-8" : Config.getCharset());
     73  String sheet = Values.getStringOrNull(request.getParameter("sheet"));
    6874  PluginConfigurationRequest pcRequest = sc.getSessionSetting("plugin.configure.request");
    6975  Plugin plugin = pcRequest.getPlugin();
     
    100106    }
    101107   
    102     String dataSplitter = Values.getStringOrNull(request.getParameter("dataSplitter"));
     108    String dataSplitter = Values.getString(request.getParameter("dataSplitter"), excelMode ? "\\t" : null);
    103109    if (dataSplitter != null)
    104110    {
     
    163169      fileInputStream = wex.wrapInputStream(fileInputStream);
    164170    }
     171   
     172    if (excelMode)
     173    {
     174      // TODO - this should be inside the FlatFileParser
     175      String dateFormat = Values.getString(request.getParameter("dateFormat"), "yyyy-MM-dd");
     176      XlsxToCsvUtil util = new XlsxToCsvUtil();
     177      util.setDateFormat(new DateFormatter(dateFormat));
     178      util.setEvaluateFormulas(true);
     179      util.readWorkbook(fileInputStream);
     180      SheetInfo sheetInfo = util.getSheetAsCsv(sheet);
     181      fileInputStream = sheetInfo.parseToCsv();
     182    }
     183   
    165184    parser.setInputStream(fileInputStream, charsetName);
    166185    lastLine = parser.parseHeaders();
     
    207226    title="File data"
    208227    >
    209    
     228    <div id="waitWhileParsing" class="absolutefull bg-filled-100" style="display: none;">
     229    <div class="messagecontainer error">
     230      Parsing file. Please wait...
     231    </div>
     232    </div>
     233    <div id="myContent">
    210234    <%
    211235    if (file == null)
    212236    {
    213237      %>
    214       <div class="absolutefull bg-filled-50">
     238      <div class="absolutefull bg-filled-100">
    215239      <div class="messagecontainer error">
    216240        No file selected. Please select a file to test above.
     
    224248      {
    225249        %>
    226         <div class="bg-filled-50" style="padding: 5px;">
     250        <div class="bg-filled-100" style="padding: 5px;">
    227251        <div class="messagecontainer error" style="margin: 0;">
    228252        <b>Could not parse the file <code><%=HTML.encodeTags(path)%></code></b>
     
    384408    }
    385409    %>
     410    </div>
    386411    </t:tab>
    387412
  • trunk/www/common/plugin/test_with_file.js

    r7627 r7654  
    4444    Events.addEventHandler('path', 'base-selected', test.setFileCallback);
    4545    Buttons.addClickHandler('btnParse', test.parseFile);
    46    
     46
     47    Events.addEventHandler('sheet', 'change', test.autoParse);
    4748    Events.addEventHandler('maxLines', 'keypress', Events.integerOnly)
    4849    Events.addEventHandler('maxDataColumns', 'keypress', Events.integerOnly)
     
    5455   
    5556    test.initParameters();
    56     test.autoParse();
     57    var frm = document.forms['testwithfile'];
     58    test.getFileInfo(Strings.trim(frm.path.value));
    5759  }
    5860 
     
    7678    frm.file_id.value = event.detail.id;
    7779    frm.path.value = event.detail.name;
    78     test.updateCharset(event.detail.id);
    79   }
    80  
    81   test.updateCharset = function(fileId)
     80    test.getFileInfo(event.detail.name, event.detail.id);
     81  }
     82 
     83  test.getFileInfo = function(path, fileId)
    8284  {
    8385    var request = Ajax.getXmlHttpRequest();
     
    8587    {
    8688      var url = App.getRoot()+'filemanager/files/ajax.jsp?ID='+App.getSessionId();
    87       url += '&cmd=GetFileInfo&item_id=' + fileId;
     89      url += '&cmd=GetFileInfo&checktype=1';
     90      if (fileId)
     91      {
     92        url += '&item_id=' + fileId;
     93      }
     94      else if (path)
     95      {
     96        url += '&path=' + encodeURIComponent(path);
     97      }
     98      else
     99      {
     100        return false;
     101      }
     102      var frm = document.forms['testwithfile'];
     103      Doc.addClass('btnParse', 'disabled');
     104      frm.sheet.disabled = true;
     105
    88106      request.open("GET", url, true);
    89       Ajax.setReadyStateHandler(request, test.updateCharsetCallback);
     107      Ajax.setReadyStateHandler(request, test.fileInfoCallback);
    90108      request.send(null);
    91109    }
     
    93111  }
    94112 
    95   test.updateCharsetCallback = function(request)
    96   {
     113  test.fileInfoCallback = function(request)
     114  {
     115    var frm = document.forms['testwithfile'];
     116    Doc.removeClass('btnParse', 'disabled');
     117   
    97118    var response = JSON.parse(request.responseText);
    98     var charset = response.characterSet;
    99     var frm = document.forms['testwithfile'];
    100     if (charset)
    101     {
    102       Forms.selectListOption(frm.charset, charset);
     119    var excelMode = response.excelMode;
     120
     121    frm.filemode.value = excelMode ? 'excel' : 'csv';
     122    Data.set(document.body, 'filemode', frm.filemode.value);
     123
     124    frm.sheet.length = 0;
     125    frm.sheet.disabled = !excelMode;
     126    frm.charset.disabled = excelMode;
     127    frm.dataSplitter.disabled = excelMode;
     128    Doc.addOrRemoveClass('btnSplitterRegexp', 'disabled', excelMode);
     129    frm.trimQuotes.disabled = excelMode;
     130    if (excelMode)
     131    {
     132      frm.dataSplitter.value = '\\t';
     133      Forms.selectListOption(frm.charset, 'UTF-8');
     134      frm.trimQuotes.checked = false;
     135      for (var i = 0; i < response.sheets.length; i++)
     136      {
     137        frm.sheet[frm.sheet.length] = new Option(response.sheets[i]);
     138      }
     139    }
     140    else
     141    {
     142      if (charset)
     143      {
     144        var charset = response.characterSet;
     145        Forms.selectListOption(frm.charset, charset);
     146      }
     147      frm.trimQuotes.checked = true;
    103148    }
    104149    test.autoParse();
     
    128173      return false;
    129174    }
     175   
     176    var parseWin = window.frames['parsed'];
     177    parseWin.ParseFile.pleaseWaitWhileParsing();
    130178    frm.submit();
    131179  }
    132  
    133180 
    134181  test.initParameters = function()
     
    145192    frm.maxDataColumns.value = configWin.Configure.getParameterValue('maxDataColumns');
    146193    frm.trimQuotes.checked = configWin.Configure.getParameterValue('trimQuotes') != 'false';
    147    
     194    frm.dateFormat.value = configWin.Configure.getParameterValue('dateFormat');
     195     
    148196    // Copy the list since ith is not certain that it is the same as Config.allCharsets()
    149197    var allCharsets = configWin.Doc.element('parameter-charset');
  • trunk/www/common/plugin/test_with_file.jsp

    r6322 r7654  
    4949  %>
    5050  <base:page type="popup" title="Test with file">
    51   <base:head scripts="menu.js,~test_with_file.js" styles="menu.css" />
    52   <base:body>
     51  <base:head scripts="menu.js,~test_with_file.js" styles="menu.css">
     52  <style>
     53  body:not([data-filemode="csv"]) .csv-mode
     54  {
     55    display: none;
     56  }
     57  body:not([data-filemode="excel"]) .excel-mode
     58  {
     59    display: none;
     60  }
     61  </style>
     62  </base:head>
     63  <base:body data-filemode="csv">
    5364
    5465  <m:menu
     
    168179    <form name="testwithfile" action="parse_file.jsp?ID=<%=ID%>" target="parsed" method="post">
    169180    <input type="hidden" name="file_id" value="">
     181    <input type="hidden" name="filemode" value="csv">
     182    <input type="hidden" name="dateFormat" value="">
    170183
    171184    <div class="content">
     
    173186      <table class="fullform input100 bottomborder">
    174187      <tr>
    175         <th>File to test</th>
     188        <th>File (CSV or Excel)</th>
    176189        <td colspan="3">
    177190          <table style="width: 100%;">
     
    191204            value="<%=FlatFileParser.DEFAULT_MAX_UNKNOWN_LINES%>">
    192205        </td>
     206        <th class="leftborder">Excel Sheet</th>
     207        <td>
     208          <select id="sheet" name="sheet" style="min-width: 20em;" disabled>
     209          </select>
     210        </td>
     211      </tr>
     212      <tr >
     213        <th>Header regexp</th>
     214        <td>
     215          <table style="width: 100%;">
     216          <tr>
     217          <td><input type="text" class="text" name="header"></td>
     218          <td style="width: 11em;">
     219            <base:button title="Predefined&hellip;" style="width: 10em;"
     220              id="btnHeaderRegexp" data-predefined="predefinedHeaders" />
     221          </td>
     222          </tr>
     223          </table>
     224        </td>
    193225        <th class="leftborder">Character set</th>
    194226        <td>
    195           <select name="charset">
     227          <select name="charset" style="min-width: 20em;">
    196228          <%
    197229          String defaultCharset = Config.getCharset();
     
    211243        </td>
    212244      </tr>
    213       <tr >
    214         <th>Header regexp</th>
     245      <tr>
     246        <th>Data splitter regexp</th>
    215247        <td>
    216248          <table style="width: 100%;">
    217249          <tr>
    218           <td><input type="text" class="text" name="header"></td>
    219           <td style="width: 11em;">
    220             <base:button title="Predefined&hellip;" style="width: 10em;"
    221               id="btnHeaderRegexp" data-predefined="predefinedHeaders" />
    222           </td>
    223           </tr>
    224           </table>
    225         </td>
    226         <th class="leftborder">Min data columns</th>
    227         <td>
    228           <input type="text" class="text" name="minDataColumns"
    229             id="minDataColumns" style="width: 15em;" maxlength="10">
    230         </td>
    231       </tr>
    232       <tr >
    233         <th>Data splitter regexp</th>
    234         <td>
    235           <table style="width: 100%;">
    236           <tr>
    237           <td><input type="text" class="text required" name="dataSplitter" ></td>
     250          <td><input type="text" class="text required" name="dataSplitter"></td>
    238251          <td style="width: 11em;">
    239252            <base:button title="Predefined&hellip;" style="width: 10em;"
     
    243256          </table>
    244257        </td>
    245         <th class="leftborder">Max data columns</th>
    246         <td>
    247           <input type="text" class="text" name="maxDataColumns"
    248             id="maxDataColumns" style="width: 15em;" maxlength="10">
     258        <th class="leftborder">Min data columns</th>
     259        <td>
     260          <input type="text" class="text" name="minDataColumns"
     261            id="minDataColumns" style="width: 15em;" maxlength="10">
    249262        </td>
    250263      </tr>
     
    262275          </table>
    263276        </td>
    264         <th class="leftborder"><label for="trimQuotes">Remove quotes</label></th>
    265         <td><input type="checkbox" name="trimQuotes" id="trimQuotes" value="1"></td>
     277        <th class="leftborder">Max data columns</th>
     278        <td>
     279          <input type="text" class="text" name="maxDataColumns"
     280            id="maxDataColumns" style="width: 15em;" maxlength="10">
     281        </td>
    266282      </tr>
    267283      <tr >
    268284        <th>Data header regexp</th>
    269285        <td><input type="text" class="text" name="dataHeader"></td>
    270         <th class="subprompt leftborder"></th>
    271         <td></td>
     286        <th class="leftborder"><label for="trimQuotes">Remove quotes</label></th>
     287        <td><input type="checkbox" name="trimQuotes" id="trimQuotes" value="1"></td>
    272288      </tr>
    273289      <tr class="dynamic">
  • trunk/www/filemanager/files/ajax.jsp

    r7626 r7654  
    2828  import="net.sf.basedb.core.File"
    2929  import="net.sf.basedb.core.InvalidDataException"
     30  import="net.sf.basedb.core.Path"
    3031  import="net.sf.basedb.util.Values"
    3132  import="net.sf.basedb.util.FileUtil"
     33  import="net.sf.basedb.util.excel.XlsxToCsvUtil"
    3234  import="net.sf.basedb.util.error.ThrowableUtil"
    3335  import="net.sf.basedb.clients.web.Base"
     
    4951final String root = request.getContextPath()+"/";
    5052final int itemId = Values.getInt(request.getParameter("item_id"));
     53final String path = Values.getStringOrNull(request.getParameter("path"));
    5154DbControl dc = null;
    5255out.clear();
     
    5861  {
    5962    dc = sc.newDbControl();
    60     File f = File.getById(dc, itemId);
     63    File f = null;
     64    if (itemId != 0)
     65    {
     66      f = File.getById(dc, itemId);
     67    }
     68    else
     69    {
     70      f = File.getByPath(dc, new Path(path, Path.Type.FILE), false);
     71    }
    6172    json.put("id", f.getId());
    6273    json.put("name", f.getName());
    6374    json.put("characterSet", f.getCharacterSet());
     75    json.put("mimeType", f.getMimeType());
     76    if (Values.getBoolean(request.getParameter("checktype")))
     77    {
     78      if (XlsxToCsvUtil.seemsLikeAnExcelFile(f))
     79      {
     80        XlsxToCsvUtil util = new XlsxToCsvUtil();
     81        try
     82        {
     83          util.readWorkbook(f.getDownloadStream(0));
     84          json.put("excelMode", true);
     85          json.put("sheets", util.getSheetNames());
     86        }
     87        catch (Exception ex)
     88        {} // It was maybe not an Excel file after all
     89      }
     90    }
    6491    dc.close();
    6592  }
Note: See TracChangeset for help on using the changeset viewer.