Changeset 5882


Ignore:
Timestamp:
Nov 22, 2011, 11:47:22 AM (10 years ago)
Author:
Nicklas Nordborg
Message:

References #1630: Migrate from MySQL to PostgreSQL

Use approximate number of rows instead of counting the actual number of rows when exporting. The reason is that counting can take a long time (>1 hour on a big database) and is only used for progress reporting in to the console. The approximate row count is good enough for the progress reporting.

Fixed an issue with '0' characters in some strings when exporting. Importing to PostgreSQL caused the following error:

org.postgresql.util.PSQLException: ERROR: invalid byte sequence for encoding "UTF8": 0x00
  Where: COPY RawBioAssayHeaders, line 58878
Location:
trunk/src/core/net/sf/basedb/core
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/core/net/sf/basedb/core/Migration.java

    r5880 r5882  
    261261    List<Table> dynamicTables = TableInfo.listTables(metaData, dynamicCatalog, dynamicSchema);
    262262    progress.display(0, dynamicTables.size() + " tables in dynamic database\n");
    263     progress.display(0, "Counting rows...");
     263    progress.display(0, "Counting rows in all tables...");
    264264    int totalTables = staticTables.size() + dynamicTables.size();
    265265   
     
    267267    // and count the number of data rows for each table
    268268    List<TableInfo> tables = new ArrayList<TableInfo>();
    269     long totalRows = 0;
     269    long approxTotal = 0;
    270270    for (Table table : staticTables)
    271271    {
     272      TableInfo info = new TableInfo(table, metaData);
     273      long approxRows = info.getRowCount(engine, connection, true);
     274      approxTotal += approxRows;
     275      tables.add(info);
    272276      if (System.currentTimeMillis() > nextTick)
    273277      {
    274         //progress.append(".");
    275278        progress.display(0, "Counting rows in all tables (" + tables.size() + "/" + totalTables + ")...");
    276279        nextTick = System.currentTimeMillis()+TICK_INTERVAL;
    277280      }
     281    }
     282    for (Table table : dynamicTables)
     283    {
    278284      TableInfo info = new TableInfo(table, metaData);
    279       long tableRows = info.getRowCount(engine, connection);
    280       totalRows += tableRows;
     285      long approxRows = info.getRowCount(engine, connection, true);
     286      approxTotal += approxRows;
    281287      tables.add(info);
    282     }
    283     for (Table table : dynamicTables)
    284     {
    285       if (System.currentTimeMillis() > nextTick)
    286       {
    287         //progress.append(".");
     288      if (System.currentTimeMillis() > nextTick || tables.size() == totalTables)
     289      {
    288290        progress.display(0, "Counting rows in all tables (" + tables.size() + "/" + totalTables + ")...");
    289291        nextTick = System.currentTimeMillis()+TICK_INTERVAL;
    290292      }
    291       TableInfo info = new TableInfo(table, metaData);
    292       long tableRows = info.getRowCount(engine, connection);
    293       totalRows += tableRows;
    294       tables.add(info);
    295     }
    296 
    297     progress.display(0, "Found " + Values.formatNumber(totalRows / 1000000f, 1) + " million data rows\n");
    298    
     293    }
     294   
     295    progress.display(0, "Found approximate " + Values.formatNumber(approxTotal / 1000000f, 2) + " million data rows\n");
    299296   
    300297    // Export data for each table...
    301     long totalExported = 0;
     298    long actualExported = 0;
    302299    Set<Integer> usedSqlTypes = new HashSet<Integer>();
    303300    for (TableInfo table : tables)
    304301    {
    305302      String tableName = table.getTable().getName();
    306       progress.display((int)((totalExported * 100) / totalRows),
     303      progress.display((int)((actualExported * 100) / approxTotal),
    307304          tableName + ": loading...");
    308305      nextTick = System.currentTimeMillis()+TICK_INTERVAL;
     
    337334          new OutputStreamWriter(FileUtil.getOutputStream(exportFile), "UTF-8"));
    338335
    339       long tableRows = table.getRowCount(engine, connection);
     336      long approxRows = table.getRowCount(engine, connection, true);
    340337      ResultSet rows = st.executeQuery(select.toString());
    341       int numRows = 0;
     338      int actualRows = 0;
    342339      while (rows.next())
    343340      {
     
    345342        if (System.currentTimeMillis() > nextTick)
    346343        {
    347           progress.display((int)((totalExported * 100) / totalRows),
    348               tableName + ": exporting... (" + numRows + " of " + tableRows + " rows done)");
     344          progress.display((int)((actualExported * 100) / approxTotal),
     345              tableName + ": exporting... (" + actualRows + " of " + approxRows + " rows done)");
    349346          nextTick = System.currentTimeMillis() + TICK_INTERVAL;
    350347        }
     
    361358       
    362359        // Keep track of rows
    363         totalExported++;
    364         numRows++;
     360        actualRows++;
     361        actualExported++;
     362        if (actualRows > approxRows)
     363        {
     364          // Increase the approximate row count
     365          approxTotal++;
     366          approxRows++;
     367        }
     368       
     369      }
     370      if (actualRows < approxRows)
     371      {
     372        // Decrease the approximate total row count
     373        approxTotal += actualRows - approxRows;
    365374      }
    366375
    367376      // Done with this table...
    368       progress.display((int)((totalExported * 100) / totalRows),
    369           tableName + ": complete (" + tableRows + " rows)                   \n");
     377      progress.display((int)((actualExported * 100) / approxTotal),
     378          tableName + ": complete (" + actualRows + " rows)                   \n");
    370379      writer.flush();
    371380      writer.close();
     
    616625  }
    617626 
     627  private static String zero = new String(new char[] { 0 });
     628
    618629  /**
    619630    Format the value to a string that PostgreSQL accepts in the
     
    652663          s = s.replace("\r", "\\r");
    653664          s = s.replace("\t", "\\t");
     665          s = s.replace(zero, ""); // a zero-character can't be stored in PostgreSQL so we simply remove it
    654666          formatted = s;
    655667          break;
  • trunk/src/core/net/sf/basedb/core/dbengine/AbstractDbEngine.java

    r5881 r5882  
    226226    return false;
    227227  }
     228  /**
     229    @return null, since this depends on capabilities of underlying database
     230  */
     231  @Override
     232  public String getApproximateRowCountSql(String catalog, String schema, String table)
     233  {
     234    return null;
     235  }
    228236  // -------------------------------------------
    229237
  • trunk/src/core/net/sf/basedb/core/dbengine/DbEngine.java

    r5881 r5882  
    380380  public boolean useSavePointToContinueTransactionFromSqlFailure();
    381381 
     382  /**
     383    Get an SQL statement that returns an approximate count for the number
     384    of rows in the given table. The SQL query should return a single row
     385    with a single value.
     386   
     387    @param catalog The name of the catalog (database) where the table is
     388      located, or null if it is located in the current catalog
     389    @param schema The name of the schema where the table is located, or
     390      null if is located in the current schema
     391    @param table The name of the table
     392    @return An SQL query or null
     393    @since 3.1
     394  */
     395  public String getApproximateRowCountSql(String catalog, String schema, String table);
     396 
    382397}
  • trunk/src/core/net/sf/basedb/core/dbengine/MySQLEngine.java

    r5852 r5882  
    229229    return "`" + name + "`";
    230230  }
     231 
     232  @Override
     233  public String getApproximateRowCountSql(String catalog, String schema, String table)
     234  {
     235    return "select TABLE_ROWS from INFORMATION_SCHEMA.TABLES where TABLE_NAME='" + table + "' and TABLE_SCHEMA='" + catalog + "'";
     236  }
     237
    231238  // -------------------------------------------
    232239
  • trunk/src/core/net/sf/basedb/core/dbengine/PostgresDbEngine.java

    r5881 r5882  
    285285    return true;
    286286  }
     287 
     288  @Override
     289  public String getApproximateRowCountSql(String catalog, String schema, String table)
     290  {
     291    return "select reltuples from pg_class where relname = '" + table + "'";
     292  }
    287293  // -------------------------------------------
    288294
     295
    289296 
    290297
  • trunk/src/core/net/sf/basedb/core/dbengine/TableInfo.java

    r5857 r5882  
    103103  private Set<IndexInfo> indexes;
    104104  private long numberOfRows = -1;
     105  private long approximateNumberOfRows = -1;
    105106 
    106107 
     
    481482    @since 3.1
    482483  */
    483   public long getRowCount(DbEngine engine, Connection connection)
     484  public long getRowCount(DbEngine engine, Connection connection, boolean approximate)
    484485    throws SQLException
    485486  {
    486     if (numberOfRows < 0)
     487    if ((numberOfRows < 0 && !approximate) || (approximateNumberOfRows < 0 && approximate))
    487488    {
    488489     
     
    492493      {
    493494        st = connection.createStatement();
    494         StringBuilder sql = new StringBuilder("SELECT count(*) FROM ");
    495         sql.append(getFullQuotedTableName(engine));
    496         result = st.executeQuery(sql.toString());
     495        String sql = null;
     496        if (approximate)
     497        {
     498          sql = engine.getApproximateRowCountSql(table.getCatalog(), table.getSchema(), table.getName());
     499        }
     500        if (sql == null)
     501        {
     502          sql = "SELECT count(*) FROM " + getFullQuotedTableName(engine);
     503        }
     504        result = st.executeQuery(sql);
    497505        result.next();
    498         numberOfRows = result.getLong(1);
     506        long count = result.getLong(1);
     507        if (approximate)
     508        {
     509          approximateNumberOfRows = count;
     510        }
     511        else
     512        {
     513          numberOfRows = count;
     514        }
    499515      }
    500516      finally
     
    504520      }
    505521    }
    506     return numberOfRows;
     522    return approximate ? approximateNumberOfRows : numberOfRows;
    507523  }
    508524 
Note: See TracChangeset for help on using the changeset viewer.