Changeset 6716


Ignore:
Timestamp:
Feb 4, 2015, 12:52:11 PM (7 years ago)
Author:
Nicklas Nordborg
Message:

Fixes #1917: Improve performance of AnnotationSet?.deleteEmptyAnnotationSets()

Changed the approach to finding empty annotation sets. Instead of using a main query with a set of subquires an initial query with a single left join is used. This gives us a list of potentially empty annotation sets. This list is then further narrowed down by executing additional queries (that also take the parameter limitation findings from #1914 into account). What remains in the list is then used for deleting.

The new implementation runs in less than a second on the same data set that previously took 40 minutes!

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/3.4-stable/src/core/net/sf/basedb/core/AnnotationSet.java

    r6420 r6716  
    2323package net.sf.basedb.core;
    2424
     25import java.util.ArrayList;
    2526import java.util.Collection;
    2627import java.util.Collections;
     
    4546import net.sf.basedb.core.snapshot.AnnotationTypeFilter;
    4647import net.sf.basedb.core.snapshot.SnapshotManager;
    47 
    4848import net.sf.basedb.core.data.AnnotationData;
    4949import net.sf.basedb.core.data.AnnotationSetData;
    5050import net.sf.basedb.core.data.AnnotationTypeData;
     51import net.sf.basedb.core.hibernate.TypeWrapper;
    5152import net.sf.basedb.util.AnnotationUtil;
    5253import net.sf.basedb.util.filter.Filter;
     
    203204  }
    204205 
     206  private static final int MAX_PARAMETERS_IN_QUERY = 10000;
    205207 
    206208  /**
     
    219221    if (progress != null)
    220222    {
    221       progress.display(5, "Counting empty annotation sets...");
    222     }
    223    
    224     String filter = "@COL not in (select [annotationset_id] from [Annotations])"+
    225         " and @COL not in (select [annotationset_id] from [InheritedAnnotations])"+
    226         " and @COL not in (select [annotationset_id] from [InheritedAnnotationSets])"+
    227         " and @COL not in (select [inherited_id] from [InheritedAnnotationSets])";
    228 
    229     // Count number of empty annotation sets
    230     String countSql = "select count(*), [item_type] from [AnnotationSets] where "+
    231         filter.replace("@COL", "[id]") +
    232         " group by [item_type]";
    233     org.hibernate.Query countQuery = HibernateUtil.createSqlQuery(session, countSql);
    234     List<Object[]> counts = (List<Object[]>)countQuery.list();
    235     long totalCount = 0;
    236     Map<String, Long> tables = new HashMap<String, Long>();
    237     for (Object[] row : counts)
    238     {
    239       long count = ((Number)row[0]).longValue();
    240       totalCount += count;
     223      progress.display(5, "Loading annotation sets with no primary annotation...");
     224    }
     225   
     226    // This SQL select annotationset id and type of item for annotation sets
     227    // that has no primary annotations
     228    String sql = "select [aa].[id], [aa].[item_type] from [AnnotationSets] [aa]"+
     229        " left join [Annotations] [a] on [a].[annotationset_id]=[aa].[id]"+
     230        " where [a].[annotationset_id] is null";
     231   
     232    org.hibernate.Query query = HibernateUtil.createSqlQuery(session, sql);
     233    Map<Integer, Integer> possibleAnnotationSets = new HashMap<Integer, Integer>(10000);
     234    List<Integer> allIds = new ArrayList<Integer>(10000);
     235    for (Object[] row : (List<Object[]>)query.list())
     236    {
     237      Integer id = (Integer)row[0];
     238      possibleAnnotationSets.put(id, (Integer)row[1]);
     239      allIds.add(id);
     240    }
     241    if (progress != null)
     242    {
     243      progress.display(25, "Found " + possibleAnnotationSets.size() + " annotation sets without primary annotation");
     244    }
     245    if (possibleAnnotationSets.size() == 0) return 0;
     246   
     247    if (progress != null)
     248    {
     249      progress.display(30, "Checking for inherited annotations..." );
     250    }
     251   
     252    // These SQL load annotation set ids that exists in one of three columns/tables
     253    // Anything we find in theese tables must be removed from the possibleAnnotationSets map
     254    String sql1 = "select [annotationset_id] from [InheritedAnnotations] where [annotationset_id] in (:ids)";
     255    String sql2 = "select [annotationset_id] from [InheritedAnnotationSets] where [annotationset_id] in (:ids)";
     256    String sql3 = "select [inherited_id] from [InheritedAnnotationSets] where [inherited_id] in (:ids)";
     257    org.hibernate.Query query1 = HibernateUtil.createSqlQuery(session, sql1);
     258    org.hibernate.Query query2 = HibernateUtil.createSqlQuery(session, sql2);
     259    org.hibernate.Query query3 = HibernateUtil.createSqlQuery(session, sql3);
     260   
     261    // Divide the query into chunks since the database driver may not support
     262    // an arbitrary large number of parameters
     263    int startIndex = 0;
     264    int endIndex = Math.min(MAX_PARAMETERS_IN_QUERY, allIds.size());
     265    while (startIndex < allIds.size())
     266    {
     267      List<Integer> sublist = allIds.subList(startIndex, endIndex);
     268      query1.setParameterList("ids", sublist, TypeWrapper.INTEGER.getHibernateType());
     269      query2.setParameterList("ids", sublist, TypeWrapper.INTEGER.getHibernateType());
     270      query3.setParameterList("ids", sublist, TypeWrapper.INTEGER.getHibernateType());
    241271     
    242       Item itemType = Item.fromValue((Integer)row[1]);
     272      for (Integer id : (List<Integer>)query1.list())
     273      {
     274        possibleAnnotationSets.remove(id);
     275      }
     276      for (Integer id : (List<Integer>)query2.list())
     277      {
     278        possibleAnnotationSets.remove(id);
     279      }
     280      for (Integer id : (List<Integer>)query3.list())
     281      {
     282        possibleAnnotationSets.remove(id);
     283      }
     284   
     285      startIndex = endIndex;
     286      endIndex = Math.min(startIndex + MAX_PARAMETERS_IN_QUERY, allIds.size());
     287    }
     288   
     289    if (progress != null)
     290    {
     291      progress.display(50, "Found " + possibleAnnotationSets.size() + " annotations sets without primary or inherited annotations");
     292    }
     293    if (possibleAnnotationSets.size() == 0) return 0;
     294   
     295    // Divide the remaining annotation set ids into one list per table to delete from
     296    Map<String, List<Integer>> tables = new HashMap<String, List<Integer>>();
     297    for (Map.Entry<Integer, Integer> entry : possibleAnnotationSets.entrySet())
     298    {
     299      Integer id = entry.getKey();
     300      Item itemType = Item.fromValue(entry.getValue());
     301     
    243302      PersistentClass pClass = HibernateUtil.getClassMapping(itemType.getDataClass().getName());
    244303      String table = pClass.getTable().getName();
     304      List<Integer> idsInTable = tables.get(table);
     305      if (idsInTable == null)
     306      {
     307        idsInTable = new ArrayList<Integer>();
     308        tables.put(table, idsInTable);
     309      }
     310      idsInTable.add(id);
     311    }
     312   
     313    int totalCount = possibleAnnotationSets.size();
     314    // Now it is finally time to delete from the "AnnotationSets" table
     315    // and nullify the reference from the item tables
     316    String deleteSql = "delete from [AnnotationSets] where [id] in (:ids)";
     317    org.hibernate.Query deleteQuery = HibernateUtil.createSqlQuery(session, deleteSql);
     318   
     319    for (Map.Entry<String, List<Integer>> entry : tables.entrySet())
     320    {
     321      String table = entry.getKey();
     322      List<Integer> ids = entry.getValue();
     323
     324      String nullifySql = "update [" + table + "] set [annotationset_id] = null where [annotationset_id] in (:ids)";
     325      org.hibernate.Query nullifyQuery = HibernateUtil.createSqlQuery(session, nullifySql);
     326
     327      startIndex = 0;
     328      endIndex = Math.min(MAX_PARAMETERS_IN_QUERY, ids.size());
    245329     
    246       if (tables.containsKey(table)) count += tables.get(table);
    247       tables.put(table, count);
    248     }
    249     if (progress != null) progress.append(" [" + totalCount + "]");
    250      
    251     // Unlink (set annotationset_id=null) on item tables
    252     long numUnlinked = 0;
    253     for (Map.Entry<String, Long> entry : tables.entrySet())
    254     {
    255       ThreadSignalHandler.checkInterrupted();
    256       Long count = entry.getValue();
    257       if (count > 0)
    258       {
    259         String table = entry.getKey();
     330      while (startIndex < ids.size())
     331      {
     332        List<Integer> sublist = ids.subList(startIndex, endIndex);
     333       
    260334        if (progress != null)
    261335        {
    262           int percent = (int)(10+(60*numUnlinked)/totalCount);
    263           progress.display(percent, "Unlinking " + count + " empty annotation sets from " + table + " table...");
     336          int percent = (int)(60+(30*numDeleted)/totalCount);
     337          progress.display(percent, "Unlinking " + ids.size() + " empty annotation sets from [" + table + "] table...");
    264338        }
    265         String nullifySql = "update [" + table + "] set [annotationset_id] = null where "+filter.replace("@COL", "[annotationset_id]");
    266         org.hibernate.Query nullifyQuery = HibernateUtil.createSqlQuery(session, nullifySql);
    267         int rowsUpdated = nullifyQuery.executeUpdate();
    268         if (progress != null) progress.append(" ["+rowsUpdated + "]");
    269         numUnlinked += rowsUpdated;
    270       }
    271     }
    272      
    273     if (totalCount > 0)
    274     {
    275       if (progress != null)
    276       {
    277         progress.display(75, "Deleting " + totalCount + " empty annotation sets...");
    278       }
    279      
    280       String deleteSql = "delete from [AnnotationSets] where "+filter.replace("@COL", "[id]");
    281       org.hibernate.Query deleteQuery = HibernateUtil.createSqlQuery(session, deleteSql);
    282       numDeleted = deleteQuery.executeUpdate();
    283       if (progress != null) progress.append(" ["+numDeleted + "]");
    284     }
    285      
     339       
     340        deleteQuery.setParameterList("ids", sublist, TypeWrapper.INTEGER.getHibernateType());
     341        nullifyQuery.setParameterList("ids", sublist, TypeWrapper.INTEGER.getHibernateType());
     342        nullifyQuery.executeUpdate();
     343        deleteQuery.executeUpdate();
     344       
     345        numDeleted += sublist.size();
     346        startIndex = endIndex;
     347        endIndex = Math.min(startIndex + MAX_PARAMETERS_IN_QUERY, ids.size());
     348      }
     349    }
     350   
     351    if (progress != null)
     352    {
     353      progress.display(100, "Deleted " + numDeleted + " unused annotation sets");
     354    }
     355   
    286356    return numDeleted;
    287357  }
Note: See TracChangeset for help on using the changeset viewer.