Changeset 3878
- Timestamp:
- Oct 25, 2007, 11:57:31 AM (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/plugins/core/net/sf/basedb/plugins/MedianRatioNormalization.java
r3679 r3878 26 26 package net.sf.basedb.plugins; 27 27 28 import java.sql.SQLException; 28 29 import java.util.ArrayList; 29 30 import java.util.Arrays; … … 60 61 import net.sf.basedb.core.plugin.Request; 61 62 import net.sf.basedb.core.plugin.Response; 62 import net.sf.basedb.core.query.Aggregations;63 63 import net.sf.basedb.core.query.Dynamic; 64 import net.sf.basedb.core.query.Expression; 64 65 import net.sf.basedb.core.query.Expressions; 65 import net.sf.basedb.core.query.JoinType;66 66 import net.sf.basedb.core.query.Orders; 67 67 import net.sf.basedb.core.query.Restriction; 68 68 import net.sf.basedb.core.query.Restrictions; 69 import net.sf.basedb.core.query.Select;70 69 import net.sf.basedb.core.query.Selects; 71 70 import net.sf.basedb.core.query.SqlResult; … … 73 72 74 73 /** 75 @author enell 74 @author enell, Nicklas 76 75 @version 2.0 77 76 @base.modified $Date$ … … 93 92 "The normalizer lets you exclude a percentage of the highest and lowest intensities as wel" + 94 93 "l as all spots with at least one intensity below a fixed value.", 95 " 1.0",94 "2.5", 96 95 "2006, Base 2 development team", 97 96 null, … … 189 188 String childName = Values.getString((String)job.getValue(CHILD_NAME), source.getName()); 190 189 String childDescription = (String)job.getValue(CHILD_DESCRIPTION); 191 192 // Create Transformation 193 Transformation t = source.newTransformation(getCurrentJob(dc)); 194 t.setName(about.getName()); 195 dc.saveItem(t); 196 197 // Create the normalized bioassay set 198 BioAssaySet child = t.newProduct(null, "new", true); 190 int blockGroupSize = (Integer) job.getValue(blockGroupParameter.getName()); 191 float minIntensity = (Float)job.getValue(minIntensityParameter.getName()); 192 float lowExclude = (Float) job.getValue(lowExcludeParameter.getName()); 193 float highExclude = (Float) job.getValue(highExcludeParameter.getName()); 194 Job thisJob = getCurrentJob(dc); 195 196 BioAssaySet child = normalize(dc, source, thisJob, minIntensity, lowExclude, highExclude, blockGroupSize, progress); 199 197 child.setName(childName); 200 198 child.setDescription(childDescription); 201 dc.saveItem(child);202 203 // Batcher for inserting normalized data204 batcher = child.getSpotBatcher();205 206 Select ratio = Selects.expression(Expressions.divide(Dynamic.column(VirtualColumn.channel(1)), Dynamic.column(VirtualColumn.channel(2))), "ratio");207 Restriction intensityRestriction = Restrictions.and208 (209 Restrictions.gt(Dynamic.column(VirtualColumn.channel(1)), Expressions.aFloat((Float) job.getValue(minIntensityParameter.getName()))),210 Restrictions.gt(Dynamic.column(VirtualColumn.channel(2)), Expressions.aFloat((Float) job.getValue(minIntensityParameter.getName())))211 );212 int blockGroupSize = (Integer) job.getValue(blockGroupParameter.getName());213 float lowExclude = (Float) job.getValue(lowExcludeParameter.getName());214 float highExclude = (Float) job.getValue(highExcludeParameter.getName());215 216 DynamicSpotQuery query;217 DynamicResultIterator resultIter;218 query = source.getSpotData();219 query.restrict(intensityRestriction);220 long numSpots = query.count(dc);221 int normalizedSpots = 0;222 if (progress != null) progress.display((int)(normalizedSpots / numSpots * 100), normalizedSpots + " spots normalized");223 224 List<BioAssay> assays = source.getBioAssays().list(dc);225 for (BioAssay assay : assays)226 {227 query = assay.getSpotData();228 query.restrict(intensityRestriction);229 query.joinRawData(JoinType.LEFT);230 query.select(Selects.expression(Aggregations.max(Dynamic.rawData("block")), "max"));231 query.select(Selects.expression(Aggregations.min(Dynamic.rawData("block")), "min"));232 resultIter = query.iterate(dc);233 SqlResult result = resultIter.next();234 int maxBlock = result.getInt(resultIter.getIndex("max"));235 int minBlock = result.getInt(resultIter.getIndex("min"));236 237 for (int i = minBlock; i <= maxBlock; i += blockGroupSize)238 {239 Restriction blockRestriction = Restrictions.between240 (241 Expressions.selected(Dynamic.selectRawData("block")),242 Expressions.integer(i),243 Expressions.integer(i+blockGroupSize)244 );245 246 query = assay.getSpotData();247 query.joinRawData(JoinType.LEFT);248 query.select(Dynamic.select(VirtualColumn.channel(1)));249 query.select(Dynamic.select(VirtualColumn.channel(2)));250 query.select(Dynamic.selectRawData("block"));251 query.select(ratio);252 query.restrict(intensityRestriction);253 query.restrict(blockRestriction);254 query.order(Orders.asc(Expressions.selected(ratio)));255 256 int count = (int)query.count(dc);257 if (count <= 0) continue;258 int lowCount = (int) Math.floor(count * lowExclude / 100);259 int highCount = (int) Math.floor(count * highExclude / 100);260 261 // check count > 0262 int include = count - lowCount - highCount;263 if (include <= 0) continue;264 double median;265 if (include % 2 == 0)266 {267 query.setFirstResult((include / 2) - 1 + lowCount);268 query.setMaxResults(2);269 DynamicResultIterator iter = query.iterate(dc);270 median = Math.sqrt(iter.next().getFloat(iter.getIndex("ratio")) * iter.next().getFloat(iter.getIndex("ratio")));271 }272 else273 {274 query.setFirstResult(((include - 1) / 2) + lowCount);275 query.setMaxResults(1);276 DynamicResultIterator iter = query.iterate(dc);277 median = iter.next().getFloat(iter.getIndex("ratio"));278 }279 280 float sqrtMedRatio = (float) Math.sqrt(median);281 float invSqrtMedRatio = 1 / sqrtMedRatio;282 Select newCh1 = Selects.expression(Expressions.multiply(Expressions.selected(Dynamic.select(VirtualColumn.channel(1))),283 Expressions.aFloat(invSqrtMedRatio)), "newCh1");284 Select newCh2 = Selects.expression(Expressions.multiply(Expressions.selected(Dynamic.select(VirtualColumn.channel(2))),285 Expressions.aFloat(sqrtMedRatio)), "newCh2");286 287 query = assay.getSpotData();288 query.joinRawData(JoinType.LEFT);289 query.select(Dynamic.select(VirtualColumn.COLUMN));290 query.select(Dynamic.select(VirtualColumn.POSITION));291 query.select(newCh1);292 query.select(newCh2);293 query.restrict(intensityRestriction);294 query.restrict(blockRestriction);295 query.setParameter("low", new Integer(i), Type.INT);296 query.setParameter("high", new Integer(i + blockGroupSize), Type.INT);297 298 DynamicResultIterator iter = query.iterate(dc);299 int column = iter.getIndex(VirtualColumn.COLUMN.getName());300 int position = iter.getIndex(VirtualColumn.POSITION.getName());301 int ch1 = iter.getIndex("newCh1");302 int ch2 = iter.getIndex("newCh2");303 while (iter.hasNext())304 {305 SqlResult r = iter.next();306 batcher.insert(r.getShort(column), r.getInt(position), r.getFloat(ch1), r.getFloat(ch2));307 normalizedSpots++;308 }309 if (progress != null) progress.display((int)(normalizedSpots * 100 / numSpots), normalizedSpots + " spots normalized");310 }311 }312 if (progress != null) progress.append("\n");313 batcher.close();314 199 dc.commit(); 200 int normalizedSpots = child.getNumSpots(); 315 201 response.setDone(normalizedSpots + " spots normalized, " + (source.getNumSpots() - normalizedSpots) + " spots removed"); 316 202 } … … 408 294 // ------------------------------------------- 409 295 296 /** 297 Normalise the source bioassay set using MEDIAN-RATIO normalization. 298 @param dc The DbControl to use for database access 299 @param source The source bioassay set that is going to be normalized 300 @param job The job that is doing the normalization, or null 301 @param minIntensity All spots which have a lower intensity value in either channel 302 will be filtered out 303 @param lowExclude A percentage of the spots with lowest ratio that are excuded in the 304 median calculation 305 @param highExclude A percentage of the spots with highest ratio that are excluded in the 306 median calculation 307 @param blockGroupSize 308 @return The normalized bioassayset 309 @since 2.5 310 */ 311 public BioAssaySet normalize(DbControl dc, BioAssaySet source, Job job, float minIntensity, float lowExclude, float highExclude, int blockGroupSize, ProgressReporter progress) 312 { 313 if (progress != null) progress.display(0, "Preparing to normalize..."); 314 315 // Create Transformation 316 Transformation t = source.newTransformation(getCurrentJob(dc)); 317 t.setName(about.getName()); 318 dc.saveItem(t); 319 320 // Create the normalized bioassay set 321 BioAssaySet child = t.newProduct(null, "new", true); 322 dc.saveItem(child); 323 324 // Batcher for inserting normalized data 325 SpotBatcher batcher = child.getSpotBatcher(); 326 327 // Expressions used to get data 328 Expression block = Dynamic.rawData("block"); 329 Expression ch1 = Dynamic.column(VirtualColumn.channel(1)); 330 Expression ch2 = Dynamic.column(VirtualColumn.channel(2)); 331 // ratio = ch1 / ch2 332 Expression ratio = Expressions.divide(ch1, ch2); 333 334 // Create restriction: ch1 > minIntensity and ch2 > minIntensity 335 Restriction intensityRestriction = Restrictions.and 336 ( 337 Restrictions.gt(Dynamic.column(VirtualColumn.channel(1)), Expressions.aFloat(minIntensity)), 338 Restrictions.gt(Dynamic.column(VirtualColumn.channel(2)), Expressions.aFloat(minIntensity)) 339 ); 340 341 // Create restriction: column = :bioAssayColumn 342 Restriction bioAssayRestriction = Restrictions.eq( 343 Dynamic.column(VirtualColumn.COLUMN), 344 Expressions.parameter("bioAssayColumn") 345 ); 346 347 // Count number of spots that is going to be normalized 348 DynamicSpotQuery query = source.getSpotData(); 349 query.restrict(intensityRestriction); 350 long numSpots = query.count(dc); 351 int normalizedSpots = 0; 352 if (progress != null) progress.display((int)(normalizedSpots / numSpots * 100), normalizedSpots + " spots normalized"); 353 354 // Create query to retrieve spot data: COLUMN, POSITION, ch1, ch2, block 355 // We use a parameter to restrict the query to return data for one bioassay at a time 356 query.select(Dynamic.select(VirtualColumn.POSITION)); 357 query.select(Selects.expression(ch1, "ch1")); 358 query.select(Selects.expression(ch2, "ch2")); 359 query.select(Selects.expression(block, "block")); 360 query.restrict(bioAssayRestriction); 361 query.order(Orders.asc(block)); 362 query.order(Orders.asc(ratio)); 363 364 // Normalize one bioassay at a time 365 List<BioAssay> assays = source.getBioAssays().list(dc); 366 try 367 { 368 for (BioAssay assay : assays) 369 { 370 // Prepare list for holding data 371 int assaySpots = assay.getNumSpots(); 372 List<SpotData> data = new ArrayList<SpotData>(assaySpots); 373 374 // Load spot data for this bioassay 375 short bioassayColumn = assay.getDataCubeColumnNo(); 376 query.setParameter("bioAssayColumn", (int)bioassayColumn, Type.INT); 377 378 DynamicResultIterator it = query.iterate(dc); 379 int positionIndex = it.getIndex(VirtualColumn.POSITION.getName()); 380 int ch1Index = it.getIndex("ch1"); 381 int ch2Index = it.getIndex("ch2"); 382 int blockIndex = it.getIndex("block"); 383 384 // Copy bioassay data to SpotData objects 385 while (it.hasNext()) 386 { 387 SqlResult r = it.next(); 388 SpotData spot = new SpotData(r.getInt(positionIndex), 389 r.getFloat(ch1Index), r.getFloat(ch2Index), r.getInt(blockIndex)); 390 data.add(spot); 391 } 392 it.close(); 393 394 // Continue with next bioassay if there is no data 395 int dataSize = data.size(); 396 if (dataSize == 0) continue; 397 // Get range of block numbers - NOTE! query must return spots sorted in block order 398 int minBlock = data.get(0).block; 399 int maxBlock = data.get(data.size()-1).block; 400 401 int fromIndex = 0; 402 int toIndex = 0; 403 int fromBlock = minBlock; 404 // Normalize each block range independently: fromBlock + blockGroupSize --> toBlock 405 while (fromBlock <= maxBlock) 406 { 407 // Find start and end index for current block range 408 int toBlock = fromBlock + blockGroupSize - 1; 409 if (toBlock > maxBlock) toBlock = maxBlock; 410 fromIndex = toIndex; 411 // Data is sorted by block; find index of last spot with: block <= toBlock 412 // spot given by toIndex should not be included 413 while (toIndex < dataSize && data.get(toIndex).block <= toBlock) 414 { 415 ++toIndex; 416 } 417 418 if (toIndex > fromIndex) 419 { 420 // Exlude low and high values 421 int count = toIndex - fromIndex; 422 int lowIndex = fromIndex + (int)Math.floor(count * lowExclude / 100); 423 int highIndex = toIndex - (int)Math.floor(count * highExclude / 100); 424 425 if (highIndex > lowIndex) 426 { 427 // Find median ratio in the current block range 428 double median = median(data.subList(lowIndex, highIndex)); 429 430 // Correct all spot intensities 431 float sqrtMedRatio = (float)Math.sqrt(median); 432 float invSqrtMedRatio = 1 / sqrtMedRatio; 433 for (int j = fromIndex; j < toIndex; ++j) 434 { 435 SpotData spot = data.get(j); 436 double newCh1 = spot.ch1 * invSqrtMedRatio; 437 double newCh2 = spot.ch2 * sqrtMedRatio; 438 batcher.insert(bioassayColumn, spot.position, (float)newCh1, (float)newCh2); 439 } 440 } 441 normalizedSpots += toIndex - fromIndex; 442 if (progress != null) progress.display((int)((normalizedSpots * 100) / numSpots), normalizedSpots + " spots normalized"); 443 } 444 fromBlock = toBlock + 1; 445 } 446 } 447 batcher.flush(); 448 batcher.close(); 449 } 450 catch (SQLException e) 451 { 452 throw new BaseException(e); 453 } 454 return child; 455 } 456 410 457 private RequestInformation getConfigureJobParameters() 411 458 { … … 444 491 } 445 492 return configureJob; 446 } 447 493 } 494 495 private double median(List<SpotData> data) 496 { 497 int size = data.size(); 498 int half = size / 2; 499 if (size % 2 == 0) 500 { 501 return Math.sqrt(data.get(half - 1).ratio * data.get(half).ratio); 502 } 503 else 504 { 505 return data.get(half).ratio; 506 } 507 } 508 509 510 private static class SpotData 511 { 512 final int position; 513 final float ch1; 514 final float ch2; 515 final double ratio; 516 final int block; 517 518 SpotData(int position, float ch1, float ch2, int block) 519 { 520 this.position = position; 521 this.ch1 = ch1; 522 this.ch2 = ch2; 523 this.block = block; 524 this.ratio = ch1 / ch2; 525 } 526 } 448 527 }
Note: See TracChangeset
for help on using the changeset viewer.