Changeset 5094
- Timestamp:
- Sep 10, 2009, 2:38:45 PM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/plugins/core/net/sf/basedb/plugins/Base1PluginExecuter.java
r4930 r5094 28 28 import net.sf.basedb.core.AnyToAny; 29 29 import net.sf.basedb.core.BaseException; 30 import net.sf.basedb.core.BioAssay;31 30 import net.sf.basedb.core.BioAssaySet; 32 31 import net.sf.basedb.core.BooleanParameterType; … … 34 33 import net.sf.basedb.core.DbControl; 35 34 import net.sf.basedb.core.Directory; 36 import net.sf.basedb.core.DynamicQuery;37 import net.sf.basedb.core.DynamicResultIterator;38 import net.sf.basedb.core.DynamicSpotQuery;39 35 import net.sf.basedb.core.Experiment; 40 36 import net.sf.basedb.core.ExtraValue; 41 import net.sf.basedb.core.ExtraValueType;42 37 import net.sf.basedb.core.File; 43 38 import net.sf.basedb.core.FileParameterType; … … 50 45 import net.sf.basedb.core.ItemQuery; 51 46 import net.sf.basedb.core.Job; 52 import net.sf.basedb.core.MappingBatcher;53 47 import net.sf.basedb.core.Path; 54 48 import net.sf.basedb.core.PathParameterType; … … 56 50 import net.sf.basedb.core.PluginConfiguration; 57 51 import net.sf.basedb.core.PluginParameter; 58 import net.sf.basedb.core.PositionBatcher;59 52 import net.sf.basedb.core.ProgressReporter; 60 import net.sf.basedb.core.ReporterBatcher;61 import net.sf.basedb.core.ReporterList;62 53 import net.sf.basedb.core.RequestInformation; 63 import net.sf.basedb.core.SpotBatcher;64 import net.sf.basedb.core.SpotExtraValueBatcher;65 54 import net.sf.basedb.core.StringParameterType; 66 55 import net.sf.basedb.core.Transformation; 67 import net.sf.basedb.core.Type;68 56 import net.sf.basedb.core.User; 69 57 import net.sf.basedb.core.Version; 70 import net.sf.basedb.core.VirtualColumn;71 import net.sf.basedb.core.VirtualTable;72 58 import net.sf.basedb.core.data.RawData; 73 59 import net.sf.basedb.core.data.ReporterData; … … 80 66 import net.sf.basedb.core.plugin.Request; 81 67 import net.sf.basedb.core.plugin.Response; 82 import net.sf.basedb.core.query.Dynamic;83 68 import net.sf.basedb.core.query.Hql; 84 import net.sf.basedb.core.query.JoinType;85 69 import net.sf.basedb.core.query.Orders; 86 import net.sf.basedb.core.query.SqlResult;87 70 import net.sf.basedb.core.signal.SignalException; 88 71 import net.sf.basedb.core.signal.SignalHandler; … … 90 73 import net.sf.basedb.core.signal.ThreadSignalHandler; 91 74 import net.sf.basedb.util.ChainedProgressReporter; 92 import net.sf.basedb.util.Diff3;93 75 import net.sf.basedb.util.Enumeration; 94 76 import net.sf.basedb.util.FileUtil; … … 102 84 import net.sf.basedb.util.export.spotdata.MatrixBaseFileExporter; 103 85 import net.sf.basedb.util.export.spotdata.SerialBaseFileExporter; 86 import net.sf.basedb.util.importer.spotdata.BaseFileImporter; 104 87 import net.sf.basedb.util.parser.FlatFileParser; 105 88 … … 126 109 import java.util.Arrays; 127 110 import java.util.Collection; 128 import java.util.Collections;129 111 import java.util.EnumSet; 130 112 import java.util.HashMap; … … 1360 1342 throws SQLException, IOException 1361 1343 { 1362 // Position -> ReporterID mapping for the parent bioassay set 1363 Map<Integer, Integer> parentReporterPositions = new HashMap<Integer, Integer>(); 1364 boolean parentHasNullReporter = false; 1365 boolean parentHasZeroReporter = false; 1366 boolean mapZeroToNull = false; 1367 1368 // Position -> ReporterID mapping for the child bioassay set 1369 Map<Integer, Integer> childReporterPositions = new HashMap<Integer, Integer>(); 1370 // BioAssayID -> Column no mapping for the child bioassay set 1371 Map<Integer, Short> bioAssayColumns = new HashMap<Integer, Short>(); 1372 // Column mapping from parent -> child bioassay 1373 Map<Short, Short> columnMapping = new HashMap<Short, Short>(); 1374 // If the "assays" section contains a child->parent mapping for bioassays 1375 boolean hasParentAssayMapping = false; 1376 // If all position->reporter mappings are the same on the child and parent bioassay set 1377 boolean hasSamePositionReporterMapping = true; 1378 // If at least one spot section was found 1379 boolean hasSpotSection = false; 1380 // Information about child bioassays 1381 Map<Integer, ChildBioAssay> childBioAssays = new HashMap<Integer, ChildBioAssay>(); 1382 1383 BioAssaySet source = t.getSource(); 1384 if (resultTransform == null) resultTransform = source.getIntensityTransform(); 1385 int numSections = 0; 1386 long totalBytes = stdout.getSize(); 1387 long progressReportInterval = totalBytes / 100; 1388 long nextProgressReport = progressReportInterval; 1389 long parsedBytes = 0; 1390 1391 // Parse the stdout file to verify child mappings 1392 FlatFileParser ffp = getInitializedFlatFileParser(stdout.getDownloadStream(0), stdout.getCharacterSet()); 1393 while (ffp.hasMoreSections()) 1394 { 1395 checkInterrupted(); 1396 numSections++; 1397 FlatFileParser.Line section = ffp.nextSection(); 1398 ffp.parseHeaders(); 1399 1400 if ("assays".equals(section.name())) 1401 { 1402 /* 1403 Optional section 1404 If it exists it must define 'id' and 'name' of child bioassays 1405 The 'id' must be the same as the id of a parent bioassay, 1406 UNLESS a 'parents' column is present 1407 */ 1408 List<String> columns = Arrays.asList(ffp.getHeader("columns").split("\\t")); 1409 int parentIndex = columns.indexOf("parents"); 1410 int idIndex = columns.indexOf("id"); 1411 int nameIndex = columns.indexOf("name"); 1412 hasParentAssayMapping = parentIndex != -1; 1413 if (idIndex == -1) 1414 { 1415 throw new BaseException("Missing column 'id' in section 'assays' at line " 1416 + ffp.getParsedLines() + " in file '" + stdout.getName() + "'"); 1417 } 1418 if (nameIndex == -1) 1419 { 1420 throw new BaseException("Missing column 'name' in section 'assays' at line " 1421 + ffp.getParsedLines() + " in file '" + stdout.getName() + "'"); 1422 } 1423 1424 // Read data part of the 'assays' section 1425 // Each line has ID, NAME and PARENTS (optional) 1426 ffp.setMinDataColumns(columns.size()); 1427 FlatFileParser.Data data; 1428 while ((data = ffp.nextData()) != null) 1429 { 1430 int assayId = Values.getInt(data.get(idIndex)); 1431 1432 // Don't allow duplicate assay ID:s 1433 if (childBioAssays.containsKey(assayId)) 1434 { 1435 throw new BaseException("Duplicate assay id (" + assayId + 1436 ") found in section 'assays' at line " + ffp.getParsedLines() + 1437 " in file '" + stdout.getName() + "'"); 1438 } 1439 1440 // Extract name and parents and store as ChildBioAssay objects 1441 String name = data.get(nameIndex); 1442 Set<Integer> parents = null; 1443 if (hasParentAssayMapping) 1444 { 1445 parents = new HashSet<Integer>(Arrays.asList(Values.getInt(data.get(parentIndex).split("/")))); 1446 Integer parentId = null; 1447 try 1448 { 1449 Iterator<Integer> it = parents.iterator(); 1450 while (it.hasNext()) 1451 { 1452 parentId = it.next(); 1453 BioAssay.getById(dc, parentId); 1454 } 1455 } 1456 catch (ItemNotFoundException ex) 1457 { 1458 throw new BaseException("Can't find parent bioassay '" + parentId + 1459 "' in section 'assays' at line " + ffp.getParsedLines() + " in file '" + 1460 stdout.getName() + "'" 1461 ); 1462 } 1463 } 1464 else 1465 { 1466 try 1467 { 1468 BioAssay.getById(dc, assayId); 1469 } 1470 catch (ItemNotFoundException ex) 1471 { 1472 throw new BaseException("Can't find bioassay '" + assayId + 1473 "' in section 'assays' at line " + ffp.getParsedLines() + " in file '" + 1474 stdout.getName() + "'" 1475 ); 1476 } 1477 } 1478 childBioAssays.put(assayId, new ChildBioAssay(assayId, name, parents)); 1479 } 1480 } 1481 else if ("spots".equals(section.name())) 1482 { 1483 checkInterrupted(); 1484 /* 1485 Optional section. May appear more than once. 1486 'position' and 'reporter' are required columns in the 'columns' header. 1487 'assays' is a required column header, that has the ID of the assays 1488 that have spot data in this section. 1489 */ 1490 if (!hasSpotSection) 1491 { 1492 hasSpotSection = true; 1493 // Load the position -> reporter mapping from the parent bioassay set 1494 DynamicQuery positionQuery = source.getPositionData(); 1495 positionQuery.select(Dynamic.select(VirtualColumn.POSITION)); 1496 positionQuery.select(Dynamic.select(VirtualColumn.REPORTER_ID)); 1497 DynamicResultIterator positionQueryIterator = positionQuery.iterate(dc); 1498 while (positionQueryIterator.hasNext()) 1499 { 1500 SqlResult result = positionQueryIterator.next(); 1501 Integer reporterId = (Integer)result.getObject(2); 1502 if (reporterId == null) 1503 { 1504 parentHasNullReporter = true; 1505 } 1506 else if (reporterId.intValue() == 0) 1507 { 1508 parentHasZeroReporter = true; 1509 } 1510 parentReporterPositions.put(result.getInt(1), reporterId); 1511 } 1512 1513 1514 // If we have null reporter (but none with ID=0) in the export 1515 // we may need to convert 0->null when importing since some 1516 // BASE1 plug-ins convert null -> 0 1517 mapZeroToNull = parentHasNullReporter && !parentHasZeroReporter; 1518 } 1519 1520 // Get the "position" and "reporter" columns from the "column" header 1521 List<String> columns = Arrays.asList(ffp.getHeader("columns").split("\\t")); 1522 int positionIndex = columns.indexOf("position"); 1523 int reporterIndex = columns.indexOf("reporter"); 1524 1525 // Error if the columns are missing 1526 if (positionIndex == -1) 1527 { 1528 throw new BaseException("Missing column 'position' in section 'spots' at line " + 1529 ffp.getParsedLines() + " in file '" + stdout.getName() + "'"); 1530 } 1531 if (reporterIndex == -1) 1532 { 1533 throw new BaseException("Missing column 'reporter' in section 'spots' at line " + 1534 ffp.getParsedLines() + " in file '" + stdout.getName() + "'"); 1535 } 1536 1537 // Get the "assays" header 1538 List<Integer> assays = Arrays.asList(Values.getInt(ffp.getHeader("assays").split("\\t"))); 1539 if (assays == null || assays.size() == 0) 1540 { 1541 throw new BaseException("Missing header 'assays' in section 'spots' at line " + 1542 ffp.getParsedLines() + " in file '" + stdout.getName() + "'"); 1543 } 1544 1545 for (Integer assayId : assays) 1546 { 1547 if (!childBioAssays.containsKey(assayId)) 1548 { 1549 try 1550 { 1551 BioAssay.getById(dc, assayId); 1552 } 1553 catch (ItemNotFoundException ex) 1554 { 1555 throw new BaseException("Can't find bioassay '" + assayId + 1556 "' in section 'spots' at line " + ffp.getParsedLines() + " in file '" + 1557 stdout.getName() + "'" 1558 ); 1559 } 1560 childBioAssays.put(assayId, new ChildBioAssay(assayId, null, null)); 1561 } 1562 } 1563 1564 // Parse data and check if each position has same reporter as before 1565 FlatFileParser.Data data; 1566 while ((data = ffp.nextData()) != null) 1567 { 1568 parsedBytes = ffp.getParsedBytes(); 1569 if (progress != null && parsedBytes >= nextProgressReport) 1570 { 1571 nextProgressReport = parsedBytes + progressReportInterval; 1572 int percent = (int)((30L * parsedBytes) / totalBytes); 1573 progress.display(percent, "Importing spot data (first pass): " + 1574 Values.formatBytes(parsedBytes) + " of " + Values.formatBytes(totalBytes)); 1575 } 1576 1577 checkInterrupted(); 1578 Integer position = Values.getInteger(data.get(positionIndex), null); 1579 Integer newReporterId = Values.getInteger(data.get(reporterIndex), null); 1580 1581 // Error if position is missing 1582 if (position == null) 1583 { 1584 throw new BaseException("Missing or invalid value for 'position' in section 'spots' at line " + 1585 ffp.getParsedLines() + " in file '" + stdout.getName() + "'"); 1586 } 1587 // Convert 0 -> null since some BASE1 plug-ins convert null -> 0 1588 if (mapZeroToNull && newReporterId != null && newReporterId.intValue() == 0) 1589 { 1590 newReporterId = null; 1591 } 1592 1593 // Is the new reporter the same as the existing? 1594 // A 'null' reporter is considered the same as another 'null' reporter 1595 if (hasSamePositionReporterMapping) 1596 { 1597 Integer existingReporterId = parentReporterPositions.get(position); 1598 if (!Diff3.isEqualOrNull(newReporterId, existingReporterId)) 1599 { 1600 hasSamePositionReporterMapping = false; 1601 parentReporterPositions.clear(); 1602 } 1603 } 1604 1605 // Is the new reporter already registered with the same position? 1606 if (!childReporterPositions.containsKey(position)) 1607 { 1608 childReporterPositions.put(position, newReporterId); 1609 } 1610 else 1611 { 1612 Integer registeredReporterId = childReporterPositions.get(position); 1613 // Error if same position has different reporters in the stdout file 1614 if (!Diff3.isEqualOrNull(newReporterId, registeredReporterId)) 1615 { 1616 throw new BaseException("Invalid value for 'reporter' (" + newReporterId + 1617 ") for position '" + position + "' in section 'spots' at line " + 1618 ffp.getParsedLines() + " in file '" + stdout.getName() + 1619 "'. Expected '" + registeredReporterId + "'"); 1620 } 1621 } 1622 } 1623 } 1624 else if ("reporterlist".equals(section.name())) 1625 { 1626 // Reporter lists are imported immediately 1627 // Get the "reporterId" and "score" columns from the "column" header 1628 List<String> columns = Arrays.asList(ffp.getHeader("columns").split("\\t")); 1629 int reporterIndex = columns.indexOf("reporterId"); 1630 int scoreIndex = columns.indexOf("score"); 1631 1632 // Error if the reporterId column is missing 1633 if (reporterIndex == -1) 1634 { 1635 throw new BaseException("Missing column 'reporterId' in section 'reporterlist' at line " + 1636 ffp.getParsedLines() + " in file '" + stdout.getName() + "'"); 1637 } 1638 1639 String name = Values.getStringOrNull(ffp.getHeader("name")); 1640 if (name == null) name = t.getName() + "." + numSections; 1641 ReporterList list = ReporterList.getNew(dc); 1642 list.setName(name); 1643 list.setExperiment(t.getExperiment()); 1644 1645 FlatFileParser.Data data; 1646 ReporterBatcher reporters = ReporterBatcher.getNew(dc); 1647 long factor = hasSpotSection ? 30 : 100; 1648 // A second pass is needed when we have spot data; first pass = 0-30%, second pass=30-100% 1649 while ((data = ffp.nextData()) != null) 1650 { 1651 parsedBytes = ffp.getParsedBytes(); 1652 if (progress != null && parsedBytes >= nextProgressReport) 1653 { 1654 nextProgressReport = parsedBytes + progressReportInterval; 1655 int percent = (int)((factor * parsedBytes) / totalBytes); 1656 progress.display(percent, "Importing reporter list: " + 1657 Values.formatBytes(parsedBytes) + " of " + Values.formatBytes(totalBytes)); 1658 } 1659 checkInterrupted(); 1660 String reporterId = Values.getStringOrNull(data.get(reporterIndex)); 1661 1662 // Ignore missing reporters 1663 if (reporterId == null) continue; 1664 1665 ReporterData reporter = reporters.getByExternalId(reporterId, false); 1666 if (reporter == null) 1667 { 1668 throw new ItemNotFoundException("Reporter[externalId=" + reporterId + "] at line "+ 1669 ffp.getParsedLines() + " in file '" + stdout.getName() + "'"); 1670 } 1671 // If no score is provided we use the line number as some kind of 'rank' 1672 Float score = scoreIndex == -1 ? 1673 data.dataLineNo() : Values.getFloat(data.get(scoreIndex), null); 1674 ReporterProxy proxy = new ReporterProxy(); 1675 list.addReporter(reporter, score); 1676 } 1677 1678 dc.saveItem(list); 1679 AnyToAny any2Any = AnyToAny.getNew(dc, t, list, "reporterList." + numSections, false); 1680 dc.saveItem(any2Any); 1681 } 1682 } 1683 1684 if (!hasSpotSection) return; 1685 1686 // Create the child bioassay set 1687 BioAssaySet child = null; 1688 boolean useNewDataCube = hasParentAssayMapping || !hasSamePositionReporterMapping; 1689 if (useNewDataCube) 1690 { 1691 child = t.newProduct("new", "new", false); 1692 } 1693 else 1694 { 1695 child = t.newProduct(null, "new", false); 1696 } 1697 child.setIntensityTransform(resultTransform); 1698 dc.saveItem(child); 1699 1700 // Create child bioassays 1701 for (ChildBioAssay childData : childBioAssays.values()) 1702 { 1703 BioAssay childBa = null; 1704 if (useNewDataCube) 1705 { 1706 Collection<BioAssay> parents = null; 1707 if (childData.parents == null) 1708 { 1709 parents = Collections.singletonList(BioAssay.getById(dc, childData.id)); 1710 } 1711 else 1712 { 1713 parents = new ArrayList<BioAssay>(childData.parents.size()); 1714 for (Integer parentId : childData.parents) 1715 { 1716 parents.add(BioAssay.getById(dc, parentId)); 1717 } 1718 } 1719 childBa = child.newBioAssay(parents); 1720 // Put column mappings 1721 short childColumn = childBa.getDataCubeColumnNo(); 1722 for (BioAssay parentBa : parents) 1723 { 1724 columnMapping.put(parentBa.getDataCubeColumnNo(), childColumn); 1725 } 1726 } 1727 else 1728 { 1729 BioAssay parentBa = BioAssay.getById(dc, childData.id); 1730 childBa = child.newBioAssay(parentBa); 1731 // Columns should be identical... put in the mapping just be be sure 1732 columnMapping.put(parentBa.getDataCubeColumnNo(), childBa.getDataCubeColumnNo()); 1733 } 1734 if (childData.name != null) childBa.setName(childData.name); 1735 dc.saveItem(childBa); 1736 bioAssayColumns.put(childData.id, childBa.getDataCubeColumnNo()); 1737 } 1738 1739 // Create new position -> reporter mapping 1740 if (useNewDataCube) 1741 { 1742 // Create position -> reporter mapping from the new data 1743 PositionBatcher posBatcher = child.getPositionBatcher(); 1744 ReporterProxy proxy = new ReporterProxy(); 1745 for (Map.Entry<Integer, Integer> entry : childReporterPositions.entrySet()) 1746 { 1747 checkInterrupted(); 1748 int position = entry.getKey(); 1749 Integer reporterId = entry.getValue(); 1750 if (reporterId == null) 1751 { 1752 posBatcher.insert(position, null); 1753 } 1754 else 1755 { 1756 proxy.setTheId(reporterId); 1757 posBatcher.insert(position, proxy); 1758 } 1759 } 1760 posBatcher.flush(); 1761 posBatcher.close(); 1762 1763 if (hasSamePositionReporterMapping) 1764 { 1765 // Create raw data mapping 1766 MappingBatcher mapBatcher = child.getMappingBatcher(); 1767 DynamicSpotQuery spotQuery = source.getSpotData(); 1768 spotQuery.joinRawData(JoinType.INNER); 1769 1770 spotQuery.select(Dynamic.select(VirtualColumn.COLUMN)); 1771 spotQuery.select(Dynamic.select(VirtualColumn.POSITION)); 1772 spotQuery.select(Dynamic.select(VirtualTable.RAWPARENTS, VirtualColumn.RAWDATA_ID)); 1773 1774 DynamicResultIterator spotIterator = spotQuery.iterate(dc); 1775 RawDataProxy rawProxy = new RawDataProxy(); 1776 while (spotIterator.hasNext()) 1777 { 1778 SqlResult result = spotIterator.next(); 1779 short parentColumn = result.getShort(1); 1780 int position = result.getInt(2); 1781 int rawDataId = result.getInt(3); 1782 Short childColumn = columnMapping.get(parentColumn); 1783 if (childColumn != null) 1784 { 1785 rawProxy.setTheId(rawDataId); 1786 mapBatcher.insert(childColumn, position, rawProxy); 1787 } 1788 } 1789 mapBatcher.flush(); 1790 mapBatcher.close(); 1791 } 1792 } 1793 1794 // Clean up things that are no longer needed 1795 childBioAssays.clear(); 1796 childReporterPositions.clear(); 1797 parentReporterPositions.clear(); 1798 1799 SpotBatcher spotBatcher = child.getSpotBatcher(); 1800 HashMap<String, ExtraValueType> evtMap = new HashMap<String, ExtraValueType>(); 1801 ffp = getInitializedFlatFileParser(stdout.getDownloadStream(0), stdout.getCharacterSet()); 1802 nextProgressReport = 0; 1803 while (ffp.hasMoreSections()) 1804 { 1805 checkInterrupted(); 1806 FlatFileParser.Line section = ffp.nextSection(); 1807 ffp.parseHeaders(); 1808 1809 if (section.name().equals("assays")) 1810 { 1811 List<String> columns = Arrays.asList(ffp.getHeader("columns").split("\\t")); 1812 ffp.setMinDataColumns(columns.size()); 1813 // Loop to the end of this section 1814 while (ffp.nextData() != null) 1815 {} 1816 } 1817 else if (section.name().equals("spots")) 1818 { 1819 List<String> columns = Arrays.asList(ffp.getHeader("columns").split("\\t")); 1820 List<Integer> assays = Arrays.asList(Values.getInt(ffp.getHeader("assays").split("\\t"))); 1821 List<String> assayFields = Arrays.asList(ffp.getHeader("assayFields").split("\\t")); 1822 List<String> setExtraFloats = new ArrayList<String>(); 1823 if (ffp.getHeader("setExtraFloats") != null) 1824 { 1825 setExtraFloats = Arrays.asList(ffp.getHeader("setExtraFloats").split("\\t")); 1826 } 1827 ffp.setMinDataColumns(columns.size() - 1 + assays.size() * assayFields.size()); 1828 1829 boolean intCols = true; 1830 1831 int posCol = columns.indexOf("position"); 1832 int dataCol = columns.indexOf("assayData"); 1833 int[] intCol = new int[child.getRawDataType().getChannels()]; 1834 for (int i = 0; i < intCol.length; ++i) 1835 { 1836 int col = assayFields.indexOf("intensity"+(i+1)); 1837 if (col != -1) 1838 { 1839 intCol[i] = col; 1840 } 1841 else 1842 { 1843 intCols = false; 1844 } 1845 } 1846 int mCol = assayFields.indexOf("l2ratio1_2"); 1847 int aCol = assayFields.indexOf("l10intgmean1_2"); 1848 int[] extraFloatsCol = new int[setExtraFloats.size()]; 1849 1850 if (dataCol > -1) 1851 { 1852 if (!intCols && (mCol > -1 && aCol > -1)) 1853 { 1854 intCols = false; 1855 } 1856 else if (!intCols) 1857 { 1858 throw new BaseException( 1859 "Can't find the intensity column(s) or l2ratio1_2/l10intgmean1_2. No data could be imported."); 1860 } 1861 for (int i = 0; i < setExtraFloats.size(); i++) 1862 { 1863 int col = assayFields.indexOf(setExtraFloats.get(i)); 1864 if (col < 0) 1865 { 1866 throw new BaseException( 1867 "Can't find the column " + setExtraFloats.get(i) + ". No data could be inmported."); 1868 } 1869 extraFloatsCol[i] = col + dataCol; 1870 } 1871 } 1872 else 1873 { 1874 throw new BaseException("Can't find the column 'assayData' in header assayFields."); 1875 } 1876 1877 for (int i = 0; i < intCol.length; ++i) 1878 { 1879 intCol[i] += dataCol; 1880 } 1881 aCol += dataCol; 1882 mCol += dataCol; 1883 1884 List<SpotExtraValueBatcher<Float>> evBatcher = new ArrayList<SpotExtraValueBatcher<Float>>(setExtraFloats.size()); 1885 for (int i = 0; i < setExtraFloats.size(); i++) 1886 { 1887 String name = setExtraFloats.get(i); 1888 ExtraValueType evt = evtMap.get(name); 1889 if (evt == null) 1890 { 1891 try 1892 { 1893 evt = ExtraValueType.getByExternalId(dc, name); 1894 } 1895 catch (ItemNotFoundException e) 1896 { 1897 evt = ExtraValueType.getNew(dc, name, Type.FLOAT); 1898 evt.setName(name); 1899 dc.saveItem(evt); 1900 } 1901 evtMap.put(name, evt); 1902 } 1903 evBatcher.add(i, child.getSpotExtraValueBatcher(Float.class, evt, Job.getById(dc, job.getId()))); 1904 } 1905 1906 float[] intensities = new float[intCol.length]; 1907 while (ffp.hasMoreData()) 1908 { 1909 parsedBytes = ffp.getParsedBytes(); 1910 if (progress != null && parsedBytes >= nextProgressReport) 1911 { 1912 nextProgressReport = parsedBytes + progressReportInterval; 1913 int percent = 30 + (int)((70L * parsedBytes) / totalBytes); 1914 progress.display(percent, "Importing spot data (second pass): " + 1915 Values.formatBytes(parsedBytes) + " of " + Values.formatBytes(totalBytes)); 1916 } 1917 checkInterrupted(); 1918 FlatFileParser.Data dataline = ffp.nextData(); 1919 int index = 0; 1920 Integer position = Values.getInteger(dataline.get(posCol), null); 1921 for (Integer assayId : assays) 1922 { 1923 // Only insert the spot if all intensity values are correct numbers 1924 boolean insertSpot = true; 1925 short dataCubeColumn = bioAssayColumns.get(assayId); 1926 if (intCols) 1927 { 1928 for (int i = 0; i < intCol.length; ++i) 1929 { 1930 float value = Values.getFloat(dataline.get(intCol[i] + index), Float.NaN); 1931 insertSpot &= !Float.isNaN(value); 1932 intensities[i] = value; 1933 } 1934 } 1935 else 1936 { 1937 float a = Values.getFloat(dataline.get(aCol + index), Float.NaN); 1938 float m = Values.getFloat(dataline.get(mCol + index), Float.NaN); 1939 insertSpot = !Float.isNaN(m) && !Float.isNaN(a); 1940 if (insertSpot) 1941 { 1942 // int2 = 10^a / 2^(0.5*m) 1943 // int1 = int2 * 2^m 1944 intensities[1] = (float)(Math.pow(10, a) / Math.pow(2, 0.5 * m)); 1945 intensities[0] = (float)(intensities[1] * Math.pow(2, m)); 1946 } 1947 } 1948 if (insertSpot) 1949 { 1950 spotBatcher.insert(dataCubeColumn, position, intensities); 1951 // Extra values 1952 for (int i = 0; i < evBatcher.size(); i++) 1953 { 1954 float value = Values.getFloat(dataline.get(extraFloatsCol[i] + index), Float.NaN); 1955 if (!Float.isNaN(value)) 1956 { 1957 evBatcher.get(i).insert(dataCubeColumn, position, value); 1958 } 1959 } 1960 } 1961 index += assayFields.size(); 1962 } 1963 } 1964 } 1965 } 1344 BaseFileImporter importer = new BaseFileImporter(); 1345 importer.setDbControl(dc); 1346 importer.setSourceFile(stdout); 1347 importer.setTransformation(t); 1348 importer.setIntensityTransform(resultTransform); 1349 importer.setProgressReporter(progress); 1350 importer.doImport(); 1966 1351 } 1967 1352
Note: See TracChangeset
for help on using the changeset viewer.