1 | <?xml version="1.0" encoding="UTF-8"?> |
---|
2 | <!DOCTYPE appendix PUBLIC |
---|
3 | "-//Dawid Weiss//DTD DocBook V3.1-Based Extension for XML and graphics inclusion//EN" |
---|
4 | "../../../../lib/docbook/preprocess/dweiss-docbook-extensions.dtd"> |
---|
5 | <!-- |
---|
6 | $Id: raw_data_types.xml 4005 2007-11-26 15:00:38Z martin $ |
---|
7 | |
---|
8 | Copyright (C) 2007 Nicklas Nordborg |
---|
9 | |
---|
10 | This file is part of BASE - BioArray Software Environment. |
---|
11 | Available at http://base.thep.lu.se/ |
---|
12 | |
---|
13 | BASE is free software; you can redistribute it and/or |
---|
14 | modify it under the terms of the GNU General Public License |
---|
15 | as published by the Free Software Foundation; either version 2 |
---|
16 | of the License, or (at your option) any later version. |
---|
17 | |
---|
18 | BASE is distributed in the hope that it will be useful, |
---|
19 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
21 | GNU General Public License for more details. |
---|
22 | |
---|
23 | You should have received a copy of the GNU General Public License |
---|
24 | along with this program; if not, write to the Free Software |
---|
25 | Foundation, Inc., 59 Temple Place - Suite 330, |
---|
26 | Boston, MA 02111-1307, USA. |
---|
27 | --> |
---|
28 | |
---|
29 | <appendix id="appendix.rawdatatypes"> |
---|
30 | <title>Platforms and raw-data-types.xml reference</title> |
---|
31 | |
---|
32 | <para> |
---|
33 | Raw data can be stored either as files attached to items and/or in |
---|
34 | the database. The <classname docapi="net.sf.basedb.core">Platform</classname> |
---|
35 | item has information about this. For more information see |
---|
36 | <xref linkend="core_api.data_in_files" />. |
---|
37 | </para> |
---|
38 | |
---|
39 | <sect1 id="appendix.rawdatatypes.platforms"> |
---|
40 | <title>Default platforms/variants installed with BASE</title> |
---|
41 | |
---|
42 | <informaltable> |
---|
43 | <tgroup cols="7"> |
---|
44 | <colspec colname="platform.name" /> |
---|
45 | <colspec colname="platform.id" /> |
---|
46 | <colspec colname="variant.name" /> |
---|
47 | <colspec colname="variant.id" /> |
---|
48 | <colspec colname="filetype.item" /> |
---|
49 | <colspec colname="filetype.name" /> |
---|
50 | <colspec colname="filetype.id" /> |
---|
51 | <thead> |
---|
52 | <row> |
---|
53 | <entry namest="platform.name" nameend="platform.id">Platform</entry> |
---|
54 | <entry namest="variant.name" nameend="variant.id">Variants</entry> |
---|
55 | <entry namest="filetype.item" nameend="filetype.id">Data file types</entry> |
---|
56 | </row> |
---|
57 | <row> |
---|
58 | <entry>Name</entry> |
---|
59 | <entry>ID</entry> |
---|
60 | <entry>Name</entry> |
---|
61 | <entry>ID</entry> |
---|
62 | <entry>Item</entry> |
---|
63 | <entry>Name</entry> |
---|
64 | <entry>ID</entry> |
---|
65 | </row> |
---|
66 | </thead> |
---|
67 | <tbody> |
---|
68 | <row> |
---|
69 | <entry morerows="2">Generic</entry> |
---|
70 | <entry morerows="2">generic</entry> |
---|
71 | <entry morerows="2">-</entry> |
---|
72 | <entry morerows="2">-</entry> |
---|
73 | |
---|
74 | <entry morerows="1">Array design</entry> |
---|
75 | <entry>Reporter map</entry> |
---|
76 | <entry>generic.reportermap</entry> |
---|
77 | </row> |
---|
78 | <row> |
---|
79 | <entry>Print map</entry> |
---|
80 | <entry>generic.printmap</entry> |
---|
81 | </row> |
---|
82 | <row> |
---|
83 | <entry>Raw bioassay</entry> |
---|
84 | <entry>Generic raw data</entry> |
---|
85 | <entry>generic.rawdata</entry> |
---|
86 | </row> |
---|
87 | <row> |
---|
88 | <entry morerows="1">Affymetrix</entry> |
---|
89 | <entry morerows="1">affymetrix</entry> |
---|
90 | <entry morerows="1">-</entry> |
---|
91 | <entry morerows="1">-</entry> |
---|
92 | <entry>Array design</entry> |
---|
93 | <entry>CDF file</entry> |
---|
94 | <entry>affymetrix.cdf</entry> |
---|
95 | </row> |
---|
96 | <row> |
---|
97 | <entry>Raw bioassay</entry> |
---|
98 | <entry>CEL file</entry> |
---|
99 | <entry>affymetrix.cel</entry> |
---|
100 | </row> |
---|
101 | </tbody> |
---|
102 | </tgroup> |
---|
103 | </informaltable> |
---|
104 | |
---|
105 | |
---|
106 | </sect1> |
---|
107 | |
---|
108 | <sect1 id="appendix.rawdatatypes.ref"> |
---|
109 | <title>raw-data-types.xml reference</title> |
---|
110 | |
---|
111 | <para> |
---|
112 | A given platform either supports importing data to the database or it |
---|
113 | doesn't. If it supports import, it may be locked to specific raw data type |
---|
114 | or it may use any raw data type. Among the default platforms installed with |
---|
115 | BASE, the Affymetrix platform doesn't support importing data while the Generic |
---|
116 | platform supports importing to any raw data type. |
---|
117 | </para> |
---|
118 | |
---|
119 | <para> |
---|
120 | Raw data types are defined in the <filename>raw-data-types.xml</filename> |
---|
121 | file. This file is located in the <filename><basedir>/www/WEB-INF/classes</filename> |
---|
122 | directory and contains information about the database tables and columns to |
---|
123 | use for storing raw data. BASE ships with default raw data types for many |
---|
124 | different microarray platforms, including Genepix, Agilent and Illumina. |
---|
125 | </para> |
---|
126 | |
---|
127 | <para> |
---|
128 | If you want your BASE installation to be configured differently we recommend that |
---|
129 | you do it before the first initialisation of the database. |
---|
130 | It is possible to change the configuration of an existing BASE installation but it |
---|
131 | requires manual updates to the database. Following procedure covers how to update: |
---|
132 | </para> |
---|
133 | |
---|
134 | <orderedlist> |
---|
135 | <listitem> |
---|
136 | <para> |
---|
137 | Shut down the BASE web server. If you have installed job agents you should shut |
---|
138 | down them as well. |
---|
139 | </para> |
---|
140 | </listitem> |
---|
141 | |
---|
142 | <listitem> |
---|
143 | <para> |
---|
144 | Modify the <filename>raw-data-types.xml</filename> file. If you have installed |
---|
145 | job agents, make sure they all have the same version as the web server. |
---|
146 | </para> |
---|
147 | </listitem> |
---|
148 | |
---|
149 | <listitem> |
---|
150 | <para> |
---|
151 | Run the <filename>updatedb.sh</filename> script. Tables for new raw data types |
---|
152 | and new columns for existing raw data types automatically be created, but the script |
---|
153 | can't delete tables or columns that have been removed, or modify columns that have |
---|
154 | changed datatype. You will have to do these kind of changes by manually executing |
---|
155 | SQL against your database. Check your database documentation for information about SQL syntax. |
---|
156 | </para> |
---|
157 | |
---|
158 | <tip> |
---|
159 | <title>Create a parallel installation</title> |
---|
160 | <para> |
---|
161 | You can always create a new temporary parallel installation to check |
---|
162 | what the table generated by installation script looks like. Compare the |
---|
163 | new table to the existing one and make sure they match. |
---|
164 | </para> |
---|
165 | </tip> |
---|
166 | </listitem> |
---|
167 | |
---|
168 | <listitem> |
---|
169 | <para> |
---|
170 | Start up the BASE web server and job agents, if any, again. |
---|
171 | </para> |
---|
172 | </listitem> |
---|
173 | </orderedlist> |
---|
174 | |
---|
175 | <tip> |
---|
176 | <title>Start with few columns</title> |
---|
177 | <para> |
---|
178 | It is better to start with too few columns, since it is easier to add |
---|
179 | more columns than it is to remove columns that are not needed. |
---|
180 | </para> |
---|
181 | </tip> |
---|
182 | |
---|
183 | <bridgehead>Format of the raw-data-types.xml file</bridgehead> |
---|
184 | <para> |
---|
185 | The following example will serve as a description of the format used in |
---|
186 | <filename>raw-data-types.xml</filename>: |
---|
187 | </para> |
---|
188 | |
---|
189 | |
---|
190 | <programlisting language="xml"> |
---|
191 | <![CDATA[ |
---|
192 | <?xml version="1.0" ?> |
---|
193 | <?xml-stylesheet type="text/xsl" href="raw-data-types.xsl"?> |
---|
194 | <!DOCTYPE raw-data-types SYSTEM "raw-data-types.dtd" > |
---|
195 | <raw-data-types> |
---|
196 | <raw-data-type |
---|
197 | id="genepix" |
---|
198 | name="GenePix" |
---|
199 | channels="2" |
---|
200 | table="RawDataGenePix" |
---|
201 | > |
---|
202 | <property |
---|
203 | name="diameter" |
---|
204 | title="Spot diameter" |
---|
205 | description="The diameter of the spot in µm" |
---|
206 | column="diameter" |
---|
207 | type="float" |
---|
208 | /> |
---|
209 | <property |
---|
210 | name="ch1FgMedian" |
---|
211 | title="Channel 1 foreground median" |
---|
212 | description="The median of the foreground intensity in channel 1" |
---|
213 | column="ch1_fg_median" |
---|
214 | type="float" |
---|
215 | channel="1" |
---|
216 | /> |
---|
217 | <!-- skipped a lot of properties --> |
---|
218 | <intensity-formula |
---|
219 | name="mean" |
---|
220 | title="Mean FG - Mean BG" |
---|
221 | description="Subtract mean background from mean foreground" |
---|
222 | > |
---|
223 | <formula |
---|
224 | channel="1" |
---|
225 | expression="raw('ch1FgMean') - raw('ch1BgMean')" |
---|
226 | /> |
---|
227 | <formula |
---|
228 | channel="2" |
---|
229 | expression="raw('ch2FgMean') - raw('ch2BgMean')" |
---|
230 | /> |
---|
231 | </intensity-formula> |
---|
232 | <!-- and a few more... ---> |
---|
233 | </raw-data-type> |
---|
234 | </raw-data-types> |
---|
235 | ]]> |
---|
236 | </programlisting> |
---|
237 | |
---|
238 | <para> |
---|
239 | Each raw data type is represented by a <sgmltag class="starttag">raw-data-type</sgmltag> |
---|
240 | tag. The following attributes can be used: |
---|
241 | </para> |
---|
242 | |
---|
243 | <table frame="all" id="appendix.rawdatatypes.tag"> |
---|
244 | <title>Attributes for the <sgmltag class="starttag">raw-data-type</sgmltag> tag</title> |
---|
245 | <tgroup cols="3" align="left"> |
---|
246 | <colspec colname="attribute" align="left" /> |
---|
247 | <colspec colname="required" /> |
---|
248 | <colspec colname="comment" /> |
---|
249 | <thead> |
---|
250 | <row> |
---|
251 | <entry>Attribute</entry> |
---|
252 | <entry>Required</entry> |
---|
253 | <entry>Comment</entry> |
---|
254 | </row> |
---|
255 | </thead> |
---|
256 | <tbody> |
---|
257 | <row> |
---|
258 | <entry>id</entry> |
---|
259 | <entry>yes</entry> |
---|
260 | <entry> |
---|
261 | A unique ID of the raw data type. It should contain only letters, |
---|
262 | numbers and underscores and the first character must be a letter. |
---|
263 | </entry> |
---|
264 | </row> |
---|
265 | <row> |
---|
266 | <entry>name</entry> |
---|
267 | <entry>yes</entry> |
---|
268 | <entry> |
---|
269 | A unique name of the raw data type. The name is usually used by client |
---|
270 | applications for display. |
---|
271 | </entry> |
---|
272 | </row> |
---|
273 | <row> |
---|
274 | <entry>table</entry> |
---|
275 | <entry>yes</entry> |
---|
276 | <entry> |
---|
277 | The name of the database table to store data in. The table name |
---|
278 | must be unique and can only contain letters, |
---|
279 | numbers and underscores. The first character must be a letter. |
---|
280 | </entry> |
---|
281 | </row> |
---|
282 | <row> |
---|
283 | <entry>channels</entry> |
---|
284 | <entry>yes</entry> |
---|
285 | <entry> |
---|
286 | The number of channels used by this raw data type. It must be |
---|
287 | a number > 0. |
---|
288 | </entry> |
---|
289 | </row> |
---|
290 | <row> |
---|
291 | <entry>description</entry> |
---|
292 | <entry>no</entry> |
---|
293 | <entry> |
---|
294 | An optional (longer) description of the raw data type. |
---|
295 | </entry> |
---|
296 | </row> |
---|
297 | </tbody> |
---|
298 | </tgroup> |
---|
299 | </table> |
---|
300 | |
---|
301 | <para> |
---|
302 | Following the <sgmltag class="starttag">raw-data-type</sgmltag> tag |
---|
303 | is one or more <sgmltag class="starttag">property</sgmltag> tags. |
---|
304 | Each one defines a column in the database that is designed to hold |
---|
305 | data values of a particular type. The following attributes can be used |
---|
306 | on this tag: |
---|
307 | </para> |
---|
308 | |
---|
309 | <table frame="all" id="appendix.rawdatatypes.property"> |
---|
310 | <title>Attributes for the <sgmltag class="starttag">property</sgmltag> tag</title> |
---|
311 | <tgroup cols="3" align="left"> |
---|
312 | <colspec colname="attribute" align="left" /> |
---|
313 | <colspec colname="required" /> |
---|
314 | <colspec colname="comment" /> |
---|
315 | <thead> |
---|
316 | <row> |
---|
317 | <entry>Attribute</entry> |
---|
318 | <entry>Required</entry> |
---|
319 | <entry>Comment</entry> |
---|
320 | </row> |
---|
321 | </thead> |
---|
322 | <tbody> |
---|
323 | <row> |
---|
324 | <entry>*</entry> |
---|
325 | <entry></entry> |
---|
326 | <entry> |
---|
327 | All attributes defined by the |
---|
328 | <sgmltag class="starttag">property</sgmltag> tag in |
---|
329 | <filename>extended-properties.xml</filename>. See |
---|
330 | <xref linkend="appendix.extendedproperties.property" />. |
---|
331 | </entry> |
---|
332 | </row> |
---|
333 | <row> |
---|
334 | <entry>channels</entry> |
---|
335 | <entry>no</entry> |
---|
336 | <entry> |
---|
337 | The channel number the property belongs to. Allowed values are 0 to |
---|
338 | the number of channels specified for the raw data type. If the property |
---|
339 | doesn't belong to any channels set the value to 0 or leave it |
---|
340 | unspecified. |
---|
341 | </entry> |
---|
342 | </row> |
---|
343 | </tbody> |
---|
344 | </tgroup> |
---|
345 | </table> |
---|
346 | |
---|
347 | <para> |
---|
348 | Following the <sgmltag class="starttag">property</sgmltag> tags comes 0 |
---|
349 | or more <sgmltag class="starttag">intensity-formula</sgmltag> tags. |
---|
350 | Each one defines mathematical formulas that can be used to |
---|
351 | calculate the intensity values from the raw data. In the Genepix case, |
---|
352 | there are several formulas which differs in the way background is |
---|
353 | subtracted from foreground intensity values. For other raw data |
---|
354 | types, the intensity formula may just copy one of the raw data values. |
---|
355 | </para> |
---|
356 | |
---|
357 | <para> |
---|
358 | The intensity formulas are installed as <classname |
---|
359 | docapi="net.sf.basedb.core">Formula</classname> items in the database. This |
---|
360 | means that you can manually add, change or remove intensity formulas directly |
---|
361 | from the web interface. The intensity formulas in the <filename>raw-data-types.xml</filename> |
---|
362 | file are only used at installation time. |
---|
363 | </para> |
---|
364 | |
---|
365 | <para> |
---|
366 | The <sgmltag class="starttag">intensity-formula</sgmltag> tag has the following |
---|
367 | attributes: |
---|
368 | </para> |
---|
369 | |
---|
370 | <table frame="all" id="appendix.rawdatatypes.intensity-formula"> |
---|
371 | <title>Attributes for the <sgmltag class="starttag">intensity-formula</sgmltag> tag</title> |
---|
372 | <tgroup cols="3" align="left"> |
---|
373 | <colspec colname="attribute" align="left" /> |
---|
374 | <colspec colname="required" /> |
---|
375 | <colspec colname="comment" /> |
---|
376 | <thead> |
---|
377 | <row> |
---|
378 | <entry>Attribute</entry> |
---|
379 | <entry>Required</entry> |
---|
380 | <entry>Comment</entry> |
---|
381 | </row> |
---|
382 | </thead> |
---|
383 | <tbody> |
---|
384 | <row> |
---|
385 | <entry>name</entry> |
---|
386 | <entry>yes</entry> |
---|
387 | <entry> |
---|
388 | A unique name for the formula. This is only used during installation. |
---|
389 | </entry> |
---|
390 | </row> |
---|
391 | <row> |
---|
392 | <entry>title</entry> |
---|
393 | <entry>yes</entry> |
---|
394 | <entry> |
---|
395 | The title of the formula. This is used by client applications for |
---|
396 | display. |
---|
397 | </entry> |
---|
398 | </row> |
---|
399 | <row> |
---|
400 | <entry>description</entry> |
---|
401 | <entry>no</entry> |
---|
402 | <entry> |
---|
403 | An optional, longer, description of the formula. |
---|
404 | </entry> |
---|
405 | </row> |
---|
406 | </tbody> |
---|
407 | </tgroup> |
---|
408 | </table> |
---|
409 | |
---|
410 | <para> |
---|
411 | The <sgmltag class="starttag">intensity-formula</sgmltag> must contain |
---|
412 | one <sgmltag class="starttag">formula</sgmltag> tag for each channel |
---|
413 | of the raw data type. The attributes of this tag are: |
---|
414 | </para> |
---|
415 | |
---|
416 | <table frame="all" id="appendix.rawdatatypes.formula"> |
---|
417 | <title>Attributes for the <sgmltag class="starttag">formula</sgmltag> tag</title> |
---|
418 | <tgroup cols="3" align="left"> |
---|
419 | <colspec colname="attribute" align="left" /> |
---|
420 | <colspec colname="required" /> |
---|
421 | <colspec colname="comment" /> |
---|
422 | <thead> |
---|
423 | <row> |
---|
424 | <entry>Attribute</entry> |
---|
425 | <entry>Required</entry> |
---|
426 | <entry>Comment</entry> |
---|
427 | </row> |
---|
428 | </thead> |
---|
429 | <tbody> |
---|
430 | <row> |
---|
431 | <entry>channel</entry> |
---|
432 | <entry>yes</entry> |
---|
433 | <entry> |
---|
434 | The channel number. One tag for each channel must be specified. No |
---|
435 | duplicates are allowed. |
---|
436 | </entry> |
---|
437 | </row> |
---|
438 | <row> |
---|
439 | <entry>expression</entry> |
---|
440 | <entry>yes</entry> |
---|
441 | <entry> |
---|
442 | The mathematical expression used to calculate the intensities. |
---|
443 | The expression is parsed with the <classname docapi="net.sf.basedb.util.jep">Jep</classname> |
---|
444 | parser. It supports the common mathematical operations such as +, -, *, /, |
---|
445 | some mathematical function like, log2(), ln(), sqrt(), etc. See the API |
---|
446 | documentation for Jep for more information. You can also use two special |
---|
447 | function developed specifically for this case: |
---|
448 | <itemizedlist> |
---|
449 | <listitem> |
---|
450 | <para> |
---|
451 | raw(name): Get the value from the raw data property with the given name, |
---|
452 | for example: <code>raw('ch1FgMedian')</code>. |
---|
453 | </para> |
---|
454 | </listitem> |
---|
455 | <listitem> |
---|
456 | <para> |
---|
457 | mean(name): Get the mean value of the raw data property with the given name, |
---|
458 | for example: <code>mean('ch1BgMean')</code>. The mean is calculated from |
---|
459 | all raw data spots in the raw bioassay. |
---|
460 | </para> |
---|
461 | </listitem> |
---|
462 | </itemizedlist> |
---|
463 | </entry> |
---|
464 | </row> |
---|
465 | </tbody> |
---|
466 | </tgroup> |
---|
467 | </table> |
---|
468 | |
---|
469 | </sect1> |
---|
470 | |
---|
471 | </appendix> |
---|
472 | |
---|