1 | #!/usr/bin/perl |
---|
2 | # Include libraries |
---|
3 | #author: rocca@ebi.ac.uk EMBL-EBI |
---|
4 | |
---|
5 | #test command line: |
---|
6 | |
---|
7 | #C:\Perl\workspace>perl t2m-wizard.pl --org Homo_sapiens --array NuGO_hs --single 1 --trtgroup 6 --factors compound(aspirin,ibuprofen);dose(none,medium,high) --target liver,heart,brain --ref no --dye no --subject 4 |
---|
8 | |
---|
9 | #C:\Perl\workspace>perl t2m-wizard.pl --org Homo_sapiens --array Agilent1 --single 2 --ref yes --dye no --trtgroup 6 --factors compound(aspirin,ibuprofen);dose(none,medium,high) --target liver,heart,brain --subject 4 |
---|
10 | |
---|
11 | |
---|
12 | |
---|
13 | use strict; # Disable automatic variables |
---|
14 | use Fcntl; # IMPORTANT: necessary for running sysopen function (flag , eg. O_WRONLY import) |
---|
15 | use Getopt::Long; |
---|
16 | use File::Find; |
---|
17 | use File::Basename; |
---|
18 | use File::Copy; |
---|
19 | |
---|
20 | @ARGV=qw(.) unless @ARGV; |
---|
21 | |
---|
22 | my $usage; |
---|
23 | |
---|
24 | my $organism; |
---|
25 | my $array; |
---|
26 | my $trtgroup_nb; |
---|
27 | my $subject_nb; |
---|
28 | my $target_tissue; |
---|
29 | my @target_tissue; |
---|
30 | my $dyeswap; |
---|
31 | #(yes/no); |
---|
32 | my $reference; |
---|
33 | # (yes/no); |
---|
34 | my $single_or_multiple_ch; |
---|
35 | # (single/double); |
---|
36 | my $hyb_nb; |
---|
37 | #my $pooling; TO DO |
---|
38 | #(yes/no); |
---|
39 | #my @sampling_points; |
---|
40 | my $record; |
---|
41 | my @records; |
---|
42 | |
---|
43 | my $factors; |
---|
44 | |
---|
45 | |
---|
46 | my $tot_hyb_nb; |
---|
47 | |
---|
48 | GetOptions("organism=s"=>\$organism, |
---|
49 | "array=s"=>\$array, |
---|
50 | "single_or_multiple_channel=i"=>\$single_or_multiple_ch, |
---|
51 | "trtgroup_nb=i"=>\$trtgroup_nb, |
---|
52 | "factors=s"=>\$factors, |
---|
53 | "subject_nb=i"=>\$subject_nb, |
---|
54 | "target_tissue=s"=>\$target_tissue, |
---|
55 | "reference=s"=>\$reference, |
---|
56 | "dyeswap=s"=>\$dyeswap); |
---|
57 | |
---|
58 | if ($usage || |
---|
59 | !$organism || |
---|
60 | !$array || |
---|
61 | !$single_or_multiple_ch || |
---|
62 | !$factors || |
---|
63 | !$trtgroup_nb || |
---|
64 | !$subject_nb || |
---|
65 | !$target_tissue || |
---|
66 | !$reference || |
---|
67 | !$dyeswap) |
---|
68 | |
---|
69 | { &usage; } |
---|
70 | |
---|
71 | |
---|
72 | #need to implement input validation: |
---|
73 | |
---|
74 | |
---|
75 | open(FILE, "+>t2m-wiz-output.txt") or die "cannot open \"t2m-wiz-output.txt\": $!"; |
---|
76 | |
---|
77 | #we create a default template for Experiment and Protocol Sections |
---|
78 | #for user to modify manually and update accordingly |
---|
79 | |
---|
80 | |
---|
81 | print FILE "Experiment section\n"; |
---|
82 | print FILE "domain\n"; |
---|
83 | print FILE "accession\n"; |
---|
84 | print FILE "quality_control\n"; |
---|
85 | print FILE "experiment_design_type\n"; |
---|
86 | print FILE "name\n"; |
---|
87 | print FILE "description\n"; |
---|
88 | print FILE "release_date\n"; |
---|
89 | print FILE "submission_date\n"; |
---|
90 | print FILE "submitter\n"; |
---|
91 | print FILE "organization\n"; |
---|
92 | print FILE "publication_title\n"; |
---|
93 | print FILE "authors\n"; |
---|
94 | print FILE "journal\n"; |
---|
95 | print FILE "volume\n"; |
---|
96 | print FILE "issue\n"; |
---|
97 | print FILE "pages\n"; |
---|
98 | print FILE "year\n"; |
---|
99 | print FILE "pubmed_id\n\n"; |
---|
100 | |
---|
101 | print FILE "Protocol section\n"; |
---|
102 | print FILE "accession name text parameters type\n"; |
---|
103 | print FILE "P-DIET-1 treatment diet treatment\n"; |
---|
104 | print FILE "P-EXTR-1 extraction RNA extraction extraction\n"; |
---|
105 | print FILE "P-LABL-1 labeling text labeling\n"; |
---|
106 | print FILE "P-HYBR-1 hybridization text hybridization\n"; |
---|
107 | print FILE "P-SCAN-1 scanning text scanning\n\n"; |
---|
108 | |
---|
109 | print FILE "Hybridization section\n"; |
---|
110 | |
---|
111 | # create the header of the hybridization section, a default core of mandatory fields. |
---|
112 | |
---|
113 | #IMPORTANT NOTE: needs to modified if pooling is used |
---|
114 | |
---|
115 | |
---|
116 | my $header= join (' ', "File[raw]", |
---|
117 | "array[accession]", |
---|
118 | "BioSource", |
---|
119 | "BioMaterialCharacteristics[Organism]", |
---|
120 | "BioMaterialCharacteristics[OrganismPart]", |
---|
121 | "BioMaterialCharacteristics[CellType]", |
---|
122 | "Sample", |
---|
123 | "Protocol[grow]", |
---|
124 | "Extract", |
---|
125 | "Protocol[extraction]", |
---|
126 | "LabeledExtract", |
---|
127 | "Protocol[labeling]", |
---|
128 | "Dye", |
---|
129 | "Hybridization", |
---|
130 | "Protocol[hybridization]", |
---|
131 | "Scan", |
---|
132 | "Protocol[scanning]", |
---|
133 | "FactorValue[Treatment Type]"); |
---|
134 | |
---|
135 | print FILE "$header\n"; |
---|
136 | |
---|
137 | |
---|
138 | |
---|
139 | |
---|
140 | |
---|
141 | |
---|
142 | my @factors; |
---|
143 | @factors = split(/;/, $factors); |
---|
144 | |
---|
145 | @target_tissue = split(/,/ , $target_tissue); |
---|
146 | |
---|
147 | |
---|
148 | print "species: $organism\n"; |
---|
149 | print "arraydesign: $array\n"; |
---|
150 | print "treatment groups: $trtgroup_nb\n"; |
---|
151 | print "subjects per group: $subject_nb\n"; |
---|
152 | print "tissue per subject: @target_tissue\n"; |
---|
153 | |
---|
154 | |
---|
155 | |
---|
156 | |
---|
157 | my $m; |
---|
158 | for $m (0..$#factors) { |
---|
159 | |
---|
160 | #need to use a hash -> more complex data structure |
---|
161 | |
---|
162 | if ($factors[$m]=~/\w+\(.*\)/) { |
---|
163 | my ($key, $values)=($factors[$m]=~/(\w+)\((.*)\)/); |
---|
164 | my @factorvalues=split(/,/, $values); |
---|
165 | my $number_of_values=$#factorvalues+1; |
---|
166 | print "factor:$key, $number_of_values associated values, which are: @factorvalues\n"; |
---|
167 | } |
---|
168 | |
---|
169 | } |
---|
170 | |
---|
171 | |
---|
172 | #To IMPLEMENT: |
---|
173 | |
---|
174 | #once the number of study groups is known, it would be good to get a full description for each study of the associated factors and factor levels: |
---|
175 | #for each study group, indicate Factor name and intensity level, for example |
---|
176 | #study group#1: [factor=Dose/Value=10 mg],[factor=Compound/Value=aspirin], [Factor=Duration/Value=24 hr] |
---|
177 | #for each study group, identify the factor and their values |
---|
178 | #for every organ derived from each animal in each study group |
---|
179 | |
---|
180 | |
---|
181 | if ($single_or_multiple_ch == 1) { |
---|
182 | |
---|
183 | #considered here as equivalent to using Affymetrix platform |
---|
184 | |
---|
185 | |
---|
186 | my $count=1; |
---|
187 | |
---|
188 | for my $i (1..$trtgroup_nb) { #iterating through each studygroup/treatment group |
---|
189 | |
---|
190 | my $subjectcount=0; |
---|
191 | #initializes a counter in order to provide unique identifier to study subject |
---|
192 | |
---|
193 | |
---|
194 | for my $j (1..$subject_nb) { #iterating through each animal/patient/plant/culture of a treatment group (biological replicates for the same conditions) |
---|
195 | |
---|
196 | $subjectcount=($i-1)*$subject_nb+$j; |
---|
197 | #computes the actual number of subjects by summing up the current rank of a subject in a given study group to the number of subjects already created |
---|
198 | |
---|
199 | for my $k (0..$#target_tissue) { #iterating through each tissue derived from each subject |
---|
200 | |
---|
201 | #we create a canonical tab2mage hybridization record |
---|
202 | |
---|
203 | my $tissue=$target_tissue[$k]; |
---|
204 | $k++; |
---|
205 | |
---|
206 | $record=join(' ', |
---|
207 | "file-".$count.".CEL", |
---|
208 | $array, |
---|
209 | "group-".$i."-subject-".$subjectcount, |
---|
210 | $organism, |
---|
211 | $tissue, |
---|
212 | $cellmodel, |
---|
213 | "group-".$i."-subject-".$subjectcount.".sample-".$k, |
---|
214 | "P-DIET-1", |
---|
215 | "group-".$i."-subject-".$subjectcount.".sample-".$k."-extract", |
---|
216 | "P-EXTR-1", |
---|
217 | "group-".$i."-subject-".$subjectcount.".sample-".$k."-extract-"."le", |
---|
218 | "P-LABL-1", |
---|
219 | "biotin", |
---|
220 | "hybridization-".$count, |
---|
221 | "P-HYBR-1", |
---|
222 | "scan-".$count, |
---|
223 | "P-SCAN-1", |
---|
224 | "trt_group".$i); |
---|
225 | |
---|
226 | #print "record: $record\n"; |
---|
227 | |
---|
228 | push(@records,$record); |
---|
229 | |
---|
230 | $count++; |
---|
231 | |
---|
232 | } |
---|
233 | |
---|
234 | |
---|
235 | } |
---|
236 | |
---|
237 | } |
---|
238 | |
---|
239 | } |
---|
240 | |
---|
241 | elsif ($single_or_multiple_ch >='2' && $reference=='yes') { |
---|
242 | |
---|
243 | my $count=1; |
---|
244 | |
---|
245 | for my $i (1..$trtgroup_nb) { |
---|
246 | |
---|
247 | my $subjectcount=0; |
---|
248 | #initializes a counter in order to provide unique identifier to study subject |
---|
249 | |
---|
250 | for my $j (1..$subject_nb) { |
---|
251 | |
---|
252 | $subjectcount=($i-1)*$subject_nb+$j; |
---|
253 | #computes the actual number of subjects by summing up the current rank of a subject in a given study group to the number of subjects already created |
---|
254 | |
---|
255 | for my $k (0..$#target_tissue) { |
---|
256 | |
---|
257 | my $tissue=$target_tissue[$k]; |
---|
258 | $k++; |
---|
259 | |
---|
260 | $record=join(' ', |
---|
261 | "file-".$count.".txt", |
---|
262 | $array, |
---|
263 | "group-".$i."-subject-".$subjectcount, |
---|
264 | $organism, |
---|
265 | $tissue, |
---|
266 | $cellmodel, |
---|
267 | "group-".$i."-subject-".$subjectcount.".sample-".$k, |
---|
268 | "P-DIET-1", |
---|
269 | "group-".$i."-subject-".$subjectcount.".sample-".$k."-extract", |
---|
270 | "P-EXTR-1", |
---|
271 | "group-".$i."-subject-".$subjectcount.".sample-".$k."-extract-"."le", |
---|
272 | "P-LABL-1", |
---|
273 | "Cy3", |
---|
274 | "hybridization-".$count, |
---|
275 | "P-HYBR-1", |
---|
276 | "scan-".$count, |
---|
277 | "P-SCAN-1", |
---|
278 | "trt_group".$i); |
---|
279 | |
---|
280 | #print "record: $record\n"; |
---|
281 | |
---|
282 | push(@records,$record); |
---|
283 | |
---|
284 | |
---|
285 | $record=join(' ', |
---|
286 | "file-".$count.".txt", |
---|
287 | $array, |
---|
288 | "reference", |
---|
289 | $organism, |
---|
290 | "multi-tissue", |
---|
291 | "multi-cell type", |
---|
292 | "reference", |
---|
293 | "P-DIET-1", |
---|
294 | "reference", |
---|
295 | "P-EXTR-1", |
---|
296 | "reference", |
---|
297 | "P-LABL-1", |
---|
298 | "Cy5", |
---|
299 | "hybridization-".$count, |
---|
300 | "P-HYBR-1", |
---|
301 | "scan-".$count, |
---|
302 | "P-SCAN-1", |
---|
303 | "reference"); |
---|
304 | push(@records,$record); |
---|
305 | |
---|
306 | $count++; |
---|
307 | |
---|
308 | |
---|
309 | } |
---|
310 | |
---|
311 | |
---|
312 | } |
---|
313 | |
---|
314 | |
---|
315 | |
---|
316 | } |
---|
317 | |
---|
318 | #TO IMPLEMENT: if reference=no, same stuff but assume matching sample at control level. |
---|
319 | |
---|
320 | |
---|
321 | |
---|
322 | |
---|
323 | } |
---|
324 | |
---|
325 | |
---|
326 | |
---|
327 | for my $l (0..$#records) { |
---|
328 | print FILE "$records[$l]\n"; |
---|
329 | } |
---|
330 | |
---|
331 | close FILE or die; |
---|
332 | |
---|
333 | #--------------------------------- |
---|
334 | sub usage { |
---|
335 | print qq/ |
---|
336 | perl expert-agent.pl <OPTIONS> |
---|
337 | ------------------------------------------------------------------------- |
---|
338 | WARNINGS: |
---|
339 | |
---|
340 | ** Prior to running the script, REMEMBER TO PASS THE FOLLOWING COMMAND: |
---|
341 | limit datasize 1048000 |
---|
342 | ------------------------------------------------------------------------- |
---|
343 | |
---|
344 | OPTIONS: |
---|
345 | |
---|
346 | --organism=s\t Name of organism under study (one only at the moment) |
---|
347 | --array=s\t Name of the array design used in study (one only at the moment) |
---|
348 | --trtgroup_nb=i\t The Number of study groups defined in the study (eg control, low dose, high dose would define 3 study groups) |
---|
349 | --subject_nb=i\t The number of subjects per study groups |
---|
350 | --target_tissue=s\t A comma separated list of organism parts (as in liver,abdominal adipose tissue, skeletal muscle) |
---|
351 | --dyeswap=s\t A semicolon separatedlist of assays whose technology is specified by csv between brackets. |
---|
352 | --single_or_multiple_channel=i\t An integer 1, 2 or 3 |
---|
353 | --reference=s\t yes\no |
---|
354 | ------------------------------------------------------------------------- |
---|
355 | |
---|
356 | POST-PROCESSING: |
---|
357 | |
---|
358 | i.check and replace Protocol with relevant Accession Numbers |
---|
359 | ii.check\/add ExperimentalFactor categories |
---|
360 | iii.check\/create Person\/Organisation and AuditSecurity Package |
---|
361 | iv.if Final Transformed files are supplied, need to add those |
---|
362 | |
---|
363 | ------------------------------------------------------------------------- |
---|
364 | |
---|
365 | /; |
---|
366 | exit(0); |
---|
367 | } |
---|