source: branches/uk_ac_ebi_Tab2MageImporter/t2m-wizard.pl @ 497

Last change on this file since 497 was 497, checked in by mbayer, 14 years ago
  • various specification related documents added (from Philippe)
  • have modified the structure of the extension -- now uses 3 separate JSP pages to collect data from user, one per tab2mage section, and data from these is stored in a data bean in the background and then retrieved in the last page
File size: 9.1 KB
Line 
1#!/usr/bin/perl
2# Include libraries
3#author: rocca@ebi.ac.uk EMBL-EBI
4
5#test command line:
6
7#C:\Perl\workspace>perl t2m-wizard.pl  --org Homo_sapiens --array NuGO_hs --single 1 --trtgroup 6 --factors compound(aspirin,ibuprofen);dose(none,medium,high) --target liver,heart,brain --ref no --dye no --subject 4
8
9#C:\Perl\workspace>perl t2m-wizard.pl  --org Homo_sapiens --array Agilent1 --single 2 --ref yes --dye no --trtgroup 6 --factors compound(aspirin,ibuprofen);dose(none,medium,high) --target liver,heart,brain --subject 4
10
11
12
13use strict; # Disable automatic variables
14use Fcntl;  # IMPORTANT: necessary for running sysopen function (flag , eg. O_WRONLY import)
15use Getopt::Long;
16use File::Find;
17use File::Basename;
18use File::Copy;
19
20@ARGV=qw(.) unless @ARGV;
21
22my $usage;
23
24my $organism;
25my $array;
26my $trtgroup_nb;
27my $subject_nb;
28my $target_tissue;
29my @target_tissue;
30my $dyeswap;
31#(yes/no);
32my $reference;
33# (yes/no);
34my $single_or_multiple_ch;
35# (single/double);
36my $hyb_nb; 
37#my $pooling; TO DO
38#(yes/no);
39#my @sampling_points;
40my $record;
41my @records;
42
43my $factors;
44
45
46my $tot_hyb_nb;
47
48  GetOptions("organism=s"=>\$organism,
49       "array=s"=>\$array,
50       "single_or_multiple_channel=i"=>\$single_or_multiple_ch,   
51       "trtgroup_nb=i"=>\$trtgroup_nb,
52       "factors=s"=>\$factors,
53       "subject_nb=i"=>\$subject_nb,
54       "target_tissue=s"=>\$target_tissue,
55       "reference=s"=>\$reference,
56       "dyeswap=s"=>\$dyeswap);
57
58  if ($usage    ||
59      !$organism    ||
60      !$array   ||
61      !$single_or_multiple_ch ||
62      !$factors   ||     
63      !$trtgroup_nb   ||
64      !$subject_nb  ||
65      !$target_tissue || 
66      !$reference   ||
67      !$dyeswap)
68     
69      { &usage; }
70
71
72#need to implement input validation:
73
74
75open(FILE, "+>t2m-wiz-output.txt") or die "cannot open \"t2m-wiz-output.txt\": $!";
76
77#we create a default template for Experiment and Protocol Sections
78#for user to modify manually and update accordingly
79
80
81print FILE "Experiment section\n";
82print FILE "domain\n";
83print FILE "accession\n";
84print FILE "quality_control\n";
85print FILE "experiment_design_type\n";
86print FILE "name\n";
87print FILE "description\n";
88print FILE "release_date\n";
89print FILE "submission_date\n";
90print FILE "submitter\n";
91print FILE "organization\n";
92print FILE "publication_title\n";
93print FILE "authors\n"; 
94print FILE "journal\n"; 
95print FILE "volume\n"; 
96print FILE "issue\n"; 
97print FILE "pages\n";
98print FILE "year\n";
99print FILE "pubmed_id\n\n";
100
101print FILE "Protocol section\n";
102print FILE "accession name  text  parameters  type\n";
103print FILE "P-DIET-1  treatment diet    treatment\n";
104print FILE "P-EXTR-1  extraction  RNA extraction    extraction\n";
105print FILE "P-LABL-1  labeling  text    labeling\n";
106print FILE "P-HYBR-1  hybridization text    hybridization\n";
107print FILE "P-SCAN-1  scanning  text    scanning\n\n";
108
109print FILE "Hybridization section\n";
110
111# create the header of the hybridization section, a default core of mandatory fields.
112
113#IMPORTANT NOTE: needs to modified if pooling is used
114
115
116my $header= join (' ', "File[raw]",
117"array[accession]",
118"BioSource",
119"BioMaterialCharacteristics[Organism]",
120"BioMaterialCharacteristics[OrganismPart]",
121"BioMaterialCharacteristics[CellType]",
122"Sample",
123"Protocol[grow]",
124"Extract",
125"Protocol[extraction]",
126"LabeledExtract",
127"Protocol[labeling]",
128"Dye",
129"Hybridization",
130"Protocol[hybridization]",
131"Scan",
132"Protocol[scanning]",
133"FactorValue[Treatment Type]");
134
135print FILE "$header\n";
136
137
138
139
140
141
142my @factors;
143@factors = split(/;/, $factors);
144
145@target_tissue = split(/,/ , $target_tissue);
146
147
148print "species: $organism\n";
149print "arraydesign: $array\n";
150print "treatment groups: $trtgroup_nb\n";
151print "subjects per group: $subject_nb\n";
152print "tissue per subject: @target_tissue\n";
153
154
155
156
157my $m;
158for $m (0..$#factors) {
159
160#need to use a hash -> more complex data structure
161
162  if ($factors[$m]=~/\w+\(.*\)/) {
163    my ($key, $values)=($factors[$m]=~/(\w+)\((.*)\)/);
164    my @factorvalues=split(/,/, $values);
165    my $number_of_values=$#factorvalues+1;
166    print "factor:$key, $number_of_values associated values, which are: @factorvalues\n";
167  }
168
169}
170
171
172#To IMPLEMENT:
173
174#once the number of study groups is known, it would be good to get a full description for each study of the associated factors and factor levels:
175#for each study group, indicate Factor name and intensity level, for example
176#study group#1: [factor=Dose/Value=10 mg],[factor=Compound/Value=aspirin], [Factor=Duration/Value=24 hr]
177#for each study group, identify the factor and their values
178#for every organ derived from each animal in each study group
179
180
181if ($single_or_multiple_ch == 1) { 
182
183#considered here as equivalent to using Affymetrix platform
184
185
186my $count=1;
187
188for my $i (1..$trtgroup_nb) { #iterating through each studygroup/treatment group
189
190  my $subjectcount=0; 
191  #initializes a counter in order to provide unique identifier to study subject
192 
193 
194  for  my $j (1..$subject_nb) { #iterating through each animal/patient/plant/culture of a treatment group (biological replicates for the same conditions)
195 
196    $subjectcount=($i-1)*$subject_nb+$j;
197    #computes the actual number of subjects by summing up the current rank of a subject in a given study group to the number of subjects already created
198 
199    for  my $k (0..$#target_tissue) { #iterating through each tissue derived from each subject
200   
201    #we create a canonical tab2mage hybridization record
202
203    my $tissue=$target_tissue[$k];
204    $k++;
205
206    $record=join('  ',
207    "file-".$count.".CEL",
208    $array, 
209    "group-".$i."-subject-".$subjectcount,
210    $organism,
211    $tissue,
212    $cellmodel,
213    "group-".$i."-subject-".$subjectcount.".sample-".$k,
214    "P-DIET-1",
215    "group-".$i."-subject-".$subjectcount.".sample-".$k."-extract",
216    "P-EXTR-1",
217    "group-".$i."-subject-".$subjectcount.".sample-".$k."-extract-"."le",
218    "P-LABL-1",
219    "biotin",
220    "hybridization-".$count,
221    "P-HYBR-1",
222    "scan-".$count,
223    "P-SCAN-1",
224    "trt_group".$i);
225   
226    #print "record: $record\n";
227
228    push(@records,$record);
229   
230    $count++;
231   
232    }
233 
234 
235  }
236
237 }
238
239}
240
241elsif ($single_or_multiple_ch >='2' && $reference=='yes') {
242
243my $count=1;
244
245for my $i (1..$trtgroup_nb) {
246
247  my $subjectcount=0;
248  #initializes a counter in order to provide unique identifier to study subject
249 
250  for  my $j (1..$subject_nb) {
251 
252  $subjectcount=($i-1)*$subject_nb+$j;
253  #computes the actual number of subjects by summing up the current rank of a subject in a given study group to the number of subjects already created
254   
255    for  my $k (0..$#target_tissue) {
256   
257    my $tissue=$target_tissue[$k];
258    $k++;
259   
260    $record=join('  ', 
261    "file-".$count.".txt",
262    $array, 
263    "group-".$i."-subject-".$subjectcount,
264    $organism,
265    $tissue,
266    $cellmodel,
267    "group-".$i."-subject-".$subjectcount.".sample-".$k,
268    "P-DIET-1",
269    "group-".$i."-subject-".$subjectcount.".sample-".$k."-extract",
270    "P-EXTR-1",
271    "group-".$i."-subject-".$subjectcount.".sample-".$k."-extract-"."le",
272    "P-LABL-1",
273    "Cy3",
274    "hybridization-".$count,
275    "P-HYBR-1",
276    "scan-".$count,
277    "P-SCAN-1",
278    "trt_group".$i);
279   
280    #print "record: $record\n";
281
282    push(@records,$record);
283   
284   
285    $record=join('  ', 
286    "file-".$count.".txt",
287    $array, 
288    "reference",
289    $organism,
290    "multi-tissue",
291    "multi-cell type",
292    "reference",
293    "P-DIET-1",
294    "reference",
295    "P-EXTR-1",
296    "reference",
297    "P-LABL-1",
298    "Cy5",
299    "hybridization-".$count,
300    "P-HYBR-1",
301    "scan-".$count,
302    "P-SCAN-1",
303    "reference");
304    push(@records,$record);
305   
306    $count++;
307
308   
309    }
310 
311 
312  }
313
314
315
316 }
317 
318 #TO IMPLEMENT: if reference=no, same stuff but assume matching sample at control level.
319 
320 
321 
322
323}
324
325
326
327for my $l (0..$#records) {
328print FILE "$records[$l]\n";
329}
330
331close FILE or die;
332
333#---------------------------------
334sub usage {
335  print qq/
336perl expert-agent.pl <OPTIONS>
337-------------------------------------------------------------------------
338 WARNINGS:
339
340  ** Prior to running the script, REMEMBER TO PASS THE FOLLOWING COMMAND:
341  limit datasize 1048000
342-------------------------------------------------------------------------
343
344 OPTIONS:
345
346  --organism=s\t Name of organism under study (one only at the moment)
347  --array=s\t Name of the array design used in study (one only at the moment)
348  --trtgroup_nb=i\t The Number of study groups defined in the study (eg control, low dose, high dose would define 3 study groups)
349  --subject_nb=i\t The number of subjects per study groups
350  --target_tissue=s\t A comma separated list of organism parts (as in liver,abdominal adipose tissue, skeletal muscle)
351  --dyeswap=s\t A semicolon separatedlist of assays whose technology is specified by csv between brackets.
352  --single_or_multiple_channel=i\t An integer 1, 2 or 3
353  --reference=s\t yes\no
354-------------------------------------------------------------------------
355 
356 POST-PROCESSING:
357
358   i.check and replace Protocol with relevant Accession Numbers
359  ii.check\/add ExperimentalFactor categories
360 iii.check\/create Person\/Organisation and AuditSecurity Package
361  iv.if Final Transformed files are supplied, need to add those
362
363-------------------------------------------------------------------------
364
365/;
366  exit(0);
367}
Note: See TracBrowser for help on using the repository browser.