source: plugins/base1/se.lu.thep.wenni/trunk/base/base1/base_plugin_script/wenni.pl @ 665

Last change on this file since 665 was 665, checked in by Jari Häkkinen, 15 years ago

Added support for using configurable columns. Useful if BASE extra values are used for intensity and background SD.

  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 7.9 KB
Line 
1#!/usr/bin/perl -w
2
3# $Id: wenni.pl 665 2008-04-16 22:46:13Z jari $
4
5# Copyright (C) 2005, 2006 Jari Häkkinen
6#
7# This file is part of WeNNI,
8# http://lev.thep.lu.se/trac/baseplugins/wiki/WeNNI
9#
10# WeNNI is free software; you can redistribute it and/or modify it
11# under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# WeNNI is distributed in the hope that it will be useful, but WITHOUT
16# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18# License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23# 02111-1307, USA.
24
25use strict;
26use FindBin;
27use Getopt::Long;
28
29sub basefile_options($$);
30sub BaseFileConverter($);
31sub cleanup($$);
32sub cmdline_options($);
33sub nni($);
34sub NNIFileConverter($);
35sub result($$);
36
37# read command line options
38my %cmdline_option;
39cmdline_options(\%cmdline_option);
40
41# write STDIN to a file
42my $basefile="BASEfile.data";
43open(BASEFILE,">$basefile");
44while(<>) {
45    print BASEFILE;
46}
47close(BASEFILE);
48
49# determine beta and number of neighbours. Presedence order is (low to
50# high priority): default value, basefile value, command line value.
51my %option=(
52    beta => 0.6,
53    datatype => "derived",
54    intensity1 => "intensity1",
55    intensity2 => "intensity2",
56    neighbours => 4,
57    nodelete => 0,
58    BCh1SD => "BCh1SD",
59    BCh2SD => "BCh2SD"
60    );
61basefile_options(\%option,$basefile);
62foreach my $key (keys %cmdline_option) {
63    # override any options if set at command line
64    if (defined($cmdline_option{$key})) {
65        $option{$key}=$cmdline_option{$key};
66    }
67}
68
69BaseFileConverter(\%option);
70NNIFileConverter(\%option);
71nni($option{'neighbours'});
72result($basefile,\%option);
73if (!$option{'nodelete'}) {
74    cleanup($basefile,\%option);
75}
76
77sub basefile_options($$) {
78    my $option=$_[0];
79    open(BASEFILE,"<$_[1]");
80    my $parameter_section_found=0;
81    while(<BASEFILE>) {
82        chomp;
83        if (/section\tWeNNIParams/) {
84            $parameter_section_found=1;
85            last;
86        }
87    }
88    if ($parameter_section_found) {
89        while(<BASEFILE>) {
90            chomp;
91            if (/%/) {
92                last;
93            }
94            my @param=split "\t";
95            $$option{$param[0]}=$param[1];
96        }
97    }
98    close(BASEFILE);
99}
100
101sub BaseFileConverter($) {
102    my $option=$_[0];
103    my $command="$FindBin::Bin/BaseFileConverter";
104    $command.=" BASEfile.data";
105    $command.=" wenni_";
106    if ($option{"datatype"} eq "derived") {
107      $command.=" -assayFields $$option{intensity1}";
108      $command.=" -assayFields $$option{intensity2}";
109    }
110    else {
111      $command.=" -assayFields FCh1Mean";
112      $command.=" -assayFields BCh1Mean";
113      $command.=" -assayFields FCh2Mean";
114      $command.=" -assayFields BCh2Mean";
115    }
116    $command.=" -assayFields $$option{BCh1SD}";
117    $command.=" -assayFields $$option{BCh2SD}";
118    return system("$command");
119}
120
121sub cleanup($$) {
122    my $basefile=$_[0];
123    my $option=$_[1];
124    unlink($basefile);
125    unlink("wenni_BCh1Mean.data");
126    unlink("wenni_$$option{BCh1SD}.data");
127    unlink("wenni_BCh2Mean.data");
128    unlink("wenni_$$option{BCh2SD}.data");
129    unlink("wenni_FCh1Mean.data");
130    unlink("wenni_FCh2Mean.data");
131    unlink("wenni_imputed.data");
132    unlink("wenni_$$option{intensity1}.data");
133    unlink("wenni_$$option{intensity2}.data");
134    unlink("wenni_logratio.data");
135    unlink("wenni_weight.data");
136}
137
138sub cmdline_options($) {
139    my $option=$_[0];
140    my $help=0;
141    my $result=GetOptions("beta=f",\$$option{beta},
142              "datatype=s",\$$option{datatype},
143              "help",\$help,
144              "neighbours=i",\$$option{neighbours},
145              "nodelete",\$$option{nodelete});
146    if (defined($$option{datatype}) && ($$option{datatype} ne "derived") &&
147        ($$option{datatype} ne "raw")) {
148        $help=1;
149    }
150    if (!$result || $help) {
151      help();
152      exit(!$result);
153    }
154}
155
156sub help() {
157  print << "HELPTXT";
158
159Usage wenni.pl [options]
160
161Options
162Options are read from the input BASE file, but the settings may be
163changed with the corresponding command line option:
164
165  --beta float
166    Set beta value to float.
167  --datatype string
168    Allowed string values for datatype is 'raw' or
169    'derived' (without '' characters). When 'derived' is
170    given, the user defined intensity1 and intensity2 will
171    be used from the Basefile. When 'raw' is used,
172    intensity1 will be calculated as FCh1Mean-BCh1Mean
173    (intensity2 is defined correspondingly).
174  --neighbours integer
175    Set the number of neighbours to integer.
176  --nodelete
177    Do not clean up, i.e. temporary files will not be deleted.
178  --help
179    Print this message.
180
181This script runs WeNNI on data read from STDIN, and write the result
182to STDOUT.
183
184HELPTXT
185}
186
187sub nni($) {
188    my $neighbours=$_[0];
189    my $command="$FindBin::Bin/nni";
190    $command.=" -data wenni_logratio.data";
191    $command.=" -neighbours $neighbours";
192    $command.=" -nni_algorithm WeNNI";
193    $command.=" -weight wenni_weight.data";
194    $command.=" > wenni_imputed.data";
195    return system("$command");
196}
197
198sub NNIFileConverter($) {
199    my $option=$_[0];
200    my $command="$FindBin::Bin/NNIFileConverter";
201    $command.=" -beta $$option{'beta'}";
202    if ($$option{'datatype'} eq "derived") {
203      $command.=" -fg1 wenni_$$option{intensity1}.data";
204      $command.=" -fg2 wenni_$$option{intensity2}.data";
205      $command.=" -datatype derived";
206    }
207    else {
208        $command.=" -bg1 wenni_BCh1Mean.data";
209        $command.=" -bg2 wenni_BCh2Mean.data";
210        $command.=" -fg1 wenni_FCh1Mean.data";
211        $command.=" -fg2 wenni_FCh2Mean.data";
212        $command.=" -datatype raw";
213    }
214    $command.=" -bgstd1 wenni_$$option{BCh1SD}.data";
215    $command.=" -bgstd2 wenni_$$option{BCh2SD}.data";
216    $command.=" -logratio wenni_logratio.data";
217    $command.=" -weight wenni_weight.data";
218    return system("$command");
219}
220
221sub result($$) {
222    my $basefile=$_[0];
223    my $option=$_[1];
224    my $datatype=$$option{'datatype'};
225    print "BASEfile\n";
226    print "section\tspots\n";
227    print "columns\tposition\treporter\tassayData\n";
228    print "assayFields\tl2ratio1_2\tl10intgmean1_2\n";
229
230    open(BASEFILE,"<$basefile");
231    my $assays_found=0;
232    while (<BASEFILE>) {
233        if (/^assays\t/) {
234            $assays_found=1;
235            last;
236        }
237    }
238    if (!$assays_found) {
239        print STDERR "Error: Cannot find assays in 'section spot'\n";
240        exit(-1);
241    }
242    print;
243
244    my $percent_found=0;
245    while (<BASEFILE>) {
246        if (/^%/) {
247            $percent_found=1;
248            last;
249        }
250    }
251    if (!$percent_found) {
252        print STDERR "Error: Cannot find end of 'section spot' header\n";
253        exit(-1);
254    }
255    print;
256
257    my $log10=log(10);
258    open(IMPUTEFILE,"<wenni_imputed.data");
259    if ($datatype eq "raw") {
260        open(B1FILE,"<wenni_BCh1Mean.data");
261        open(F1FILE,"<wenni_FCh1Mean.data");
262        open(B2FILE,"<wenni_BCh2Mean.data");
263        open(F2FILE,"<wenni_FCh2Mean.data");
264    }
265    else {
266        open(F1FILE,"<wenni_$$option{intensity1}.data");
267        open(F2FILE,"<wenni_$$option{intensity2}.data");
268    }
269    while (<IMPUTEFILE>) {
270        chop;
271        my @impute_values=split(/\s/);
272        my $row=<F1FILE>;
273        chop;
274        my @f1_values=split(/\s/,$row);
275        $row=<F2FILE>;
276        chop;
277        my @f2_values=split(/\s/,$row);
278        my @b1_values;
279        my @b2_values;
280        if ($datatype eq "raw") {
281            $row=<B1FILE>;
282            chop;
283            @b1_values=split(/\s/,$row);
284            $row=<B2FILE>;
285            chop;
286            @b2_values=split(/\s/,$row);
287        }
288        my $basefile_row=<BASEFILE>;
289        my ($position,$reporter)=split("\t",$basefile_row);
290        print "$position\t$reporter";
291        my $columns=@impute_values;
292        for (my $i=0; $i<$columns; $i++) {
293            my $int1=$f1_values[$i]-($datatype eq "raw" ? $b1_values[$i] : 0);
294            my $int2=$f2_values[$i]-($datatype eq "raw" ? $b2_values[$i] : 0);
295            $int1=($int1<0 ? 0 : $int1);
296            $int2=($int2<0 ? 0 : $int2);
297            my $A=( ($int1>0) && ($int2>0) ? log(sqrt($int1*$int2))/$log10 : 0);
298            print "\t$impute_values[$i]\t$A";
299        }
300        print "\n";
301    }
302    if ($datatype eq "raw") {
303        close(B1FILE);
304        close(B2FILE);
305    }
306    close(F1FILE);
307    close(F2FILE);
308    close(IMPUTEFILE);
309    close(BASEFILE);
310}
Note: See TracBrowser for help on using the repository browser.