Changeset 105


Ignore:
Timestamp:
Jun 15, 2004, 5:26:02 PM (18 years ago)
Author:
cecilia
Message:

handle missing values

Location:
trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/Merge.cc

    r83 r105  
    88#include "stl_utility.h"
    99#include "Merge.h"
     10#include "Averager.h"
     11#include "WeightedAverager.h"
    1012
    1113namespace theplu {
     
    1416  using namespace std;
    1517
    16   Merge::Merge(istream& data, istream& error, istream& annotations, int nof_assays)
    17     : data_(data), error_(error), nof_assays_(nof_assays)
     18  Merge::Merge(gslapi::matrix& data, gslapi::matrix& error,
     19               vector<string>& annotations)
     20    : data_(data), error_(error), annots_(annotations)
     21  {
     22   
     23    if(data_.columns() == error_.columns()){
     24      nof_assays_ = data_.columns();
     25    }else{
     26      cerr << "not the same number of assays in data and error matrix";
     27    }
     28   
     29    if(data_.rows() != error_.rows() || data_.rows() != annots_.size()){
     30      cerr << "wrong number of gene annotations (!= nof genes)";
     31    }
     32  }
     33
     34  Merge::Merge(istream& data, istream& error, istream& annotations)
     35    : data_(data), error_(error)
    1836  {
    1937    // skapa annot-vektor
     
    2341      annots_.push_back(str);
    2442    }
     43   
     44    if(data_.columns() == error_.columns()){
     45      nof_assays_ = data_.columns();
     46    }else{
     47      cerr << "not the same number of assays in data and error matrix";
     48    }
     49   
     50    if(data_.rows() != error_.rows() || data_.rows() != annots_.size()){
     51      cerr << "wrong number of gene annotations (!= nof genes)";
     52    }
     53  }
     54 
     55  void Merge::calculate_weights(gslapi::matrix& weights) {
     56   
     57    for (unsigned int i=0; i<weights.rows(); i++)
     58      for (unsigned int j=0; j<weights.columns(); j++){
     59        // if error < 0 (missing value) weight=0
     60        if(weights(i,j) >= 0){
     61          weights(i,j) = 1/(1+weights(i,j));
     62        }else{
     63          weights(i,j) = 0;
     64        }
     65      }
    2566  }
    2667
    27   gslapi::matrix Merge::calculate_weights(void) const
    28   {
     68  void Merge::do_something(void){
     69
     70    using namespace std;
    2971    gslapi::matrix weights(error_);
    30     for (unsigned int i=0; i<weights.rows(); i++)
    31       for (unsigned int j=0; j<weights.columns(); j++)
    32         weights(i,j) = 1/(1+weights(i,j));
    33     // weights = w_i=1/(1+u_i)
    34     return weights;
    35   }
    36 
    37   void Merge::do_something(void)
    38   {
    39     using namespace std;
    40     gslapi::matrix weights;
    41     weights = calculate_weights();
     72    calculate_weights(weights);
    4273
    4374    // för varje annot: kolla vilka rader den finns på
    4475    map<string, vector<u_int> > name;
    45     for (u_int i=0; i<annots_.size(); i++)
     76    for (u_int i=0; i<annots_.size(); i++){
    4677      name[annots_[i]].push_back(i);
     78    }
    4779   
    48     // för varje annot
    49     for (map<string,vector<u_int> >::const_iterator k=name.begin(); k!=name.end(); k++)
    50       merging((*k).second, weights);
     80    // för varje annot, merge
     81    for (map<string,vector<u_int> >::const_iterator k=name.begin();
     82         k!=name.end(); k++){
     83      if((*k).second.size()>1){
     84        merging((*k).second, weights);
     85      }
     86    }
    5187  }
    5288
    5389  void Merge::merging(vector<u_int> index, gslapi::matrix& weights){
    54 
     90    WeightedAverager w_av;
     91    Averager av;
    5592    // merge genes for each assay
    5693    for (u_int j=0; j<nof_assays_; j++){
    57 
    5894      // calc. m
    59       double sum_wx = 0;
    60       double sum_w = 0;
    6195      for (vector<u_int>::const_iterator i=index.begin(); i!=index.end(); i++){
    62         sum_wx += weights(*i,j)*data_(*i,j);
    63         sum_w += weights(*i,j);
     96        w_av.add(data_(*i,j),weights(*i,j));
     97        if(error_(*i,j)>0){
     98          av.add(1/error_(*i,j));
     99        }
    64100      }
    65       double m = sum_wx/sum_w;
    66      
     101      double m = w_av.average();
    67102      // calc. U
    68       double sum_u = 0;
    69       double sum_var = 0;
    70       for (vector<u_int>::const_iterator i=index.begin(); i!=index.end(); i++){
    71         sum_u += 1/error_(*i,j);
    72         double w = weights(*i,j);
    73         double diff = data_(*i,j)-m;
    74         sum_var += w*w*diff*diff;
    75       }
    76       double U = 1/sum_u+sum_var/(sum_w*sum_w);
    77      
     103      double U = 1/av.average()+w_av.squared_error();
    78104      // insert new values
    79105      for (vector<u_int>::const_iterator i=index.begin(); i!=index.end(); i++){
     
    81107        error_(*i,j) = U;
    82108      }
    83      
     109      w_av.reset();
     110      av.reset();
    84111    }
    85    
    86112  }
    87113
  • trunk/src/Merge.h

    r81 r105  
    2424   U=1/(sum(1/u_i))+sum(w_i^2*(x_i-m)^2)/(sum(w_i))^2
    2525   w_i is calculated from errors u_i
     26   
     27   Missing values have error < 0
    2628*/
    2729  class Merge
     
    3537
    3638    ///
    37     /// Constructor, arguments: data(matrix), weights(matrix), annotations(vector), nof_assays(int)
     39    /// istream Constructor, arguments: data(matrix), weights(matrix),
     40    /// annotations(vector)
    3841    ///
    39     Merge(istream& data, istream& error, istream& annotations, int nof_assays);
     42    Merge(istream& data, istream& error, istream& annotations);
    4043
    4144    ///
    42     /// Calculate new weighted data values.
     45    /// matrix Constructor, arguments: data(matrix), weights(matrix),
     46    /// annotations(vector)
    4347    ///
    44     // modify data_ and error_
     48    Merge(gslapi::matrix& data, gslapi::matrix& error,
     49          vector<string>& annotations);
     50   
     51    ///
     52    /// Calculate new weighted data and error values.
     53    ///
    4554    void do_something(void);
    4655   
     
    5968    u_int nof_assays_;
    6069
    61     // or gslapi::matrix& ?
    62     gslapi::matrix calculate_weights(void) const;
     70    void calculate_weights(gslapi::matrix&);
    6371    void Merge::merging(vector<u_int>, gslapi::matrix&);
    6472  };
Note: See TracChangeset for help on using the changeset viewer.