Changeset 3243


Ignore:
Timestamp:
May 24, 2014, 6:26:32 PM (9 years ago)
Author:
Peter
Message:

speed up Percentiler when input range is unweighted and unsorted. Do not sort the entire range but use nth_element to find the element of interest.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/statistics/Percentiler.h

    r3240 r3243  
    102102      if (first==last)
    103103        return std::numeric_limits<double>::quiet_NaN();
    104       return calculate(first, last, sorted_,
    105        typename utility::weighted_iterator_traits<RandomAccessIterator>::type());
     104      // just to avoid long line
     105      using utility::weighted_iterator_traits;
     106      typedef typename weighted_iterator_traits<RandomAccessIterator>::type tag;
     107      return calculate(first, last, sorted_, tag());
    106108    }
    107109  private:
     
    136138                         utility::unweighted_iterator_tag tag) const
    137139  {
     140    size_t n = last - first;
    138141    // range is one value only is a special case
    139     if (first+1 == last)
     142    if (n == 1)
    140143      *first;
     144
     145    double j = n * perc_ / 100.0;
     146
     147    // Some special cases
     148    if (j > n-1) {
     149      if (sorted)
     150        return *(--last);
     151      return *std::max_element(first, last);
     152    }
     153    if (j < 1.0) {
     154      if (sorted)
     155        return *first;
     156      return *std::min_element(first, last);
     157    }
     158
     159    size_t i = static_cast<size_t>(j);
     160    // for border case (j integer) we take the average of adjacent bins
     161    size_t k = (i==j) ? i-1 : i;
     162
    141163    if (sorted) {
    142       size_t n = last - first;
    143       // have to take care of this special case
    144       if (perc_>= 100.0)
    145         return *(--last);
    146       if (perc_<= 0.0)
    147         return *first;
    148       double j = n * perc_ / 100.0;
    149       size_t i = static_cast<size_t>(j);
    150       if (i==j)
    151         return (first[i]+first[i-1])/2;
    152       return first[i];
    153     }
    154 
    155     std::vector<double> v_copy;
    156     v_copy.reserve(std::distance(first,last));
    157     std::copy(first, last, std::back_inserter(v_copy));
    158     std::sort(v_copy.begin(), v_copy.end());
    159     return calculate(v_copy.begin(), v_copy.end(), true, tag);
     164      if (i==k)
     165        return first[i];
     166      return (first[i]+first[k])/2;
     167    }
     168
     169    std::vector<double> vec(first, last);
     170    // find the ith element
     171    std::nth_element(vec.begin(), vec.begin()+i, vec.end());
     172    // if i==k simply return vec[i]
     173    if (i==k)
     174      return vec[i];
     175    YAT_ASSERT(k==i-1);
     176    // otherwise return average of vec[i] and vec[k].
     177
     178    // We need to find the kth element. Since we have called
     179    // nth_element above, we know that all elements in (begin+i, end)
     180    // are >= vec[i] and all elements in [begin, begin+i) are <=
     181    // vec[i]. Consequently, kth (k=i-1) element is the max element in
     182    // range [begin, begin+i).
     183    return (vec[i] + *std::max_element(vec.begin(), vec.begin()+i))/2;
    160184  }
    161185
Note: See TracChangeset for help on using the changeset viewer.