source: trunk/lib/yat/utility.h @ 1058

Last change on this file since 1058 was 1058, checked in by Peter Johansson, 13 years ago

update to latest yat

  • Property svn:eol-style set to native
File size: 9.0 KB
Line 
1#ifndef _theplu_yat_utility_utility_
2#define _theplu_yat_utility_utility_
3
4// $Id: utility.h 2248 2010-04-22 00:57:13Z peter $
5
6/*
7  Copyright (C) 2005 Jari Häkkinen, Peter Johansson, Markus Ringnér
8  Copyright (C) 2006 Jari Häkkinen
9  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2009 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28///
29/// \file yat/utility/utility.h
30///
31/// @brief Some useful functions are placed here
32///
33
34#include "deprecate.h"
35#include "Exception.h"
36
37#include <cmath>
38#include <limits>
39#include <istream>
40#include <string>
41#include <stdexcept>
42#include <sstream>
43#include <utility>
44#include <vector>
45
46namespace theplu {
47namespace yat {
48namespace utility {
49
50  /**
51     For each element in resulting range assign it to 0.0 if
52     corresponding element in input range is NaN else assign it to
53     1.0.
54
55     \return true if there is at least one NaN in input range
56     [first, last).
57
58     \since New in yat 0.5
59  */
60  template<typename InputIterator, typename OutputIterator>
61  bool binary_weight(InputIterator first, InputIterator last, 
62                     OutputIterator result);
63
64
65  /**
66     \brief convert string to (numerical) type
67
68     \throw runtime_error if conversion fails
69   */
70  template<typename T>
71  T convert(const std::string& s);
72
73  /**
74     \brief check if string is convertible to (numerical) type
75
76     \since New in yat 0.5
77   */
78  template<typename T>
79  bool is(const std::string& s);
80
81  ///
82  /// @return true if string is a double
83  ///
84  /// \deprecated Provided for backward compatibility with the 0.4
85  /// API. Use is<double>(const std::string&)
86  ///
87  bool is_double(const std::string&) YAT_DEPRECATE;
88
89  /**
90     This function takes the first word (separated by whitespace) in
91     \a s, replaces all upper case with lower case, and compares it
92     with \a other.
93
94     \return true if processed \a s is equal to \a other. It returns
95     false otherwise or if \a s contains more than one word.
96  */
97  bool is_equal(std::string s, std::string other);
98
99  ///
100  /// @return true if string is a float
101  ///
102  /// \deprecated Provided for backward compatibility with the 0.4
103  /// API. Use is<float>(const std::string&)
104  ///
105  bool is_float(const std::string&) YAT_DEPRECATE;
106
107  ///
108  /// @return true if string is an int
109  ///
110  /// \deprecated Provided for backward compatibility with the 0.4
111  /// API. Use is<int>(const std::string&)
112  ///
113  bool is_int(const std::string&) YAT_DEPRECATE;
114
115  ///
116  /// @return true if string is "nan" (case-insensitive)
117  ///
118  bool is_nan(const std::string& s);
119
120  /**
121     The std::istream will be interpreted as outlined here:
122
123     Lines are separated by character \a line_sep and rows are
124     separated by character \a sep.
125     
126     The first line is read into a stringstream, which is used to
127     load the first vector (vec[0]) with elements using
128     load(stringstream, vec[0], sep).
129     
130     Therefore, column elements separation has two modes depending
131     on the value of \a sep.
132     
133     - If \a sep is the default '\\0' value then column elements are
134     separated with white space characters except the new line
135     character. Multiple sequential white space characters are treated
136     as one separator.
137     
138     - Setting \a sep to something else than the default value will
139     change the behaviour to use the \a sep character as the separator
140     between column elements. Multiple sequential \a sep characters
141     will be treated as separating elements with missing values.
142
143     If \a rectangle is true, rows must contain same number of
144     elements or function will throw.
145
146     If \a ignore_empty is true empty lines are ignored.
147
148     \see load(std::istream&, std::vector<T>&, char sep='\\0')
149
150     \note Requirement on T: utility::convert<T> must be supported
151     (from yat 0.7 T=string is also supported)
152
153     \since New in yat 0.6
154   */
155  template<typename T>
156  void load(std::istream& is, std::vector<std::vector<T> >& vec, char sep='\0', 
157            char line_sep='\n', bool ignore_empty=false, bool rectangle=true);
158
159  /**
160     \brief Fill a vector<T> with elements from istream
161
162     Element separation has two modes depending on the value of \a
163     sep.
164     
165     - If \a sep is the default '\\0' value then elements are
166     separated with white space characters. Multiple sequential white
167     space characters are treated as one separator.
168     
169     - Setting \a sep to something else than the default value will
170     change the behaviour to use the \a sep character as the
171     separator between column elements. Multiple sequential \a sep
172     characters will be treated as separating elements with missing
173     values. Missing values are set to std::numeric_limits<T>::quiet_NaN
174     
175     \note Requirement on T: utility::convert<T> must be supported
176     (from yat 0.7 T=string is also supported)
177
178     \since New in yat 0.6
179   */
180  template<typename T>
181  void load(std::istream& is, std::vector<T>& vec, char sep='\0');
182 
183// private namespace
184namespace detail {
185  /**
186     Functor used in load function
187   */
188  template<typename T>
189  struct VectorPusher
190  {
191    /**
192       convert element to T and push on vec's back
193
194       \internal
195     */
196    void operator()(const std::string& element, std::vector<T>& vec)
197    { 
198      if (!element.size())
199        vec.push_back(std::numeric_limits<T>::quiet_NaN());
200      else {
201        vec.push_back(theplu::yat::utility::convert<T>(element));
202      }
203    }
204  };
205
206  /**
207     specialization for string
208
209     \internal
210   */
211  template<>
212  struct VectorPusher<std::string>
213  {
214    /**
215       push element on vec's back
216     */
217    void operator()(const std::string& element, std::vector<std::string>& vec)
218    { 
219      vec.push_back(element);
220    }
221  };
222
223} // end of namespace detail
224
225
226  // template implementations
227
228  template<typename InputIterator, typename OutputIterator>
229  bool binary_weight(InputIterator first, InputIterator last, 
230                     OutputIterator result)
231  {
232    bool nan=false;
233    while (first!=last) {
234      if (std::isnan(*first)) {
235        *result=0;
236        nan=true;
237      }
238      else
239        *result = 1.0;
240      ++first;
241      ++result;
242    }
243    return nan;
244  }
245
246
247  // template implementations
248  template<typename T>
249  T convert(const std::string& s)
250  {
251    if (is_nan(s))
252      return std::numeric_limits<T>::quiet_NaN();
253    if (is_equal(s, "inf"))
254      return std::numeric_limits<T>::infinity();
255    if (is_equal(s, "-inf")) {
256      if (std::numeric_limits<T>::is_signed)
257        return -std::numeric_limits<T>::infinity();
258      else
259        throw runtime_error(std::string("yat::utility::convert(\"")+s+
260                            std::string("\"): type is unsigned") );
261    }
262    std::stringstream ss(s);
263    T a;
264    ss >> a;
265    bool ok = true;
266    if(ss.fail()) 
267      ok = false;
268    // Check that nothing is left on stream
269    std::string b;
270    ss >> b;
271    if (!b.empty() || !ok)
272      throw runtime_error(std::string("yat::utility::convert(\"")+s+
273                          std::string("\")"));
274    return a;
275  }
276
277  template<typename T>
278  bool is(const std::string& s)
279  {
280    if (is_nan(s))
281      return std::numeric_limits<T>::has_quiet_NaN;
282    if (is_equal(s, "inf"))
283      return std::numeric_limits<T>::has_infinity;
284    if (is_equal(s, "-inf"))
285      return std::numeric_limits<T>::has_infinity && 
286        std::numeric_limits<T>::is_signed;
287    std::stringstream ss(s);
288    T a;
289    ss >> a;
290    if(ss.fail())
291      return false;
292    // Check that nothing is left on stream
293    std::string b;
294    ss >> b;
295    return b.empty();
296  }
297
298  template<typename T>
299  void load(std::istream& is, std::vector<std::vector<T> >& matrix, 
300            char sep, char line_sep, bool ignore_empty, 
301            bool rectangle)
302  {
303    size_t nof_columns=0;
304    std::string line;
305    while(getline(is, line, line_sep)){
306      if (line.empty() && ignore_empty)
307        continue;
308      matrix.resize(matrix.size()+1);
309      std::vector<double>& v=matrix.back();
310      v.reserve(nof_columns);
311      std::stringstream ss(line);
312      load(ss, v, sep);
313      // add NaN for final separator
314      if(sep!='\0' && !line.empty() && line[line.size()-1]==sep) 
315        v.push_back(std::numeric_limits<T>::quiet_NaN());
316     
317      if (rectangle && nof_columns && v.size()!=nof_columns) {
318        std::ostringstream s;
319        s << "load data file error: "
320          << "line " << matrix.size() << " has " << v.size()
321          << " columns; expected " << nof_columns << " columns.";
322        throw utility::IO_error(s.str());
323      }       
324      nof_columns = std::max(nof_columns, v.size());
325    }
326
327    // manipulate the state of the stream to be good
328    is.clear(std::ios::goodbit);
329  }
330
331  template<typename T>
332  void load(std::istream& is, std::vector<T>& vec, char sep='\0')
333  {
334    detail::VectorPusher<T> pusher;
335    std::string element;
336    bool ok=true;
337    while(true) {
338      if(sep=='\0')
339        ok=(is>>element);
340      else
341        ok=getline(is, element, sep);
342      if(!ok)
343        break;
344     
345      pusher(element, vec);
346    }
347  }           
348
349}}} // of namespace utility, yat, and theplu
350
351#endif
Note: See TracBrowser for help on using the repository browser.