source: trunk/yat/utility/utility.h @ 3417

Last change on this file since 3417 was 3417, checked in by Peter, 4 years ago

updating copyright statements

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 14.0 KB
Line 
1#ifndef _theplu_yat_utility_utility_
2#define _theplu_yat_utility_utility_
3
4// $Id: utility.h 3417 2015-05-25 01:35:59Z peter $
5
6/*
7  Copyright (C) 2005 Jari Häkkinen, Peter Johansson, Markus Ringnér
8  Copyright (C) 2006 Jari Häkkinen
9  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28///
29/// \file yat/utility/utility.h
30///
31/// @brief Some useful functions are placed here
32///
33
34#include "concept_check.h"
35#include "deprecate.h"
36#include "Exception.h"
37#include "iterator_traits.h"
38#include "WeightIterator.h"
39#include "yat_assert.h"
40
41#include <gsl/gsl_math.h>
42
43#include <boost/iterator/iterator_concepts.hpp>
44#include <boost/concept_check.hpp>
45
46#include <algorithm>
47#include <cctype>
48#include <cmath>
49#include <functional>
50#include <limits>
51#include <locale>
52#include <istream>
53#include <numeric>
54#include <string>
55#include <stdexcept>
56#include <sstream>
57#include <utility>
58#include <vector>
59
60namespace theplu {
61namespace yat {
62namespace utility {
63
64  /**
65     Same as posix C function with same name but works on \c
66     std::string rather than \c char*.
67
68     \see http://linux.die.net/man/3/basename
69
70     \return everything after last '/'
71
72     \since New in yat 0.10
73   */
74  std::string basename(const std::string& fn);
75
76  /**
77     For each element in resulting range assign it to 0.0 if
78     corresponding element in input range is NaN else assign it to
79     1.0.
80
81     \return true if there is at least one NaN in input range
82     [first, last).
83
84     \since New in yat 0.5
85  */
86  template<typename InputIterator, typename OutputIterator>
87  bool binary_weight(InputIterator first, InputIterator last,
88                     OutputIterator result);
89
90  /**
91     Same as C function \c chdir but throws on failure (instead of
92     retrning non-zero).
93
94     \throw runtime_error if underlying chdir returns non-zero
95
96     \see http://linux.die.net/man/3/chdir
97
98     \since New in yat 0.10
99   */
100  void chdir(const std::string& dir);
101
102  /**
103     same as C function \c chmod but throws on failure (instead of
104     returning non-zero).
105
106     \see http://linux.die.net/man/3/chmod
107
108     \since New in yat 0.10
109   */
110  void chmod(const std::string& filename, mode_t mode);
111
112  /**
113     \brief convert T to a string
114
115     T is supposed to be a numerical type.
116
117     \since new in yat 0.8
118   */
119  template<typename T>
120  std::string convert(T input);
121
122  /**
123     \brief convert string to (numerical) type
124
125     \throw runtime_error if conversion fails
126   */
127  template<typename T>
128  T convert(const std::string& s);
129
130  /**
131     @brief Copy file \a source to \a target.
132
133     @throw runtime_error if read error of \a source or write error
134     for \a target is encountered.
135  */
136  void copy_file(const std::string& source, const std::string& target);
137
138  /**
139     Same as posix C function with same name but works on \c
140     std::string rather than \c char*.
141
142     \see http://linux.die.net/man/3/dirname
143
144     \return everything prior last '/'.
145
146     \since New in yat 0.10
147   */
148  std::string dirname(const std::string& fn);
149
150  /**
151     \return true if \a str matches \a pattern
152
153     \see http://linux.die.net/man/3/fnmatch
154
155     \throw runtime_error if returned value from underlying fnmatch is
156     neither zero nor \c FNM_NOMATCH.
157
158     \since New in yat 0.10
159  */
160  bool fnmatch(const std::string& pattern, const std::string& str,
161               int flags=0);
162
163  /**
164     \brief check if string is convertible to (numerical) type \c T
165
166     \since New in yat 0.5
167   */
168  template<typename T>
169  bool is(const std::string& s);
170
171  ///
172  /// @return true if string is a double
173  ///
174  /// \deprecated Provided for backward compatibility with the 0.4
175  /// API. Use is<double>(const std::string&)
176  ///
177  bool is_double(const std::string&) YAT_DEPRECATE;
178
179  /**
180     This function takes the first word (separated by whitespace) in
181     \a s, replaces all upper case with lower case, and compares it
182     with \a other.
183
184     \return true if processed \a s is equal to \a other. It returns
185     false otherwise or if \a s contains more than one word.
186  */
187  bool is_equal(std::string s, std::string other);
188
189  ///
190  /// @return true if string is a float
191  ///
192  /// \deprecated Provided for backward compatibility with the 0.4
193  /// API. Use is<float>(const std::string&)
194  ///
195  bool is_float(const std::string&) YAT_DEPRECATE;
196
197  ///
198  /// @return true if string is an int
199  ///
200  /// \deprecated Provided for backward compatibility with the 0.4
201  /// API. Use is<int>(const std::string&)
202  ///
203  bool is_int(const std::string&) YAT_DEPRECATE;
204
205  ///
206  /// @return true if string is "nan" (case-insensitive)
207  ///
208  bool is_nan(const std::string& s);
209
210  /**
211     The std::istream will be interpreted as outlined here:
212
213     Lines are separated by character \a line_sep and rows are
214     separated by character \a sep.
215     
216     The first line is read into a stringstream, which is used to
217     load the first vector (vec[0]) with elements using
218     load(stringstream, vec[0], sep).
219     
220     Therefore, column elements separation has two modes depending
221     on the value of \a sep.
222     
223     - If \a sep is the default '\\0' value then column elements are
224     separated with white space characters except the new line
225     character. Multiple sequential white space characters are treated
226     as one separator.
227     
228     - Setting \a sep to something else than the default value will
229     change the behaviour to use the \a sep character as the separator
230     between column elements. Multiple sequential \a sep characters
231     will be treated as separating elements with missing values.
232
233     If \a rectangle is true, rows must contain same number of
234     elements or function will throw.
235
236     If \a ignore_empty is true empty lines are ignored.
237
238     \see load(std::istream&, std::vector<T>&, char sep='\\0')
239
240     \note Requirement on T: utility::convert<T> must be supported
241     (from yat 0.7 T=string is also supported)
242
243     \since New in yat 0.6
244   */
245  template<typename T>
246  void load(std::istream& is, std::vector<std::vector<T> >& vec, char sep='\0', 
247            char line_sep='\n', bool ignore_empty=false, bool rectangle=true);
248
249  /**
250     \brief Fill a vector<T> with elements from istream
251
252     Element separation has two modes depending on the value of \a
253     sep.
254     
255     - If \a sep is the default '\\0' value then elements are
256     separated with white space characters. Multiple sequential white
257     space characters are treated as one separator.
258     
259     - Setting \a sep to something else than the default value will
260     change the behaviour to use the \a sep character as the
261     separator between column elements. Multiple sequential \a sep
262     characters will be treated as separating elements with missing
263     values. Missing values are set to std::numeric_limits<T>::quiet_NaN
264     
265     \note Requirement on T: utility::convert<T> must be supported
266     (from yat 0.7 T=string is also supported)
267
268     \since New in yat 0.6
269   */
270  template<typename T>
271  void load(std::istream& is, std::vector<T>& vec, char sep='\0');
272
273  /**
274     \return base-2 logarithm of x
275
276     \since New in yat 0.10
277   */
278  // c++11 provides std::log2 so perhaps we should call that one if
279  // availalable (but a bit tricky since this is a public header)
280  template<typename T>
281  T log2(T x) { return std::log(x)/M_LN2; }
282
283  /**
284     \brief create a directory \a dir
285
286     \see http://linux.die.net/man/3/mkdir
287
288     \throw runtime_error if creation failed
289
290     \since New in yat 0.10
291   */
292  void mkdir(const std::string& dir, mode_t mode=0777);
293
294  /**
295     Similar to mkdir(const std::string&, mode_t).
296
297     No error if \a dir already exist. Make parent directories as needed.
298
299     \since New in yat 0.10
300   */
301  void mkdir_p(const std::string& dir, mode_t mode=0777);
302
303  /**
304     same as C function remove but throws errno_error at failure
305
306     \see http://linux.die.net/man/3/remove
307
308     \since New in yat 0.12
309   */
310  void remove(const std::string& fn);
311
312  /**
313     same as C function with same name but throws errno_error if error
314     is encountered
315
316     \see http://linux.die.net/man/3/rename
317
318     \since New in yat 0.12
319   */
320  void rename(const std::string& from, const std::string& to);
321
322  /**
323     In \a full_str replace every sub-string \a old_str with \a
324     new_str;
325
326     \since New in yat 0.10
327   */
328  void replace(std::string& full_str, std::string old_str, std::string new_str);
329
330
331  /**
332     Calculate sum of weights in range [first, last). The complexity
333     is linear except in the important case when \c Iterator is
334     unweighted and \random_access_iterator when complexity is constant.
335
336     Type Requirments:
337     - \c Iterator is \ref concept_data_iterator
338     - \c Iterator is \readable_iterator
339     - \c Iterator is \single_pass_iterator
340
341     \since New in yat 0.13
342   */
343  template<typename Iterator>
344  double sum_weight(Iterator first, Iterator last);
345
346/// \cond IGNORE_DOXYGEN
347
348// private namespace
349namespace detail {
350
351  /**
352     \internal
353
354     \brief convert s to t
355
356     used in function is<T> and convert<T>
357
358     \return true if conversion was successful
359
360     \internal
361   */
362  template<typename T>
363  bool convert(const std::string& s, T& t);
364
365  /**
366     Functor used in load function
367   */
368  template<typename T>
369  struct VectorPusher
370  {
371    /**
372       convert element to T and push on vec's back
373
374       \internal
375     */
376    void operator()(const std::string& element, std::vector<T>& vec)
377    {
378      if (!element.size())
379        vec.push_back(std::numeric_limits<T>::quiet_NaN());
380      else {
381        vec.push_back(theplu::yat::utility::convert<T>(element));
382      }
383    }
384  };
385
386  /**
387     specialization for string
388
389     \internal
390   */
391  template<>
392  struct VectorPusher<std::string>
393  {
394    /**
395       push element on vec's back
396     */
397    void operator()(const std::string& element, std::vector<std::string>& vec)
398    { 
399      vec.push_back(element);
400    }
401  };
402
403
404  template<typename Iterator>
405  double sum_weight(Iterator first, Iterator last, unweighted_iterator_tag tag)
406  {
407    return std::distance(first, last);
408  }
409
410
411  template<typename Iterator>
412  double sum_weight(Iterator first, Iterator last, weighted_iterator_tag tag)
413  {
414    return std::accumulate(weight_iterator(first), weight_iterator(last), 0);
415  }
416
417
418} // end of namespace detail
419
420/// \endcond
421
422  // template implementations
423
424  template<typename InputIterator, typename OutputIterator>
425  bool binary_weight(InputIterator first, InputIterator last, 
426                     OutputIterator result)
427  {
428    bool nan=false;
429    while (first!=last) {
430      if (std::isnan(*first)) {
431        *result=0;
432        nan=true;
433      }
434      else
435        *result = 1.0;
436      ++first;
437      ++result;
438    }
439    return nan;
440  }
441
442
443  template<typename T>
444  std::string convert(T input)
445  {
446    std::ostringstream ss;
447    ss << input;
448    return ss.str();
449  }
450
451
452  template<typename T>
453  T convert(const std::string& s)
454  {
455    T result;
456    if (!detail::convert(s, result))
457      throw runtime_error(std::string("yat::utility::convert(\"")+s+
458                          std::string("\")"));
459    return result;
460  }
461
462
463  template<typename T>
464  bool is(const std::string& s)
465  {
466    T tmp;
467    return detail::convert(s, tmp);
468  }
469
470
471  template<typename T>
472  void load(std::istream& is, std::vector<std::vector<T> >& matrix,
473            char sep, char line_sep, bool ignore_empty,
474            bool rectangle)
475  {
476    size_t nof_columns=0;
477    std::string line;
478    while(getline(is, line, line_sep)){
479      if (line.empty() && ignore_empty)
480        continue;
481      matrix.push_back(std::vector<T>());
482      std::vector<T>& v=matrix.back();
483      v.reserve(nof_columns);
484      std::stringstream ss(line);
485      load(ss, v, sep);
486      // add NaN for final separator (or empty string if T=std::string)
487      detail::VectorPusher<T> pusher;
488      if(sep!='\0' && !line.empty() && line[line.size()-1]==sep)
489        pusher("", v);
490
491      if (rectangle && nof_columns && v.size()!=nof_columns) {
492        std::ostringstream s;
493        s << "load stream error: "
494          << "line " << matrix.size() << " has " << v.size()
495          << " columns; expected " << nof_columns << " columns.";
496        throw utility::IO_error(s.str());
497      }
498      nof_columns = std::max(nof_columns, v.size());
499    }
500
501    // manipulate the state of the stream to be good
502    is.clear(std::ios::goodbit);
503  }
504
505  template<typename T>
506  void load(std::istream& is, std::vector<T>& vec, char sep)
507  {
508    detail::VectorPusher<T> pusher;
509    std::string element;
510    bool ok=true;
511    while(true) {
512      if(sep=='\0')
513        ok=(is>>element);
514      else
515        ok=getline(is, element, sep);
516      if(!ok)
517        break;
518      pusher(element, vec);
519    }
520  }
521
522
523  template<typename Iterator>
524  double sum_weight(Iterator first, Iterator last)
525  {
526    BOOST_CONCEPT_ASSERT((DataIteratorConcept<Iterator>));
527    BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Iterator>));
528    BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Iterator>));
529    typename weighted_iterator_traits<Iterator>::type tag;
530    return detail::sum_weight(first, last, tag);
531  }
532
533
534/// \cond IGNORE_DOXYGEN
535namespace detail {
536  template<typename T>
537  bool convert(const std::string& s, T& result)
538  {
539    if (!std::numeric_limits<T>::is_signed) {
540      // first non-whitespace character
541      std::string::const_iterator iter = s.begin();
542      while (iter!=s.end() && std::isspace(*iter))
543        ++iter;
544      // unsigned int cannot start with a '-' and with some compilers
545      // operation ss >> result won't fail so catch it like this instead.
546      if (iter==s.end() || *iter=='-')
547        return false;
548    }
549    std::istringstream ss(s);
550    ss >> result;
551    if (ss.fail()) {
552      if (is_nan(s) || is_equal(s, "-nan")) {
553        result = std::numeric_limits<T>::quiet_NaN();
554        return true;
555      }
556      if (is_equal(s, "inf")) {
557        result = std::numeric_limits<T>::infinity();
558        return true;
559      }
560      if (is_equal(s, "-inf")) {
561        // unsigned types are caught in prologue
562        YAT_ASSERT(std::numeric_limits<T>::is_signed);
563        result = -std::numeric_limits<T>::infinity();
564        return true;
565      }
566      return false;
567    }
568    // Check that nothing is left on stream
569    std::string b;
570    ss >> b;
571    return b.empty();
572  }
573} // of namespace detail
574
575/// \endcond
576
577}}} // of namespace utility, yat, and theplu
578
579#endif
Note: See TracBrowser for help on using the repository browser.