source: trunk/yat/utility/utility.h @ 3579

Last change on this file since 3579 was 3579, checked in by Peter, 5 years ago

merge release 0.14 into trunk

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 14.9 KB
Line 
1#ifndef _theplu_yat_utility_utility_
2#define _theplu_yat_utility_utility_
3
4// $Id: utility.h 3579 2017-01-16 03:54:43Z peter $
5
6/*
7  Copyright (C) 2005 Jari Häkkinen, Peter Johansson, Markus Ringnér
8  Copyright (C) 2006 Jari Häkkinen
9  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Peter Johansson
11  Copyright (C) 2017 Jari Häkkinen
12
13  This file is part of the yat library, http://dev.thep.lu.se/yat
14
15  The yat library is free software; you can redistribute it and/or
16  modify it under the terms of the GNU General Public License as
17  published by the Free Software Foundation; either version 3 of the
18  License, or (at your option) any later version.
19
20  The yat library is distributed in the hope that it will be useful,
21  but WITHOUT ANY WARRANTY; without even the implied warranty of
22  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23  General Public License for more details.
24
25  You should have received a copy of the GNU General Public License
26  along with yat. If not, see <http://www.gnu.org/licenses/>.
27*/
28
29///
30/// \file yat/utility/utility.h
31///
32/// @brief Some useful functions are placed here
33///
34
35#include "concept_check.h"
36#include "deprecate.h"
37#include "Exception.h"
38#include "iterator_traits.h"
39#include "WeightIterator.h"
40#include "yat_assert.h"
41
42#include <gsl/gsl_math.h>
43
44#include <boost/iterator/iterator_concepts.hpp>
45#include <boost/concept_check.hpp>
46
47#include <algorithm>
48#include <cctype>
49#include <cmath>
50#include <cstdlib>
51#include <functional>
52#include <limits>
53#include <locale>
54#include <istream>
55#include <numeric>
56#include <string>
57#include <stdexcept>
58#include <sstream>
59#include <utility>
60#include <vector>
61
62namespace theplu {
63namespace yat {
64namespace utility {
65
66  /**
67     Same as posix C function with same name but works on \c
68     std::string rather than \c char*.
69
70     \see http://linux.die.net/man/3/basename
71
72     \return everything after last '/'
73
74     \since New in yat 0.10
75   */
76  std::string basename(const std::string& fn);
77
78  /**
79     For each element in resulting range assign it to 0.0 if
80     corresponding element in input range is NaN else assign it to
81     1.0.
82
83     \return true if there is at least one NaN in input range
84     [first, last).
85
86     Type Requirements:
87     - \c InputIterator is \single_pass_iterator
88     - \c InputIterator is \readable_iterator
89     - \c std::isnan(*InputIterator) is a valid expression (e.g. \c
90       float, \c double, and \c long \c double )
91     - \c OutputIterator is \incrementable_iterator
92     - \c OutputIterator is \writable_iterator
93     - \c float is convertible to \c OutputIterator value type
94
95     \since New in yat 0.5
96  */
97  template<typename InputIterator, typename OutputIterator>
98  bool binary_weight(InputIterator first, InputIterator last,
99                     OutputIterator result);
100
101  /**
102     Same as C function \c chdir but throws on failure (instead of
103     retrning non-zero).
104
105     \throw runtime_error if underlying chdir returns non-zero
106
107     \see http://linux.die.net/man/3/chdir
108
109     \since New in yat 0.10
110   */
111  void chdir(const std::string& dir);
112
113  /**
114     same as C function \c chmod but throws on failure (instead of
115     returning non-zero).
116
117     \see http://linux.die.net/man/3/chmod
118
119     \since New in yat 0.10
120   */
121  void chmod(const std::string& filename, mode_t mode);
122
123  /**
124     \brief convert T to a string
125
126     T is supposed to be a numerical type.
127
128     \since new in yat 0.8
129   */
130  template<typename T>
131  std::string convert(T input);
132
133  /**
134     \brief convert string to (numerical) type
135
136     \throw runtime_error if conversion fails
137   */
138  template<typename T>
139  T convert(const std::string& s);
140
141  /**
142     @brief Copy file \a source to \a target.
143
144     @throw runtime_error if read error of \a source or write error
145     for \a target is encountered.
146  */
147  void copy_file(const std::string& source, const std::string& target);
148
149
150  /**
151     Same as posix C function with same name but works on \c
152     std::string rather than \c char*.
153
154     \see http://linux.die.net/man/3/dirname
155
156     \return everything prior last '/'.
157
158     \since New in yat 0.10
159   */
160  std::string dirname(const std::string& fn);
161
162  /**
163     \return true if \a str matches \a pattern
164
165     \see http://linux.die.net/man/3/fnmatch
166
167     \throw runtime_error if returned value from underlying fnmatch is
168     neither zero nor \c FNM_NOMATCH.
169
170     \since New in yat 0.10
171  */
172  bool fnmatch(const std::string& pattern, const std::string& str,
173               int flags=0);
174
175  /**
176     \brief check if string is convertible to (numerical) type \c T
177
178     \since New in yat 0.5
179   */
180  template<typename T>
181  bool is(const std::string& s);
182
183  ///
184  /// @return true if string is a double
185  ///
186  /// \deprecated Provided for backward compatibility with the 0.4
187  /// API. Use is<double>(const std::string&)
188  ///
189  bool is_double(const std::string&) YAT_DEPRECATE;
190
191  /**
192     This function takes the first word (separated by whitespace) in
193     \a s, replaces all upper case with lower case, and compares it
194     with \a other.
195
196     \return true if processed \a s is equal to \a other. It returns
197     false otherwise or if \a s contains more than one word.
198  */
199  bool is_equal(std::string s, std::string other);
200
201  ///
202  /// @return true if string is a float
203  ///
204  /// \deprecated Provided for backward compatibility with the 0.4
205  /// API. Use is<float>(const std::string&)
206  ///
207  bool is_float(const std::string&) YAT_DEPRECATE;
208
209  ///
210  /// @return true if string is an int
211  ///
212  /// \deprecated Provided for backward compatibility with the 0.4
213  /// API. Use is<int>(const std::string&)
214  ///
215  bool is_int(const std::string&) YAT_DEPRECATE;
216
217  ///
218  /// @return true if string is "nan" (case-insensitive)
219  ///
220  bool is_nan(const std::string& s);
221
222  /**
223     The std::istream will be interpreted as outlined here:
224
225     Lines are separated by character \a line_sep and rows are
226     separated by character \a sep.
227     
228     The first line is read into a stringstream, which is used to
229     load the first vector (vec[0]) with elements using
230     load(stringstream, vec[0], sep).
231     
232     Therefore, column elements separation has two modes depending
233     on the value of \a sep.
234     
235     - If \a sep is the default '\\0' value then column elements are
236     separated with white space characters except the new line
237     character. Multiple sequential white space characters are treated
238     as one separator.
239     
240     - Setting \a sep to something else than the default value will
241     change the behaviour to use the \a sep character as the separator
242     between column elements. Multiple sequential \a sep characters
243     will be treated as separating elements with missing values.
244
245     If \a rectangle is true, rows must contain same number of
246     elements or function will throw.
247
248     If \a ignore_empty is true empty lines are ignored.
249
250     \see load(std::istream&, std::vector<T>&, char sep='\\0')
251
252     \note Requirement on T: utility::convert<T> must be supported
253     (from yat 0.7 T=string is also supported)
254
255     \since New in yat 0.6
256   */
257  template<typename T>
258  void load(std::istream& is, std::vector<std::vector<T> >& vec, char sep='\0', 
259            char line_sep='\n', bool ignore_empty=false, bool rectangle=true);
260
261  /**
262     \brief Fill a vector<T> with elements from istream
263
264     Element separation has two modes depending on the value of \a
265     sep.
266     
267     - If \a sep is the default '\\0' value then elements are
268     separated with white space characters. Multiple sequential white
269     space characters are treated as one separator.
270     
271     - Setting \a sep to something else than the default value will
272     change the behaviour to use the \a sep character as the
273     separator between column elements. Multiple sequential \a sep
274     characters will be treated as separating elements with missing
275     values. Missing values are set to std::numeric_limits<T>::quiet_NaN
276     
277     \note Requirement on T: utility::convert<T> must be supported
278     (from yat 0.7 T=string is also supported)
279
280     \since New in yat 0.6
281   */
282  template<typename T>
283  void load(std::istream& is, std::vector<T>& vec, char sep='\0');
284
285  /**
286     \return base-2 logarithm of x
287
288     Implemented for \c float, \c double, and \c long \c double.
289
290     \since New in yat 0.10
291   */
292  // c++11 provides std::log2 so perhaps we should call that one if
293  // availalable (but a bit tricky since this is a public header)
294  template<typename T>
295  T log2(T x) { return std::log(x)/M_LN2; }
296
297  /**
298     \brief create a directory \a dir
299
300     \see http://linux.die.net/man/3/mkdir
301
302     \throw runtime_error if creation failed
303
304     \since New in yat 0.10
305   */
306  void mkdir(const std::string& dir, mode_t mode=0777);
307
308  /**
309     Similar to mkdir(const std::string&, mode_t).
310
311     No error if \a dir already exist. Make parent directories as needed.
312
313     \since New in yat 0.10
314   */
315  void mkdir_p(const std::string& dir, mode_t mode=0777);
316
317  /**
318     same as C function remove but throws errno_error at failure
319
320     \see http://linux.die.net/man/3/remove
321
322     \since New in yat 0.12
323   */
324  void remove(const std::string& fn);
325
326  /**
327     same as C function with same name but throws errno_error if error
328     is encountered
329
330     \see http://linux.die.net/man/3/rename
331
332     \since New in yat 0.12
333   */
334  void rename(const std::string& from, const std::string& to);
335
336  /**
337     In \a full_str replace every sub-string \a old_str with \a
338     new_str;
339
340     \since New in yat 0.10
341   */
342  void replace(std::string& full_str, std::string old_str, std::string new_str);
343
344
345  /**
346     Calculate sum of weights in range [first, last). The complexity
347     is linear except in the important case when \c Iterator is
348     unweighted and \random_access_iterator when complexity is constant.
349
350     Type Requirments:
351     - \c Iterator is \ref concept_data_iterator
352     - \c Iterator is \readable_iterator
353     - \c Iterator is \single_pass_iterator
354
355     \since New in yat 0.13
356   */
357  template<typename Iterator>
358  double sum_weight(Iterator first, Iterator last);
359
360/// \cond IGNORE_DOXYGEN
361
362// private namespace
363namespace detail {
364
365  /**
366     \internal
367
368     \brief convert s to t
369
370     used in function is<T> and convert<T>
371
372     \return true if conversion was successful
373
374     \internal
375   */
376  template<typename T>
377  bool convert(const std::string& s, T& t);
378
379  /**
380     Functor used in load function
381   */
382  template<typename T>
383  struct VectorPusher
384  {
385    /**
386       convert element to T and push on vec's back
387
388       \internal
389     */
390    void operator()(const std::string& element, std::vector<T>& vec)
391    {
392      if (!element.size())
393        vec.push_back(std::numeric_limits<T>::quiet_NaN());
394      else {
395        vec.push_back(theplu::yat::utility::convert<T>(element));
396      }
397    }
398  };
399
400  /**
401     specialization for string
402
403     \internal
404   */
405  template<>
406  struct VectorPusher<std::string>
407  {
408    /**
409       push element on vec's back
410     */
411    void operator()(const std::string& element, std::vector<std::string>& vec)
412    { 
413      vec.push_back(element);
414    }
415  };
416
417
418  template<typename Iterator>
419  double sum_weight(Iterator first, Iterator last, unweighted_iterator_tag tag)
420  {
421    return std::distance(first, last);
422  }
423
424
425  template<typename Iterator>
426  double sum_weight(Iterator first, Iterator last, weighted_iterator_tag tag)
427  {
428    return std::accumulate(weight_iterator(first), weight_iterator(last), 0);
429  }
430
431
432} // end of namespace detail
433
434/// \endcond
435
436  // template implementations
437
438  template<typename InputIterator, typename OutputIterator>
439  bool binary_weight(InputIterator first, InputIterator last, 
440                     OutputIterator result)
441  {
442    BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<InputIterator>));
443    BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<InputIterator>));
444    BOOST_CONCEPT_ASSERT((boost_concepts::IncrementableIterator<OutputIterator>));
445    BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<OutputIterator, float>));
446
447    bool nan=false;
448    while (first!=last) {
449      if (std::isnan(*first)) {
450        *result=0;
451        nan=true;
452      }
453      else
454        *result = 1.0;
455      ++first;
456      ++result;
457    }
458    return nan;
459  }
460
461
462  template<typename T>
463  std::string convert(T input)
464  {
465    std::ostringstream ss;
466    ss << input;
467    return ss.str();
468  }
469
470
471  template<typename T>
472  T convert(const std::string& s)
473  {
474    T result;
475    if (!detail::convert(s, result))
476      throw runtime_error(std::string("yat::utility::convert(\"")+s+
477                          std::string("\")"));
478    return result;
479  }
480
481
482  template<typename T>
483  bool is(const std::string& s)
484  {
485    T tmp;
486    return detail::convert(s, tmp);
487  }
488
489
490  template<typename T>
491  void load(std::istream& is, std::vector<std::vector<T> >& matrix,
492            char sep, char line_sep, bool ignore_empty,
493            bool rectangle)
494  {
495    size_t nof_columns=0;
496    std::string line;
497    while(getline(is, line, line_sep)){
498      if (line.empty() && ignore_empty)
499        continue;
500      matrix.push_back(std::vector<T>());
501      std::vector<T>& v=matrix.back();
502      v.reserve(nof_columns);
503      std::stringstream ss(line);
504      load(ss, v, sep);
505      // add NaN for final separator (or empty string if T=std::string)
506      detail::VectorPusher<T> pusher;
507      if(sep!='\0' && !line.empty() && line[line.size()-1]==sep)
508        pusher("", v);
509
510      if (rectangle && nof_columns && v.size()!=nof_columns) {
511        std::ostringstream s;
512        s << "load stream error: "
513          << "line " << matrix.size() << " has " << v.size()
514          << " columns; expected " << nof_columns << " columns.";
515        throw utility::IO_error(s.str());
516      }
517      nof_columns = std::max(nof_columns, v.size());
518    }
519
520    // manipulate the state of the stream to be good
521    is.clear(std::ios::goodbit);
522  }
523
524  template<typename T>
525  void load(std::istream& is, std::vector<T>& vec, char sep)
526  {
527    detail::VectorPusher<T> pusher;
528    std::string element;
529    bool ok=true;
530    while(true) {
531      if(sep=='\0')
532        ok=!(is>>element).fail();
533      else
534        ok=!getline(is, element, sep).fail();
535      if(!ok)
536        break;
537      pusher(element, vec);
538    }
539  }
540
541
542  template<typename Iterator>
543  double sum_weight(Iterator first, Iterator last)
544  {
545    BOOST_CONCEPT_ASSERT((DataIteratorConcept<Iterator>));
546    BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Iterator>));
547    BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Iterator>));
548    typename weighted_iterator_traits<Iterator>::type tag;
549    return detail::sum_weight(first, last, tag);
550  }
551
552
553/// \cond IGNORE_DOXYGEN
554namespace detail {
555  template<typename T>
556  bool convert(const std::string& s, T& result)
557  {
558    if (!std::numeric_limits<T>::is_signed) {
559      // first non-whitespace character
560      std::string::const_iterator iter = s.begin();
561      while (iter!=s.end() && std::isspace(*iter))
562        ++iter;
563      // unsigned int cannot start with a '-' and with some compilers
564      // operation ss >> result won't fail so catch it like this instead.
565      if (iter==s.end() || *iter=='-')
566        return false;
567    }
568    std::istringstream ss(s);
569    ss >> result;
570    if (ss.fail()) {
571      if (is_nan(s) || is_equal(s, "-nan")) {
572        result = std::numeric_limits<T>::quiet_NaN();
573        return true;
574      }
575      if (is_equal(s, "inf")) {
576        result = std::numeric_limits<T>::infinity();
577        return true;
578      }
579      if (is_equal(s, "-inf")) {
580        // unsigned types are caught in prologue
581        YAT_ASSERT(std::numeric_limits<T>::is_signed);
582        result = -std::numeric_limits<T>::infinity();
583        return true;
584      }
585      return false;
586    }
587    // Check that nothing is left on stream
588    std::string b;
589    ss >> b;
590    return b.empty();
591  }
592} // of namespace detail
593
594/// \endcond
595
596}}} // of namespace utility, yat, and theplu
597
598#endif
Note: See TracBrowser for help on using the repository browser.