source: trunk/yat/utility/utility.h @ 3550

Last change on this file since 3550 was 3550, checked in by Peter, 5 years ago

Update copyright years. Happy New Year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 14.8 KB
Line 
1#ifndef _theplu_yat_utility_utility_
2#define _theplu_yat_utility_utility_
3
4// $Id: utility.h 3550 2017-01-03 05:41:02Z peter $
5
6/*
7  Copyright (C) 2005 Jari Häkkinen, Peter Johansson, Markus Ringnér
8  Copyright (C) 2006 Jari Häkkinen
9  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28///
29/// \file yat/utility/utility.h
30///
31/// @brief Some useful functions are placed here
32///
33
34#include "concept_check.h"
35#include "deprecate.h"
36#include "Exception.h"
37#include "iterator_traits.h"
38#include "WeightIterator.h"
39#include "yat_assert.h"
40
41#include <gsl/gsl_math.h>
42
43#include <boost/iterator/iterator_concepts.hpp>
44#include <boost/concept_check.hpp>
45
46#include <algorithm>
47#include <cctype>
48#include <cmath>
49#include <cstdlib>
50#include <functional>
51#include <limits>
52#include <locale>
53#include <istream>
54#include <numeric>
55#include <string>
56#include <stdexcept>
57#include <sstream>
58#include <utility>
59#include <vector>
60
61namespace theplu {
62namespace yat {
63namespace utility {
64
65  /**
66     Same as posix C function with same name but works on \c
67     std::string rather than \c char*.
68
69     \see http://linux.die.net/man/3/basename
70
71     \return everything after last '/'
72
73     \since New in yat 0.10
74   */
75  std::string basename(const std::string& fn);
76
77  /**
78     For each element in resulting range assign it to 0.0 if
79     corresponding element in input range is NaN else assign it to
80     1.0.
81
82     \return true if there is at least one NaN in input range
83     [first, last).
84
85     Type Requirements:
86     - \c InputIterator is \single_pass_iterator
87     - \c InputIterator is \readable_iterator
88     - \c std::isnan(*InputIterator) is a valid expression (e.g. \c
89       float, \c double, and \c long \c double )
90     - \c OutputIterator is \incrementable_iterator
91     - \c OutputIterator is \writable_iterator
92     - \c float is convertible to \c OutputIterator value type
93
94     \since New in yat 0.5
95  */
96  template<typename InputIterator, typename OutputIterator>
97  bool binary_weight(InputIterator first, InputIterator last,
98                     OutputIterator result);
99
100  /**
101     Same as C function \c chdir but throws on failure (instead of
102     retrning non-zero).
103
104     \throw runtime_error if underlying chdir returns non-zero
105
106     \see http://linux.die.net/man/3/chdir
107
108     \since New in yat 0.10
109   */
110  void chdir(const std::string& dir);
111
112  /**
113     same as C function \c chmod but throws on failure (instead of
114     returning non-zero).
115
116     \see http://linux.die.net/man/3/chmod
117
118     \since New in yat 0.10
119   */
120  void chmod(const std::string& filename, mode_t mode);
121
122  /**
123     \brief convert T to a string
124
125     T is supposed to be a numerical type.
126
127     \since new in yat 0.8
128   */
129  template<typename T>
130  std::string convert(T input);
131
132  /**
133     \brief convert string to (numerical) type
134
135     \throw runtime_error if conversion fails
136   */
137  template<typename T>
138  T convert(const std::string& s);
139
140  /**
141     @brief Copy file \a source to \a target.
142
143     @throw runtime_error if read error of \a source or write error
144     for \a target is encountered.
145  */
146  void copy_file(const std::string& source, const std::string& target);
147
148
149  /**
150     Same as posix C function with same name but works on \c
151     std::string rather than \c char*.
152
153     \see http://linux.die.net/man/3/dirname
154
155     \return everything prior last '/'.
156
157     \since New in yat 0.10
158   */
159  std::string dirname(const std::string& fn);
160
161  /**
162     \return true if \a str matches \a pattern
163
164     \see http://linux.die.net/man/3/fnmatch
165
166     \throw runtime_error if returned value from underlying fnmatch is
167     neither zero nor \c FNM_NOMATCH.
168
169     \since New in yat 0.10
170  */
171  bool fnmatch(const std::string& pattern, const std::string& str,
172               int flags=0);
173
174  /**
175     \brief check if string is convertible to (numerical) type \c T
176
177     \since New in yat 0.5
178   */
179  template<typename T>
180  bool is(const std::string& s);
181
182  ///
183  /// @return true if string is a double
184  ///
185  /// \deprecated Provided for backward compatibility with the 0.4
186  /// API. Use is<double>(const std::string&)
187  ///
188  bool is_double(const std::string&) YAT_DEPRECATE;
189
190  /**
191     This function takes the first word (separated by whitespace) in
192     \a s, replaces all upper case with lower case, and compares it
193     with \a other.
194
195     \return true if processed \a s is equal to \a other. It returns
196     false otherwise or if \a s contains more than one word.
197  */
198  bool is_equal(std::string s, std::string other);
199
200  ///
201  /// @return true if string is a float
202  ///
203  /// \deprecated Provided for backward compatibility with the 0.4
204  /// API. Use is<float>(const std::string&)
205  ///
206  bool is_float(const std::string&) YAT_DEPRECATE;
207
208  ///
209  /// @return true if string is an int
210  ///
211  /// \deprecated Provided for backward compatibility with the 0.4
212  /// API. Use is<int>(const std::string&)
213  ///
214  bool is_int(const std::string&) YAT_DEPRECATE;
215
216  ///
217  /// @return true if string is "nan" (case-insensitive)
218  ///
219  bool is_nan(const std::string& s);
220
221  /**
222     The std::istream will be interpreted as outlined here:
223
224     Lines are separated by character \a line_sep and rows are
225     separated by character \a sep.
226     
227     The first line is read into a stringstream, which is used to
228     load the first vector (vec[0]) with elements using
229     load(stringstream, vec[0], sep).
230     
231     Therefore, column elements separation has two modes depending
232     on the value of \a sep.
233     
234     - If \a sep is the default '\\0' value then column elements are
235     separated with white space characters except the new line
236     character. Multiple sequential white space characters are treated
237     as one separator.
238     
239     - Setting \a sep to something else than the default value will
240     change the behaviour to use the \a sep character as the separator
241     between column elements. Multiple sequential \a sep characters
242     will be treated as separating elements with missing values.
243
244     If \a rectangle is true, rows must contain same number of
245     elements or function will throw.
246
247     If \a ignore_empty is true empty lines are ignored.
248
249     \see load(std::istream&, std::vector<T>&, char sep='\\0')
250
251     \note Requirement on T: utility::convert<T> must be supported
252     (from yat 0.7 T=string is also supported)
253
254     \since New in yat 0.6
255   */
256  template<typename T>
257  void load(std::istream& is, std::vector<std::vector<T> >& vec, char sep='\0', 
258            char line_sep='\n', bool ignore_empty=false, bool rectangle=true);
259
260  /**
261     \brief Fill a vector<T> with elements from istream
262
263     Element separation has two modes depending on the value of \a
264     sep.
265     
266     - If \a sep is the default '\\0' value then elements are
267     separated with white space characters. Multiple sequential white
268     space characters are treated as one separator.
269     
270     - Setting \a sep to something else than the default value will
271     change the behaviour to use the \a sep character as the
272     separator between column elements. Multiple sequential \a sep
273     characters will be treated as separating elements with missing
274     values. Missing values are set to std::numeric_limits<T>::quiet_NaN
275     
276     \note Requirement on T: utility::convert<T> must be supported
277     (from yat 0.7 T=string is also supported)
278
279     \since New in yat 0.6
280   */
281  template<typename T>
282  void load(std::istream& is, std::vector<T>& vec, char sep='\0');
283
284  /**
285     \return base-2 logarithm of x
286
287     Implemented for \c float, \c double, and \c long \c double.
288
289     \since New in yat 0.10
290   */
291  // c++11 provides std::log2 so perhaps we should call that one if
292  // availalable (but a bit tricky since this is a public header)
293  template<typename T>
294  T log2(T x) { return std::log(x)/M_LN2; }
295
296  /**
297     \brief create a directory \a dir
298
299     \see http://linux.die.net/man/3/mkdir
300
301     \throw runtime_error if creation failed
302
303     \since New in yat 0.10
304   */
305  void mkdir(const std::string& dir, mode_t mode=0777);
306
307  /**
308     Similar to mkdir(const std::string&, mode_t).
309
310     No error if \a dir already exist. Make parent directories as needed.
311
312     \since New in yat 0.10
313   */
314  void mkdir_p(const std::string& dir, mode_t mode=0777);
315
316  /**
317     same as C function remove but throws errno_error at failure
318
319     \see http://linux.die.net/man/3/remove
320
321     \since New in yat 0.12
322   */
323  void remove(const std::string& fn);
324
325  /**
326     same as C function with same name but throws errno_error if error
327     is encountered
328
329     \see http://linux.die.net/man/3/rename
330
331     \since New in yat 0.12
332   */
333  void rename(const std::string& from, const std::string& to);
334
335  /**
336     In \a full_str replace every sub-string \a old_str with \a
337     new_str;
338
339     \since New in yat 0.10
340   */
341  void replace(std::string& full_str, std::string old_str, std::string new_str);
342
343
344  /**
345     Calculate sum of weights in range [first, last). The complexity
346     is linear except in the important case when \c Iterator is
347     unweighted and \random_access_iterator when complexity is constant.
348
349     Type Requirments:
350     - \c Iterator is \ref concept_data_iterator
351     - \c Iterator is \readable_iterator
352     - \c Iterator is \single_pass_iterator
353
354     \since New in yat 0.13
355   */
356  template<typename Iterator>
357  double sum_weight(Iterator first, Iterator last);
358
359/// \cond IGNORE_DOXYGEN
360
361// private namespace
362namespace detail {
363
364  /**
365     \internal
366
367     \brief convert s to t
368
369     used in function is<T> and convert<T>
370
371     \return true if conversion was successful
372
373     \internal
374   */
375  template<typename T>
376  bool convert(const std::string& s, T& t);
377
378  /**
379     Functor used in load function
380   */
381  template<typename T>
382  struct VectorPusher
383  {
384    /**
385       convert element to T and push on vec's back
386
387       \internal
388     */
389    void operator()(const std::string& element, std::vector<T>& vec)
390    {
391      if (!element.size())
392        vec.push_back(std::numeric_limits<T>::quiet_NaN());
393      else {
394        vec.push_back(theplu::yat::utility::convert<T>(element));
395      }
396    }
397  };
398
399  /**
400     specialization for string
401
402     \internal
403   */
404  template<>
405  struct VectorPusher<std::string>
406  {
407    /**
408       push element on vec's back
409     */
410    void operator()(const std::string& element, std::vector<std::string>& vec)
411    { 
412      vec.push_back(element);
413    }
414  };
415
416
417  template<typename Iterator>
418  double sum_weight(Iterator first, Iterator last, unweighted_iterator_tag tag)
419  {
420    return std::distance(first, last);
421  }
422
423
424  template<typename Iterator>
425  double sum_weight(Iterator first, Iterator last, weighted_iterator_tag tag)
426  {
427    return std::accumulate(weight_iterator(first), weight_iterator(last), 0);
428  }
429
430
431} // end of namespace detail
432
433/// \endcond
434
435  // template implementations
436
437  template<typename InputIterator, typename OutputIterator>
438  bool binary_weight(InputIterator first, InputIterator last, 
439                     OutputIterator result)
440  {
441    BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<InputIterator>));
442    BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<InputIterator>));
443    BOOST_CONCEPT_ASSERT((boost_concepts::IncrementableIterator<OutputIterator>));
444    BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<OutputIterator, float>));
445
446    bool nan=false;
447    while (first!=last) {
448      if (std::isnan(*first)) {
449        *result=0;
450        nan=true;
451      }
452      else
453        *result = 1.0;
454      ++first;
455      ++result;
456    }
457    return nan;
458  }
459
460
461  template<typename T>
462  std::string convert(T input)
463  {
464    std::ostringstream ss;
465    ss << input;
466    return ss.str();
467  }
468
469
470  template<typename T>
471  T convert(const std::string& s)
472  {
473    T result;
474    if (!detail::convert(s, result))
475      throw runtime_error(std::string("yat::utility::convert(\"")+s+
476                          std::string("\")"));
477    return result;
478  }
479
480
481  template<typename T>
482  bool is(const std::string& s)
483  {
484    T tmp;
485    return detail::convert(s, tmp);
486  }
487
488
489  template<typename T>
490  void load(std::istream& is, std::vector<std::vector<T> >& matrix,
491            char sep, char line_sep, bool ignore_empty,
492            bool rectangle)
493  {
494    size_t nof_columns=0;
495    std::string line;
496    while(getline(is, line, line_sep)){
497      if (line.empty() && ignore_empty)
498        continue;
499      matrix.push_back(std::vector<T>());
500      std::vector<T>& v=matrix.back();
501      v.reserve(nof_columns);
502      std::stringstream ss(line);
503      load(ss, v, sep);
504      // add NaN for final separator (or empty string if T=std::string)
505      detail::VectorPusher<T> pusher;
506      if(sep!='\0' && !line.empty() && line[line.size()-1]==sep)
507        pusher("", v);
508
509      if (rectangle && nof_columns && v.size()!=nof_columns) {
510        std::ostringstream s;
511        s << "load stream error: "
512          << "line " << matrix.size() << " has " << v.size()
513          << " columns; expected " << nof_columns << " columns.";
514        throw utility::IO_error(s.str());
515      }
516      nof_columns = std::max(nof_columns, v.size());
517    }
518
519    // manipulate the state of the stream to be good
520    is.clear(std::ios::goodbit);
521  }
522
523  template<typename T>
524  void load(std::istream& is, std::vector<T>& vec, char sep)
525  {
526    detail::VectorPusher<T> pusher;
527    std::string element;
528    bool ok=true;
529    while(true) {
530      if(sep=='\0')
531        ok=(is>>element);
532      else
533        ok=getline(is, element, sep);
534      if(!ok)
535        break;
536      pusher(element, vec);
537    }
538  }
539
540
541  template<typename Iterator>
542  double sum_weight(Iterator first, Iterator last)
543  {
544    BOOST_CONCEPT_ASSERT((DataIteratorConcept<Iterator>));
545    BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Iterator>));
546    BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Iterator>));
547    typename weighted_iterator_traits<Iterator>::type tag;
548    return detail::sum_weight(first, last, tag);
549  }
550
551
552/// \cond IGNORE_DOXYGEN
553namespace detail {
554  template<typename T>
555  bool convert(const std::string& s, T& result)
556  {
557    if (!std::numeric_limits<T>::is_signed) {
558      // first non-whitespace character
559      std::string::const_iterator iter = s.begin();
560      while (iter!=s.end() && std::isspace(*iter))
561        ++iter;
562      // unsigned int cannot start with a '-' and with some compilers
563      // operation ss >> result won't fail so catch it like this instead.
564      if (iter==s.end() || *iter=='-')
565        return false;
566    }
567    std::istringstream ss(s);
568    ss >> result;
569    if (ss.fail()) {
570      if (is_nan(s) || is_equal(s, "-nan")) {
571        result = std::numeric_limits<T>::quiet_NaN();
572        return true;
573      }
574      if (is_equal(s, "inf")) {
575        result = std::numeric_limits<T>::infinity();
576        return true;
577      }
578      if (is_equal(s, "-inf")) {
579        // unsigned types are caught in prologue
580        YAT_ASSERT(std::numeric_limits<T>::is_signed);
581        result = -std::numeric_limits<T>::infinity();
582        return true;
583      }
584      return false;
585    }
586    // Check that nothing is left on stream
587    std::string b;
588    ss >> b;
589    return b.empty();
590  }
591} // of namespace detail
592
593/// \endcond
594
595}}} // of namespace utility, yat, and theplu
596
597#endif
Note: See TracBrowser for help on using the repository browser.