source: trunk/yat/utility/utility.h @ 4098

Last change on this file since 4098 was 4098, checked in by Peter, 2 years ago

minor docs change

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 17.6 KB
Line 
1#ifndef _theplu_yat_utility_utility_
2#define _theplu_yat_utility_utility_
3
4// $Id: utility.h 4098 2021-09-17 00:09:36Z peter $
5
6/*
7  Copyright (C) 2005 Jari Häkkinen, Peter Johansson, Markus Ringnér
8  Copyright (C) 2006 Jari Häkkinen
9  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28///
29/// \file yat/utility/utility.h
30///
31/// @brief Some useful functions are placed here
32///
33
34#include "config_public.h"
35
36#include "concept_check.h"
37#include "deprecate.h"
38#include "Exception.h"
39#include "iterator_traits.h"
40#include "WeightIterator.h"
41#include "yat_assert.h"
42
43#include <boost/iterator/iterator_concepts.hpp>
44#include <boost/concept_check.hpp>
45
46#include <algorithm>
47#include <cctype>
48#include <cmath>
49#include <cstdlib>
50#include <cstring>
51#include <functional>
52#include <limits>
53#include <locale>
54#include <istream>
55#include <iostream>
56#include <numeric>
57#include <string>
58#include <stdexcept>
59#include <sstream>
60#include <utility>
61#include <vector>
62
63namespace theplu {
64namespace yat {
65namespace utility {
66
67  /**
68     Same as posix C function with same name but works on \c
69     std::string rather than \c char*.
70
71     \see http://linux.die.net/man/3/basename
72
73     \return everything after last '/'
74
75     \since New in yat 0.10
76   */
77  std::string basename(const std::string& fn);
78
79  /**
80     Read data from \a is into variable \a x. Data has typically been
81     written with binary_write(std::ostream& os, T).
82
83     \c T is expected to be a native numerical type.
84
85     \return \a is
86
87     \since New in yat 0.18
88   */
89  template<typename T>
90  std::istream& binary_read(std::istream& is, T& x)
91  {
92    const unsigned char type_size=sizeof(T);
93    YAT_ASSERT(type_size <= 16);
94    char buffer[16];
95    is.read(buffer, type_size);
96    memcpy(&x, buffer, type_size);
97    return is;
98  }
99
100
101  /**
102     Write value of \a x into ostream \a os in binary format. Since
103     the data is written in the native binary format, it may not be
104     portable between different architectures.
105
106     \c T is expected to be a native numerical type.
107
108     \see binary_read(std::istream&, T&)
109
110     \since New in yat 0.18
111   */
112  template<typename T>
113  void binary_write(std::ostream& os, T x)
114  {
115    const unsigned char type_size=sizeof(T);
116    YAT_ASSERT(type_size <= 16);
117    char buffer[16];
118    memcpy(buffer, &x, type_size);
119    os.write(buffer, type_size);
120  }
121
122  /**
123     For each element in resulting range assign it to 0.0 if
124     corresponding element in input range is NaN else assign it to
125     1.0.
126
127     \return true if there is at least one NaN in input range
128     [first, last).
129
130     Type Requirements:
131     - \c InputIterator is \single_pass_iterator
132     - \c InputIterator is \readable_iterator
133     - \c std::isnan(*InputIterator) is a valid expression (e.g. \c
134       float, \c double, and \c long \c double )
135     - \c OutputIterator is \incrementable_iterator
136     - \c OutputIterator is \writable_iterator
137     - \c float is convertible to \c OutputIterator value type
138
139     \since New in yat 0.5
140  */
141  template<typename InputIterator, typename OutputIterator>
142  bool binary_weight(InputIterator first, InputIterator last,
143                     OutputIterator result);
144
145  /**
146     Same as C function \c chdir but throws on failure (instead of
147     retrning non-zero).
148
149     \throw runtime_error if underlying chdir returns non-zero
150
151     \see http://linux.die.net/man/3/chdir
152
153     \since New in yat 0.10
154   */
155  void chdir(const std::string& dir);
156
157  /**
158     same as C function \c chmod but throws on failure (instead of
159     returning non-zero).
160
161     \see http://linux.die.net/man/3/chmod
162
163     \since New in yat 0.10
164   */
165  void chmod(const std::string& filename, mode_t mode);
166
167  /**
168     \brief convert T to a string
169
170     \c T is supposed to be a numerical type.
171
172     \see
173     <a href="http://www.cplusplus.com/reference/string/to_string/">std::to_string</a>
174     in C++11
175
176     \since new in yat 0.8
177   */
178  template<typename T>
179  std::string convert(T input);
180
181  /**
182     \brief convert string to (numerical) type
183
184     \throw runtime_error if conversion fails
185   */
186  template<typename T>
187  T convert(const std::string& s);
188
189  /**
190     \brief try to convert
191
192     \see is<T>
193     \see convert<T>
194
195     \return true if conversion was successful
196
197     \since new in yat 0.15
198   */
199  template<typename T>
200  bool convert_try(const std::string& s, T& t);
201
202  /**
203     @brief Copy file \a source to \a target.
204
205     @throw runtime_error if read error of \a source or write error
206     for \a target is encountered.
207  */
208  void copy_file(const std::string& source, const std::string& target);
209
210
211  /**
212     Same as posix C function with same name but works on \c
213     std::string rather than \c char*.
214
215     \see http://linux.die.net/man/3/dirname
216
217     \return everything prior last '/'.
218
219     \since New in yat 0.10
220   */
221  std::string dirname(const std::string& fn);
222
223  /**
224     \return true if \a str matches \a pattern
225
226     \see http://linux.die.net/man/3/fnmatch
227
228     \throw runtime_error if returned value from underlying fnmatch is
229     neither zero nor \c FNM_NOMATCH.
230
231     \since New in yat 0.10
232  */
233  bool fnmatch(const std::string& pattern, const std::string& str,
234               int flags=0);
235
236  /**
237     \brief current directory
238
239     \see ​https://linux.die.net/man/3/getcwd
240
241     \since New in yat 0.17
242   */
243  std::string getcwd(void);
244
245  /**
246     Function can be used as argument to \c gsl_set_error_handler, so
247     it is called when by GSL when an error is detected. This will
248     result in a GSL_error is thrown.
249
250     \see https://www.gnu.org/software/gsl/doc/html/err.html
251
252     \since New in yat 0.18
253   */
254  void gsl_error_handler(const char* reason, const char* file,
255                         int line, int gsl_errno);
256
257  /**
258     \brief check if string is convertible to (numerical) type \c T
259
260     \since New in yat 0.5
261   */
262  template<typename T>
263  bool is(const std::string& s);
264
265  ///
266  /// @return true if string is a double
267  ///
268  /// \deprecated Provided for backward compatibility with the 0.4
269  /// API. Use is<double>(const std::string&)
270  ///
271  bool is_double(const std::string&) YAT_DEPRECATE;
272
273  /**
274     This function takes the first word (separated by whitespace) in
275     \a s, replaces all upper case with lower case, and compares it
276     with \a other.
277
278     \return true if processed \a s is equal to \a other. It returns
279     false otherwise or if \a s contains more than one word.
280  */
281  bool is_equal(std::string s, std::string other);
282
283  ///
284  /// @return true if string is a float
285  ///
286  /// \deprecated Provided for backward compatibility with the 0.4
287  /// API. Use is<float>(const std::string&)
288  ///
289  bool is_float(const std::string&) YAT_DEPRECATE;
290
291  ///
292  /// @return true if string is an int
293  ///
294  /// \deprecated Provided for backward compatibility with the 0.4
295  /// API. Use is<int>(const std::string&)
296  ///
297  bool is_int(const std::string&) YAT_DEPRECATE;
298
299  ///
300  /// @return true if string is "nan" (case-insensitive)
301  ///
302  bool is_nan(const std::string& s);
303
304  /**
305     The std::istream will be interpreted as outlined here:
306
307     Lines are separated by character \a line_sep and rows are
308     separated by character \a sep.
309     
310     The first line is read into a stringstream, which is used to
311     load the first vector (vec[0]) with elements using
312     load(stringstream, vec[0], sep).
313     
314     Therefore, column elements separation has two modes depending
315     on the value of \a sep.
316     
317     - If \a sep is the default '\\0' value then column elements are
318     separated with white space characters except the new line
319     character. Multiple sequential white space characters are treated
320     as one separator.
321     
322     - Setting \a sep to something else than the default value will
323     change the behaviour to use the \a sep character as the separator
324     between column elements. Multiple sequential \a sep characters
325     will be treated as separating elements with missing values.
326
327     If \a rectangle is true, rows must contain same number of
328     elements or function will throw.
329
330     If \a ignore_empty is true empty lines are ignored.
331
332     \see load(std::istream&, std::vector<T>&, char sep='\\0')
333
334     \note Requirement on T: utility::convert<T> must be supported
335     (from yat 0.7 T=string is also supported)
336
337     \since New in yat 0.6
338   */
339  template<typename T>
340  void load(std::istream& is, std::vector<std::vector<T> >& vec, char sep='\0', 
341            char line_sep='\n', bool ignore_empty=false, bool rectangle=true);
342
343  /**
344     \brief Fill a vector<T> with elements from istream
345
346     Element separation has two modes depending on the value of \a
347     sep.
348     
349     - If \a sep is the default '\\0' value then elements are
350     separated with white space characters. Multiple sequential white
351     space characters are treated as one separator.
352     
353     - Setting \a sep to something else than the default value will
354     change the behaviour to use the \a sep character as the
355     separator between column elements. Multiple sequential \a sep
356     characters will be treated as separating elements with missing
357     values. Missing values are set to std::numeric_limits<T>::quiet_NaN
358     
359     \note Requirement on T: utility::convert<T> must be supported
360     (from yat 0.7 T=string is also supported)
361
362     \since New in yat 0.6
363   */
364  template<typename T>
365  void load(std::istream& is, std::vector<T>& vec, char sep='\0');
366
367  /**
368     \return base-2 logarithm of x
369
370     Implemented for \c float, \c double, and \c long \c double.
371
372     \since New in yat 0.10
373   */
374  template<typename T>
375  T log2(T x) { return std::log2(x); }
376
377  /**
378     \brief create a directory \a dir
379
380     \see http://linux.die.net/man/3/mkdir
381
382     \throw runtime_error if creation failed
383
384     \since New in yat 0.10
385   */
386  void mkdir(const std::string& dir, mode_t mode=0777);
387
388  /**
389     Similar to mkdir(const std::string&, mode_t).
390
391     No error if \a dir already exist. Make parent directories as needed.
392
393     \since New in yat 0.10
394   */
395  void mkdir_p(const std::string& dir, mode_t mode=0777);
396
397  /**
398     Preprocessor macro that works like std::move, but also works when
399     rvalue is not available (in which case arg is returned).
400
401     \since new in yat 0.16
402   */
403#define YAT_MOVE(arg) std::move(arg)
404  /// likewise for std::move_if_noexcept
405#define YAT_MOVE_IF_NOEXCEPT(arg) std::move_if_noexcept(arg)
406
407  /**
408     \brief recursively print exception what
409
410     Print \c what() of \a error to \a out. If \a error is nested, the
411     exception that is nested in \a error is passed to print_what().
412
413     \note If nested exception is null, function calls terminate (this
414     behaviour might change in the future).
415
416     \since new in yat 0.16
417   */
418  void print_what(const std::exception& error, std::ostream& out=std::cerr);
419
420  /**
421     same as C function remove but throws errno_error at failure
422
423     \see http://linux.die.net/man/3/remove
424
425     \since New in yat 0.12
426   */
427  void remove(const std::string& fn);
428
429  /**
430     same as C function with same name but throws errno_error if error
431     is encountered
432
433     \see http://linux.die.net/man/3/rename
434
435     \since New in yat 0.12
436   */
437  void rename(const std::string& from, const std::string& to);
438
439  /**
440     In \a full_str replace every sub-string \a old_str with \a
441     new_str;
442
443     \since New in yat 0.10
444
445     \see <a href=https://www.boost.org/doc/libs/1_77_0/doc/html/boost/algorithm/replace_all.html>
446     boost::algorithm::replace_all</a>(string&, const string&, const string&)
447   */
448  void replace(std::string& full_str, const std::string& old_str,
449               const std::string& new_str);
450
451
452  /**
453     Calculate sum of weights in range [first, last). The complexity
454     is linear except in the important case when \c Iterator is
455     unweighted and \random_access_iterator when complexity is constant.
456
457     Type Requirments:
458     - \c Iterator is \ref concept_data_iterator
459     - \c Iterator is \readable_iterator
460     - \c Iterator is \single_pass_iterator
461
462     \since New in yat 0.13
463   */
464  template<typename Iterator>
465  double sum_weight(Iterator first, Iterator last);
466
467  /**
468     same as C function symlink but throws errno_error at failure
469
470     Creates a symbolic link called \a path2 that contains the string
471     pointed to by \a path1
472
473     \see http://linux.die.net/man/3/symlink
474
475     \since New in yat 0.16
476   */
477  void symlink(const std::string& path1, const std::string& path2);
478
479/// \cond IGNORE_DOXYGEN
480
481// private namespace
482namespace detail {
483
484  /**
485     Functor used in load function
486   */
487  template<typename T>
488  struct VectorPusher
489  {
490    /**
491       convert element to T and push on vec's back
492
493       \internal
494     */
495    void operator()(const std::string& element, std::vector<T>& vec)
496    {
497      if (!element.size())
498        vec.push_back(std::numeric_limits<T>::quiet_NaN());
499      else {
500        vec.push_back(theplu::yat::utility::convert<T>(element));
501      }
502    }
503  };
504
505  /**
506     specialization for string
507
508     \internal
509   */
510  template<>
511  struct VectorPusher<std::string>
512  {
513    /**
514       push element on vec's back
515     */
516    void operator()(const std::string& element, std::vector<std::string>& vec)
517    {
518      vec.push_back(element);
519    }
520
521    void operator()(std::string&& element, std::vector<std::string>& vec)
522    {
523      vec.push_back(std::move(element));
524    }
525  };
526
527
528  template<typename Iterator>
529  double sum_weight(Iterator first, Iterator last, unweighted_iterator_tag tag)
530  {
531    return std::distance(first, last);
532  }
533
534
535  template<typename Iterator>
536  double sum_weight(Iterator first, Iterator last, weighted_iterator_tag tag)
537  {
538    return std::accumulate(weight_iterator(first), weight_iterator(last), 0);
539  }
540
541
542} // end of namespace detail
543
544/// \endcond
545
546  // template implementations
547
548  template<typename InputIterator, typename OutputIterator>
549  bool binary_weight(InputIterator first, InputIterator last, 
550                     OutputIterator result)
551  {
552    BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<InputIterator>));
553    BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<InputIterator>));
554    BOOST_CONCEPT_ASSERT((boost_concepts::IncrementableIterator<OutputIterator>));
555    BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<OutputIterator, float>));
556
557    bool nan=false;
558    while (first!=last) {
559      if (std::isnan(*first)) {
560        *result=0;
561        nan=true;
562      }
563      else
564        *result = 1.0;
565      ++first;
566      ++result;
567    }
568    return nan;
569  }
570
571
572  template<typename T>
573  std::string convert(T input)
574  {
575    std::ostringstream ss;
576    ss << input;
577    return ss.str();
578  }
579
580
581  template<typename T>
582  T convert(const std::string& s)
583  {
584    T result;
585    if (!convert_try(s, result))
586      throw runtime_error(std::string("yat::utility::convert(\"")+s+
587                          std::string("\")"));
588    return result;
589  }
590
591
592  template<typename T>
593  bool is(const std::string& s)
594  {
595    T tmp;
596    return convert_try(s, tmp);
597  }
598
599
600  template<typename T>
601  void load(std::istream& is, std::vector<std::vector<T> >& matrix,
602            char sep, char line_sep, bool ignore_empty,
603            bool rectangle)
604  {
605    size_t nof_columns=0;
606    std::string line;
607    while(getline(is, line, line_sep)){
608      if (line.empty() && ignore_empty)
609        continue;
610      std::vector<T> v;
611      v.reserve(nof_columns);
612      bool empty_last_element = !line.empty() && line.back()==sep;
613      std::stringstream ss(std::move(line));
614      load(ss, v, sep);
615      // add NaN for final separator (or empty string if T=std::string)
616      detail::VectorPusher<T> pusher;
617      if (empty_last_element && sep!='\0')
618        pusher("", v);
619
620      if (rectangle && nof_columns && v.size()!=nof_columns) {
621        std::ostringstream s;
622        s << "load stream error: "
623          << "line " << matrix.size() << " has " << v.size()
624          << " columns; expected " << nof_columns << " columns.";
625        throw utility::IO_error(s.str());
626      }
627      nof_columns = std::max(nof_columns, v.size());
628      matrix.push_back(std::move(v));
629    }
630
631    // manipulate the state of the stream to be good
632    is.clear(std::ios::goodbit);
633  }
634
635  template<typename T>
636  void load(std::istream& is, std::vector<T>& vec, char sep)
637  {
638    detail::VectorPusher<T> pusher;
639    while(true) {
640      std::string element;
641      if(sep=='\0')
642        is>>element;
643      else
644        getline(is, element, sep);
645      if(is.fail())
646        break;
647      pusher(std::move(element), vec);
648    }
649  }
650
651
652  template<typename Iterator>
653  double sum_weight(Iterator first, Iterator last)
654  {
655    BOOST_CONCEPT_ASSERT((DataIteratorConcept<Iterator>));
656    BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Iterator>));
657    BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Iterator>));
658    typename weighted_iterator_traits<Iterator>::type tag;
659    return detail::sum_weight(first, last, tag);
660  }
661
662
663
664  template<typename T>
665  bool convert_try(const std::string& s, T& result)
666  {
667    if (!std::numeric_limits<T>::is_signed) {
668      // first non-whitespace character
669      std::string::const_iterator iter = s.begin();
670      while (iter!=s.end() && std::isspace(*iter))
671        ++iter;
672      // unsigned int cannot start with a '-' and with some compilers
673      // operation ss >> result won't fail so catch it like this instead.
674      if (iter==s.end() || *iter=='-')
675        return false;
676    }
677    std::istringstream ss(s);
678    ss >> result;
679    if (ss.fail()) {
680      if (is_nan(s) || is_equal(s, "-nan")) {
681        result = std::numeric_limits<T>::quiet_NaN();
682        return true;
683      }
684      if (is_equal(s, "inf")) {
685        result = std::numeric_limits<T>::infinity();
686        return true;
687      }
688      if (is_equal(s, "-inf")) {
689        // unsigned types are caught in prologue
690        YAT_ASSERT(std::numeric_limits<T>::is_signed);
691        result = -std::numeric_limits<T>::infinity();
692        return true;
693      }
694      return false;
695    }
696    // Check that nothing is left on stream
697    std::string b;
698    ss >> b;
699    return b.empty();
700  }
701
702}}} // of namespace utility, yat, and theplu
703
704#endif
Note: See TracBrowser for help on using the repository browser.