1 | // $Id: Stats.cc 994 2009-12-24 23:46:54Z peter $ |
---|
2 | |
---|
3 | /* |
---|
4 | Copyright (C) 2005 Peter Johansson |
---|
5 | Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson |
---|
6 | Copyright (C) 2009 Peter Johansson |
---|
7 | |
---|
8 | This file is part of svndigest, http://dev.thep.lu.se/svndigest |
---|
9 | |
---|
10 | svndigest is free software; you can redistribute it and/or modify it |
---|
11 | under the terms of the GNU General Public License as published by |
---|
12 | the Free Software Foundation; either version 3 of the License, or |
---|
13 | (at your option) any later version. |
---|
14 | |
---|
15 | svndigest is distributed in the hope that it will be useful, but |
---|
16 | WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
18 | General Public License for more details. |
---|
19 | |
---|
20 | You should have received a copy of the GNU General Public License |
---|
21 | along with svndigest. If not, see <http://www.gnu.org/licenses/>. |
---|
22 | */ |
---|
23 | |
---|
24 | #include "Stats.h" |
---|
25 | |
---|
26 | #include "Functor.h" |
---|
27 | #include "GnuplotFE.h" |
---|
28 | #include "SVNblame.h" |
---|
29 | #include "SVNinfo.h" |
---|
30 | #include "utility.h" |
---|
31 | |
---|
32 | #include <algorithm> |
---|
33 | #include <cassert> |
---|
34 | #include <cstdlib> |
---|
35 | #include <fstream> |
---|
36 | #include <iostream> |
---|
37 | #include <iterator> |
---|
38 | #include <map> |
---|
39 | #include <numeric> |
---|
40 | #include <string> |
---|
41 | #include <sstream> |
---|
42 | #include <unistd.h> |
---|
43 | #include <utility> |
---|
44 | #include <vector> |
---|
45 | |
---|
46 | |
---|
47 | namespace theplu{ |
---|
48 | namespace svndigest{ |
---|
49 | |
---|
50 | |
---|
51 | Stats::Stats(const std::string& path) |
---|
52 | : stats_(std::vector<Author2Vector>(LineTypeParser::total+1)) |
---|
53 | { |
---|
54 | // Make sure latest revision is set properly |
---|
55 | SVNinfo svn_info(path); |
---|
56 | revision_=svn_info.rev(); |
---|
57 | last_changed_rev_=svn_info.last_changed_rev(); |
---|
58 | reset(); |
---|
59 | } |
---|
60 | |
---|
61 | |
---|
62 | Stats::~Stats(void) |
---|
63 | { |
---|
64 | } |
---|
65 | |
---|
66 | |
---|
67 | void Stats::accumulate(std::vector<unsigned int>& vec, |
---|
68 | svn_revnum_t rev) const |
---|
69 | { |
---|
70 | assert(rev>0); |
---|
71 | if (vec.empty()){ |
---|
72 | // just to allow call to vec.back() below |
---|
73 | vec.resize(1,0); |
---|
74 | } |
---|
75 | else if (vec.begin()+rev-1 < vec.end()) |
---|
76 | std::partial_sum(vec.begin()+rev-1,vec.end(),vec.begin()+rev-1); |
---|
77 | // static_cast to remove annoying compiler warning |
---|
78 | if (vec.size() < static_cast<size_t>(revision()+1)) |
---|
79 | vec.resize(revision()+1, vec.back()); |
---|
80 | } |
---|
81 | |
---|
82 | |
---|
83 | void Stats::accumulate_stats(svn_revnum_t rev) |
---|
84 | { |
---|
85 | if (!rev) |
---|
86 | rev = 1; |
---|
87 | for (std::set<std::string>::const_iterator iter(authors().begin()); |
---|
88 | iter!=authors().end(); ++iter) { |
---|
89 | std::vector<unsigned int>& code = code_stats()[*iter]; |
---|
90 | accumulate(code, rev); |
---|
91 | std::vector<unsigned int>& comments = comment_stats()[*iter]; |
---|
92 | accumulate(comments, rev); |
---|
93 | std::vector<unsigned int>& other = other_stats()[*iter]; |
---|
94 | accumulate(other, rev); |
---|
95 | std::vector<unsigned int>& copyright = copyright_stats()[*iter]; |
---|
96 | accumulate(copyright, rev); |
---|
97 | } |
---|
98 | } |
---|
99 | |
---|
100 | |
---|
101 | void Stats::add(const std::string& user, const unsigned int& rev, |
---|
102 | const LineTypeParser::line_type& lt, unsigned int n) |
---|
103 | { |
---|
104 | assert(user.size()); |
---|
105 | add_author(user); |
---|
106 | |
---|
107 | add(code_stats()[user], rev, lt==LineTypeParser::code, n); |
---|
108 | add(comment_stats()[user], rev, lt==LineTypeParser::comment, n); |
---|
109 | add(other_stats()[user], rev, lt==LineTypeParser::other, n); |
---|
110 | add(copyright_stats()[user], rev, lt==LineTypeParser::copyright, n); |
---|
111 | } |
---|
112 | |
---|
113 | |
---|
114 | void Stats::add(std::vector<unsigned int>& vec, unsigned int rev, bool x, |
---|
115 | unsigned int n) |
---|
116 | { |
---|
117 | if (vec.size() < rev+1){ |
---|
118 | vec.reserve(rev+1); |
---|
119 | vec.resize(rev); |
---|
120 | assert(vec.size()+1<vec.max_size()); |
---|
121 | if (x) { |
---|
122 | vec.push_back(n); |
---|
123 | } |
---|
124 | else { |
---|
125 | vec.push_back(0); |
---|
126 | } |
---|
127 | } |
---|
128 | else if (x) |
---|
129 | vec[rev]+=n; |
---|
130 | } |
---|
131 | |
---|
132 | |
---|
133 | void Stats::add_author(std::string name) |
---|
134 | { |
---|
135 | authors_.insert(name); |
---|
136 | } |
---|
137 | |
---|
138 | |
---|
139 | void Stats::add_authors(std::set<std::string>::const_iterator first, |
---|
140 | std::set<std::string>::const_iterator last) |
---|
141 | { |
---|
142 | authors_.insert(first, last); |
---|
143 | } |
---|
144 | |
---|
145 | |
---|
146 | const std::set<std::string>& Stats::authors(void) const |
---|
147 | { |
---|
148 | return authors_; |
---|
149 | } |
---|
150 | |
---|
151 | |
---|
152 | void Stats::calc_all(void) |
---|
153 | { |
---|
154 | std::vector<unsigned int> init(revision()+1); |
---|
155 | for (int lt=0; lt <= 4; ++lt) { |
---|
156 | stats_[lt]["all"].clear(); |
---|
157 | stats_[lt]["all"] = |
---|
158 | std::accumulate(stats_[lt].begin(), |
---|
159 | stats_[lt].end(), init, |
---|
160 | PairValuePlus<std::string,unsigned int>()); |
---|
161 | } |
---|
162 | VectorPlus<unsigned int> vp; |
---|
163 | comment_or_copy_stats()["all"] = |
---|
164 | vp(comment_stats()["all"], copyright_stats()["all"]); |
---|
165 | |
---|
166 | total_stats()["all"] = |
---|
167 | vp(vp(code_stats()["all"], comment_or_copy_stats()["all"]), |
---|
168 | other_stats()["all"]); |
---|
169 | } |
---|
170 | |
---|
171 | |
---|
172 | void Stats::calc_total(void) |
---|
173 | { |
---|
174 | for (std::set<std::string>::const_iterator iter(authors().begin()); |
---|
175 | iter!=authors().end(); ++iter) { |
---|
176 | std::vector<unsigned int>& code = code_stats()[*iter]; |
---|
177 | std::vector<unsigned int>& comments = comment_stats()[*iter]; |
---|
178 | std::vector<unsigned int>& other = other_stats()[*iter]; |
---|
179 | std::vector<unsigned int>& copy = copyright_stats()[*iter]; |
---|
180 | |
---|
181 | VectorPlus<unsigned int> vp; |
---|
182 | total_stats()[*iter] = vp(vp(vp(code, comments),other),copy); |
---|
183 | } |
---|
184 | |
---|
185 | } |
---|
186 | |
---|
187 | |
---|
188 | void Stats::calc_comment_or_copy(void) |
---|
189 | { |
---|
190 | for (std::set<std::string>::const_iterator iter(authors().begin()); |
---|
191 | iter!=authors().end(); ++iter) { |
---|
192 | std::vector<unsigned int>& comments = comment_stats()[*iter]; |
---|
193 | std::vector<unsigned int>& copy = copyright_stats()[*iter]; |
---|
194 | |
---|
195 | VectorPlus<unsigned int> vp; |
---|
196 | comment_or_copy_stats()[*iter] = vp(comments, copy); |
---|
197 | } |
---|
198 | |
---|
199 | } |
---|
200 | |
---|
201 | |
---|
202 | unsigned int Stats::code(const std::string& user) const |
---|
203 | { |
---|
204 | return get_back(code_stats(), user); |
---|
205 | } |
---|
206 | |
---|
207 | |
---|
208 | unsigned int Stats::comments(const std::string& user) const |
---|
209 | { |
---|
210 | return get_back(comment_or_copy_stats(), user); |
---|
211 | } |
---|
212 | |
---|
213 | |
---|
214 | unsigned int Stats::empty(const std::string& user) const |
---|
215 | { |
---|
216 | return get_back(other_stats(), user); |
---|
217 | } |
---|
218 | |
---|
219 | |
---|
220 | unsigned int Stats::get_back(const Author2Vector& m, std::string user) const |
---|
221 | { |
---|
222 | A2VConstIter iter(m.find(std::string(user))); |
---|
223 | if (iter==m.end() || iter->second.empty()) |
---|
224 | return 0; |
---|
225 | return iter->second.back(); |
---|
226 | } |
---|
227 | |
---|
228 | |
---|
229 | const std::vector<unsigned int>& Stats::get_vector(const Author2Vector& m, |
---|
230 | std::string user) const |
---|
231 | { |
---|
232 | A2VConstIter iter(m.find(std::string(user))); |
---|
233 | if (iter==m.end()) |
---|
234 | throw std::runtime_error(user+std::string(" not found i Stats")); |
---|
235 | return iter->second; |
---|
236 | } |
---|
237 | |
---|
238 | |
---|
239 | svn_revnum_t Stats::last_changed_rev(void) const |
---|
240 | { |
---|
241 | return last_changed_rev_; |
---|
242 | } |
---|
243 | |
---|
244 | |
---|
245 | unsigned int Stats::lines(const std::string& user) const |
---|
246 | { |
---|
247 | return get_back(total_stats(), user); |
---|
248 | } |
---|
249 | |
---|
250 | |
---|
251 | void Stats::load(std::istream& is, Author2Vector& m) |
---|
252 | { |
---|
253 | while (m.size() < authors().size()+1 && is.good()) { |
---|
254 | std::string name; |
---|
255 | std::getline(is, name); |
---|
256 | std::vector<unsigned int>& vec=m[name]; |
---|
257 | std::string line; |
---|
258 | std::getline(is, line); |
---|
259 | std::stringstream ss(line); |
---|
260 | while (ss.good()) { |
---|
261 | svn_revnum_t rev=0; |
---|
262 | unsigned int count=0; |
---|
263 | ss >> rev; |
---|
264 | ss >> count; |
---|
265 | assert(rev<=revision_); |
---|
266 | if (!count) |
---|
267 | break; |
---|
268 | vec.resize(std::max(vec.size(),static_cast<size_t>(rev+1))); |
---|
269 | vec[rev]=count; |
---|
270 | } |
---|
271 | accumulate(vec); |
---|
272 | } |
---|
273 | } |
---|
274 | |
---|
275 | |
---|
276 | svn_revnum_t Stats::load_cache(std::istream& is) |
---|
277 | { |
---|
278 | std::string str; |
---|
279 | getline(is, str); |
---|
280 | if (str!=cache_check_str()) { |
---|
281 | if (str == prev_cache_check_str()) |
---|
282 | std::cout << "cache file is obsolete; " |
---|
283 | << "retrieving statistics from repository.\n"; |
---|
284 | return 0; |
---|
285 | } |
---|
286 | svn_revnum_t rev; |
---|
287 | is >> rev; |
---|
288 | reset(); |
---|
289 | size_t a_size=0; |
---|
290 | is >> a_size; |
---|
291 | while (authors().size()<a_size && is.good()){ |
---|
292 | getline(is, str); |
---|
293 | if (str.size()) |
---|
294 | add_author(str); |
---|
295 | } |
---|
296 | getline(is, str); |
---|
297 | if (str!=cache_check_str()) { |
---|
298 | return 0; |
---|
299 | } |
---|
300 | for (size_t i=0; i<stats_.size(); ++i){ |
---|
301 | load(is, stats_[i]); |
---|
302 | getline(is, str); |
---|
303 | if (str!=cache_check_str()) { |
---|
304 | return 0; |
---|
305 | } |
---|
306 | } |
---|
307 | return rev; |
---|
308 | } |
---|
309 | |
---|
310 | |
---|
311 | void Stats::map_add(A2VConstIter first1, A2VConstIter last1, |
---|
312 | Author2Vector& map) |
---|
313 | { |
---|
314 | A2VIter first2(map.begin()); |
---|
315 | Author2Vector::key_compare compare; |
---|
316 | while ( first1 != last1) { |
---|
317 | // key of first1 less than key of first2 |
---|
318 | if (first2==map.end() || compare(first1->first,first2->first)) { |
---|
319 | first2 = map.insert(first2, *first1); |
---|
320 | ++first1; |
---|
321 | } |
---|
322 | // key of first2 less than key of first1 |
---|
323 | else if ( compare(first2->first, first1->first)) { |
---|
324 | ++first2; |
---|
325 | } |
---|
326 | // keys are equivalent |
---|
327 | else { |
---|
328 | VectorPlus<Author2Vector::mapped_type::value_type> vp; |
---|
329 | first2->second = vp(first1->second, first2->second); |
---|
330 | ++first1; |
---|
331 | ++first2; |
---|
332 | } |
---|
333 | } |
---|
334 | } |
---|
335 | |
---|
336 | |
---|
337 | void Stats::parse(const std::string& path, svn_revnum_t rev) |
---|
338 | { |
---|
339 | // reset stats to zero for [rev, inf) |
---|
340 | for (size_t i=0; i<stats_.size(); ++i) |
---|
341 | for (A2VIter iter=stats_[i].begin(); iter!=stats_[i].end(); ++iter) { |
---|
342 | iter->second.resize(rev,0); |
---|
343 | iter->second.resize(revision(),0); |
---|
344 | } |
---|
345 | do_parse(path, rev); |
---|
346 | calc_comment_or_copy(); |
---|
347 | calc_total(); |
---|
348 | calc_all(); |
---|
349 | assert(total_stats().size()); |
---|
350 | assert(code_stats().size()); |
---|
351 | assert(comment_or_copy_stats().size()); |
---|
352 | assert(other_stats().size()); |
---|
353 | } |
---|
354 | |
---|
355 | std::string Stats::plot(const std::string& filename, |
---|
356 | const std::string& linetype) const |
---|
357 | { |
---|
358 | assert(total_stats().size()); |
---|
359 | plot_init(filename); |
---|
360 | GnuplotFE* gp=GnuplotFE::instance(); |
---|
361 | const Author2Vector* stat=NULL; |
---|
362 | if (linetype=="total") |
---|
363 | stat = &total_stats(); |
---|
364 | else if (linetype=="code") |
---|
365 | stat = &code_stats(); |
---|
366 | else if (linetype=="comments") |
---|
367 | stat = &comment_or_copy_stats(); |
---|
368 | else if (linetype=="empty") |
---|
369 | stat = &other_stats(); |
---|
370 | assert(stat); |
---|
371 | assert(stat->size()); |
---|
372 | assert(stat->find("all")!=stat->end()); |
---|
373 | std::vector<unsigned int> total=get_vector(*stat, "all"); |
---|
374 | double yrange_max=1.03 * *std::max_element(total.begin(), total.end()) +1.0; |
---|
375 | gp->yrange(yrange_max); |
---|
376 | |
---|
377 | typedef std::vector<std::pair<std::string, std::vector<unsigned int> > > vec_type; |
---|
378 | vec_type author_cont; |
---|
379 | author_cont.reserve(stat->size()); |
---|
380 | for (std::set<std::string>::const_iterator i=authors_.begin(); |
---|
381 | i != authors_.end(); ++i) { |
---|
382 | if (lines(*i)) { |
---|
383 | assert(stat->find(*i)!=stat->end()); |
---|
384 | author_cont.push_back(std::make_pair(*i,get_vector(*stat,*i))); |
---|
385 | } |
---|
386 | } |
---|
387 | |
---|
388 | LessReversed<std::vector<unsigned int> > lr; |
---|
389 | PairSecondCompare<std::string, std::vector<unsigned int>, |
---|
390 | LessReversed<std::vector<unsigned int> > > compare(lr); |
---|
391 | std::sort(author_cont.begin(), author_cont.end(), compare); |
---|
392 | |
---|
393 | size_t plotno=author_cont.size(); |
---|
394 | std::stringstream ss; |
---|
395 | vec_type::iterator end(author_cont.end()); |
---|
396 | for (vec_type::iterator i(author_cont.begin()); i!=end; ++i) { |
---|
397 | ss.str(""); |
---|
398 | ss << "set key height " << 2*plotno; |
---|
399 | gp->command(ss.str()); |
---|
400 | ss.str(""); |
---|
401 | ss << get_back(*stat, i->first) << " " << i->first; |
---|
402 | gp->yrange(yrange_max); |
---|
403 | gp->linetitle(ss.str()); |
---|
404 | ss.str(""); |
---|
405 | ss << "steps " << --plotno+2; |
---|
406 | gp->linestyle(ss.str()); |
---|
407 | gp->plot(i->second); |
---|
408 | } |
---|
409 | ss.str(""); |
---|
410 | ss << get_back(*stat, "all") << " total"; |
---|
411 | gp->command("set key height 0"); |
---|
412 | gp->linetitle(ss.str()); |
---|
413 | gp->linestyle("steps 1"); |
---|
414 | gp->plot(total); |
---|
415 | |
---|
416 | gp->command("unset multiplot"); |
---|
417 | gp->yrange(); |
---|
418 | |
---|
419 | return filename; |
---|
420 | } |
---|
421 | |
---|
422 | |
---|
423 | void Stats::plot_init(const std::string& filename) const |
---|
424 | { |
---|
425 | GnuplotFE* gp=GnuplotFE::instance(); |
---|
426 | gp->command("set term png"); |
---|
427 | gp->command("set output '"+filename+"'"); |
---|
428 | gp->command("set xtics nomirror"); |
---|
429 | gp->command("set ytics nomirror"); |
---|
430 | gp->command("set key default"); |
---|
431 | gp->command("set key left Left reverse"); |
---|
432 | gp->command("set multiplot"); |
---|
433 | } |
---|
434 | |
---|
435 | |
---|
436 | void Stats::plot_summary(const std::string& filename) const |
---|
437 | { |
---|
438 | plot_init(filename); |
---|
439 | GnuplotFE* gp=GnuplotFE::instance(); |
---|
440 | std::vector<unsigned int> total = get_vector(total_stats(), "all"); |
---|
441 | double yrange_max=1.03*total.back()+1; |
---|
442 | gp->yrange(yrange_max); |
---|
443 | std::stringstream ss; |
---|
444 | |
---|
445 | ss.str(""); |
---|
446 | std::vector<unsigned int> x(get_vector(code_stats(), "all")); |
---|
447 | ss << x.back() << " code"; |
---|
448 | gp->command("set key height 2"); |
---|
449 | gp->linetitle(ss.str()); |
---|
450 | gp->linestyle("steps 2"); |
---|
451 | gp->plot(x); |
---|
452 | |
---|
453 | ss.str(""); |
---|
454 | x = get_vector(comment_or_copy_stats(), "all"); |
---|
455 | ss << x.back() << " comment"; |
---|
456 | gp->command("set key height 4"); |
---|
457 | gp->linetitle(ss.str()); |
---|
458 | gp->linestyle("steps 3"); |
---|
459 | gp->plot(x); |
---|
460 | |
---|
461 | ss.str(""); |
---|
462 | x = get_vector(other_stats(), "all"); |
---|
463 | ss << x.back() << " other"; |
---|
464 | gp->command("set key height 6"); |
---|
465 | gp->linetitle(ss.str()); |
---|
466 | gp->linestyle("steps 4"); |
---|
467 | gp->plot(x); |
---|
468 | |
---|
469 | ss.str(""); |
---|
470 | ss << total.back() << " total"; |
---|
471 | gp->command("set key height 0"); |
---|
472 | gp->linetitle(ss.str()); |
---|
473 | gp->linestyle("steps 1"); |
---|
474 | gp->plot(total); |
---|
475 | |
---|
476 | gp->command("unset multiplot"); |
---|
477 | gp->yrange(); |
---|
478 | } |
---|
479 | |
---|
480 | |
---|
481 | void Stats::print(std::ostream& os) const |
---|
482 | { |
---|
483 | os << cache_check_str() << "\n"; |
---|
484 | os << last_changed_rev() << " "; |
---|
485 | os << authors().size() << "\n"; |
---|
486 | |
---|
487 | std::copy(authors().begin(), authors().end(), |
---|
488 | std::ostream_iterator<std::string>(os, "\n")); |
---|
489 | os << cache_check_str() << "\n"; |
---|
490 | for (size_t i=0; i<stats_.size(); ++i){ |
---|
491 | print(os, stats_[i]); |
---|
492 | os << cache_check_str() << "\n"; |
---|
493 | } |
---|
494 | } |
---|
495 | |
---|
496 | |
---|
497 | void Stats::print(std::ostream& os, const Author2Vector& m) const |
---|
498 | { |
---|
499 | for (A2VConstIter i(m.begin()); i!=m.end(); ++i){ |
---|
500 | os << i->first << "\n"; |
---|
501 | assert(i->second.size()); |
---|
502 | if (i->second[0]) |
---|
503 | os << 0 << " " << i->second[0] << " "; |
---|
504 | for (size_t j=1; j<i->second.size(); ++j) { |
---|
505 | // only print if stats changes in this rev |
---|
506 | if (i->second[j] != i->second[j-1]) { |
---|
507 | os << j << " " << i->second[j] - i->second[j-1] << " "; |
---|
508 | } |
---|
509 | } |
---|
510 | os << "\n"; |
---|
511 | } |
---|
512 | } |
---|
513 | |
---|
514 | void Stats::reset(void) |
---|
515 | { |
---|
516 | for (size_t i=0; i<stats_.size(); ++i){ |
---|
517 | stats_[i].clear(); |
---|
518 | std::vector<unsigned int>& tmp = stats_[i]["all"]; |
---|
519 | std::fill(tmp.begin(), tmp.end(), 0); |
---|
520 | tmp.resize(revision_+1); |
---|
521 | } |
---|
522 | authors_.clear(); |
---|
523 | } |
---|
524 | |
---|
525 | |
---|
526 | Stats& Stats::operator+=(const Stats& rhs) |
---|
527 | { |
---|
528 | revision_ = std::max(revision_, rhs.revision_); |
---|
529 | last_changed_rev_ = std::max(last_changed_rev_, rhs.last_changed_rev_); |
---|
530 | add_authors(rhs.authors().begin(), rhs.authors().end()); |
---|
531 | assert(stats_.size()==rhs.stats_.size()); |
---|
532 | for (size_t i=0; i<stats_.size(); ++i) |
---|
533 | map_add(rhs.stats_[i].begin(), rhs.stats_[i].end(), stats_[i]); |
---|
534 | |
---|
535 | return *this; |
---|
536 | } |
---|
537 | |
---|
538 | |
---|
539 | size_t Stats::operator()(int linetype, std::string author, |
---|
540 | svn_revnum_t rev) const |
---|
541 | { |
---|
542 | assert(linetype<=LineTypeParser::total); |
---|
543 | assert(static_cast<size_t>(linetype) < stats_.size()); |
---|
544 | assert(rev>=0); |
---|
545 | A2VConstIter i = stats_[linetype].find(author); |
---|
546 | if (i==stats_[linetype].end()){ |
---|
547 | std::stringstream msg; |
---|
548 | msg << __FILE__ << ": author: " << author << " does not exist"; |
---|
549 | throw std::runtime_error(msg.str()); |
---|
550 | } |
---|
551 | assert(rev < static_cast<svn_revnum_t>(i->second.size())); |
---|
552 | return i->second[rev]; |
---|
553 | } |
---|
554 | |
---|
555 | }} // end of namespace svndigest and namespace theplu |
---|