1 | // $Id: Stats.cc 693 2008-09-11 20:42:56Z jari $ |
---|
2 | |
---|
3 | /* |
---|
4 | Copyright (C) 2005 Peter Johansson |
---|
5 | Copyright (C) 2006, 2007 Jari Häkkinen, Peter Johansson |
---|
6 | |
---|
7 | This file is part of svndigest, http://dev.thep.lu.se/svndigest |
---|
8 | |
---|
9 | svndigest is free software; you can redistribute it and/or modify it |
---|
10 | under the terms of the GNU General Public License as published by |
---|
11 | the Free Software Foundation; either version 3 of the License, or |
---|
12 | (at your option) any later version. |
---|
13 | |
---|
14 | svndigest is distributed in the hope that it will be useful, but |
---|
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
17 | General Public License for more details. |
---|
18 | |
---|
19 | You should have received a copy of the GNU General Public License |
---|
20 | along with svndigest. If not, see <http://www.gnu.org/licenses/>. |
---|
21 | */ |
---|
22 | |
---|
23 | #include "Stats.h" |
---|
24 | |
---|
25 | #include "Functor.h" |
---|
26 | #include "GnuplotFE.h" |
---|
27 | #include "SVNblame.h" |
---|
28 | #include "SVNinfo.h" |
---|
29 | #include "utility.h" |
---|
30 | |
---|
31 | #include <algorithm> |
---|
32 | #include <cassert> |
---|
33 | #include <cstdlib> |
---|
34 | #include <fstream> |
---|
35 | #include <iostream> |
---|
36 | #include <iterator> |
---|
37 | #include <map> |
---|
38 | #include <numeric> |
---|
39 | #include <string> |
---|
40 | #include <sstream> |
---|
41 | #include <unistd.h> |
---|
42 | #include <utility> |
---|
43 | #include <vector> |
---|
44 | |
---|
45 | |
---|
46 | namespace theplu{ |
---|
47 | namespace svndigest{ |
---|
48 | |
---|
49 | |
---|
50 | Stats::Stats(const std::string& path) |
---|
51 | : stats_(std::vector<Author2Vector>(LineTypeParser::total+1)) |
---|
52 | { |
---|
53 | // Make sure latest revision is set properly |
---|
54 | SVNinfo svn_info(path); |
---|
55 | revision_=svn_info.rev(); |
---|
56 | last_changed_rev_=svn_info.last_changed_rev(); |
---|
57 | reset(); |
---|
58 | } |
---|
59 | |
---|
60 | |
---|
61 | Stats::~Stats(void) |
---|
62 | { |
---|
63 | } |
---|
64 | |
---|
65 | |
---|
66 | void Stats::accumulate(std::vector<unsigned int>& vec, |
---|
67 | svn_revnum_t rev) const |
---|
68 | { |
---|
69 | if (vec.empty()){ |
---|
70 | // just to allow call to vec.back() below |
---|
71 | vec.resize(1,0); |
---|
72 | } |
---|
73 | else if (vec.begin()+rev < vec.end()) |
---|
74 | std::partial_sum(vec.begin()+rev,vec.end(),vec.begin()+rev); |
---|
75 | // static_cast to remove annoying compiler warning |
---|
76 | if (vec.size() < static_cast<size_t>(revision()+1)) |
---|
77 | vec.resize(revision()+1, vec.back()); |
---|
78 | } |
---|
79 | |
---|
80 | |
---|
81 | void Stats::accumulate_stats(svn_revnum_t rev) |
---|
82 | { |
---|
83 | for (std::set<std::string>::const_iterator iter(authors().begin()); |
---|
84 | iter!=authors().end(); ++iter) { |
---|
85 | std::vector<unsigned int>& code = code_stats()[*iter]; |
---|
86 | accumulate(code, rev); |
---|
87 | std::vector<unsigned int>& comments = comment_stats()[*iter]; |
---|
88 | accumulate(comments, rev); |
---|
89 | std::vector<unsigned int>& other = other_stats()[*iter]; |
---|
90 | accumulate(other, rev); |
---|
91 | std::vector<unsigned int>& copyright = copyright_stats()[*iter]; |
---|
92 | accumulate(copyright, rev); |
---|
93 | } |
---|
94 | } |
---|
95 | |
---|
96 | |
---|
97 | void Stats::add(const std::string& user, const unsigned int& rev, |
---|
98 | const LineTypeParser::line_type& lt, unsigned int n) |
---|
99 | { |
---|
100 | assert(user.size()); |
---|
101 | add_author(user); |
---|
102 | |
---|
103 | add(code_stats()[user], rev, lt==LineTypeParser::code, n); |
---|
104 | add(comment_stats()[user], rev, lt==LineTypeParser::comment, n); |
---|
105 | add(other_stats()[user], rev, lt==LineTypeParser::other, n); |
---|
106 | add(copyright_stats()[user], rev, lt==LineTypeParser::copyright, n); |
---|
107 | } |
---|
108 | |
---|
109 | |
---|
110 | void Stats::add(std::vector<unsigned int>& vec, unsigned int rev, bool x, |
---|
111 | unsigned int n) |
---|
112 | { |
---|
113 | if (vec.size() < rev+1){ |
---|
114 | vec.reserve(rev+1); |
---|
115 | vec.resize(rev); |
---|
116 | if (x) { |
---|
117 | assert(vec.size()+1<vec.max_size()); |
---|
118 | vec.push_back(n); |
---|
119 | } |
---|
120 | else { |
---|
121 | assert(vec.size()+1<vec.max_size()); |
---|
122 | vec.push_back(0); |
---|
123 | } |
---|
124 | } |
---|
125 | else if (x) |
---|
126 | vec[rev]+=n; |
---|
127 | } |
---|
128 | |
---|
129 | |
---|
130 | void Stats::add_author(std::string name) |
---|
131 | { |
---|
132 | authors_.insert(name); |
---|
133 | } |
---|
134 | |
---|
135 | |
---|
136 | void Stats::add_authors(std::set<std::string>::const_iterator first, |
---|
137 | std::set<std::string>::const_iterator last) |
---|
138 | { |
---|
139 | authors_.insert(first, last); |
---|
140 | } |
---|
141 | |
---|
142 | |
---|
143 | const std::set<std::string>& Stats::authors(void) const |
---|
144 | { |
---|
145 | return authors_; |
---|
146 | } |
---|
147 | |
---|
148 | |
---|
149 | void Stats::calc_all(void) |
---|
150 | { |
---|
151 | std::vector<unsigned int> init(revision()+1); |
---|
152 | code_stats()["all"]=std::accumulate(code_stats().begin(), |
---|
153 | code_stats().end(), init, |
---|
154 | PairValuePlus<std::string,unsigned int>()); |
---|
155 | comment_stats()["all"]=std::accumulate(comment_stats().begin(), |
---|
156 | comment_stats().end(), init, |
---|
157 | PairValuePlus<std::string,unsigned int>()); |
---|
158 | other_stats()["all"]=std::accumulate(other_stats().begin(), |
---|
159 | other_stats().end(), init, |
---|
160 | PairValuePlus<std::string,unsigned int>()); |
---|
161 | copyright_stats()["all"]=std::accumulate(copyright_stats().begin(), |
---|
162 | copyright_stats().end(), init, |
---|
163 | PairValuePlus<std::string,unsigned int>()); |
---|
164 | VectorPlus<unsigned int> vp; |
---|
165 | comment_or_copy_stats()["all"] = |
---|
166 | vp(comment_stats()["all"], copyright_stats()["all"]); |
---|
167 | |
---|
168 | total_stats()["all"] = |
---|
169 | vp(vp(code_stats()["all"], comment_or_copy_stats()["all"]), |
---|
170 | other_stats()["all"]); |
---|
171 | } |
---|
172 | |
---|
173 | |
---|
174 | void Stats::calc_total(void) |
---|
175 | { |
---|
176 | for (std::set<std::string>::const_iterator iter(authors().begin()); |
---|
177 | iter!=authors().end(); ++iter) { |
---|
178 | std::vector<unsigned int>& code = code_stats()[*iter]; |
---|
179 | std::vector<unsigned int>& comments = comment_stats()[*iter]; |
---|
180 | std::vector<unsigned int>& other = other_stats()[*iter]; |
---|
181 | std::vector<unsigned int>& copy = copyright_stats()[*iter]; |
---|
182 | |
---|
183 | VectorPlus<unsigned int> vp; |
---|
184 | total_stats()[*iter] = vp(vp(vp(code, comments),other),copy); |
---|
185 | } |
---|
186 | |
---|
187 | } |
---|
188 | |
---|
189 | |
---|
190 | void Stats::calc_comment_or_copy(void) |
---|
191 | { |
---|
192 | for (std::set<std::string>::const_iterator iter(authors().begin()); |
---|
193 | iter!=authors().end(); ++iter) { |
---|
194 | std::vector<unsigned int>& comments = comment_stats()[*iter]; |
---|
195 | std::vector<unsigned int>& copy = copyright_stats()[*iter]; |
---|
196 | |
---|
197 | VectorPlus<unsigned int> vp; |
---|
198 | comment_or_copy_stats()[*iter] = vp(comments, copy); |
---|
199 | } |
---|
200 | |
---|
201 | } |
---|
202 | |
---|
203 | |
---|
204 | unsigned int Stats::code(const std::string& user) const |
---|
205 | { |
---|
206 | return get_back(code_stats(), user); |
---|
207 | } |
---|
208 | |
---|
209 | |
---|
210 | unsigned int Stats::comments(const std::string& user) const |
---|
211 | { |
---|
212 | return get_back(comment_or_copy_stats(), user); |
---|
213 | } |
---|
214 | |
---|
215 | |
---|
216 | unsigned int Stats::empty(const std::string& user) const |
---|
217 | { |
---|
218 | return get_back(other_stats(), user); |
---|
219 | } |
---|
220 | |
---|
221 | |
---|
222 | unsigned int Stats::get_back(const Author2Vector& m, std::string user) const |
---|
223 | { |
---|
224 | A2VConstIter iter(m.find(std::string(user))); |
---|
225 | if (iter==m.end() || iter->second.empty()) |
---|
226 | return 0; |
---|
227 | return iter->second.back(); |
---|
228 | } |
---|
229 | |
---|
230 | |
---|
231 | const std::vector<unsigned int>& Stats::get_vector(const Author2Vector& m, |
---|
232 | std::string user) const |
---|
233 | { |
---|
234 | A2VConstIter iter(m.find(std::string(user))); |
---|
235 | if (iter==m.end()) |
---|
236 | throw std::runtime_error(user+std::string(" not found i Stats")); |
---|
237 | return iter->second; |
---|
238 | } |
---|
239 | |
---|
240 | |
---|
241 | svn_revnum_t Stats::last_changed_rev(void) const |
---|
242 | { |
---|
243 | return last_changed_rev_; |
---|
244 | } |
---|
245 | |
---|
246 | |
---|
247 | unsigned int Stats::lines(const std::string& user) const |
---|
248 | { |
---|
249 | return get_back(total_stats(), user); |
---|
250 | } |
---|
251 | |
---|
252 | |
---|
253 | void Stats::load(std::istream& is, Author2Vector& m) |
---|
254 | { |
---|
255 | while (m.size() < authors().size()+1 && is.good()) { |
---|
256 | std::string name; |
---|
257 | std::getline(is, name); |
---|
258 | std::vector<unsigned int>& vec=m[name]; |
---|
259 | std::string line; |
---|
260 | std::getline(is, line); |
---|
261 | std::stringstream ss(line); |
---|
262 | while (ss.good()) { |
---|
263 | svn_revnum_t rev=0; |
---|
264 | unsigned int count=0; |
---|
265 | ss >> rev; |
---|
266 | ss >> count; |
---|
267 | assert(rev<=revision_); |
---|
268 | if (!count) |
---|
269 | break; |
---|
270 | vec.resize(std::max(vec.size(),static_cast<size_t>(rev+1))); |
---|
271 | vec[rev]=count; |
---|
272 | } |
---|
273 | accumulate(vec); |
---|
274 | } |
---|
275 | } |
---|
276 | |
---|
277 | |
---|
278 | svn_revnum_t Stats::load_cache(std::istream& is) |
---|
279 | { |
---|
280 | std::string str; |
---|
281 | getline(is, str); |
---|
282 | if (str!=cache_check_str()) |
---|
283 | return 0; |
---|
284 | svn_revnum_t rev; |
---|
285 | is >> rev; |
---|
286 | reset(); |
---|
287 | size_t a_size=0; |
---|
288 | is >> a_size; |
---|
289 | while (authors().size()<a_size && is.good()){ |
---|
290 | getline(is, str); |
---|
291 | if (str.size()) |
---|
292 | add_author(str); |
---|
293 | } |
---|
294 | getline(is, str); |
---|
295 | if (str!=cache_check_str()) { |
---|
296 | return 0; |
---|
297 | } |
---|
298 | for (size_t i=0; i<stats_.size(); ++i){ |
---|
299 | load(is, stats_[i]); |
---|
300 | getline(is, str); |
---|
301 | if (str!=cache_check_str()) { |
---|
302 | return 0; |
---|
303 | } |
---|
304 | } |
---|
305 | return rev; |
---|
306 | } |
---|
307 | |
---|
308 | |
---|
309 | void Stats::map_add(A2VConstIter first1, A2VConstIter last1, |
---|
310 | Author2Vector& map) |
---|
311 | { |
---|
312 | A2VIter first2(map.begin()); |
---|
313 | Author2Vector::key_compare compare; |
---|
314 | while ( first1 != last1) { |
---|
315 | // key of first1 less than key of first2 |
---|
316 | if (first2==map.end() || compare(first1->first,first2->first)) { |
---|
317 | first2 = map.insert(first2, *first1); |
---|
318 | ++first1; |
---|
319 | } |
---|
320 | // key of first2 less than key of first1 |
---|
321 | else if ( compare(first2->first, first1->first)) { |
---|
322 | ++first2; |
---|
323 | } |
---|
324 | // keys are equivalent |
---|
325 | else { |
---|
326 | VectorPlus<Author2Vector::mapped_type::value_type> vp; |
---|
327 | first2->second = vp(first1->second, first2->second); |
---|
328 | ++first1; |
---|
329 | ++first2; |
---|
330 | } |
---|
331 | } |
---|
332 | } |
---|
333 | |
---|
334 | |
---|
335 | void Stats::parse(const std::string& path, svn_revnum_t rev) |
---|
336 | { |
---|
337 | do_parse(path, rev); |
---|
338 | calc_comment_or_copy(); |
---|
339 | calc_total(); |
---|
340 | calc_all(); |
---|
341 | assert(total_stats().size()); |
---|
342 | assert(code_stats().size()); |
---|
343 | assert(comment_or_copy_stats().size()); |
---|
344 | assert(other_stats().size()); |
---|
345 | } |
---|
346 | |
---|
347 | std::string Stats::plot(const std::string& filename, |
---|
348 | const std::string& linetype) const |
---|
349 | { |
---|
350 | assert(total_stats().size()); |
---|
351 | plot_init(filename); |
---|
352 | GnuplotFE* gp=GnuplotFE::instance(); |
---|
353 | const Author2Vector* stat=NULL; |
---|
354 | if (linetype=="total") |
---|
355 | stat = &total_stats(); |
---|
356 | else if (linetype=="code") |
---|
357 | stat = &code_stats(); |
---|
358 | else if (linetype=="comments") |
---|
359 | stat = &comment_or_copy_stats(); |
---|
360 | else if (linetype=="empty") |
---|
361 | stat = &other_stats(); |
---|
362 | assert(stat); |
---|
363 | assert(stat->size()); |
---|
364 | assert(stat->find("all")!=stat->end()); |
---|
365 | std::vector<unsigned int> total=get_vector(*stat, "all"); |
---|
366 | double yrange_max=1.03*total.back()+1; |
---|
367 | gp->yrange(yrange_max); |
---|
368 | |
---|
369 | typedef std::vector<std::pair<std::string, std::vector<unsigned int> > > vec_type; |
---|
370 | vec_type author_cont; |
---|
371 | author_cont.reserve(stat->size()); |
---|
372 | for (std::set<std::string>::const_iterator i=authors_.begin(); |
---|
373 | i != authors_.end(); ++i) { |
---|
374 | if (lines(*i)) { |
---|
375 | assert(stat->find(*i)!=stat->end()); |
---|
376 | author_cont.push_back(std::make_pair(*i,get_vector(*stat,*i))); |
---|
377 | } |
---|
378 | } |
---|
379 | |
---|
380 | LessReversed<std::vector<unsigned int> > lr; |
---|
381 | PairSecondCompare<std::string, std::vector<unsigned int>, |
---|
382 | LessReversed<std::vector<unsigned int> > > compare(lr); |
---|
383 | std::sort(author_cont.begin(), author_cont.end(), compare); |
---|
384 | |
---|
385 | size_t plotno=author_cont.size(); |
---|
386 | std::stringstream ss; |
---|
387 | vec_type::iterator end(author_cont.end()); |
---|
388 | for (vec_type::iterator i(author_cont.begin()); i!=end; ++i) { |
---|
389 | ss.str(""); |
---|
390 | ss << "set key height " << 2*plotno; |
---|
391 | gp->command(ss.str()); |
---|
392 | ss.str(""); |
---|
393 | ss << get_back(*stat, i->first) << " " << i->first; |
---|
394 | gp->yrange(yrange_max); |
---|
395 | gp->linetitle(ss.str()); |
---|
396 | ss.str(""); |
---|
397 | ss << "steps " << --plotno+2; |
---|
398 | gp->linestyle(ss.str()); |
---|
399 | gp->plot(i->second); |
---|
400 | } |
---|
401 | ss.str(""); |
---|
402 | ss << get_back(*stat, "all") << " total"; |
---|
403 | gp->command("set key height 0"); |
---|
404 | gp->linetitle(ss.str()); |
---|
405 | gp->linestyle("steps 1"); |
---|
406 | gp->plot(total); |
---|
407 | |
---|
408 | gp->command("unset multiplot"); |
---|
409 | gp->yrange(); |
---|
410 | |
---|
411 | return filename; |
---|
412 | } |
---|
413 | |
---|
414 | |
---|
415 | void Stats::plot_init(const std::string& filename) const |
---|
416 | { |
---|
417 | GnuplotFE* gp=GnuplotFE::instance(); |
---|
418 | gp->command("set term png"); |
---|
419 | gp->command("set output '"+filename+"'"); |
---|
420 | gp->command("set xtics nomirror"); |
---|
421 | gp->command("set ytics nomirror"); |
---|
422 | gp->command("set key default"); |
---|
423 | gp->command("set key left Left reverse"); |
---|
424 | gp->command("set multiplot"); |
---|
425 | } |
---|
426 | |
---|
427 | |
---|
428 | void Stats::plot_summary(const std::string& filename) const |
---|
429 | { |
---|
430 | plot_init(filename); |
---|
431 | GnuplotFE* gp=GnuplotFE::instance(); |
---|
432 | std::vector<unsigned int> total = get_vector(total_stats(), "all"); |
---|
433 | double yrange_max=1.03*total.back()+1; |
---|
434 | gp->yrange(yrange_max); |
---|
435 | std::stringstream ss; |
---|
436 | |
---|
437 | ss.str(""); |
---|
438 | std::vector<unsigned int> x(get_vector(code_stats(), "all")); |
---|
439 | ss << x.back() << " code"; |
---|
440 | gp->command("set key height 2"); |
---|
441 | gp->linetitle(ss.str()); |
---|
442 | gp->linestyle("steps 2"); |
---|
443 | gp->plot(x); |
---|
444 | |
---|
445 | ss.str(""); |
---|
446 | x = get_vector(comment_or_copy_stats(), "all"); |
---|
447 | ss << x.back() << " comment"; |
---|
448 | gp->command("set key height 4"); |
---|
449 | gp->linetitle(ss.str()); |
---|
450 | gp->linestyle("steps 3"); |
---|
451 | gp->plot(x); |
---|
452 | |
---|
453 | ss.str(""); |
---|
454 | x = get_vector(other_stats(), "all"); |
---|
455 | ss << x.back() << " other"; |
---|
456 | gp->command("set key height 6"); |
---|
457 | gp->linetitle(ss.str()); |
---|
458 | gp->linestyle("steps 4"); |
---|
459 | gp->plot(x); |
---|
460 | |
---|
461 | ss.str(""); |
---|
462 | ss << total.back() << " total"; |
---|
463 | gp->command("set key height 0"); |
---|
464 | gp->linetitle(ss.str()); |
---|
465 | gp->linestyle("steps 1"); |
---|
466 | gp->plot(total); |
---|
467 | |
---|
468 | gp->command("unset multiplot"); |
---|
469 | gp->yrange(); |
---|
470 | } |
---|
471 | |
---|
472 | |
---|
473 | void Stats::print(std::ostream& os) const |
---|
474 | { |
---|
475 | os << cache_check_str() << "\n"; |
---|
476 | os << last_changed_rev() << " "; |
---|
477 | os << authors().size() << "\n"; |
---|
478 | |
---|
479 | std::copy(authors().begin(), authors().end(), |
---|
480 | std::ostream_iterator<std::string>(os, "\n")); |
---|
481 | os << cache_check_str() << "\n"; |
---|
482 | for (size_t i=0; i<stats_.size(); ++i){ |
---|
483 | print(os, stats_[i]); |
---|
484 | os << cache_check_str() << "\n"; |
---|
485 | } |
---|
486 | } |
---|
487 | |
---|
488 | |
---|
489 | void Stats::print(std::ostream& os, const Author2Vector& m) const |
---|
490 | { |
---|
491 | for (A2VConstIter i(m.begin()); i!=m.end(); ++i){ |
---|
492 | os << i->first << "\n"; |
---|
493 | assert(i->second.size()); |
---|
494 | if (i->second[0]) |
---|
495 | os << 0 << " " << i->second[0] << " "; |
---|
496 | for (size_t j=1; j<i->second.size(); ++j) { |
---|
497 | // only print if stats changes in this rev |
---|
498 | if (i->second[j] != i->second[j-1]) { |
---|
499 | os << j << " " << i->second[j] - i->second[j-1] << " "; |
---|
500 | } |
---|
501 | } |
---|
502 | os << "\n"; |
---|
503 | } |
---|
504 | } |
---|
505 | |
---|
506 | void Stats::reset(void) |
---|
507 | { |
---|
508 | for (size_t i=0; i<stats_.size(); ++i){ |
---|
509 | stats_[i].clear(); |
---|
510 | stats_[i]["all"]=std::vector<unsigned int>(revision_+1); |
---|
511 | } |
---|
512 | authors_.clear(); |
---|
513 | } |
---|
514 | |
---|
515 | |
---|
516 | Stats& Stats::operator+=(const Stats& rhs) |
---|
517 | { |
---|
518 | revision_ = std::max(revision_, rhs.revision_); |
---|
519 | last_changed_rev_ = std::max(last_changed_rev_, rhs.last_changed_rev_); |
---|
520 | add_authors(rhs.authors().begin(), rhs.authors().end()); |
---|
521 | assert(stats_.size()==rhs.stats_.size()); |
---|
522 | for (size_t i=0; i<stats_.size(); ++i) |
---|
523 | map_add(rhs.stats_[i].begin(), rhs.stats_[i].end(), stats_[i]); |
---|
524 | |
---|
525 | return *this; |
---|
526 | } |
---|
527 | |
---|
528 | |
---|
529 | size_t Stats::operator()(int linetype, std::string author, |
---|
530 | svn_revnum_t rev) const |
---|
531 | { |
---|
532 | assert(linetype<=LineTypeParser::total); |
---|
533 | assert(static_cast<size_t>(linetype) < stats_.size()); |
---|
534 | assert(rev>=0); |
---|
535 | A2VConstIter i = stats_[linetype].find(author); |
---|
536 | if (i==stats_[linetype].end()){ |
---|
537 | std::stringstream msg; |
---|
538 | msg << __FILE__ << ": author: " << author << " does not exist"; |
---|
539 | throw std::runtime_error(msg.str()); |
---|
540 | } |
---|
541 | assert(rev < static_cast<svn_revnum_t>(i->second.size())); |
---|
542 | return i->second[rev]; |
---|
543 | } |
---|
544 | |
---|
545 | }} // end of namespace svndigest and namespace theplu |
---|