1 | // $Id: Parser.cc 408 2007-06-29 10:08:19Z jari $ |
---|
2 | |
---|
3 | /* |
---|
4 | Copyright (C) 2006, 2007 Jari Häkkinen, Peter Johansson |
---|
5 | |
---|
6 | This file is part of svndigest, http://lev.thep.lu.se/trac/svndigest |
---|
7 | |
---|
8 | svndigest is free software; you can redistribute it and/or modify it |
---|
9 | under the terms of the GNU General Public License as published by |
---|
10 | the Free Software Foundation; either version 2 of the License, or |
---|
11 | (at your option) any later version. |
---|
12 | |
---|
13 | svndigest is distributed in the hope that it will be useful, but |
---|
14 | WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
16 | General Public License for more details. |
---|
17 | |
---|
18 | You should have received a copy of the GNU General Public License |
---|
19 | along with this program; if not, write to the Free Software |
---|
20 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA |
---|
21 | 02111-1307, USA. |
---|
22 | */ |
---|
23 | |
---|
24 | #include "Parser.h" |
---|
25 | #include "utility.h" |
---|
26 | |
---|
27 | #include <algorithm> |
---|
28 | #include <cassert> |
---|
29 | #include <functional> |
---|
30 | #include <fstream> |
---|
31 | #include <iostream> |
---|
32 | #include <string> |
---|
33 | #include <utility> |
---|
34 | #include <vector> |
---|
35 | |
---|
36 | namespace theplu{ |
---|
37 | namespace svndigest{ |
---|
38 | |
---|
39 | |
---|
40 | Parser::Parser(std::string path) |
---|
41 | { |
---|
42 | std::ifstream is(path.c_str()); |
---|
43 | assert(is.good()); |
---|
44 | std::vector<std::pair<std::string, std::string> > codon; |
---|
45 | // Ignore trailing '.in' in file names |
---|
46 | if (match_end(path.rbegin(), path.rend(), ".in")) |
---|
47 | path = path.substr(0, path.size()-3); |
---|
48 | if (match_end(path.rbegin(), path.rend(), ".ac") || |
---|
49 | match_end(path.rbegin(), path.rend(), ".am") || |
---|
50 | match_end(path.rbegin(), path.rend(), ".m4")) { |
---|
51 | codon.reserve(2); |
---|
52 | codon.push_back(std::make_pair("#", "\n")); |
---|
53 | codon.push_back(std::make_pair("dnl", "\n")); |
---|
54 | parse(is, codon); |
---|
55 | } |
---|
56 | else if (match_end(path.rbegin(), path.rend(), ".c") || |
---|
57 | match_end(path.rbegin(), path.rend(), ".cc") || |
---|
58 | match_end(path.rbegin(), path.rend(), ".cpp") || |
---|
59 | match_end(path.rbegin(), path.rend(), ".cxx") || |
---|
60 | match_end(path.rbegin(), path.rend(), ".h") || |
---|
61 | match_end(path.rbegin(), path.rend(), ".hh") || |
---|
62 | match_end(path.rbegin(), path.rend(), ".hpp") || |
---|
63 | match_end(path.rbegin(), path.rend(), ".java")) { |
---|
64 | codon.reserve(2); |
---|
65 | codon.push_back(std::make_pair("//", "\n")); |
---|
66 | codon.push_back(std::make_pair("/*", "*/")); |
---|
67 | parse(is, codon); |
---|
68 | } |
---|
69 | else if (match_end(path.rbegin(), path.rend(), ".pl") || |
---|
70 | match_end(path.rbegin(), path.rend(), ".pm") || |
---|
71 | match_end(path.rbegin(), path.rend(), ".sh") || |
---|
72 | match_end(path.rbegin(), path.rend(), "config") || |
---|
73 | file_name(path)=="bootstrap" || |
---|
74 | file_name(path)=="Makefile") { |
---|
75 | codon.push_back(std::make_pair("#", "\n")); |
---|
76 | parse(is,codon); |
---|
77 | } |
---|
78 | else if (match_end(path.rbegin(), path.rend(), ".tex") || |
---|
79 | match_end(path.rbegin(), path.rend(), ".m")) { |
---|
80 | codon.push_back(std::make_pair("%", "\n")); |
---|
81 | parse(is,codon); |
---|
82 | } |
---|
83 | else if (match_end(path.rbegin(), path.rend(), ".jsp")) { |
---|
84 | codon.reserve(2); |
---|
85 | codon.push_back(std::make_pair("<!--", "-->")); |
---|
86 | codon.push_back(std::make_pair("<%--", "--%>")); |
---|
87 | parse(is,codon); |
---|
88 | } |
---|
89 | else if (match_end(path.rbegin(), path.rend(), ".html") || |
---|
90 | match_end(path.rbegin(), path.rend(), ".xml") || |
---|
91 | match_end(path.rbegin(), path.rend(), ".xsl") || |
---|
92 | match_end(path.rbegin(), path.rend(), ".xsd") || |
---|
93 | match_end(path.rbegin(), path.rend(), ".xhtml") || |
---|
94 | match_end(path.rbegin(), path.rend(), ".shtml") || |
---|
95 | match_end(path.rbegin(), path.rend(), ".xml") || |
---|
96 | match_end(path.rbegin(), path.rend(), ".css") || |
---|
97 | match_end(path.rbegin(), path.rend(), ".rss") || |
---|
98 | match_end(path.rbegin(), path.rend(), ".sgml") ){ |
---|
99 | codon.push_back(std::make_pair("<!--", "-->")); |
---|
100 | parse(is,codon); |
---|
101 | } |
---|
102 | else |
---|
103 | text_mode(is); |
---|
104 | is.close(); |
---|
105 | } |
---|
106 | |
---|
107 | |
---|
108 | void Parser::parse(std::istream& is, |
---|
109 | std::vector<std::pair<std::string, std::string> >& codon) |
---|
110 | { |
---|
111 | // mode zero means we are currently not in a comment |
---|
112 | // if mode!=0 comment is closed by codon[mode-1].second -> mode=0 |
---|
113 | // if codon[x-1].start is found and x >= mode -> mode=x |
---|
114 | size_t mode = 0; |
---|
115 | std::string str; |
---|
116 | while(getline(is,str)) { |
---|
117 | line_type lt=empty; |
---|
118 | for (std::string::iterator iter=str.begin(); iter!=str.end(); ++iter){ |
---|
119 | for (size_t i=mode; i<codon.size(); ++i) { |
---|
120 | if (match_begin(iter, str.end(), codon[i].first)) { |
---|
121 | mode = i+1; |
---|
122 | break; |
---|
123 | } |
---|
124 | } |
---|
125 | assert(mode==0 || mode-1<codon.size()); |
---|
126 | if (mode && match_begin(iter,str.end(), codon[mode-1].second)){ |
---|
127 | mode=0; |
---|
128 | continue; |
---|
129 | } |
---|
130 | // A line of code or comment must contain at least one |
---|
131 | // alphanumerical character. |
---|
132 | if (isalnum(*iter)) { |
---|
133 | if (!mode) { |
---|
134 | lt=code; |
---|
135 | } |
---|
136 | else if (lt!=code) { |
---|
137 | lt=comment; |
---|
138 | } |
---|
139 | } |
---|
140 | } |
---|
141 | if (mode && codon[mode-1].second==std::string("\n")) |
---|
142 | mode=0; |
---|
143 | type_.push_back(lt); |
---|
144 | } |
---|
145 | } |
---|
146 | |
---|
147 | |
---|
148 | void Parser::text_mode(std::istream& is) |
---|
149 | { |
---|
150 | std::string str; |
---|
151 | while(getline(is,str)) { |
---|
152 | line_type lt=empty; |
---|
153 | for (std::string::iterator iter=str.begin(); iter!=str.end(); ++iter){ |
---|
154 | if (lt==empty && isalnum(*iter)) |
---|
155 | lt = comment; |
---|
156 | } |
---|
157 | type_.push_back(lt); |
---|
158 | } |
---|
159 | } |
---|
160 | |
---|
161 | |
---|
162 | }} // end of namespace svndigest and namespace theplu |
---|