trunk/src/tools/srcclean.c
r20198 | r20199 | |
76 | 76 | int removed_newlines = 0; |
77 | 77 | int src = 0; |
78 | 78 | int dst = 0; |
79 | | bool in_c_comment = false; |
80 | | bool in_cpp_comment = false; |
81 | | int indent_c_comment = 0; |
| 79 | bool in_multiline_comment = false; |
| 80 | bool in_singleline_comment = false; |
| 81 | int indent_multiline_comment = 0; |
82 | 82 | int in_c_string = FALSE; |
83 | 83 | int hichars = 0; |
84 | 84 | bool is_c_file; |
r20198 | r20199 | |
88 | 88 | int bytes; |
89 | 89 | int col = 0; |
90 | 90 | int escape = 0; |
| 91 | int consume = 0; |
91 | 92 | const int tab_size = 4; |
92 | 93 | |
93 | 94 | /* print usage info */ |
r20198 | r20199 | |
117 | 118 | { |
118 | 119 | UINT8 ch = original[src++]; |
119 | 120 | |
120 | | /* check for invalid upper-ASCII chars, but only for non-xml files (swlists might contain UTF-8 chars) */ |
121 | | if (!is_xml_file && ch != 13 && ch != 10 && ch != 9 && (ch > 127 || ch < 32)) |
| 121 | if (consume == 0) |
122 | 122 | { |
123 | | ch = '?'; |
124 | | hichars++; |
125 | | } |
| 123 | /* C-specific handling */ |
| 124 | if (is_c_file) |
| 125 | { |
| 126 | /* check for string/char literals */ |
| 127 | if ((ch == '"' || ch == '\'') && !in_multiline_comment && !in_singleline_comment ) |
| 128 | { |
| 129 | if (ch == in_c_string && !escape) |
| 130 | in_c_string = 0; |
| 131 | else if (!in_c_string) |
| 132 | in_c_string = ch; |
| 133 | } |
126 | 134 | |
127 | | /* C-specific handling */ |
128 | | if (is_c_file) |
129 | | { |
130 | | /* check for string/char literals */ |
131 | | if ((ch == '"' || ch == '\'') && !in_c_comment && !in_cpp_comment ) |
132 | | { |
133 | | if (ch == in_c_string && !escape) |
134 | | in_c_string = 0; |
135 | | else if (!in_c_string) |
136 | | in_c_string = ch; |
| 135 | /* Update escape state */ |
| 136 | if (in_c_string) |
| 137 | escape = (ch == '\\') ? !escape : 0; |
| 138 | |
| 139 | if (!in_c_string && !in_singleline_comment) |
| 140 | { |
| 141 | /* track whether or not we are within a C-style comment */ |
| 142 | if (!in_multiline_comment && ch == '/' && original[src] == '*') |
| 143 | { |
| 144 | in_multiline_comment = true; |
| 145 | if (col > 0 && modified[dst-1] == 0x09) |
| 146 | { |
| 147 | indent_multiline_comment = col; |
| 148 | } |
| 149 | else |
| 150 | { |
| 151 | indent_multiline_comment = 0; |
| 152 | } |
| 153 | consume = 2; |
| 154 | } |
| 155 | else if (in_multiline_comment && ch == '*' && original[src] == '/') |
| 156 | { |
| 157 | in_multiline_comment = false; |
| 158 | indent_multiline_comment = 0; |
| 159 | consume = 2; |
| 160 | } |
| 161 | |
| 162 | /* track whether or not we are within a C++-style comment */ |
| 163 | else if (!in_multiline_comment && ch == '/' && original[src] == '/') |
| 164 | { |
| 165 | in_singleline_comment = true; |
| 166 | consume = 2; |
| 167 | } |
| 168 | } |
137 | 169 | } |
138 | 170 | |
139 | | /* Update escape state */ |
140 | | if (in_c_string) |
141 | | escape = (ch == '\\') ? !escape : 0; |
142 | | |
143 | | if (!in_c_string && !in_cpp_comment) |
| 171 | if (is_xml_file) |
144 | 172 | { |
145 | | int consume = TRUE; |
146 | | |
147 | | /* track whether or not we are within a C-style comment */ |
148 | | if (!in_c_comment && ch == '/' && original[src] == '*') |
| 173 | /* track whether or not we are within a XML comment */ |
| 174 | if (!in_multiline_comment && ch == '<' && original[src] == '!' && original[src+1] == '-' && original[src+2] == '-') |
149 | 175 | { |
150 | | in_c_comment = true; |
| 176 | in_multiline_comment = true; |
151 | 177 | if (col > 0 && modified[dst-1] == 0x09) |
152 | 178 | { |
153 | | indent_c_comment = col; |
| 179 | indent_multiline_comment = col; |
154 | 180 | } |
155 | 181 | else |
156 | 182 | { |
157 | | indent_c_comment = 0; |
| 183 | indent_multiline_comment = 0; |
158 | 184 | } |
| 185 | consume = 4; |
159 | 186 | } |
160 | | else if (in_c_comment && ch == '*' && original[src] == '/') |
| 187 | else if (in_multiline_comment && ch == '-' && original[src] == '-' && original[src+1] == '>') |
161 | 188 | { |
162 | | in_c_comment = false; |
163 | | indent_c_comment = 0; |
| 189 | in_multiline_comment = false; |
| 190 | indent_multiline_comment = 0; |
| 191 | consume = 3; |
164 | 192 | } |
165 | | |
166 | | /* track whether or not we are within a C++-style comment */ |
167 | | else if (!in_c_comment && ch == '/' && original[src] == '/') |
168 | | in_cpp_comment = true; |
169 | | else |
170 | | consume = FALSE; |
171 | | |
172 | | if (consume) |
173 | | { |
174 | | modified[dst++] = ch; |
175 | | col++; |
176 | | ch = original[src++]; |
177 | | } |
178 | 193 | } |
179 | 194 | } |
180 | 195 | |
181 | | /* if we hit a LF without a CR, back up and act like we hit a CR */ |
182 | | if (ch == 0x0a) |
| 196 | if (consume != 0) |
183 | 197 | { |
184 | | src--; |
185 | | ch = 0x0d; |
186 | | fixed_nix_style = 1; |
| 198 | modified[dst++] = ch; |
| 199 | col++; |
| 200 | consume--; |
187 | 201 | } |
188 | 202 | |
189 | | /* if we hit a CR, clean up from there */ |
190 | | if (ch == 0x0d) |
| 203 | /* if we hit a CR or LF, clean up from there */ |
| 204 | else if (ch == 0x0d || ch == 0x0a) |
191 | 205 | { |
192 | 206 | /* remove all extra spaces/tabs at the end */ |
193 | 207 | while (dst > 0 && (modified[dst-1] == ' ' || modified[dst-1] == 0x09)) |
r20198 | r20199 | |
202 | 216 | col = 0; |
203 | 217 | |
204 | 218 | /* skip over any LF in the source file */ |
205 | | if (original[src] == 0x0a) |
| 219 | if (ch == 0x0d && original[src] == 0x0a) |
206 | 220 | src++; |
| 221 | else if (ch == 0x0a) |
| 222 | fixed_nix_style = 1; |
207 | 223 | else |
208 | 224 | fixed_mac_style = 1; |
209 | 225 | |
210 | 226 | /* we are no longer in a C++-style comment */ |
211 | | in_cpp_comment = false; |
| 227 | in_singleline_comment = false; |
212 | 228 | |
213 | 229 | if (in_c_string) |
214 | 230 | { |
r20198 | r20199 | |
223 | 239 | int spaces = tab_size - (col % tab_size); |
224 | 240 | |
225 | 241 | /* convert tabs to spaces, if not used for indenting */ |
226 | | if ((in_c_comment && col >= indent_c_comment) || (col != 0 && modified[dst-1] != 0x09)) |
| 242 | if ((in_multiline_comment && col >= indent_multiline_comment) || (col != 0 && modified[dst-1] != 0x09)) |
227 | 243 | { |
228 | 244 | while (spaces > 0) |
229 | 245 | { |
r20198 | r20199 | |
261 | 277 | } |
262 | 278 | |
263 | 279 | /* convert spaces to tabs, if used for indenting */ |
264 | | while (spaces > 0 && (!in_c_comment || col < indent_c_comment) && (col == 0 || modified[dst-1] == 0x09)) |
| 280 | while (spaces > 0 && (!in_multiline_comment || col < indent_multiline_comment) && (col == 0 || modified[dst-1] == 0x09)) |
265 | 281 | { |
266 | 282 | modified[dst++] = 0x09; |
267 | 283 | spaces -= tab_size; |
r20198 | r20199 | |
280 | 296 | /* otherwise, copy the source character */ |
281 | 297 | else |
282 | 298 | { |
| 299 | /* check for invalid upper-ASCII chars, but only for non-xml files (swlists might contain UTF-8 chars) */ |
| 300 | if (!is_xml_file && (ch < 32 || ch > 127)) |
| 301 | { |
| 302 | ch = '?'; |
| 303 | hichars++; |
| 304 | } |
| 305 | |
283 | 306 | modified[dst++] = ch; |
284 | 307 | col++; |
285 | 308 | } |
286 | 309 | } |
287 | 310 | |
288 | 311 | /* if we didn't find an end of comment, we screwed up */ |
289 | | if (in_c_comment) |
| 312 | if (in_multiline_comment) |
290 | 313 | { |
291 | | printf("Error: unmatched C-style comment (%s)!\n", argv[1]); |
| 314 | printf("Error: unmatched multi-line comment (%s)!\n", argv[1]); |
292 | 315 | return 1; |
293 | 316 | } |
294 | 317 | |