trunk/src/tools/srcclean.c
| r20198 | r20199 | |
| 76 | 76 | int removed_newlines = 0; |
| 77 | 77 | int src = 0; |
| 78 | 78 | int dst = 0; |
| 79 | | bool in_c_comment = false; |
| 80 | | bool in_cpp_comment = false; |
| 81 | | int indent_c_comment = 0; |
| 79 | bool in_multiline_comment = false; |
| 80 | bool in_singleline_comment = false; |
| 81 | int indent_multiline_comment = 0; |
| 82 | 82 | int in_c_string = FALSE; |
| 83 | 83 | int hichars = 0; |
| 84 | 84 | bool is_c_file; |
| r20198 | r20199 | |
| 88 | 88 | int bytes; |
| 89 | 89 | int col = 0; |
| 90 | 90 | int escape = 0; |
| 91 | int consume = 0; |
| 91 | 92 | const int tab_size = 4; |
| 92 | 93 | |
| 93 | 94 | /* print usage info */ |
| r20198 | r20199 | |
| 117 | 118 | { |
| 118 | 119 | UINT8 ch = original[src++]; |
| 119 | 120 | |
| 120 | | /* check for invalid upper-ASCII chars, but only for non-xml files (swlists might contain UTF-8 chars) */ |
| 121 | | if (!is_xml_file && ch != 13 && ch != 10 && ch != 9 && (ch > 127 || ch < 32)) |
| 121 | if (consume == 0) |
| 122 | 122 | { |
| 123 | | ch = '?'; |
| 124 | | hichars++; |
| 125 | | } |
| 123 | /* C-specific handling */ |
| 124 | if (is_c_file) |
| 125 | { |
| 126 | /* check for string/char literals */ |
| 127 | if ((ch == '"' || ch == '\'') && !in_multiline_comment && !in_singleline_comment ) |
| 128 | { |
| 129 | if (ch == in_c_string && !escape) |
| 130 | in_c_string = 0; |
| 131 | else if (!in_c_string) |
| 132 | in_c_string = ch; |
| 133 | } |
| 126 | 134 | |
| 127 | | /* C-specific handling */ |
| 128 | | if (is_c_file) |
| 129 | | { |
| 130 | | /* check for string/char literals */ |
| 131 | | if ((ch == '"' || ch == '\'') && !in_c_comment && !in_cpp_comment ) |
| 132 | | { |
| 133 | | if (ch == in_c_string && !escape) |
| 134 | | in_c_string = 0; |
| 135 | | else if (!in_c_string) |
| 136 | | in_c_string = ch; |
| 135 | /* Update escape state */ |
| 136 | if (in_c_string) |
| 137 | escape = (ch == '\\') ? !escape : 0; |
| 138 | |
| 139 | if (!in_c_string && !in_singleline_comment) |
| 140 | { |
| 141 | /* track whether or not we are within a C-style comment */ |
| 142 | if (!in_multiline_comment && ch == '/' && original[src] == '*') |
| 143 | { |
| 144 | in_multiline_comment = true; |
| 145 | if (col > 0 && modified[dst-1] == 0x09) |
| 146 | { |
| 147 | indent_multiline_comment = col; |
| 148 | } |
| 149 | else |
| 150 | { |
| 151 | indent_multiline_comment = 0; |
| 152 | } |
| 153 | consume = 2; |
| 154 | } |
| 155 | else if (in_multiline_comment && ch == '*' && original[src] == '/') |
| 156 | { |
| 157 | in_multiline_comment = false; |
| 158 | indent_multiline_comment = 0; |
| 159 | consume = 2; |
| 160 | } |
| 161 | |
| 162 | /* track whether or not we are within a C++-style comment */ |
| 163 | else if (!in_multiline_comment && ch == '/' && original[src] == '/') |
| 164 | { |
| 165 | in_singleline_comment = true; |
| 166 | consume = 2; |
| 167 | } |
| 168 | } |
| 137 | 169 | } |
| 138 | 170 | |
| 139 | | /* Update escape state */ |
| 140 | | if (in_c_string) |
| 141 | | escape = (ch == '\\') ? !escape : 0; |
| 142 | | |
| 143 | | if (!in_c_string && !in_cpp_comment) |
| 171 | if (is_xml_file) |
| 144 | 172 | { |
| 145 | | int consume = TRUE; |
| 146 | | |
| 147 | | /* track whether or not we are within a C-style comment */ |
| 148 | | if (!in_c_comment && ch == '/' && original[src] == '*') |
| 173 | /* track whether or not we are within a XML comment */ |
| 174 | if (!in_multiline_comment && ch == '<' && original[src] == '!' && original[src+1] == '-' && original[src+2] == '-') |
| 149 | 175 | { |
| 150 | | in_c_comment = true; |
| 176 | in_multiline_comment = true; |
| 151 | 177 | if (col > 0 && modified[dst-1] == 0x09) |
| 152 | 178 | { |
| 153 | | indent_c_comment = col; |
| 179 | indent_multiline_comment = col; |
| 154 | 180 | } |
| 155 | 181 | else |
| 156 | 182 | { |
| 157 | | indent_c_comment = 0; |
| 183 | indent_multiline_comment = 0; |
| 158 | 184 | } |
| 185 | consume = 4; |
| 159 | 186 | } |
| 160 | | else if (in_c_comment && ch == '*' && original[src] == '/') |
| 187 | else if (in_multiline_comment && ch == '-' && original[src] == '-' && original[src+1] == '>') |
| 161 | 188 | { |
| 162 | | in_c_comment = false; |
| 163 | | indent_c_comment = 0; |
| 189 | in_multiline_comment = false; |
| 190 | indent_multiline_comment = 0; |
| 191 | consume = 3; |
| 164 | 192 | } |
| 165 | | |
| 166 | | /* track whether or not we are within a C++-style comment */ |
| 167 | | else if (!in_c_comment && ch == '/' && original[src] == '/') |
| 168 | | in_cpp_comment = true; |
| 169 | | else |
| 170 | | consume = FALSE; |
| 171 | | |
| 172 | | if (consume) |
| 173 | | { |
| 174 | | modified[dst++] = ch; |
| 175 | | col++; |
| 176 | | ch = original[src++]; |
| 177 | | } |
| 178 | 193 | } |
| 179 | 194 | } |
| 180 | 195 | |
| 181 | | /* if we hit a LF without a CR, back up and act like we hit a CR */ |
| 182 | | if (ch == 0x0a) |
| 196 | if (consume != 0) |
| 183 | 197 | { |
| 184 | | src--; |
| 185 | | ch = 0x0d; |
| 186 | | fixed_nix_style = 1; |
| 198 | modified[dst++] = ch; |
| 199 | col++; |
| 200 | consume--; |
| 187 | 201 | } |
| 188 | 202 | |
| 189 | | /* if we hit a CR, clean up from there */ |
| 190 | | if (ch == 0x0d) |
| 203 | /* if we hit a CR or LF, clean up from there */ |
| 204 | else if (ch == 0x0d || ch == 0x0a) |
| 191 | 205 | { |
| 192 | 206 | /* remove all extra spaces/tabs at the end */ |
| 193 | 207 | while (dst > 0 && (modified[dst-1] == ' ' || modified[dst-1] == 0x09)) |
| r20198 | r20199 | |
| 202 | 216 | col = 0; |
| 203 | 217 | |
| 204 | 218 | /* skip over any LF in the source file */ |
| 205 | | if (original[src] == 0x0a) |
| 219 | if (ch == 0x0d && original[src] == 0x0a) |
| 206 | 220 | src++; |
| 221 | else if (ch == 0x0a) |
| 222 | fixed_nix_style = 1; |
| 207 | 223 | else |
| 208 | 224 | fixed_mac_style = 1; |
| 209 | 225 | |
| 210 | 226 | /* we are no longer in a C++-style comment */ |
| 211 | | in_cpp_comment = false; |
| 227 | in_singleline_comment = false; |
| 212 | 228 | |
| 213 | 229 | if (in_c_string) |
| 214 | 230 | { |
| r20198 | r20199 | |
| 223 | 239 | int spaces = tab_size - (col % tab_size); |
| 224 | 240 | |
| 225 | 241 | /* convert tabs to spaces, if not used for indenting */ |
| 226 | | if ((in_c_comment && col >= indent_c_comment) || (col != 0 && modified[dst-1] != 0x09)) |
| 242 | if ((in_multiline_comment && col >= indent_multiline_comment) || (col != 0 && modified[dst-1] != 0x09)) |
| 227 | 243 | { |
| 228 | 244 | while (spaces > 0) |
| 229 | 245 | { |
| r20198 | r20199 | |
| 261 | 277 | } |
| 262 | 278 | |
| 263 | 279 | /* convert spaces to tabs, if used for indenting */ |
| 264 | | while (spaces > 0 && (!in_c_comment || col < indent_c_comment) && (col == 0 || modified[dst-1] == 0x09)) |
| 280 | while (spaces > 0 && (!in_multiline_comment || col < indent_multiline_comment) && (col == 0 || modified[dst-1] == 0x09)) |
| 265 | 281 | { |
| 266 | 282 | modified[dst++] = 0x09; |
| 267 | 283 | spaces -= tab_size; |
| r20198 | r20199 | |
| 280 | 296 | /* otherwise, copy the source character */ |
| 281 | 297 | else |
| 282 | 298 | { |
| 299 | /* check for invalid upper-ASCII chars, but only for non-xml files (swlists might contain UTF-8 chars) */ |
| 300 | if (!is_xml_file && (ch < 32 || ch > 127)) |
| 301 | { |
| 302 | ch = '?'; |
| 303 | hichars++; |
| 304 | } |
| 305 | |
| 283 | 306 | modified[dst++] = ch; |
| 284 | 307 | col++; |
| 285 | 308 | } |
| 286 | 309 | } |
| 287 | 310 | |
| 288 | 311 | /* if we didn't find an end of comment, we screwed up */ |
| 289 | | if (in_c_comment) |
| 312 | if (in_multiline_comment) |
| 290 | 313 | { |
| 291 | | printf("Error: unmatched C-style comment (%s)!\n", argv[1]); |
| 314 | printf("Error: unmatched multi-line comment (%s)!\n", argv[1]); |
| 292 | 315 | return 1; |
| 293 | 316 | } |
| 294 | 317 | |