trunk/scripts/build/check_po.py
| r253614 | r253615 |  | 
|---|
| 1 |  | #! /usr/bin/env python | 
| 2 |  | # | 
| 3 |  | # check_po - a gramps tool to check validity of po files | 
| 4 |  | # | 
| 5 |  | # Copyright (C) 2006-2006  Kees Bakker | 
| 6 |  | # | 
| 7 |  | # This program is free software; you can redistribute it and/or modify | 
| 8 |  | # it under the terms of the GNU General Public License as published by | 
| 9 |  | # the Free Software Foundation; either version 2 of the License, or | 
| 10 |  | # (at your option) any later version. | 
| 11 |  | # | 
| 12 |  | # This program is distributed in the hope that it will be useful, | 
| 13 |  | # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 14 |  | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 15 |  | # GNU General Public License for more details. | 
| 16 |  | # | 
| 17 |  | # You should have received a copy of the GNU General Public License | 
| 18 |  | # along with this program; if not, write to the Free Software | 
| 19 |  | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA | 
| 20 |  |  | 
| 21 |  | # | 
| 22 |  | # TODO | 
| 23 |  | # | 
| 24 |  | # * Check for HTML text in msgstr when there is none in msgid | 
| 25 |  | # * Check for matching HTML tag/endtag in msgstr | 
| 26 |  | # | 
| 27 |  |  | 
| 28 |  | # Adapted for Umit by Guilherme Polo, original file: | 
| 29 |  | # https://gramps.svn.sourceforge.net/svnroot/gramps/branches/gramps22/po/check_po | 
| 30 |  |  | 
| 31 |  | import re | 
| 32 |  | import sys | 
| 33 |  | from optparse import OptionParser | 
| 34 |  |  | 
| 35 |  | APP = "Umit" | 
| 36 |  |  | 
| 37 |  | all_total = {} | 
| 38 |  | all_fuzzy = {} | 
| 39 |  | all_untranslated = {} | 
| 40 |  | all_percent_s = {} | 
| 41 |  | all_named_s = {} | 
| 42 |  | all_bnamed_s = {} | 
| 43 |  | all_context = {} | 
| 44 |  | all_coverage = {} | 
| 45 |  | all_template_coverage = {} | 
| 46 |  |  | 
| 47 |  | def strip_quotes(st): | 
| 48 |  | if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"': | 
| 49 |  | st = st.strip()[1:-1] | 
| 50 |  | return st | 
| 51 |  |  | 
| 52 |  | # This is a base class for all checks | 
| 53 |  | class Check: | 
| 54 |  | def __init__( self ): | 
| 55 |  | self.msgs = [] | 
| 56 |  | def diag( self ): | 
| 57 |  | if len( self.msgs ): | 
| 58 |  | print | 
| 59 |  | print self.diag_header | 
| 60 |  | for m in self.msgs: | 
| 61 |  | m.diag() | 
| 62 |  | def summary( self ): | 
| 63 |  | print "%-20s%d" % ( self.summary_text, len(self.msgs) ) | 
| 64 |  |  | 
| 65 |  | class Check_fmt( Check ): | 
| 66 |  | def __init__( self, fmt ): | 
| 67 |  | Check.__init__( self ) | 
| 68 |  | self.diag_header = "-------- %s mismatches --------------" % fmt | 
| 69 |  | self.summary_text = "%s mismatches:" % fmt | 
| 70 |  | self.fmt = fmt | 
| 71 |  | def process( self, msg ): | 
| 72 |  | msgid = msg.msgid | 
| 73 |  | msgstr = msg.msgstr | 
| 74 |  | cnt1 = msgid.count( self.fmt ) | 
| 75 |  | cnt2 = msgstr.count( self.fmt ) | 
| 76 |  | if cnt1 != cnt2: | 
| 77 |  | self.msgs.append( msg ) | 
| 78 |  |  | 
| 79 |  | class Check_named_fmt( Check ): | 
| 80 |  | # A pattern to find all %() | 
| 81 |  | find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE) | 
| 82 |  |  | 
| 83 |  | def __init__( self ): | 
| 84 |  | Check.__init__( self ) | 
| 85 |  | self.diag_header = "-------- %() name mismatches --------------" | 
| 86 |  | self.summary_text = "%() name mismatches:" | 
| 87 |  | def process( self, msg ): | 
| 88 |  | msgid = msg.msgid | 
| 89 |  | msgstr = msg.msgstr | 
| 90 |  | # Same number of named formats? | 
| 91 |  | fmts1 = self.find_named_fmt_pat.findall( msgid ) | 
| 92 |  | fmts2 = self.find_named_fmt_pat.findall( msgstr ) | 
| 93 |  | if len( fmts1 ) != len( fmts2 ): | 
| 94 |  | self.msgs.append( msg ) | 
| 95 |  | else: | 
| 96 |  | # Do we have the same named formats? | 
| 97 |  | fmts1.sort() | 
| 98 |  | fmts2.sort() | 
| 99 |  | if fmts1 != fmts2: | 
| 100 |  | self.msgs.append( msg ) | 
| 101 |  |  | 
| 102 |  | class Check_missing_sd( Check ): | 
| 103 |  | # A pattern to find %() without s or d | 
| 104 |  | # Here is a command to use for testing | 
| 105 |  | # print re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' ) | 
| 106 |  | find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE) | 
| 107 |  |  | 
| 108 |  | def __init__( self ): | 
| 109 |  | Check.__init__( self ) | 
| 110 |  | self.diag_header = "-------- %() without 's' or 'd' mismatches --------------" | 
| 111 |  | self.summary_text = "%() missing s/d:" | 
| 112 |  | def process( self, msg ): | 
| 113 |  | msgstr = msg.msgstr | 
| 114 |  | fmts = self.find_named_fmt_pat2.findall( msgstr ) | 
| 115 |  | for f in fmts: | 
| 116 |  | if not f in ('s', 'd'): | 
| 117 |  | self.msgs.append( msg ) | 
| 118 |  | break | 
| 119 |  |  | 
| 120 |  | class Check_runaway( Check ): | 
| 121 |  | def __init__( self ): | 
| 122 |  | Check.__init__( self ) | 
| 123 |  | self.diag_header = "-------- Runaway context in translation ---------" | 
| 124 |  | self.summary_text = "Runaway context:" | 
| 125 |  | def process( self, msg ): | 
| 126 |  | msgid = msg.msgid | 
| 127 |  | msgstr = msg.msgstr | 
| 128 |  |  | 
| 129 |  | # Runaway context. In the translated part we only to see | 
| 130 |  | # the translation of the word after the | | 
| 131 |  | if msgid.count('|') > 0 and msgstr.count('|') > 0 and msgid != msgstr: | 
| 132 |  | self.msgs.append( msg ) | 
| 133 |  |  | 
| 134 |  | class Check_xml_chars( Check ): | 
| 135 |  | # Special XML characters | 
| 136 |  | # It is not allowed to have a quote, an ampersand or an angle bracket | 
| 137 |  | xml_chars_pat = re.compile( r'(?<=\W) > | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE ) | 
| 138 |  |  | 
| 139 |  | def __init__( self ): | 
| 140 |  | Check.__init__( self ) | 
| 141 |  | self.diag_header = "-------- unescaped XML special characters ---------" | 
| 142 |  | self.summary_text = "XML special chars:" | 
| 143 |  | def process( self, msg ): | 
| 144 |  | msgid = msg.msgid | 
| 145 |  | msgstr = msg.msgstr | 
| 146 |  |  | 
| 147 |  | # XML errors | 
| 148 |  | # Only look at messages in the tips.xml | 
| 149 |  | if msg.is_tips_xml: | 
| 150 |  | if self.xml_chars_pat.search( msgstr ): | 
| 151 |  | self.msgs.append( msg ) | 
| 152 |  |  | 
| 153 |  | class Check_last_char( Check ): | 
| 154 |  | def __init__( self ): | 
| 155 |  | Check.__init__( self ) | 
| 156 |  | self.diag_header = "-------- last character not identical ---------" | 
| 157 |  | self.summary_text = "Last character:" | 
| 158 |  | def process( self, msg ): | 
| 159 |  | msgid = msg.msgid | 
| 160 |  | msgstr = msg.msgstr | 
| 161 |  |  | 
| 162 |  | # Last character of msgid? White space? Period? | 
| 163 |  | if msg.is_fuzzy: | 
| 164 |  | return | 
| 165 |  |  | 
| 166 |  | msgid_last = msgid[-1:] | 
| 167 |  | msgstr_last = msgstr[-1:] | 
| 168 |  | if msgid_last.isspace() != msgstr_last.isspace(): | 
| 169 |  | self.msgs.append( msg ) | 
| 170 |  | elif (msgid_last == '.') != (msgstr_last == '.'): | 
| 171 |  | self.msgs.append( msg ) | 
| 172 |  |  | 
| 173 |  | class Check_shortcut_trans( Check ): | 
| 174 |  | def __init__( self ): | 
| 175 |  | Check.__init__( self ) | 
| 176 |  | self.diag_header = "-------- shortcut key in translation ---------" | 
| 177 |  | self.summary_text = "Shortcut in msgstr:" | 
| 178 |  | def process( self, msg ): | 
| 179 |  | msgid = msg.msgid | 
| 180 |  | msgstr = msg.msgstr | 
| 181 |  |  | 
| 182 |  | if msgid.count('_') == 0 and msgstr.count('_') > 0: | 
| 183 |  | self.msgs.append( msg ) | 
| 184 |  |  | 
| 185 |  | class Msgid: | 
| 186 |  | fuzzy_pat = re.compile( 'fuzzy' ) | 
| 187 |  | tips_xml_pat = re.compile( r'tips\.xml' ) | 
| 188 |  | def __init__( self, msgnr, lineno ): | 
| 189 |  | self._msgid = [] | 
| 190 |  | self._msgstr = [] | 
| 191 |  | self.msgid = '' | 
| 192 |  | self.msgstr = '' | 
| 193 |  | self._cmnt = [] | 
| 194 |  | self.nr = msgnr | 
| 195 |  | self.lineno = lineno | 
| 196 |  | self.is_fuzzy = 0 | 
| 197 |  | self.is_tips_xml = 0 | 
| 198 |  |  | 
| 199 |  | def diag( self ): | 
| 200 |  | if 1: | 
| 201 |  | print | 
| 202 |  | print "msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" ) | 
| 203 |  | sys.stdout.write( ''.join( self._msgid ) ) | 
| 204 |  | sys.stdout.write( ''.join( self._msgstr ) ) | 
| 205 |  | else: | 
| 206 |  | # Compatible with the old check_po | 
| 207 |  | print "%d '%s' : '%s'" % ( self.lineno, self.msgid, self.msgstr ) | 
| 208 |  |  | 
| 209 |  | def add_msgid( self, line, lineno ): | 
| 210 |  | self._msgid.append( line ) | 
| 211 |  | line = re.sub( r'msgid\s+', '', line ) | 
| 212 |  | line = line.strip() | 
| 213 |  | if line[0] != '"' or line[-1:] != '"': | 
| 214 |  | print "ERROR at line %d: Missing quote." % lineno | 
| 215 |  | line = strip_quotes( line ) | 
| 216 |  | self.msgid += line | 
| 217 |  |  | 
| 218 |  | def add_msgstr( self, line, lineno ): | 
| 219 |  | self._msgstr.append( line ) | 
| 220 |  | line = re.sub( r'msgstr\s+', '', line ) | 
| 221 |  | line = line.strip() | 
| 222 |  | if line[0] != '"' or line[-1:] != '"': | 
| 223 |  | print "ERROR at line %d: Missing quote." % lineno | 
| 224 |  | line = strip_quotes( line ) | 
| 225 |  | self.msgstr += line | 
| 226 |  |  | 
| 227 |  | def add_cmnt( self, line ): | 
| 228 |  | self._cmnt.append( line ) | 
| 229 |  | if not self.is_fuzzy and self.fuzzy_pat.search( line ): | 
| 230 |  | self.is_fuzzy = 1 | 
| 231 |  | if not self.is_tips_xml and self.tips_xml_pat.search( line ): | 
| 232 |  | self.is_tips_xml = 1 | 
| 233 |  |  | 
| 234 |  | def read_msgs( fname ): | 
| 235 |  | empty_pat   = re.compile( r'^ \s* $',      re.VERBOSE ) | 
| 236 |  | comment_pat = re.compile( r'\#',           re.VERBOSE ) | 
| 237 |  | msgid_pat   = re.compile( r'msgid \s+ "',  re.VERBOSE ) | 
| 238 |  | msgstr_pat  = re.compile( r'msgstr \s+ "', re.VERBOSE ) | 
| 239 |  | str_pat     = re.compile( r'"',            re.VERBOSE ) | 
| 240 |  | old_pat     = re.compile( r'\#~ \s+ ',     re.VERBOSE ) | 
| 241 |  |  | 
| 242 |  | msgnr = 0         # This is the message number of the next message to read. The first real message is 1. | 
| 243 |  | f = open( fname ) | 
| 244 |  | lines = f.readlines() | 
| 245 |  |  | 
| 246 |  | # parse it like a statemachine | 
| 247 |  | NONE   = 0         # Nothing detected, yet | 
| 248 |  | CMNT   = 1         # Inside comment part | 
| 249 |  | MSGID  = 2         # Inside msgid part | 
| 250 |  | MSGSTR = 3         # Inside msgstr part | 
| 251 |  | STR    = 4         # A continuation string | 
| 252 |  | OLD    = 5         # An old pattern with #~ | 
| 253 |  |  | 
| 254 |  | state = NONE | 
| 255 |  | msg = None | 
| 256 |  | msgs = [] | 
| 257 |  |  | 
| 258 |  | for ix in range( len(lines) ):   # Use line numbers for messages | 
| 259 |  | line = lines[ix] | 
| 260 |  | lineno = ix + 1 | 
| 261 |  |  | 
| 262 |  | m = empty_pat.match( line ) | 
| 263 |  | if m: | 
| 264 |  | continue   # Empty lines are not interesting | 
| 265 |  |  | 
| 266 |  | # What's the next state? | 
| 267 |  | if  old_pat.match( line ): | 
| 268 |  | next_state = OLD | 
| 269 |  | elif comment_pat.match( line ): | 
| 270 |  | next_state = CMNT | 
| 271 |  | elif msgid_pat.match( line ): | 
| 272 |  | next_state = MSGID | 
| 273 |  | elif msgstr_pat.match( line ): | 
| 274 |  | next_state = MSGSTR | 
| 275 |  | elif str_pat.match( line ): | 
| 276 |  | next_state = STR | 
| 277 |  | else: | 
| 278 |  | print 'WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars() | 
| 279 |  | next_state = NONE | 
| 280 |  |  | 
| 281 |  | #print "%(state)d->%(next_state)d\t%(line)s" % vars() | 
| 282 |  | if state == NONE: | 
| 283 |  | # expect msgid or comment or old stuff | 
| 284 |  | if next_state == CMNT: | 
| 285 |  | state = CMNT | 
| 286 |  | msg = Msgid( msgnr, lineno ) # Start with an empty new item | 
| 287 |  | msgnr += 1 | 
| 288 |  | msgs.append( msg ) | 
| 289 |  | msg.add_cmnt( line ) | 
| 290 |  |  | 
| 291 |  | elif next_state == MSGID: | 
| 292 |  | state = MSGID | 
| 293 |  | msg = Msgid( msgnr, lineno ) # Start with an empty new item | 
| 294 |  | msgnr += 1 | 
| 295 |  | msgs.append( msg ) | 
| 296 |  | msg.add_msgid( line, lineno ) | 
| 297 |  |  | 
| 298 |  | elif next_state == MSGSTR: | 
| 299 |  | print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars() | 
| 300 |  | state = MSGSTR | 
| 301 |  | msg = Msgid( msgnr, lineno ) # Start with an empty new item | 
| 302 |  | msgnr += 1 | 
| 303 |  | msgs.append( msg ) | 
| 304 |  | msg.add_msgstr( line, lineno ) | 
| 305 |  |  | 
| 306 |  | elif next_state == STR: | 
| 307 |  | print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars() | 
| 308 |  |  | 
| 309 |  | elif next_state == OLD: | 
| 310 |  | pass   # Just skip | 
| 311 |  |  | 
| 312 |  | elif state == CMNT: | 
| 313 |  | if next_state == CMNT: | 
| 314 |  | if msg: | 
| 315 |  | msg.add_cmnt( line ) | 
| 316 |  | else: | 
| 317 |  | # Note. We may need to do something about these comments | 
| 318 |  | # Skip for now | 
| 319 |  | pass | 
| 320 |  |  | 
| 321 |  | elif next_state == MSGID: | 
| 322 |  | state = MSGID | 
| 323 |  | if not msg: | 
| 324 |  | msg = Msgid( msgnr, lineno ) # Start with an empty new item | 
| 325 |  | msgnr += 1 | 
| 326 |  | msgs.append( msg ) | 
| 327 |  | msg.add_msgid( line, lineno ) | 
| 328 |  |  | 
| 329 |  | elif next_state == MSGSTR: | 
| 330 |  | print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars() | 
| 331 |  | state = MSGSTR | 
| 332 |  | msg = Msgid( msgnr, lineno ) # Start with an empty new item | 
| 333 |  | msgnr += 1 | 
| 334 |  | msgs.append( msg ) | 
| 335 |  | msg.add_msgstr( line, lineno ) | 
| 336 |  |  | 
| 337 |  | elif next_state == STR: | 
| 338 |  | print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars() | 
| 339 |  |  | 
| 340 |  | elif next_state == OLD: | 
| 341 |  | msg = None | 
| 342 |  | pass   # Just skip | 
| 343 |  |  | 
| 344 |  | elif state == MSGID: | 
| 345 |  | if next_state == CMNT: | 
| 346 |  | # Hmmm. A comment here? | 
| 347 |  | print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars() | 
| 348 |  |  | 
| 349 |  | elif next_state == MSGID: | 
| 350 |  | raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() ) | 
| 351 |  |  | 
| 352 |  | elif next_state == MSGSTR: | 
| 353 |  | state = MSGSTR | 
| 354 |  | msg.add_msgstr( line, lineno ) | 
| 355 |  |  | 
| 356 |  | elif next_state == STR: | 
| 357 |  | msg.add_msgid( line, lineno ) | 
| 358 |  |  | 
| 359 |  | elif next_state == OLD: | 
| 360 |  | msg = None | 
| 361 |  | pass   # Just skip | 
| 362 |  |  | 
| 363 |  | elif state == MSGSTR: | 
| 364 |  | if next_state == CMNT: | 
| 365 |  | # A comment probably starts a new item | 
| 366 |  | state = CMNT | 
| 367 |  | msg = Msgid( msgnr, lineno ) | 
| 368 |  | msgnr += 1 | 
| 369 |  | msgs.append( msg ) | 
| 370 |  | msg.add_cmnt( line ) | 
| 371 |  |  | 
| 372 |  | elif next_state == MSGID: | 
| 373 |  | state = MSGID | 
| 374 |  | msg = Msgid( msgnr, lineno ) | 
| 375 |  | msgnr += 1 | 
| 376 |  | msgs.append( msg ) | 
| 377 |  | msg.add_msgid( line, lineno ) | 
| 378 |  |  | 
| 379 |  | elif next_state == MSGSTR: | 
| 380 |  | raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() ) | 
| 381 |  |  | 
| 382 |  | elif next_state == STR: | 
| 383 |  | msg.add_msgstr( line, lineno ) | 
| 384 |  |  | 
| 385 |  | elif next_state == OLD: | 
| 386 |  | msg = None | 
| 387 |  | pass   # Just skip | 
| 388 |  |  | 
| 389 |  | else: | 
| 390 |  | raise Exception( 'Unexpected state in po parsing (state = %d)' % state ) | 
| 391 |  |  | 
| 392 |  | # Strip items with just comments. (Can this happen?) | 
| 393 |  | msgs1 = [] | 
| 394 |  | for m in msgs: | 
| 395 |  | if not m.msgid and not m.msgstr: | 
| 396 |  | #print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno ) | 
| 397 |  | pass | 
| 398 |  | else: | 
| 399 |  | msgs1.append( m ) | 
| 400 |  | msgs = msgs1 | 
| 401 |  | return msgs | 
| 402 |  |  | 
| 403 |  | def analyze_msgs( options, fname, msgs, nr_templates = None, nth = 0 ): | 
| 404 |  | nr_fuzzy = 0 | 
| 405 |  | nr_untranslated = 0 | 
| 406 |  |  | 
| 407 |  | checks = [] | 
| 408 |  | checks.append( Check_fmt( '%s' ) ) | 
| 409 |  | checks.append( Check_fmt( '%d' ) ) | 
| 410 |  | checks.append( Check_named_fmt() ) | 
| 411 |  | checks.append( Check_missing_sd() ) | 
| 412 |  | checks.append( Check_runaway() ) | 
| 413 |  | checks.append( Check_xml_chars() ) | 
| 414 |  | checks.append( Check_last_char() ) | 
| 415 |  | checks.append( Check_shortcut_trans() ) | 
| 416 |  |  | 
| 417 |  | for msg in msgs: | 
| 418 |  | msgid = msg.msgid | 
| 419 |  | msgstr = msg.msgstr | 
| 420 |  | #print | 
| 421 |  | #print "msgid: %(msgid)s" % vars() | 
| 422 |  | #print "msgstr: %(msgstr)s" % vars() | 
| 423 |  |  | 
| 424 |  | if not msgstr: | 
| 425 |  | nr_untranslated += 1 | 
| 426 |  | continue | 
| 427 |  |  | 
| 428 |  | if msg.is_fuzzy: | 
| 429 |  | nr_fuzzy += 1 | 
| 430 |  | if options.skip_fuzzy: | 
| 431 |  | continue | 
| 432 |  |  | 
| 433 |  | for c in checks: | 
| 434 |  | c.process( msg ) | 
| 435 |  |  | 
| 436 |  | nr_msgs = len(msgs) | 
| 437 |  | if nth > 0: | 
| 438 |  | print | 
| 439 |  | print "=====================================" | 
| 440 |  | print "%-20s%s"     % ( "File:",              fname ) | 
| 441 |  | print "%-20s%d"     % ( "Template total:",    nr_templates ) | 
| 442 |  | print "%-20s%d"     % ( "PO total:",          nr_msgs ) | 
| 443 |  | print "%-20s%d"     % ( "Fuzzy:",             nr_fuzzy ) | 
| 444 |  | print "%-20s%d"     % ( "Untranslated:",      nr_untranslated ) | 
| 445 |  |  | 
| 446 |  | for c in checks: | 
| 447 |  | c.summary() | 
| 448 |  |  | 
| 449 |  | po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100 | 
| 450 |  | print "%-20s%5.2f%%" % ( "PO Coverage:",       po_coverage ) | 
| 451 |  |  | 
| 452 |  | template_coverage = po_coverage * float(nr_msgs) / float(nr_templates) | 
| 453 |  | print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage ) | 
| 454 |  |  | 
| 455 |  | if not options.only_summary: | 
| 456 |  | for c in checks: | 
| 457 |  | c.diag() | 
| 458 |  |  | 
| 459 |  | def main(args): | 
| 460 |  | if len(sys.argv) < 2: | 
| 461 |  | print "Error: Especify the umit.pot file path" | 
| 462 |  | sys.exit(1) | 
| 463 |  |  | 
| 464 |  | parser = OptionParser(description="This program validates a PO file for " | 
| 465 |  | "%s." % APP, usage='%prog [options] po-file...' ) | 
| 466 |  |  | 
| 467 |  | parser.add_option("", "--skip-fuzzy", | 
| 468 |  | action="store_true", dest="skip_fuzzy", default=False, | 
| 469 |  | help="skip fuzzies") | 
| 470 |  |  | 
| 471 |  | parser.add_option("-s", "--only-summary", | 
| 472 |  | action="store_true", dest="only_summary", default=False, | 
| 473 |  | help="only give the summary") | 
| 474 |  |  | 
| 475 |  | options, args = parser.parse_args() | 
| 476 |  |  | 
| 477 |  | try: | 
| 478 |  | pot_msgs = read_msgs(sys.argv[1]) | 
| 479 |  | nr_templates = len(pot_msgs) | 
| 480 |  | nth = 0 | 
| 481 |  | for fname in args: | 
| 482 |  | msgs = read_msgs(fname) | 
| 483 |  | analyze_msgs(options, fname, msgs, nr_templates, nth) | 
| 484 |  | nth += 1 | 
| 485 |  |  | 
| 486 |  | except Exception, e: | 
| 487 |  | print e | 
| 488 |  |  | 
| 489 |  | if __name__ == "__main__": | 
| 490 |  | main(sys.argv) | 
trunk/scripts/build/msgmerge.py
| r253614 | r253615 |  | 
|---|
| 1 |  | #! /usr/bin/env python | 
| 2 |  | # -*- coding: iso-8859-1 -*- | 
| 3 |  | # | 
| 4 |  | # Copyright Terje Røsten <terjeros@phys.ntnu.no> Nov. 2003. | 
| 5 |  | # | 
| 6 |  | '''Merge two Uniforum style .po files together. | 
| 7 |  |  | 
| 8 |  | This is a implementation (not complete) in Python of the GNU | 
| 9 |  | msgmerge(1) program. It can be used on the command line (or as a Python | 
| 10 |  | module). | 
| 11 |  |  | 
| 12 |  | Usage: msgmerge.py [OPTIONS] def.po ref.pot | 
| 13 |  |  | 
| 14 |  | The def.po file is an existing PO file with translations. The ref.pot | 
| 15 |  | file is the last created PO file with up-to-date source references but | 
| 16 |  | old translations, or a PO Template file. | 
| 17 |  |  | 
| 18 |  | Options: | 
| 19 |  | -U, --update           update def.po, | 
| 20 |  | do nothing if def.po is already up to date. | 
| 21 |  | -o, --output-file=FILE write output to file FILE. Output is written to | 
| 22 |  | stdout if set to - or if the option is not present. | 
| 23 |  | -D, --docstrings       don\'t remove docstring flag. | 
| 24 |  | -h, --help             display help text and exit. | 
| 25 |  | -V, --version          display version and exit. | 
| 26 |  | -q, --quiet, --silent  suppress progress indicators. | 
| 27 |  | ''' | 
| 28 |  | from __future__ import generators | 
| 29 |  |  | 
| 30 |  | if not __name__ == '__main__': | 
| 31 |  | __doc__ += '''\ | 
| 32 |  |  | 
| 33 |  | When used as module the interesting functions are merge() and | 
| 34 |  | merge_dir(). | 
| 35 |  |  | 
| 36 |  | The merge() function does the same as the command line version, and | 
| 37 |  | the arguments are as follows. The first argument is the def.po file, | 
| 38 |  | then the ref.pot file. The third argument controls whether do work in | 
| 39 |  | update mode or not, then the next argument sets the output file. Set | 
| 40 |  | the next argument to False to remove docstring flags. The last | 
| 41 |  | argument can be used to suppress progress indicators. The default is | 
| 42 |  | to work in update mode with progress indicators. | 
| 43 |  |  | 
| 44 |  | Example: | 
| 45 |  | merge("def.po", "ref.pot") | 
| 46 |  | merge the files def.po and ref.pot and write output to def.po if | 
| 47 |  | there are any changes. | 
| 48 |  | merge("def.po", "red.pot", docstrings = False, verbose = False, | 
| 49 |  | update = False, outfile = "-") | 
| 50 |  | merge the files def.po and ref.pot and write output to stdout, | 
| 51 |  | remove docstring flag and be quiet. | 
| 52 |  |  | 
| 53 |  | The merge_dir() function is useful when merging a directory of po | 
| 54 |  | files. The only required argument is the name of the directory with po | 
| 55 |  | files and the pot file. It will use simple glob to find the files. The | 
| 56 |  | second argument can be used to specify the pot file (in the | 
| 57 |  | directory). Third argument is a list of po files (then globbing will | 
| 58 |  | not be used) and the next argument is list of filename to exclude. The | 
| 59 |  | last argument can be used to suppress progress indicators. Docstring | 
| 60 |  | flag will not be removed. | 
| 61 |  |  | 
| 62 |  | Example: | 
| 63 |  | merge_dir("po") | 
| 64 |  | merge (and update) all po files in directory po with the single pot | 
| 65 |  | file in the same directory. | 
| 66 |  |  | 
| 67 |  | The module raises the MsgmergeError exception in case of error. | 
| 68 |  | ''' | 
| 69 |  | __revision__ = '$Id: msgmerge.py,v 1.41 2003/11/18 19:10:42 terjeros Exp $' | 
| 70 |  | __version__ = '0.1' | 
| 71 |  | name = 'msgmerge.py' | 
| 72 |  |  | 
| 73 |  | __all__ = [ 'merge', 'merge_dir', 'MsgmergeError' ] | 
| 74 |  |  | 
| 75 |  | import sys | 
| 76 |  | import re | 
| 77 |  | import string | 
| 78 |  | import getopt | 
| 79 |  | import difflib | 
| 80 |  | import glob | 
| 81 |  | import os.path | 
| 82 |  | import codecs | 
| 83 |  |  | 
| 84 |  | try: | 
| 85 |  | True, False | 
| 86 |  | except NameError: | 
| 87 |  | True, False = 1, 0 | 
| 88 |  |  | 
| 89 |  | class Msgs: | 
| 90 |  | '''Class to hold information about messages.''' | 
| 91 |  | width = 80 | 
| 92 |  | file = '' | 
| 93 |  | def __init__(self, msgid, msgstr, flag, lno, entry, **kwds): | 
| 94 |  | self.id = msgid | 
| 95 |  | self.str = msgstr | 
| 96 |  | self.cmt = kwds.get('cmt', '') | 
| 97 |  | self.ref = kwds.get('ref', '') | 
| 98 |  | self.autocmt = kwds.get('autocmt', '') | 
| 99 |  | self.flag = flag | 
| 100 |  | self.entry = entry | 
| 101 |  | self.lno = lno | 
| 102 |  | self.count = 0 | 
| 103 |  | def wash(self): | 
| 104 |  | self.id = wash(self.id, width = self.width, | 
| 105 |  | filename = self.file, lno = self.lno) | 
| 106 |  | self.str = wash(self.str, 'msgstr', width = self.width, | 
| 107 |  | filename = self.file, lno = self.lno) | 
| 108 |  | def used(self): | 
| 109 |  | self.count += 1 | 
| 110 |  | def get_clean_id(self): | 
| 111 |  | return self.id.replace('msgid "','', 1) | 
| 112 |  | def obsolete(self): | 
| 113 |  | self.width -= len('#~ ') | 
| 114 |  | self.wash() | 
| 115 |  | t = [ '#~ %s\n' % s for s in self.id.splitlines() ] | 
| 116 |  | self.id = ''.join(t) | 
| 117 |  | t = [ '#~ %s\n' % s for s in self.str.splitlines() ] | 
| 118 |  | self.str = ''.join(t) | 
| 119 |  |  | 
| 120 |  | class Options: | 
| 121 |  | '''Class to hold options''' | 
| 122 |  | def __init__(self, cmdline = False, **kwds): | 
| 123 |  | if not cmdline: | 
| 124 |  | self.update = kwds.get('update', True) | 
| 125 |  | self.outfile = kwds.get('outfile', '-') | 
| 126 |  | self.docstrings = kwds.get('docstrings', True) | 
| 127 |  | self.verbose = kwds.get('verbose', False) | 
| 128 |  | self.suffix = kwds.get('suffix', '~') | 
| 129 |  | self.backup = kwds.get('backup', True) | 
| 130 |  | else: | 
| 131 |  | self.update = False | 
| 132 |  | self.outfile = False | 
| 133 |  | self.docstrings = False | 
| 134 |  | self.verbose = True | 
| 135 |  | self.suffix = '~' | 
| 136 |  | self.backup = True | 
| 137 |  |  | 
| 138 |  | class MsgmergeError(Exception): | 
| 139 |  | '''Exception class for msgmerge''' | 
| 140 |  |  | 
| 141 |  | def gen(lines): | 
| 142 |  | ''' | 
| 143 |  | Generator which returns a line (with the obsolete prefix removed) | 
| 144 |  | from the list of lines in <lines>, the line number is also | 
| 145 |  | returned. | 
| 146 |  | ''' | 
| 147 |  | lno = 0 | 
| 148 |  | for l in lines: | 
| 149 |  | lno += 1 | 
| 150 |  | yield l.replace('#~ ', '', 1), lno | 
| 151 |  | yield l, lno | 
| 152 |  |  | 
| 153 |  | def slurp(s, g, sign): | 
| 154 |  | ''' | 
| 155 |  | The string returned from iterator <g>\'s next() method is added to | 
| 156 |  | the string <s> if string returned is beginning with the string | 
| 157 |  | <sign>. The return value is the first returned string which do not | 
| 158 |  | start with <sign>, the line number, the iterator <g> and the | 
| 159 |  | (possibly) updated string <s>. | 
| 160 |  | ''' | 
| 161 |  | l, lno = g.next() | 
| 162 |  | while l.startswith(sign) or (sign == '# ' and l.strip() == '#'): | 
| 163 |  | s += l | 
| 164 |  | l, lno = g.next() | 
| 165 |  | return l, lno, g, s | 
| 166 |  |  | 
| 167 |  | def splitted_fit(chunk, line, width, break_always, break_after_space): | 
| 168 |  | ''' | 
| 169 |  | Check if string <chunk> can be splitted by newline to fit into | 
| 170 |  | string <line> with width smaller than <width>. The return value is | 
| 171 |  | a tuple where the first element is the part of chunk which fits | 
| 172 |  | and the second element is the rest of chunk. | 
| 173 |  | ''' | 
| 174 |  | ret = '', chunk | 
| 175 |  | l = len(chunk) | 
| 176 |  | for i in range(l - 1, -1, -1): | 
| 177 |  | if chunk[i] in break_always and len(chunk[0:i] + line) <= width: | 
| 178 |  | ret = chunk[0:i], chunk[i:] | 
| 179 |  | break | 
| 180 |  | elif chunk[i] in break_after_space and i and chunk[i-1].strip() == '': | 
| 181 |  | ret = chunk[0:i], chunk[i:] | 
| 182 |  | break | 
| 183 |  | elif chunk[i] == '\\' and len(chunk[i:]) > 1 and chunk[i+1] == '"' \ | 
| 184 |  | and len(chunk[0:i] + line) <= width: | 
| 185 |  | ret = chunk[0:i], chunk[i:] | 
| 186 |  | break | 
| 187 |  | return ret | 
| 188 |  |  | 
| 189 |  | def wrap(msg, width): | 
| 190 |  | ''' | 
| 191 |  | Accept a list <msg> of strings to wrap, each string is wrapped to | 
| 192 |  | width <width> and surrounded with a pair of ". The return value is | 
| 193 |  | a string with these wrapped strings joined together with newlines. | 
| 194 |  | ''' | 
| 195 |  | if msg.isspace() or not msg: | 
| 196 |  | return '"%s"' % msg | 
| 197 |  |  | 
| 198 |  | # \ and " is here, but " is special in po files. | 
| 199 |  | break_always = '$%+({[' | 
| 200 |  | # XXX what about: « © » ¦ § etc? | 
| 201 |  | break_after_space = '_-=^`~\'<|>&*#@' | 
| 202 |  | enders = '.:,;!?/])}|%-' | 
| 203 |  | extra = string.punctuation | 
| 204 |  | for c in enders: | 
| 205 |  | extra = extra.replace(c, '') | 
| 206 |  | escaped = { 'enders' : re.escape(enders), | 
| 207 |  | 'extra'  : re.escape(extra) } | 
| 208 |  | regex = r'([\w%(extra)s]*[\s%(enders)s)]+[\s%(enders)s]*)' % escaped | 
| 209 |  | r = re.compile(regex, re.UNICODE) | 
| 210 |  | msg = [ m for m in r.split(msg) if not m == ''] | 
| 211 |  |  | 
| 212 |  | lines = [] | 
| 213 |  | line = msg.pop(0) | 
| 214 |  |  | 
| 215 |  | # Handle \n on end of line | 
| 216 |  | if len(msg) > 1 and msg[-1] == 'n' and len(msg[-2]) > 0 \ | 
| 217 |  | and msg[-2][-1] == '\\': | 
| 218 |  | msg[-2] += msg[-1] | 
| 219 |  | msg.pop() | 
| 220 |  | # Do not allow a single \n on a line | 
| 221 |  | if len(msg) > 2 and msg[-1] == '\\n': | 
| 222 |  | msg[-2] += msg[-1] | 
| 223 |  | msg.pop() | 
| 224 |  |  | 
| 225 |  | for m in msg: | 
| 226 |  | if len(line) > width or len(m) > width or len(line + m) > width: | 
| 227 |  | fit, rest = splitted_fit(m, line, width, break_always, | 
| 228 |  | break_after_space) | 
| 229 |  | line += fit | 
| 230 |  | lines.append(line) | 
| 231 |  | line = rest | 
| 232 |  | else: | 
| 233 |  | line += m | 
| 234 |  | lines.append(line) | 
| 235 |  | lines = [ '"%s"' % l for l in lines ] | 
| 236 |  | return '\n'.join(lines) | 
| 237 |  |  | 
| 238 |  | def normalize(lines): | 
| 239 |  | ''' | 
| 240 |  | Normalize <lines>: e.g "\n\nText\n\n" becomes: | 
| 241 |  | "\n" | 
| 242 |  | "\n" | 
| 243 |  | "Text\n" | 
| 244 |  | "\n" | 
| 245 |  | ''' | 
| 246 |  | if  0 < lines.find('\\n') < len(lines) - 3: | 
| 247 |  | if lines[-3:] == '\\n"': | 
| 248 |  | lines = lines[:-3].replace('\\n','\\n"\n"').replace('""\n','') \ | 
| 249 |  | + '\\n"' | 
| 250 |  | else: | 
| 251 |  | lines = lines.replace('\\n','\\n"\n"').replace('""\n','') | 
| 252 |  | return lines | 
| 253 |  |  | 
| 254 |  | def wash(msg, idx = 'msgid', width = 80, **kwds): | 
| 255 |  | ''' | 
| 256 |  | Do washing on the msgstr or msgid fields. Wrap the text to fit in | 
| 257 |  | width <width>. <msg> is a list of lines that makes up the field. | 
| 258 |  | <idx> indicate msgid or msgstr, <width> holds the width. <filename> | 
| 259 |  | and <lno> (line number) is picked up from <kwds>. | 
| 260 |  | Returns the washed field as a string. | 
| 261 |  | ''' | 
| 262 |  | msg = normalize(msg) | 
| 263 |  | lines = msg.splitlines() | 
| 264 |  | size = len(lines) | 
| 265 |  | if size > 1 or len(msg) > width: | 
| 266 |  | washed = [] | 
| 267 |  | # The first line is special | 
| 268 |  | m = re.match('^%s "(.*)"$' % (idx, ), lines[0]) | 
| 269 |  | if not m: | 
| 270 |  | print lines[0] | 
| 271 |  | kwds['lno'] -= size + 1 | 
| 272 |  | raise MsgmergeError('parse error: %(filename)s:%(lno)s.' | 
| 273 |  | % kwds) | 
| 274 |  | washed.append(m.group(1)) | 
| 275 |  | if m.group(1).endswith(r'\n'): | 
| 276 |  | washed.append('') | 
| 277 |  | i = 0 | 
| 278 |  | for line in lines[1:]: | 
| 279 |  | m = re.match('^"(\s*.*)"$', line) | 
| 280 |  | i += 1 | 
| 281 |  | if not m: | 
| 282 |  | print line | 
| 283 |  | kwds['lno'] -= size - i + 1 | 
| 284 |  | raise MsgmergeError('parse error: %(filename)s:%(lno)s.' | 
| 285 |  | % kwds) | 
| 286 |  | washed[-1] += m.group(1) | 
| 287 |  | if m.group(1).endswith(r'\n'): | 
| 288 |  | washed.append('') | 
| 289 |  | if washed[0] == '': | 
| 290 |  | washed.pop(0) | 
| 291 |  | if washed[-1] == '': | 
| 292 |  | washed.pop() | 
| 293 |  |  | 
| 294 |  | washed = [ wrap(w, width - 3) for w in washed ] # " and \n removed. | 
| 295 |  |  | 
| 296 |  | # One line or multiline | 
| 297 |  | if len(washed) == 1 and len('%s %s\n' % (idx, washed[0])) < width: | 
| 298 |  | washed = '%s %s\n' % (idx, washed[0]) | 
| 299 |  | else: | 
| 300 |  | washed = '%s ""\n%s\n' % (idx, '\n'.join(washed)) | 
| 301 |  | else: | 
| 302 |  | washed = msg | 
| 303 |  |  | 
| 304 |  | return washed | 
| 305 |  |  | 
| 306 |  | def parse(filename, entry): | 
| 307 |  | ''' | 
| 308 |  | Parse po or pot file with name <filename>. Set the variable | 
| 309 |  | <entry> to msgid/msgstr to indicate pot/po file.  The return value | 
| 310 |  | is a dict with msgid (washed) as key and Msgs instances as | 
| 311 |  | values. | 
| 312 |  | ''' | 
| 313 |  | lines = io(filename).readlines() | 
| 314 |  | Msgs.file = filename | 
| 315 |  | messages = {} | 
| 316 |  | last = len(lines) | 
| 317 |  | g = gen(lines) | 
| 318 |  | cmt = autocmt = ref = flag = '' | 
| 319 |  | msgid = False | 
| 320 |  | lno = 0 | 
| 321 |  | while not lno == last: | 
| 322 |  | l, lno = g.next() | 
| 323 |  | if l.startswith('# '): | 
| 324 |  | l, lno, g, cmt  = slurp(l, g, '# ') | 
| 325 |  | if l.startswith('#.'): | 
| 326 |  | l, lno, g, autocmt = slurp(l, g, '#.') | 
| 327 |  | if l.startswith('#:'): | 
| 328 |  | l, lno, g, ref = slurp(l, g, '#:') | 
| 329 |  | if l.startswith('#,'): | 
| 330 |  | l, lno, g, flag = slurp(l, g, '#,') | 
| 331 |  | if l.startswith('msgid'): | 
| 332 |  | l, lno, g, msgid = slurp(l, g, '"') | 
| 333 |  | if l.startswith('msgstr'): | 
| 334 |  | l, lno, g, msgstr = slurp(l, g, '"') | 
| 335 |  |  | 
| 336 |  | if not lno == last and not l.strip() == '': | 
| 337 |  | raise MsgmergeError('parse error: %s:%s.' % (filename, lno)) | 
| 338 |  |  | 
| 339 |  | if msgid and entry == 'msgstr': | 
| 340 |  | idx = wash(msgid, filename = filename, lno = lno) | 
| 341 |  | messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, cmt = cmt) | 
| 342 |  | msgid = False; msgstr = cmt = autocmt = ref = flag = '' | 
| 343 |  | elif msgid and entry == 'msgid': | 
| 344 |  | idx = wash(msgid, filename = filename, lno = lno) | 
| 345 |  | messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, | 
| 346 |  | autocmt = autocmt, ref = ref) | 
| 347 |  | msgid = False; msgstr = cmt = autocmt = ref = flag = '' | 
| 348 |  |  | 
| 349 |  | for m in messages.values(): | 
| 350 |  | m.wash() | 
| 351 |  | return messages | 
| 352 |  |  | 
| 353 |  | def fuzzy_match(pot, defs): | 
| 354 |  | ''' | 
| 355 |  | Try to find the best difflib match (with ratio > 0.6) between | 
| 356 |  | id of Msgs object <pot> and Msgs in the dict <defs>. | 
| 357 |  | Return value is the Msgs object in <defs> with highest ratio, | 
| 358 |  | False is returned if no suitable Msgs is found. | 
| 359 |  | ''' | 
| 360 |  | limit = 0.6 | 
| 361 |  | l, po = limit - 0.01, False | 
| 362 |  | s = difflib.SequenceMatcher(lambda x: x == ' "', '', pot.get_clean_id()) | 
| 363 |  | len2 = len(pot.get_clean_id()) | 
| 364 |  | for candidate in defs.values(): | 
| 365 |  | if candidate.str == 'msgstr ""\n':       # Empty translation | 
| 366 |  | continue | 
| 367 |  | if candidate.id == 'msgid ""\n':         # Empty msgid (header) | 
| 368 |  | continue | 
| 369 |  | len1 = len(candidate.get_clean_id()) | 
| 370 |  | if len2 > 2 * len1 or len1 > 1.5 * len2: # Simple and fast tests first | 
| 371 |  | continue | 
| 372 |  | s.set_seq1(candidate.get_clean_id()) | 
| 373 |  | if s.quick_ratio() < l: | 
| 374 |  | continue | 
| 375 |  | r = s.ratio()                            # This is expensive | 
| 376 |  | if r > l: | 
| 377 |  | l, po = r, candidate | 
| 378 |  | return po | 
| 379 |  |  | 
| 380 |  | def flags(po, pot, fuzzy = False, obs = False): | 
| 381 |  | ''' | 
| 382 |  | Create flag field from flag field in Msgs objects <po> and | 
| 383 |  | <pot>. When <fuzzy> is true <po>\'s flags are ignored and the | 
| 384 |  | fuzzy flag is added. If <obs> is set then most flags but fuzzy are | 
| 385 |  | removed. If the global variable option.docstrings is set then | 
| 386 |  | docstring flags will not be removed. The return value is a string | 
| 387 |  | which holds the combined flag. | 
| 388 |  | ''' | 
| 389 |  | global option | 
| 390 |  | flag = '' | 
| 391 |  | if po.flag or pot.flag or fuzzy: | 
| 392 |  | if not fuzzy: | 
| 393 |  | flag = '%s, %s' % (po.flag.strip(), pot.flag.strip()) | 
| 394 |  | else: | 
| 395 |  | flag = '%s, %s' % ('#, fuzzy', pot.flag.strip()) | 
| 396 |  | flag = flag.split(', ') | 
| 397 |  | fl = {} | 
| 398 |  | flag = [fl.setdefault(f, f) for f in flag if f not in fl and f] | 
| 399 |  | if not option.docstrings: | 
| 400 |  | try: | 
| 401 |  | flag.remove('docstring') | 
| 402 |  | except ValueError: | 
| 403 |  | pass | 
| 404 |  | if obs: | 
| 405 |  | removes = ['c-format', 'python-format', 'docstring'] | 
| 406 |  | for remove in removes: | 
| 407 |  | try: | 
| 408 |  | flag.remove(remove) | 
| 409 |  | except ValueError: | 
| 410 |  | pass | 
| 411 |  | # Put fuzzy first | 
| 412 |  | if 'fuzzy' in flag and not flag.index('fuzzy') == 1: | 
| 413 |  | i = flag.index('fuzzy') | 
| 414 |  | flag[1], flag[i] = flag[i], flag[1] | 
| 415 |  |  | 
| 416 |  | if len(flag) == 1: | 
| 417 |  | flag = '' | 
| 418 |  | else: | 
| 419 |  | flag = ', '.join(flag) + '\n' | 
| 420 |  | return flag | 
| 421 |  |  | 
| 422 |  | def add(pot, po, fuzzy = False): | 
| 423 |  | ''' | 
| 424 |  | Build a new entry from the Msgs objects <pot> and <pot>. If | 
| 425 |  | <fuzzy> is true, <po>\'s flag field is ignored (in | 
| 426 |  | flags()). Returns a multiline string with a up to date entry. | 
| 427 |  | ''' | 
| 428 |  | msg = [] | 
| 429 |  | msg.append(po.cmt) | 
| 430 |  | msg.append(pot.autocmt) | 
| 431 |  | msg.append(pot.ref) | 
| 432 |  | msg.append(flags(po, pot, fuzzy = fuzzy)) | 
| 433 |  | msg.append(pot.id) | 
| 434 |  | msg.append(po.str) | 
| 435 |  | return ''.join(msg) | 
| 436 |  |  | 
| 437 |  | def header(pot, defs): | 
| 438 |  | ''' | 
| 439 |  | Update date in header entry. Returns the updated header entry. | 
| 440 |  | ''' | 
| 441 |  | try: | 
| 442 |  | [po] = [ d for d in defs.values() if d.id == 'msgid ""\n' ] | 
| 443 |  | except ValueError: | 
| 444 |  | raise MsgmergeError('Error: did not find header in po file.') | 
| 445 |  |  | 
| 446 |  | r = re.compile(r'(.*^"POT-Creation-Date:\s+)(.*?)(\\n"$.*)', | 
| 447 |  | re.MULTILINE | re.DOTALL) | 
| 448 |  | m = r.match(pot.str) | 
| 449 |  | if not m: | 
| 450 |  | raise MsgmergeError( | 
| 451 |  | 'Error: did not find POT-Creation-Date field in pot file.') | 
| 452 |  |  | 
| 453 |  | subs = '\\1%s\\3' % m.group(2) | 
| 454 |  | _, count = r.subn(subs, po.str) | 
| 455 |  | if not count == 1: | 
| 456 |  | raise MsgmergeError( | 
| 457 |  | 'Error: did not find POT-Creation-Date field in po file.') | 
| 458 |  | return po | 
| 459 |  |  | 
| 460 |  | def match(defs, refs): | 
| 461 |  | ''' | 
| 462 |  | Try to match Msgs objects in <refs> with Msgs objects in | 
| 463 |  | <defs>. The return value is a list with po entries. | 
| 464 |  | ''' | 
| 465 |  | global option | 
| 466 |  | matches = [] | 
| 467 |  | empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str') | 
| 468 |  | deco = [(r.lno, r) for r in refs.values()] | 
| 469 |  | deco.sort() | 
| 470 |  | po = header(deco.pop(0)[1], defs)       # Header entry | 
| 471 |  | matches.append(add(empty, po)) | 
| 472 |  | po.used() | 
| 473 |  | sorted = [ a[1] for a in deco ] | 
| 474 |  | for pot in sorted: | 
| 475 |  | if option.verbose: | 
| 476 |  | sys.stderr.write('.') | 
| 477 |  | po = defs.get(pot.id, False)        # Perfect match | 
| 478 |  | if po: | 
| 479 |  | matches.append(add(pot, po)) | 
| 480 |  | po.used(); pot.used() | 
| 481 |  | continue | 
| 482 |  | po = fuzzy_match(pot, defs)         # Fuzzy match | 
| 483 |  | if po: | 
| 484 |  | matches.append(add(pot, po, fuzzy = True)) | 
| 485 |  | po.used(); pot.used() | 
| 486 |  | continue | 
| 487 |  | matches.append(add(pot, empty))     # No match | 
| 488 |  |  | 
| 489 |  | obsolete(defs, matches) | 
| 490 |  | return matches | 
| 491 |  |  | 
| 492 |  | def obsolete(defs, matches): | 
| 493 |  | '''Handle obsolete translations.''' | 
| 494 |  | deco = [ (d.lno, d) for d in defs.values() if | 
| 495 |  | d.count == 0 and not d.str == 'msgstr ""\n' ] | 
| 496 |  | deco.sort() | 
| 497 |  | empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str') | 
| 498 |  | obs = [ o[1] for o in deco ] | 
| 499 |  | for o in obs: | 
| 500 |  | o.flag = flags(o, empty, obs = True) | 
| 501 |  | o.obsolete() | 
| 502 |  | matches.append('%s%s%s' % (o.flag, o.id, o.str)) | 
| 503 |  |  | 
| 504 |  | def help(): | 
| 505 |  | '''Print help text and exit.''' | 
| 506 |  | print __doc__ | 
| 507 |  | sys.exit(0) | 
| 508 |  |  | 
| 509 |  | def cmdline(): | 
| 510 |  | '''Parse options and arguments from command line.''' | 
| 511 |  | advice = 'Try `%(name)s --help\' for more information.' | 
| 512 |  | try: | 
| 513 |  | long_opt = ['help', 'version', 'update', 'output-file=', | 
| 514 |  | 'quiet', 'silent', 'docstrings', 'suffix', 'backup'] | 
| 515 |  | opts, args = getopt.getopt(sys.argv[1:], 'hVUo:qD', long_opt) | 
| 516 |  | except getopt.error, msg: | 
| 517 |  | print '%s: %s\n%s' % ('%(name)s', msg, advice) % globals() | 
| 518 |  | sys.exit(1) | 
| 519 |  |  | 
| 520 |  | option = Options(cmdline = True) | 
| 521 |  | for opt, arg in opts: | 
| 522 |  | if opt in ['-h', '--help']: | 
| 523 |  | help() | 
| 524 |  | elif opt in ['-V', '--version']: | 
| 525 |  | print '%(name)s %(__version__)s' % globals() | 
| 526 |  | sys.exit(0) | 
| 527 |  | elif opt in ['-o', '--output-file']: | 
| 528 |  | option.outfile = arg | 
| 529 |  | elif opt in ['-U', '--update']: | 
| 530 |  | option.update = True | 
| 531 |  | elif opt in ['-q', '--silent', '--quiet']: | 
| 532 |  | option.verbose = False | 
| 533 |  | elif opt in ['-D', '--docstrings']: | 
| 534 |  | option.docstrings = True | 
| 535 |  | elif opt in ['--suffix']: | 
| 536 |  | option.suffix = arg | 
| 537 |  | elif opt in ['--backup']: | 
| 538 |  | option.backup = arg | 
| 539 |  |  | 
| 540 |  | # Sanity checks | 
| 541 |  | warn = False | 
| 542 |  | if option.update and option.outfile: | 
| 543 |  | warn = '--update and --output-file are mutually exclusive.' | 
| 544 |  | if len(args) == 0: | 
| 545 |  | warn = 'no input files given.' | 
| 546 |  | elif len(args) == 1 or len(args) > 2: | 
| 547 |  | warn = 'exactly 2 input files required.' | 
| 548 |  | if warn: | 
| 549 |  | print '%s: %s\n%s' % ('%(name)s', warn, advice) % globals() | 
| 550 |  | sys.exit(1) | 
| 551 |  |  | 
| 552 |  | if option.update: | 
| 553 |  | option.outfile = args[0] | 
| 554 |  | elif not option.outfile: | 
| 555 |  | option.outfile = '-' | 
| 556 |  |  | 
| 557 |  | defs, refs = args | 
| 558 |  |  | 
| 559 |  | try: | 
| 560 |  | merge(defs, refs, option = option) | 
| 561 |  | except MsgmergeError, err: | 
| 562 |  | print '%(name)s: ' % globals() + '%s' % err | 
| 563 |  | sys.exit(1) | 
| 564 |  |  | 
| 565 |  | def io(iofile, mode = 'rU'): | 
| 566 |  | '''Wrapper around open().''' | 
| 567 |  | try: | 
| 568 |  | fo = open(iofile, mode) | 
| 569 |  | if 'r' in mode and fo.read(3) != codecs.BOM_UTF8: | 
| 570 |  | fo.seek(0) | 
| 571 |  |  | 
| 572 |  | except IOError, msg: | 
| 573 |  | raise MsgmergeError('error while opening file: %s: %s.' % | 
| 574 |  | (msg[1], iofile)) | 
| 575 |  | return fo | 
| 576 |  |  | 
| 577 |  | def backup(infile): | 
| 578 |  | '''Handle backup of files in update mode''' | 
| 579 |  | os.environ.get('VERSION_CONTROL', '') | 
| 580 |  | suffix = os.environ.get('SIMPLE_BACKUP_SUFFIX', '~') | 
| 581 |  |  | 
| 582 |  | backup_file = '%s%s' % (infile, suffix) | 
| 583 |  |  | 
| 584 |  | def changes(new, old): | 
| 585 |  | return cmp(''.join(old), '\n'.join(new)) | 
| 586 |  |  | 
| 587 |  | def write(matches, outfile): | 
| 588 |  | '''Write the list <matches> to file <outfile>''' | 
| 589 |  | if not outfile == '-': | 
| 590 |  | fd = io(outfile, 'w') | 
| 591 |  | else: | 
| 592 |  | fd = sys.stdout | 
| 593 |  | fd.write('\n'.join(matches)) | 
| 594 |  |  | 
| 595 |  | def merge(def_file, ref_file, update = True, outfile = '-', | 
| 596 |  | docstrings = True, suffix = '~', backup = True, | 
| 597 |  | verbose = True, **kwds): | 
| 598 |  | ''' | 
| 599 |  | Merge po file <def_file> with pot file <ref_file> . If <update> is | 
| 600 |  | set to True then only update if there are changes to the po | 
| 601 |  | file. Set outfile to write updated po file to an another file. Set | 
| 602 |  | to `-\' for writing to standard out. If docstrings is False | 
| 603 |  | docstrings flag will removed. Set verbose to False to suppress | 
| 604 |  | progress indicators. <kwds> is used to pass options from the | 
| 605 |  | command line interface. | 
| 606 |  | ''' | 
| 607 |  | global option | 
| 608 |  | option = kwds.get('option', Options(update = update, | 
| 609 |  | outfile = outfile, | 
| 610 |  | docstrings = docstrings, | 
| 611 |  | suffix = suffix, | 
| 612 |  | backup = backup, | 
| 613 |  | verbose = verbose)) | 
| 614 |  | def_msgs = parse(def_file, 'msgstr') | 
| 615 |  | ref_msgs = parse(ref_file, 'msgid') | 
| 616 |  | if verbose and not __name__ == '__main__': | 
| 617 |  | print >> sys.stderr, 'Merging %s with %s' % (ref_file, def_file) | 
| 618 |  | updated_lines = match(def_msgs, ref_msgs) | 
| 619 |  | if option.verbose: | 
| 620 |  | print >> sys.stderr, ' done.' | 
| 621 |  | if not option.update: | 
| 622 |  | write(updated_lines, option.outfile) | 
| 623 |  | elif option.update and changes(updated_lines, io(def_file).readlines()): | 
| 624 |  | write(updated_lines, def_file) | 
| 625 |  |  | 
| 626 |  | def merge_dir(directory, pot = False, include = [], exclude = [], | 
| 627 |  | verbose = True): | 
| 628 |  | ''' | 
| 629 |  | Tries to merge a directory of po files. Uses simple glob to find | 
| 630 |  | po files and pot file. The parameter <pot> can be used to specify | 
| 631 |  | the pot file in the directory. If the list <include> is given only | 
| 632 |  | files in this list is merged. Use the list <exclude> to exclude | 
| 633 |  | files to be merged. This function is only useful if po files and | 
| 634 |  | pot file are in the same directory. Set <verbose> to get | 
| 635 |  | information when running. | 
| 636 |  | ''' | 
| 637 |  | if directory[-1] == '/': | 
| 638 |  | directory = os.path.dirname(directory) | 
| 639 |  | if pot: | 
| 640 |  | pot = os.path.basename(pot) | 
| 641 |  | else: | 
| 642 |  | pot = glob.glob('%s/*.pot' % directory) | 
| 643 |  | if not pot: | 
| 644 |  | raise MsgmergeError('No pot file found.') | 
| 645 |  | elif len(pot) > 1: | 
| 646 |  | raise MsgmergeError('More than one pot file found: %s.' % pot) | 
| 647 |  | pot = os.path.basename(pot[0]) | 
| 648 |  |  | 
| 649 |  | if not include: | 
| 650 |  | pos = glob.glob('%s/*po' % directory) | 
| 651 |  | if not len(pos) > 1: | 
| 652 |  | raise MsgmergeError('No po file(s) found.') | 
| 653 |  | pos = [ os.path.basename(po) for po in pos ] | 
| 654 |  | else: | 
| 655 |  | pos = [ os.path.basename(po) for po in include ] | 
| 656 |  |  | 
| 657 |  | for po in exclude: | 
| 658 |  | try: | 
| 659 |  | pos.remove(po) | 
| 660 |  | except ValueError: | 
| 661 |  | pass | 
| 662 |  | format = '%s/%s' | 
| 663 |  | for po in pos: | 
| 664 |  | try: | 
| 665 |  | merge(format % (directory, po), format % (directory, pot), | 
| 666 |  | update = True, verbose = verbose, | 
| 667 |  | outfile = format % (directory, po)) | 
| 668 |  | except MsgmergeError, err: | 
| 669 |  | if verbose: | 
| 670 |  | print >> sys.stderr, '%s Not updated.' % err | 
| 671 |  | else: | 
| 672 |  | print >> sys.stderr, '%s %s not updated.' % (err, po) | 
| 673 |  |  | 
| 674 |  | if __name__ == '__main__': | 
| 675 |  | cmdline() | 
trunk/scripts/build/pygettext.py
| r253614 | r253615 |  | 
|---|
| 1 |  | #! /usr/bin/env python | 
| 2 |  | # -*- coding: iso-8859-1 -*- | 
| 3 |  | # Originally written by Barry Warsaw <barry@zope.com> | 
| 4 |  | # | 
| 5 |  | # Minimally patched to make it even more xgettext compatible | 
| 6 |  | # by Peter Funk <pf@artcom-gmbh.de> | 
| 7 |  | # | 
| 8 |  | # 2002-11-22 Jürgen Hermann <jh@web.de> | 
| 9 |  | # Added checks that _() only contains string literals, and | 
| 10 |  | # command line args are resolved to module lists, i.e. you | 
| 11 |  | # can now pass a filename, a module or package name, or a | 
| 12 |  | # directory (including globbing chars, important for Win32). | 
| 13 |  | # Made docstring fit in 80 chars wide displays using pydoc. | 
| 14 |  | # | 
| 15 |  |  | 
| 16 |  | # for selftesting | 
| 17 |  | try: | 
| 18 |  | import fintl | 
| 19 |  | _ = fintl.gettext | 
| 20 |  | except ImportError: | 
| 21 |  | _ = lambda s: s | 
| 22 |  |  | 
| 23 |  | __doc__ = _("""pygettext -- Python equivalent of xgettext(1) | 
| 24 |  |  | 
| 25 |  | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the | 
| 26 |  | internationalization of C programs. Most of these tools are independent of | 
| 27 |  | the programming language and can be used from within Python programs. | 
| 28 |  | Martin von Loewis' work[1] helps considerably in this regard. | 
| 29 |  |  | 
| 30 |  | There's one problem though; xgettext is the program that scans source code | 
| 31 |  | looking for message strings, but it groks only C (or C++). Python | 
| 32 |  | introduces a few wrinkles, such as dual quoting characters, triple quoted | 
| 33 |  | strings, and raw strings. xgettext understands none of this. | 
| 34 |  |  | 
| 35 |  | Enter pygettext, which uses Python's standard tokenize module to scan | 
| 36 |  | Python source code, generating .pot files identical to what GNU xgettext[2] | 
| 37 |  | generates for C and C++ code. From there, the standard GNU tools can be | 
| 38 |  | used. | 
| 39 |  |  | 
| 40 |  | A word about marking Python strings as candidates for translation. GNU | 
| 41 |  | xgettext recognizes the following keywords: gettext, dgettext, dcgettext, | 
| 42 |  | and gettext_noop. But those can be a lot of text to include all over your | 
| 43 |  | code. C and C++ have a trick: they use the C preprocessor. Most | 
| 44 |  | internationalized C source includes a #define for gettext() to _() so that | 
| 45 |  | what has to be written in the source is much less. Thus these are both | 
| 46 |  | translatable strings: | 
| 47 |  |  | 
| 48 |  | gettext("Translatable String") | 
| 49 |  | _("Translatable String") | 
| 50 |  |  | 
| 51 |  | Python of course has no preprocessor so this doesn't work so well.  Thus, | 
| 52 |  | pygettext searches only for _() by default, but see the -k/--keyword flag | 
| 53 |  | below for how to augment this. | 
| 54 |  |  | 
| 55 |  | [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html | 
| 56 |  | [2] http://www.gnu.org/software/gettext/gettext.html | 
| 57 |  |  | 
| 58 |  | NOTE: pygettext attempts to be option and feature compatible with GNU | 
| 59 |  | xgettext where ever possible. However some options are still missing or are | 
| 60 |  | not fully implemented. Also, xgettext's use of command line switches with | 
| 61 |  | option arguments is broken, and in these cases, pygettext just defines | 
| 62 |  | additional switches. | 
| 63 |  |  | 
| 64 |  | Usage: pygettext [options] inputfile ... | 
| 65 |  |  | 
| 66 |  | Options: | 
| 67 |  |  | 
| 68 |  | -a | 
| 69 |  | --extract-all | 
| 70 |  | Extract all strings. | 
| 71 |  |  | 
| 72 |  | -d name | 
| 73 |  | --default-domain=name | 
| 74 |  | Rename the default output file from messages.pot to name.pot. | 
| 75 |  |  | 
| 76 |  | -E | 
| 77 |  | --escape | 
| 78 |  | Replace non-ASCII characters with octal escape sequences. | 
| 79 |  |  | 
| 80 |  | -D | 
| 81 |  | --docstrings | 
| 82 |  | Extract module, class, method, and function docstrings.  These do | 
| 83 |  | not need to be wrapped in _() markers, and in fact cannot be for | 
| 84 |  | Python to consider them docstrings. (See also the -X option). | 
| 85 |  |  | 
| 86 |  | -h | 
| 87 |  | --help | 
| 88 |  | Print this help message and exit. | 
| 89 |  |  | 
| 90 |  | -k word | 
| 91 |  | --keyword=word | 
| 92 |  | Keywords to look for in addition to the default set, which are: | 
| 93 |  | %(DEFAULTKEYWORDS)s | 
| 94 |  |  | 
| 95 |  | You can have multiple -k flags on the command line. | 
| 96 |  |  | 
| 97 |  | -K | 
| 98 |  | --no-default-keywords | 
| 99 |  | Disable the default set of keywords (see above).  Any keywords | 
| 100 |  | explicitly added with the -k/--keyword option are still recognized. | 
| 101 |  |  | 
| 102 |  | --no-location | 
| 103 |  | Do not write filename/lineno location comments. | 
| 104 |  |  | 
| 105 |  | -n | 
| 106 |  | --add-location | 
| 107 |  | Write filename/lineno location comments indicating where each | 
| 108 |  | extracted string is found in the source.  These lines appear before | 
| 109 |  | each msgid.  The style of comments is controlled by the -S/--style | 
| 110 |  | option.  This is the default. | 
| 111 |  |  | 
| 112 |  | -o filename | 
| 113 |  | --output=filename | 
| 114 |  | Rename the default output file from messages.pot to filename.  If | 
| 115 |  | filename is `-' then the output is sent to standard out. | 
| 116 |  |  | 
| 117 |  | -p dir | 
| 118 |  | --output-dir=dir | 
| 119 |  | Output files will be placed in directory dir. | 
| 120 |  |  | 
| 121 |  | -S stylename | 
| 122 |  | --style stylename | 
| 123 |  | Specify which style to use for location comments.  Two styles are | 
| 124 |  | supported: | 
| 125 |  |  | 
| 126 |  | Solaris  # File: filename, line: line-number | 
| 127 |  | GNU      #: filename:line | 
| 128 |  |  | 
| 129 |  | The style name is case insensitive.  GNU style is the default. | 
| 130 |  |  | 
| 131 |  | -v | 
| 132 |  | --verbose | 
| 133 |  | Print the names of the files being processed. | 
| 134 |  |  | 
| 135 |  | -V | 
| 136 |  | --version | 
| 137 |  | Print the version of pygettext and exit. | 
| 138 |  |  | 
| 139 |  | -w columns | 
| 140 |  | --width=columns | 
| 141 |  | Set width of output to columns. | 
| 142 |  |  | 
| 143 |  | -x filename | 
| 144 |  | --exclude-file=filename | 
| 145 |  | Specify a file that contains a list of strings that are not be | 
| 146 |  | extracted from the input files.  Each string to be excluded must | 
| 147 |  | appear on a line by itself in the file. | 
| 148 |  |  | 
| 149 |  | -X filename | 
| 150 |  | --no-docstrings=filename | 
| 151 |  | Specify a file that contains a list of files (one per line) that | 
| 152 |  | should not have their docstrings extracted.  This is only useful in | 
| 153 |  | conjunction with the -D option above. | 
| 154 |  |  | 
| 155 |  | If `inputfile' is -, standard input is read. | 
| 156 |  | """) | 
| 157 |  |  | 
| 158 |  | import os | 
| 159 |  | import imp | 
| 160 |  | import sys | 
| 161 |  | import glob | 
| 162 |  | import time | 
| 163 |  | import getopt | 
| 164 |  | import token | 
| 165 |  | import tokenize | 
| 166 |  | import operator | 
| 167 |  |  | 
| 168 |  | from umit.pm.core.const import PM_VERSION | 
| 169 |  |  | 
| 170 |  | __version__ = '1.5' | 
| 171 |  |  | 
| 172 |  | default_keywords = ['_'] | 
| 173 |  | DEFAULTKEYWORDS = ', '.join(default_keywords) | 
| 174 |  |  | 
| 175 |  | EMPTYSTRING = '' | 
| 176 |  |  | 
| 177 |  |  | 
| 178 |  |  | 
| 179 |  | # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's | 
| 180 |  | # there. | 
| 181 |  | pot_header = _('''\ | 
| 182 |  | # PacketManipulator catalog. | 
| 183 |  | # Copyright (C) 2009 Adriano Montero Marques | 
| 184 |  | # Francesco Piccinno <stack.box@gmail.com>, 2009 | 
| 185 |  | # | 
| 186 |  | msgid "" | 
| 187 |  | msgstr "" | 
| 188 |  | "Project-Id-Version: PacketManipulator %(pm_version)s\\n" | 
| 189 |  | "POT-Creation-Date: %(time)s\\n" | 
| 190 |  | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" | 
| 191 |  | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" | 
| 192 |  | "Language-Team: LANGUAGE <LL@li.org>\\n" | 
| 193 |  | "MIME-Version: 1.0\\n" | 
| 194 |  | "Content-Type: text/plain; charset=UTF-8\\n" | 
| 195 |  | "Content-Transfer-Encoding: 8bit\\n" | 
| 196 |  | "Generated-By: pygettext.py %(version)s\\n" | 
| 197 |  |  | 
| 198 |  | ''') | 
| 199 |  |  | 
| 200 |  |  | 
| 201 |  | def usage(code, msg=''): | 
| 202 |  | print >> sys.stderr, __doc__ % globals() | 
| 203 |  | if msg: | 
| 204 |  | print >> sys.stderr, msg | 
| 205 |  | sys.exit(code) | 
| 206 |  |  | 
| 207 |  |  | 
| 208 |  |  | 
| 209 |  | escapes = [] | 
| 210 |  |  | 
| 211 |  | def make_escapes(pass_iso8859): | 
| 212 |  | global escapes | 
| 213 |  | if pass_iso8859: | 
| 214 |  | # Allow iso-8859 characters to pass through so that e.g. 'msgid | 
| 215 |  | # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we | 
| 216 |  | # escape any character outside the 32..126 range. | 
| 217 |  | mod = 128 | 
| 218 |  | else: | 
| 219 |  | mod = 256 | 
| 220 |  | for i in range(256): | 
| 221 |  | if 32 <= (i % mod) <= 126: | 
| 222 |  | escapes.append(chr(i)) | 
| 223 |  | else: | 
| 224 |  | escapes.append("\\%03o" % i) | 
| 225 |  | escapes[ord('\\')] = '\\\\' | 
| 226 |  | escapes[ord('\t')] = '\\t' | 
| 227 |  | escapes[ord('\r')] = '\\r' | 
| 228 |  | escapes[ord('\n')] = '\\n' | 
| 229 |  | escapes[ord('\"')] = '\\"' | 
| 230 |  |  | 
| 231 |  |  | 
| 232 |  | def escape(s): | 
| 233 |  | global escapes | 
| 234 |  | s = list(s) | 
| 235 |  | for i in range(len(s)): | 
| 236 |  | s[i] = escapes[ord(s[i])] | 
| 237 |  | return EMPTYSTRING.join(s) | 
| 238 |  |  | 
| 239 |  |  | 
| 240 |  | def safe_eval(s): | 
| 241 |  | # unwrap quotes, safely | 
| 242 |  | return eval(s, {'__builtins__':{}}, {}) | 
| 243 |  |  | 
| 244 |  |  | 
| 245 |  | def normalize(s): | 
| 246 |  | # This converts the various Python string types into a format that is | 
| 247 |  | # appropriate for .po files, namely much closer to C style. | 
| 248 |  | lines = s.split('\n') | 
| 249 |  | if len(lines) == 1: | 
| 250 |  | s = '"' + escape(s) + '"' | 
| 251 |  | else: | 
| 252 |  | if not lines[-1]: | 
| 253 |  | del lines[-1] | 
| 254 |  | lines[-1] = lines[-1] + '\n' | 
| 255 |  | for i in range(len(lines)): | 
| 256 |  | lines[i] = escape(lines[i]) | 
| 257 |  | lineterm = '\\n"\n"' | 
| 258 |  | s = '""\n"' + lineterm.join(lines) + '"' | 
| 259 |  | return s | 
| 260 |  |  | 
| 261 |  |  | 
| 262 |  | def containsAny(str, set): | 
| 263 |  | """Check whether 'str' contains ANY of the chars in 'set'""" | 
| 264 |  | return 1 in [c in str for c in set] | 
| 265 |  |  | 
| 266 |  |  | 
| 267 |  | def _visit_pyfiles(list, dirname, names): | 
| 268 |  | """Helper for getFilesForName().""" | 
| 269 |  | # get extension for python source files | 
| 270 |  | if not globals().has_key('_py_ext'): | 
| 271 |  | global _py_ext | 
| 272 |  | _py_ext = [triple[0] for triple in imp.get_suffixes() | 
| 273 |  | if triple[2] == imp.PY_SOURCE][0] | 
| 274 |  |  | 
| 275 |  | # don't recurse into CVS directories | 
| 276 |  | if 'CVS' in names: | 
| 277 |  | names.remove('CVS') | 
| 278 |  |  | 
| 279 |  | # add all *.py files to list | 
| 280 |  | list.extend( | 
| 281 |  | [os.path.join(dirname, file) for file in names | 
| 282 |  | if os.path.splitext(file)[1] == _py_ext] | 
| 283 |  | ) | 
| 284 |  |  | 
| 285 |  |  | 
| 286 |  | def _get_modpkg_path(dotted_name, pathlist=None): | 
| 287 |  | """Get the filesystem path for a module or a package. | 
| 288 |  |  | 
| 289 |  | Return the file system path to a file for a module, and to a directory for | 
| 290 |  | a package. Return None if the name is not found, or is a builtin or | 
| 291 |  | extension module. | 
| 292 |  | """ | 
| 293 |  | # split off top-most name | 
| 294 |  | parts = dotted_name.split('.', 1) | 
| 295 |  |  | 
| 296 |  | if len(parts) > 1: | 
| 297 |  | # we have a dotted path, import top-level package | 
| 298 |  | try: | 
| 299 |  | file, pathname, description = imp.find_module(parts[0], pathlist) | 
| 300 |  | if file: file.close() | 
| 301 |  | except ImportError: | 
| 302 |  | return None | 
| 303 |  |  | 
| 304 |  | # check if it's indeed a package | 
| 305 |  | if description[2] == imp.PKG_DIRECTORY: | 
| 306 |  | # recursively handle the remaining name parts | 
| 307 |  | pathname = _get_modpkg_path(parts[1], [pathname]) | 
| 308 |  | else: | 
| 309 |  | pathname = None | 
| 310 |  | else: | 
| 311 |  | # plain name | 
| 312 |  | try: | 
| 313 |  | file, pathname, description = imp.find_module( | 
| 314 |  | dotted_name, pathlist) | 
| 315 |  | if file: | 
| 316 |  | file.close() | 
| 317 |  | if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]: | 
| 318 |  | pathname = None | 
| 319 |  | except ImportError: | 
| 320 |  | pathname = None | 
| 321 |  |  | 
| 322 |  | return pathname | 
| 323 |  |  | 
| 324 |  |  | 
| 325 |  | def getFilesForName(name): | 
| 326 |  | """Get a list of module files for a filename, a module or package name, | 
| 327 |  | or a directory. | 
| 328 |  | """ | 
| 329 |  | if not os.path.exists(name): | 
| 330 |  | # check for glob chars | 
| 331 |  | if containsAny(name, "*?[]"): | 
| 332 |  | files = glob.glob(name) | 
| 333 |  | list = [] | 
| 334 |  | for file in files: | 
| 335 |  | list.extend(getFilesForName(file)) | 
| 336 |  | return list | 
| 337 |  |  | 
| 338 |  | # try to find module or package | 
| 339 |  | name = _get_modpkg_path(name) | 
| 340 |  | if not name: | 
| 341 |  | return [] | 
| 342 |  |  | 
| 343 |  | if os.path.isdir(name): | 
| 344 |  | # find all python files in directory | 
| 345 |  | list = [] | 
| 346 |  | os.path.walk(name, _visit_pyfiles, list) | 
| 347 |  | return list | 
| 348 |  | elif os.path.exists(name): | 
| 349 |  | # a single file | 
| 350 |  | return [name] | 
| 351 |  |  | 
| 352 |  | return [] | 
| 353 |  |  | 
| 354 |  |  | 
| 355 |  | class TokenEater: | 
| 356 |  | def __init__(self, options): | 
| 357 |  | self.__options = options | 
| 358 |  | self.__messages = {} | 
| 359 |  | self.__state = self.__waiting | 
| 360 |  | self.__data = [] | 
| 361 |  | self.__lineno = -1 | 
| 362 |  | self.__freshmodule = 1 | 
| 363 |  | self.__curfile = None | 
| 364 |  |  | 
| 365 |  | def __call__(self, ttype, tstring, stup, etup, line): | 
| 366 |  | # dispatch | 
| 367 |  | ##        import token | 
| 368 |  | ##        print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ | 
| 369 |  | ##              'tstring:', tstring | 
| 370 |  | self.__state(ttype, tstring, stup[0]) | 
| 371 |  |  | 
| 372 |  | def __waiting(self, ttype, tstring, lineno): | 
| 373 |  | opts = self.__options | 
| 374 |  | # Do docstring extractions, if enabled | 
| 375 |  | if opts.docstrings and not opts.nodocstrings.get(self.__curfile): | 
| 376 |  | # module docstring? | 
| 377 |  | if self.__freshmodule: | 
| 378 |  | if ttype == tokenize.STRING: | 
| 379 |  | self.__addentry(safe_eval(tstring), lineno, isdocstring=1) | 
| 380 |  | self.__freshmodule = 0 | 
| 381 |  | elif ttype not in (tokenize.COMMENT, tokenize.NL): | 
| 382 |  | self.__freshmodule = 0 | 
| 383 |  | return | 
| 384 |  | # class docstring? | 
| 385 |  | if ttype == tokenize.NAME and tstring in ('class', 'def'): | 
| 386 |  | self.__state = self.__suiteseen | 
| 387 |  | return | 
| 388 |  | if ttype == tokenize.NAME and tstring in opts.keywords: | 
| 389 |  | self.__state = self.__keywordseen | 
| 390 |  |  | 
| 391 |  | def __suiteseen(self, ttype, tstring, lineno): | 
| 392 |  | # ignore anything until we see the colon | 
| 393 |  | if ttype == tokenize.OP and tstring == ':': | 
| 394 |  | self.__state = self.__suitedocstring | 
| 395 |  |  | 
| 396 |  | def __suitedocstring(self, ttype, tstring, lineno): | 
| 397 |  | # ignore any intervening noise | 
| 398 |  | if ttype == tokenize.STRING: | 
| 399 |  | self.__addentry(safe_eval(tstring), lineno, isdocstring=1) | 
| 400 |  | self.__state = self.__waiting | 
| 401 |  | elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, | 
| 402 |  | tokenize.COMMENT): | 
| 403 |  | # there was no class docstring | 
| 404 |  | self.__state = self.__waiting | 
| 405 |  |  | 
| 406 |  | def __keywordseen(self, ttype, tstring, lineno): | 
| 407 |  | if ttype == tokenize.OP and tstring == '(': | 
| 408 |  | self.__data = [] | 
| 409 |  | self.__lineno = lineno | 
| 410 |  | self.__state = self.__openseen | 
| 411 |  | else: | 
| 412 |  | self.__state = self.__waiting | 
| 413 |  |  | 
| 414 |  | def __openseen(self, ttype, tstring, lineno): | 
| 415 |  | if ttype == tokenize.OP and tstring == ')': | 
| 416 |  | # We've seen the last of the translatable strings.  Record the | 
| 417 |  | # line number of the first line of the strings and update the list | 
| 418 |  | # of messages seen.  Reset state for the next batch.  If there | 
| 419 |  | # were no strings inside _(), then just ignore this entry. | 
| 420 |  | if self.__data: | 
| 421 |  | self.__addentry(EMPTYSTRING.join(self.__data)) | 
| 422 |  | self.__state = self.__waiting | 
| 423 |  | elif ttype == tokenize.STRING: | 
| 424 |  | self.__data.append(safe_eval(tstring)) | 
| 425 |  | elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, | 
| 426 |  | token.NEWLINE, tokenize.NL]: | 
| 427 |  | # warn if we see anything else than STRING or whitespace | 
| 428 |  | print >> sys.stderr, _( | 
| 429 |  | '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' | 
| 430 |  | ) % { | 
| 431 |  | 'token': tstring, | 
| 432 |  | 'file': self.__curfile, | 
| 433 |  | 'lineno': self.__lineno | 
| 434 |  | } | 
| 435 |  | self.__state = self.__waiting | 
| 436 |  |  | 
| 437 |  | def __addentry(self, msg, lineno=None, isdocstring=0): | 
| 438 |  | if lineno is None: | 
| 439 |  | lineno = self.__lineno | 
| 440 |  | if not msg in self.__options.toexclude: | 
| 441 |  | entry = (self.__curfile, lineno) | 
| 442 |  | self.__messages.setdefault(msg, {})[entry] = isdocstring | 
| 443 |  |  | 
| 444 |  | def set_filename(self, filename): | 
| 445 |  | self.__curfile = filename | 
| 446 |  | self.__freshmodule = 1 | 
| 447 |  |  | 
| 448 |  | def write(self, fp): | 
| 449 |  | options = self.__options | 
| 450 |  | timestamp = time.strftime('%Y-%m-%d %H:%M+%Z') | 
| 451 |  | # The time stamp in the header doesn't have the same format as that | 
| 452 |  | # generated by xgettext... | 
| 453 |  | print >> fp, pot_header % {'time': timestamp, 'version': __version__, | 
| 454 |  | 'pm_version': PM_VERSION} | 
| 455 |  | # Sort the entries.  First sort each particular entry's keys, then | 
| 456 |  | # sort all the entries by their first item. | 
| 457 |  | reverse = {} | 
| 458 |  | for k, v in self.__messages.items(): | 
| 459 |  | keys = v.keys() | 
| 460 |  | keys.sort() | 
| 461 |  | reverse.setdefault(tuple(keys), []).append((k, v)) | 
| 462 |  | rkeys = reverse.keys() | 
| 463 |  | rkeys.sort() | 
| 464 |  | for rkey in rkeys: | 
| 465 |  | rentries = reverse[rkey] | 
| 466 |  | rentries.sort() | 
| 467 |  | for k, v in rentries: | 
| 468 |  | isdocstring = 0 | 
| 469 |  | # If the entry was gleaned out of a docstring, then add a | 
| 470 |  | # comment stating so.  This is to aid translators who may wish | 
| 471 |  | # to skip translating some unimportant docstrings. | 
| 472 |  | if reduce(operator.__add__, v.values()): | 
| 473 |  | isdocstring = 1 | 
| 474 |  | # k is the message string, v is a dictionary-set of (filename, | 
| 475 |  | # lineno) tuples.  We want to sort the entries in v first by | 
| 476 |  | # file name and then by line number. | 
| 477 |  | v = v.keys() | 
| 478 |  | v.sort() | 
| 479 |  | if not options.writelocations: | 
| 480 |  | pass | 
| 481 |  | # location comments are different b/w Solaris and GNU: | 
| 482 |  | elif options.locationstyle == options.SOLARIS: | 
| 483 |  | for filename, lineno in v: | 
| 484 |  | d = {'filename': filename, 'lineno': lineno} | 
| 485 |  | print >>fp, _( | 
| 486 |  | '# File: %(filename)s, line: %(lineno)d') % d | 
| 487 |  | elif options.locationstyle == options.GNU: | 
| 488 |  | # fit as many locations on one line, as long as the | 
| 489 |  | # resulting line length doesn't exceeds 'options.width' | 
| 490 |  | locline = '#:' | 
| 491 |  | for filename, lineno in v: | 
| 492 |  | d = {'filename': filename, 'lineno': lineno} | 
| 493 |  | s = _(' %(filename)s:%(lineno)d') % d | 
| 494 |  | if len(locline) + len(s) <= options.width: | 
| 495 |  | locline = locline + s | 
| 496 |  | else: | 
| 497 |  | print >> fp, locline | 
| 498 |  | locline = "#:" + s | 
| 499 |  | if len(locline) > 2: | 
| 500 |  | print >> fp, locline | 
| 501 |  | if isdocstring: | 
| 502 |  | print >> fp, '#, docstring' | 
| 503 |  | print >> fp, 'msgid', normalize(k) | 
| 504 |  | print >> fp, 'msgstr ""\n' | 
| 505 |  |  | 
| 506 |  |  | 
| 507 |  |  | 
| 508 |  | def main(): | 
| 509 |  | global default_keywords | 
| 510 |  | try: | 
| 511 |  | opts, args = getopt.getopt( | 
| 512 |  | sys.argv[1:], | 
| 513 |  | 'ad:DEhk:Kno:p:S:Vvw:x:X:', | 
| 514 |  | ['extract-all', 'default-domain=', 'escape', 'help', | 
| 515 |  | 'keyword=', 'no-default-keywords', | 
| 516 |  | 'add-location', 'no-location', 'output=', 'output-dir=', | 
| 517 |  | 'style=', 'verbose', 'version', 'width=', 'exclude-file=', | 
| 518 |  | 'docstrings', 'no-docstrings', | 
| 519 |  | ]) | 
| 520 |  | except getopt.error, msg: | 
| 521 |  | usage(1, msg) | 
| 522 |  |  | 
| 523 |  | # for holding option values | 
| 524 |  | class Options: | 
| 525 |  | # constants | 
| 526 |  | GNU = 1 | 
| 527 |  | SOLARIS = 2 | 
| 528 |  | # defaults | 
| 529 |  | extractall = 0 # FIXME: currently this option has no effect at all. | 
| 530 |  | escape = 0 | 
| 531 |  | keywords = [] | 
| 532 |  | outpath = '' | 
| 533 |  | outfile = 'messages.pot' | 
| 534 |  | writelocations = 1 | 
| 535 |  | locationstyle = GNU | 
| 536 |  | verbose = 0 | 
| 537 |  | width = 78 | 
| 538 |  | excludefilename = '' | 
| 539 |  | docstrings = 0 | 
| 540 |  | nodocstrings = {} | 
| 541 |  |  | 
| 542 |  | options = Options() | 
| 543 |  | locations = {'gnu' : options.GNU, | 
| 544 |  | 'solaris' : options.SOLARIS, | 
| 545 |  | } | 
| 546 |  |  | 
| 547 |  | # parse options | 
| 548 |  | for opt, arg in opts: | 
| 549 |  | if opt in ('-h', '--help'): | 
| 550 |  | usage(0) | 
| 551 |  | elif opt in ('-a', '--extract-all'): | 
| 552 |  | options.extractall = 1 | 
| 553 |  | elif opt in ('-d', '--default-domain'): | 
| 554 |  | options.outfile = arg + '.pot' | 
| 555 |  | elif opt in ('-E', '--escape'): | 
| 556 |  | options.escape = 1 | 
| 557 |  | elif opt in ('-D', '--docstrings'): | 
| 558 |  | options.docstrings = 1 | 
| 559 |  | elif opt in ('-k', '--keyword'): | 
| 560 |  | options.keywords.append(arg) | 
| 561 |  | elif opt in ('-K', '--no-default-keywords'): | 
| 562 |  | default_keywords = [] | 
| 563 |  | elif opt in ('-n', '--add-location'): | 
| 564 |  | options.writelocations = 1 | 
| 565 |  | elif opt in ('--no-location',): | 
| 566 |  | options.writelocations = 0 | 
| 567 |  | elif opt in ('-S', '--style'): | 
| 568 |  | options.locationstyle = locations.get(arg.lower()) | 
| 569 |  | if options.locationstyle is None: | 
| 570 |  | usage(1, _('Invalid value for --style: %s') % arg) | 
| 571 |  | elif opt in ('-o', '--output'): | 
| 572 |  | options.outfile = arg | 
| 573 |  | elif opt in ('-p', '--output-dir'): | 
| 574 |  | options.outpath = arg | 
| 575 |  | elif opt in ('-v', '--verbose'): | 
| 576 |  | options.verbose = 1 | 
| 577 |  | elif opt in ('-V', '--version'): | 
| 578 |  | print _('pygettext.py (xgettext for Python) %s') % __version__ | 
| 579 |  | sys.exit(0) | 
| 580 |  | elif opt in ('-w', '--width'): | 
| 581 |  | try: | 
| 582 |  | options.width = int(arg) | 
| 583 |  | except ValueError: | 
| 584 |  | usage(1, _('--width argument must be an integer: %s') % arg) | 
| 585 |  | elif opt in ('-x', '--exclude-file'): | 
| 586 |  | options.excludefilename = arg | 
| 587 |  | elif opt in ('-X', '--no-docstrings'): | 
| 588 |  | fp = open(arg) | 
| 589 |  | try: | 
| 590 |  | while 1: | 
| 591 |  | line = fp.readline() | 
| 592 |  | if not line: | 
| 593 |  | break | 
| 594 |  | options.nodocstrings[line[:-1]] = 1 | 
| 595 |  | finally: | 
| 596 |  | fp.close() | 
| 597 |  |  | 
| 598 |  | # calculate escapes | 
| 599 |  | make_escapes(options.escape) | 
| 600 |  |  | 
| 601 |  | # calculate all keywords | 
| 602 |  | options.keywords.extend(default_keywords) | 
| 603 |  |  | 
| 604 |  | # initialize list of strings to exclude | 
| 605 |  | if options.excludefilename: | 
| 606 |  | try: | 
| 607 |  | fp = open(options.excludefilename) | 
| 608 |  | options.toexclude = fp.readlines() | 
| 609 |  | fp.close() | 
| 610 |  | except IOError: | 
| 611 |  | print >> sys.stderr, _( | 
| 612 |  | "Can't read --exclude-file: %s") % options.excludefilename | 
| 613 |  | sys.exit(1) | 
| 614 |  | else: | 
| 615 |  | options.toexclude = [] | 
| 616 |  |  | 
| 617 |  | # resolve args to module lists | 
| 618 |  | expanded = [] | 
| 619 |  | for arg in args: | 
| 620 |  | if arg == '-': | 
| 621 |  | expanded.append(arg) | 
| 622 |  | else: | 
| 623 |  | expanded.extend(getFilesForName(arg)) | 
| 624 |  | args = expanded | 
| 625 |  |  | 
| 626 |  | # slurp through all the files | 
| 627 |  | eater = TokenEater(options) | 
| 628 |  | for filename in args: | 
| 629 |  | if filename == '-': | 
| 630 |  | if options.verbose: | 
| 631 |  | print _('Reading standard input') | 
| 632 |  | fp = sys.stdin | 
| 633 |  | closep = 0 | 
| 634 |  | else: | 
| 635 |  | if options.verbose: | 
| 636 |  | print _('Working on %s') % filename | 
| 637 |  | fp = open(filename) | 
| 638 |  | closep = 1 | 
| 639 |  | try: | 
| 640 |  | eater.set_filename(filename) | 
| 641 |  | try: | 
| 642 |  | tokenize.tokenize(fp.readline, eater) | 
| 643 |  | except tokenize.TokenError, e: | 
| 644 |  | print >> sys.stderr, '%s: %s, line %d, column %d' % ( | 
| 645 |  | e[0], filename, e[1][0], e[1][1]) | 
| 646 |  | finally: | 
| 647 |  | if closep: | 
| 648 |  | fp.close() | 
| 649 |  |  | 
| 650 |  | # write the output | 
| 651 |  | if options.outfile == '-': | 
| 652 |  | fp = sys.stdout | 
| 653 |  | closep = 0 | 
| 654 |  | else: | 
| 655 |  | if options.outpath: | 
| 656 |  | options.outfile = os.path.join(options.outpath, options.outfile) | 
| 657 |  | fp = open(options.outfile, 'w') | 
| 658 |  | closep = 1 | 
| 659 |  | try: | 
| 660 |  | eater.write(fp) | 
| 661 |  | finally: | 
| 662 |  | if closep: | 
| 663 |  | fp.close() | 
| 664 |  |  | 
| 665 |  |  | 
| 666 |  | if __name__ == '__main__': | 
| 667 |  | main() | 
| 668 |  | # some more test strings | 
| 669 |  | _(u'a unicode string') | 
| 670 |  | # this one creates a warning | 
| 671 |  | _('*** Seen unexpected token "%(token)s"') % {'token': 'test'} | 
| 672 |  | _('more' 'than' 'one' 'string') |