trunk/scripts/build/check_po.py
| r253614 | r253615 | |
| 1 | | #! /usr/bin/env python |
| 2 | | # |
| 3 | | # check_po - a gramps tool to check validity of po files |
| 4 | | # |
| 5 | | # Copyright (C) 2006-2006 Kees Bakker |
| 6 | | # |
| 7 | | # This program is free software; you can redistribute it and/or modify |
| 8 | | # it under the terms of the GNU General Public License as published by |
| 9 | | # the Free Software Foundation; either version 2 of the License, or |
| 10 | | # (at your option) any later version. |
| 11 | | # |
| 12 | | # This program is distributed in the hope that it will be useful, |
| 13 | | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | | # GNU General Public License for more details. |
| 16 | | # |
| 17 | | # You should have received a copy of the GNU General Public License |
| 18 | | # along with this program; if not, write to the Free Software |
| 19 | | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 20 | | |
| 21 | | # |
| 22 | | # TODO |
| 23 | | # |
| 24 | | # * Check for HTML text in msgstr when there is none in msgid |
| 25 | | # * Check for matching HTML tag/endtag in msgstr |
| 26 | | # |
| 27 | | |
| 28 | | # Adapted for Umit by Guilherme Polo, original file: |
| 29 | | # https://gramps.svn.sourceforge.net/svnroot/gramps/branches/gramps22/po/check_po |
| 30 | | |
| 31 | | import re |
| 32 | | import sys |
| 33 | | from optparse import OptionParser |
| 34 | | |
| 35 | | APP = "Umit" |
| 36 | | |
| 37 | | all_total = {} |
| 38 | | all_fuzzy = {} |
| 39 | | all_untranslated = {} |
| 40 | | all_percent_s = {} |
| 41 | | all_named_s = {} |
| 42 | | all_bnamed_s = {} |
| 43 | | all_context = {} |
| 44 | | all_coverage = {} |
| 45 | | all_template_coverage = {} |
| 46 | | |
| 47 | | def strip_quotes(st): |
| 48 | | if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"': |
| 49 | | st = st.strip()[1:-1] |
| 50 | | return st |
| 51 | | |
| 52 | | # This is a base class for all checks |
| 53 | | class Check: |
| 54 | | def __init__( self ): |
| 55 | | self.msgs = [] |
| 56 | | def diag( self ): |
| 57 | | if len( self.msgs ): |
| 58 | | print |
| 59 | | print self.diag_header |
| 60 | | for m in self.msgs: |
| 61 | | m.diag() |
| 62 | | def summary( self ): |
| 63 | | print "%-20s%d" % ( self.summary_text, len(self.msgs) ) |
| 64 | | |
| 65 | | class Check_fmt( Check ): |
| 66 | | def __init__( self, fmt ): |
| 67 | | Check.__init__( self ) |
| 68 | | self.diag_header = "-------- %s mismatches --------------" % fmt |
| 69 | | self.summary_text = "%s mismatches:" % fmt |
| 70 | | self.fmt = fmt |
| 71 | | def process( self, msg ): |
| 72 | | msgid = msg.msgid |
| 73 | | msgstr = msg.msgstr |
| 74 | | cnt1 = msgid.count( self.fmt ) |
| 75 | | cnt2 = msgstr.count( self.fmt ) |
| 76 | | if cnt1 != cnt2: |
| 77 | | self.msgs.append( msg ) |
| 78 | | |
| 79 | | class Check_named_fmt( Check ): |
| 80 | | # A pattern to find all %() |
| 81 | | find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE) |
| 82 | | |
| 83 | | def __init__( self ): |
| 84 | | Check.__init__( self ) |
| 85 | | self.diag_header = "-------- %() name mismatches --------------" |
| 86 | | self.summary_text = "%() name mismatches:" |
| 87 | | def process( self, msg ): |
| 88 | | msgid = msg.msgid |
| 89 | | msgstr = msg.msgstr |
| 90 | | # Same number of named formats? |
| 91 | | fmts1 = self.find_named_fmt_pat.findall( msgid ) |
| 92 | | fmts2 = self.find_named_fmt_pat.findall( msgstr ) |
| 93 | | if len( fmts1 ) != len( fmts2 ): |
| 94 | | self.msgs.append( msg ) |
| 95 | | else: |
| 96 | | # Do we have the same named formats? |
| 97 | | fmts1.sort() |
| 98 | | fmts2.sort() |
| 99 | | if fmts1 != fmts2: |
| 100 | | self.msgs.append( msg ) |
| 101 | | |
| 102 | | class Check_missing_sd( Check ): |
| 103 | | # A pattern to find %() without s or d |
| 104 | | # Here is a command to use for testing |
| 105 | | # print re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' ) |
| 106 | | find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE) |
| 107 | | |
| 108 | | def __init__( self ): |
| 109 | | Check.__init__( self ) |
| 110 | | self.diag_header = "-------- %() without 's' or 'd' mismatches --------------" |
| 111 | | self.summary_text = "%() missing s/d:" |
| 112 | | def process( self, msg ): |
| 113 | | msgstr = msg.msgstr |
| 114 | | fmts = self.find_named_fmt_pat2.findall( msgstr ) |
| 115 | | for f in fmts: |
| 116 | | if not f in ('s', 'd'): |
| 117 | | self.msgs.append( msg ) |
| 118 | | break |
| 119 | | |
| 120 | | class Check_runaway( Check ): |
| 121 | | def __init__( self ): |
| 122 | | Check.__init__( self ) |
| 123 | | self.diag_header = "-------- Runaway context in translation ---------" |
| 124 | | self.summary_text = "Runaway context:" |
| 125 | | def process( self, msg ): |
| 126 | | msgid = msg.msgid |
| 127 | | msgstr = msg.msgstr |
| 128 | | |
| 129 | | # Runaway context. In the translated part we only to see |
| 130 | | # the translation of the word after the | |
| 131 | | if msgid.count('|') > 0 and msgstr.count('|') > 0 and msgid != msgstr: |
| 132 | | self.msgs.append( msg ) |
| 133 | | |
| 134 | | class Check_xml_chars( Check ): |
| 135 | | # Special XML characters |
| 136 | | # It is not allowed to have a quote, an ampersand or an angle bracket |
| 137 | | xml_chars_pat = re.compile( r'(?<=\W) > | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE ) |
| 138 | | |
| 139 | | def __init__( self ): |
| 140 | | Check.__init__( self ) |
| 141 | | self.diag_header = "-------- unescaped XML special characters ---------" |
| 142 | | self.summary_text = "XML special chars:" |
| 143 | | def process( self, msg ): |
| 144 | | msgid = msg.msgid |
| 145 | | msgstr = msg.msgstr |
| 146 | | |
| 147 | | # XML errors |
| 148 | | # Only look at messages in the tips.xml |
| 149 | | if msg.is_tips_xml: |
| 150 | | if self.xml_chars_pat.search( msgstr ): |
| 151 | | self.msgs.append( msg ) |
| 152 | | |
| 153 | | class Check_last_char( Check ): |
| 154 | | def __init__( self ): |
| 155 | | Check.__init__( self ) |
| 156 | | self.diag_header = "-------- last character not identical ---------" |
| 157 | | self.summary_text = "Last character:" |
| 158 | | def process( self, msg ): |
| 159 | | msgid = msg.msgid |
| 160 | | msgstr = msg.msgstr |
| 161 | | |
| 162 | | # Last character of msgid? White space? Period? |
| 163 | | if msg.is_fuzzy: |
| 164 | | return |
| 165 | | |
| 166 | | msgid_last = msgid[-1:] |
| 167 | | msgstr_last = msgstr[-1:] |
| 168 | | if msgid_last.isspace() != msgstr_last.isspace(): |
| 169 | | self.msgs.append( msg ) |
| 170 | | elif (msgid_last == '.') != (msgstr_last == '.'): |
| 171 | | self.msgs.append( msg ) |
| 172 | | |
| 173 | | class Check_shortcut_trans( Check ): |
| 174 | | def __init__( self ): |
| 175 | | Check.__init__( self ) |
| 176 | | self.diag_header = "-------- shortcut key in translation ---------" |
| 177 | | self.summary_text = "Shortcut in msgstr:" |
| 178 | | def process( self, msg ): |
| 179 | | msgid = msg.msgid |
| 180 | | msgstr = msg.msgstr |
| 181 | | |
| 182 | | if msgid.count('_') == 0 and msgstr.count('_') > 0: |
| 183 | | self.msgs.append( msg ) |
| 184 | | |
| 185 | | class Msgid: |
| 186 | | fuzzy_pat = re.compile( 'fuzzy' ) |
| 187 | | tips_xml_pat = re.compile( r'tips\.xml' ) |
| 188 | | def __init__( self, msgnr, lineno ): |
| 189 | | self._msgid = [] |
| 190 | | self._msgstr = [] |
| 191 | | self.msgid = '' |
| 192 | | self.msgstr = '' |
| 193 | | self._cmnt = [] |
| 194 | | self.nr = msgnr |
| 195 | | self.lineno = lineno |
| 196 | | self.is_fuzzy = 0 |
| 197 | | self.is_tips_xml = 0 |
| 198 | | |
| 199 | | def diag( self ): |
| 200 | | if 1: |
| 201 | | print |
| 202 | | print "msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" ) |
| 203 | | sys.stdout.write( ''.join( self._msgid ) ) |
| 204 | | sys.stdout.write( ''.join( self._msgstr ) ) |
| 205 | | else: |
| 206 | | # Compatible with the old check_po |
| 207 | | print "%d '%s' : '%s'" % ( self.lineno, self.msgid, self.msgstr ) |
| 208 | | |
| 209 | | def add_msgid( self, line, lineno ): |
| 210 | | self._msgid.append( line ) |
| 211 | | line = re.sub( r'msgid\s+', '', line ) |
| 212 | | line = line.strip() |
| 213 | | if line[0] != '"' or line[-1:] != '"': |
| 214 | | print "ERROR at line %d: Missing quote." % lineno |
| 215 | | line = strip_quotes( line ) |
| 216 | | self.msgid += line |
| 217 | | |
| 218 | | def add_msgstr( self, line, lineno ): |
| 219 | | self._msgstr.append( line ) |
| 220 | | line = re.sub( r'msgstr\s+', '', line ) |
| 221 | | line = line.strip() |
| 222 | | if line[0] != '"' or line[-1:] != '"': |
| 223 | | print "ERROR at line %d: Missing quote." % lineno |
| 224 | | line = strip_quotes( line ) |
| 225 | | self.msgstr += line |
| 226 | | |
| 227 | | def add_cmnt( self, line ): |
| 228 | | self._cmnt.append( line ) |
| 229 | | if not self.is_fuzzy and self.fuzzy_pat.search( line ): |
| 230 | | self.is_fuzzy = 1 |
| 231 | | if not self.is_tips_xml and self.tips_xml_pat.search( line ): |
| 232 | | self.is_tips_xml = 1 |
| 233 | | |
| 234 | | def read_msgs( fname ): |
| 235 | | empty_pat = re.compile( r'^ \s* $', re.VERBOSE ) |
| 236 | | comment_pat = re.compile( r'\#', re.VERBOSE ) |
| 237 | | msgid_pat = re.compile( r'msgid \s+ "', re.VERBOSE ) |
| 238 | | msgstr_pat = re.compile( r'msgstr \s+ "', re.VERBOSE ) |
| 239 | | str_pat = re.compile( r'"', re.VERBOSE ) |
| 240 | | old_pat = re.compile( r'\#~ \s+ ', re.VERBOSE ) |
| 241 | | |
| 242 | | msgnr = 0 # This is the message number of the next message to read. The first real message is 1. |
| 243 | | f = open( fname ) |
| 244 | | lines = f.readlines() |
| 245 | | |
| 246 | | # parse it like a statemachine |
| 247 | | NONE = 0 # Nothing detected, yet |
| 248 | | CMNT = 1 # Inside comment part |
| 249 | | MSGID = 2 # Inside msgid part |
| 250 | | MSGSTR = 3 # Inside msgstr part |
| 251 | | STR = 4 # A continuation string |
| 252 | | OLD = 5 # An old pattern with #~ |
| 253 | | |
| 254 | | state = NONE |
| 255 | | msg = None |
| 256 | | msgs = [] |
| 257 | | |
| 258 | | for ix in range( len(lines) ): # Use line numbers for messages |
| 259 | | line = lines[ix] |
| 260 | | lineno = ix + 1 |
| 261 | | |
| 262 | | m = empty_pat.match( line ) |
| 263 | | if m: |
| 264 | | continue # Empty lines are not interesting |
| 265 | | |
| 266 | | # What's the next state? |
| 267 | | if old_pat.match( line ): |
| 268 | | next_state = OLD |
| 269 | | elif comment_pat.match( line ): |
| 270 | | next_state = CMNT |
| 271 | | elif msgid_pat.match( line ): |
| 272 | | next_state = MSGID |
| 273 | | elif msgstr_pat.match( line ): |
| 274 | | next_state = MSGSTR |
| 275 | | elif str_pat.match( line ): |
| 276 | | next_state = STR |
| 277 | | else: |
| 278 | | print 'WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars() |
| 279 | | next_state = NONE |
| 280 | | |
| 281 | | #print "%(state)d->%(next_state)d\t%(line)s" % vars() |
| 282 | | if state == NONE: |
| 283 | | # expect msgid or comment or old stuff |
| 284 | | if next_state == CMNT: |
| 285 | | state = CMNT |
| 286 | | msg = Msgid( msgnr, lineno ) # Start with an empty new item |
| 287 | | msgnr += 1 |
| 288 | | msgs.append( msg ) |
| 289 | | msg.add_cmnt( line ) |
| 290 | | |
| 291 | | elif next_state == MSGID: |
| 292 | | state = MSGID |
| 293 | | msg = Msgid( msgnr, lineno ) # Start with an empty new item |
| 294 | | msgnr += 1 |
| 295 | | msgs.append( msg ) |
| 296 | | msg.add_msgid( line, lineno ) |
| 297 | | |
| 298 | | elif next_state == MSGSTR: |
| 299 | | print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars() |
| 300 | | state = MSGSTR |
| 301 | | msg = Msgid( msgnr, lineno ) # Start with an empty new item |
| 302 | | msgnr += 1 |
| 303 | | msgs.append( msg ) |
| 304 | | msg.add_msgstr( line, lineno ) |
| 305 | | |
| 306 | | elif next_state == STR: |
| 307 | | print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars() |
| 308 | | |
| 309 | | elif next_state == OLD: |
| 310 | | pass # Just skip |
| 311 | | |
| 312 | | elif state == CMNT: |
| 313 | | if next_state == CMNT: |
| 314 | | if msg: |
| 315 | | msg.add_cmnt( line ) |
| 316 | | else: |
| 317 | | # Note. We may need to do something about these comments |
| 318 | | # Skip for now |
| 319 | | pass |
| 320 | | |
| 321 | | elif next_state == MSGID: |
| 322 | | state = MSGID |
| 323 | | if not msg: |
| 324 | | msg = Msgid( msgnr, lineno ) # Start with an empty new item |
| 325 | | msgnr += 1 |
| 326 | | msgs.append( msg ) |
| 327 | | msg.add_msgid( line, lineno ) |
| 328 | | |
| 329 | | elif next_state == MSGSTR: |
| 330 | | print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars() |
| 331 | | state = MSGSTR |
| 332 | | msg = Msgid( msgnr, lineno ) # Start with an empty new item |
| 333 | | msgnr += 1 |
| 334 | | msgs.append( msg ) |
| 335 | | msg.add_msgstr( line, lineno ) |
| 336 | | |
| 337 | | elif next_state == STR: |
| 338 | | print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars() |
| 339 | | |
| 340 | | elif next_state == OLD: |
| 341 | | msg = None |
| 342 | | pass # Just skip |
| 343 | | |
| 344 | | elif state == MSGID: |
| 345 | | if next_state == CMNT: |
| 346 | | # Hmmm. A comment here? |
| 347 | | print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars() |
| 348 | | |
| 349 | | elif next_state == MSGID: |
| 350 | | raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() ) |
| 351 | | |
| 352 | | elif next_state == MSGSTR: |
| 353 | | state = MSGSTR |
| 354 | | msg.add_msgstr( line, lineno ) |
| 355 | | |
| 356 | | elif next_state == STR: |
| 357 | | msg.add_msgid( line, lineno ) |
| 358 | | |
| 359 | | elif next_state == OLD: |
| 360 | | msg = None |
| 361 | | pass # Just skip |
| 362 | | |
| 363 | | elif state == MSGSTR: |
| 364 | | if next_state == CMNT: |
| 365 | | # A comment probably starts a new item |
| 366 | | state = CMNT |
| 367 | | msg = Msgid( msgnr, lineno ) |
| 368 | | msgnr += 1 |
| 369 | | msgs.append( msg ) |
| 370 | | msg.add_cmnt( line ) |
| 371 | | |
| 372 | | elif next_state == MSGID: |
| 373 | | state = MSGID |
| 374 | | msg = Msgid( msgnr, lineno ) |
| 375 | | msgnr += 1 |
| 376 | | msgs.append( msg ) |
| 377 | | msg.add_msgid( line, lineno ) |
| 378 | | |
| 379 | | elif next_state == MSGSTR: |
| 380 | | raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() ) |
| 381 | | |
| 382 | | elif next_state == STR: |
| 383 | | msg.add_msgstr( line, lineno ) |
| 384 | | |
| 385 | | elif next_state == OLD: |
| 386 | | msg = None |
| 387 | | pass # Just skip |
| 388 | | |
| 389 | | else: |
| 390 | | raise Exception( 'Unexpected state in po parsing (state = %d)' % state ) |
| 391 | | |
| 392 | | # Strip items with just comments. (Can this happen?) |
| 393 | | msgs1 = [] |
| 394 | | for m in msgs: |
| 395 | | if not m.msgid and not m.msgstr: |
| 396 | | #print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno ) |
| 397 | | pass |
| 398 | | else: |
| 399 | | msgs1.append( m ) |
| 400 | | msgs = msgs1 |
| 401 | | return msgs |
| 402 | | |
| 403 | | def analyze_msgs( options, fname, msgs, nr_templates = None, nth = 0 ): |
| 404 | | nr_fuzzy = 0 |
| 405 | | nr_untranslated = 0 |
| 406 | | |
| 407 | | checks = [] |
| 408 | | checks.append( Check_fmt( '%s' ) ) |
| 409 | | checks.append( Check_fmt( '%d' ) ) |
| 410 | | checks.append( Check_named_fmt() ) |
| 411 | | checks.append( Check_missing_sd() ) |
| 412 | | checks.append( Check_runaway() ) |
| 413 | | checks.append( Check_xml_chars() ) |
| 414 | | checks.append( Check_last_char() ) |
| 415 | | checks.append( Check_shortcut_trans() ) |
| 416 | | |
| 417 | | for msg in msgs: |
| 418 | | msgid = msg.msgid |
| 419 | | msgstr = msg.msgstr |
| 420 | | #print |
| 421 | | #print "msgid: %(msgid)s" % vars() |
| 422 | | #print "msgstr: %(msgstr)s" % vars() |
| 423 | | |
| 424 | | if not msgstr: |
| 425 | | nr_untranslated += 1 |
| 426 | | continue |
| 427 | | |
| 428 | | if msg.is_fuzzy: |
| 429 | | nr_fuzzy += 1 |
| 430 | | if options.skip_fuzzy: |
| 431 | | continue |
| 432 | | |
| 433 | | for c in checks: |
| 434 | | c.process( msg ) |
| 435 | | |
| 436 | | nr_msgs = len(msgs) |
| 437 | | if nth > 0: |
| 438 | | print |
| 439 | | print "=====================================" |
| 440 | | print "%-20s%s" % ( "File:", fname ) |
| 441 | | print "%-20s%d" % ( "Template total:", nr_templates ) |
| 442 | | print "%-20s%d" % ( "PO total:", nr_msgs ) |
| 443 | | print "%-20s%d" % ( "Fuzzy:", nr_fuzzy ) |
| 444 | | print "%-20s%d" % ( "Untranslated:", nr_untranslated ) |
| 445 | | |
| 446 | | for c in checks: |
| 447 | | c.summary() |
| 448 | | |
| 449 | | po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100 |
| 450 | | print "%-20s%5.2f%%" % ( "PO Coverage:", po_coverage ) |
| 451 | | |
| 452 | | template_coverage = po_coverage * float(nr_msgs) / float(nr_templates) |
| 453 | | print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage ) |
| 454 | | |
| 455 | | if not options.only_summary: |
| 456 | | for c in checks: |
| 457 | | c.diag() |
| 458 | | |
| 459 | | def main(args): |
| 460 | | if len(sys.argv) < 2: |
| 461 | | print "Error: Especify the umit.pot file path" |
| 462 | | sys.exit(1) |
| 463 | | |
| 464 | | parser = OptionParser(description="This program validates a PO file for " |
| 465 | | "%s." % APP, usage='%prog [options] po-file...' ) |
| 466 | | |
| 467 | | parser.add_option("", "--skip-fuzzy", |
| 468 | | action="store_true", dest="skip_fuzzy", default=False, |
| 469 | | help="skip fuzzies") |
| 470 | | |
| 471 | | parser.add_option("-s", "--only-summary", |
| 472 | | action="store_true", dest="only_summary", default=False, |
| 473 | | help="only give the summary") |
| 474 | | |
| 475 | | options, args = parser.parse_args() |
| 476 | | |
| 477 | | try: |
| 478 | | pot_msgs = read_msgs(sys.argv[1]) |
| 479 | | nr_templates = len(pot_msgs) |
| 480 | | nth = 0 |
| 481 | | for fname in args: |
| 482 | | msgs = read_msgs(fname) |
| 483 | | analyze_msgs(options, fname, msgs, nr_templates, nth) |
| 484 | | nth += 1 |
| 485 | | |
| 486 | | except Exception, e: |
| 487 | | print e |
| 488 | | |
| 489 | | if __name__ == "__main__": |
| 490 | | main(sys.argv) |
trunk/scripts/build/msgmerge.py
| r253614 | r253615 | |
| 1 | | #! /usr/bin/env python |
| 2 | | # -*- coding: iso-8859-1 -*- |
| 3 | | # |
| 4 | | # Copyright Terje Røsten <terjeros@phys.ntnu.no> Nov. 2003. |
| 5 | | # |
| 6 | | '''Merge two Uniforum style .po files together. |
| 7 | | |
| 8 | | This is a implementation (not complete) in Python of the GNU |
| 9 | | msgmerge(1) program. It can be used on the command line (or as a Python |
| 10 | | module). |
| 11 | | |
| 12 | | Usage: msgmerge.py [OPTIONS] def.po ref.pot |
| 13 | | |
| 14 | | The def.po file is an existing PO file with translations. The ref.pot |
| 15 | | file is the last created PO file with up-to-date source references but |
| 16 | | old translations, or a PO Template file. |
| 17 | | |
| 18 | | Options: |
| 19 | | -U, --update update def.po, |
| 20 | | do nothing if def.po is already up to date. |
| 21 | | -o, --output-file=FILE write output to file FILE. Output is written to |
| 22 | | stdout if set to - or if the option is not present. |
| 23 | | -D, --docstrings don\'t remove docstring flag. |
| 24 | | -h, --help display help text and exit. |
| 25 | | -V, --version display version and exit. |
| 26 | | -q, --quiet, --silent suppress progress indicators. |
| 27 | | ''' |
| 28 | | from __future__ import generators |
| 29 | | |
| 30 | | if not __name__ == '__main__': |
| 31 | | __doc__ += '''\ |
| 32 | | |
| 33 | | When used as module the interesting functions are merge() and |
| 34 | | merge_dir(). |
| 35 | | |
| 36 | | The merge() function does the same as the command line version, and |
| 37 | | the arguments are as follows. The first argument is the def.po file, |
| 38 | | then the ref.pot file. The third argument controls whether do work in |
| 39 | | update mode or not, then the next argument sets the output file. Set |
| 40 | | the next argument to False to remove docstring flags. The last |
| 41 | | argument can be used to suppress progress indicators. The default is |
| 42 | | to work in update mode with progress indicators. |
| 43 | | |
| 44 | | Example: |
| 45 | | merge("def.po", "ref.pot") |
| 46 | | merge the files def.po and ref.pot and write output to def.po if |
| 47 | | there are any changes. |
| 48 | | merge("def.po", "red.pot", docstrings = False, verbose = False, |
| 49 | | update = False, outfile = "-") |
| 50 | | merge the files def.po and ref.pot and write output to stdout, |
| 51 | | remove docstring flag and be quiet. |
| 52 | | |
| 53 | | The merge_dir() function is useful when merging a directory of po |
| 54 | | files. The only required argument is the name of the directory with po |
| 55 | | files and the pot file. It will use simple glob to find the files. The |
| 56 | | second argument can be used to specify the pot file (in the |
| 57 | | directory). Third argument is a list of po files (then globbing will |
| 58 | | not be used) and the next argument is list of filename to exclude. The |
| 59 | | last argument can be used to suppress progress indicators. Docstring |
| 60 | | flag will not be removed. |
| 61 | | |
| 62 | | Example: |
| 63 | | merge_dir("po") |
| 64 | | merge (and update) all po files in directory po with the single pot |
| 65 | | file in the same directory. |
| 66 | | |
| 67 | | The module raises the MsgmergeError exception in case of error. |
| 68 | | ''' |
| 69 | | __revision__ = '$Id: msgmerge.py,v 1.41 2003/11/18 19:10:42 terjeros Exp $' |
| 70 | | __version__ = '0.1' |
| 71 | | name = 'msgmerge.py' |
| 72 | | |
| 73 | | __all__ = [ 'merge', 'merge_dir', 'MsgmergeError' ] |
| 74 | | |
| 75 | | import sys |
| 76 | | import re |
| 77 | | import string |
| 78 | | import getopt |
| 79 | | import difflib |
| 80 | | import glob |
| 81 | | import os.path |
| 82 | | import codecs |
| 83 | | |
| 84 | | try: |
| 85 | | True, False |
| 86 | | except NameError: |
| 87 | | True, False = 1, 0 |
| 88 | | |
| 89 | | class Msgs: |
| 90 | | '''Class to hold information about messages.''' |
| 91 | | width = 80 |
| 92 | | file = '' |
| 93 | | def __init__(self, msgid, msgstr, flag, lno, entry, **kwds): |
| 94 | | self.id = msgid |
| 95 | | self.str = msgstr |
| 96 | | self.cmt = kwds.get('cmt', '') |
| 97 | | self.ref = kwds.get('ref', '') |
| 98 | | self.autocmt = kwds.get('autocmt', '') |
| 99 | | self.flag = flag |
| 100 | | self.entry = entry |
| 101 | | self.lno = lno |
| 102 | | self.count = 0 |
| 103 | | def wash(self): |
| 104 | | self.id = wash(self.id, width = self.width, |
| 105 | | filename = self.file, lno = self.lno) |
| 106 | | self.str = wash(self.str, 'msgstr', width = self.width, |
| 107 | | filename = self.file, lno = self.lno) |
| 108 | | def used(self): |
| 109 | | self.count += 1 |
| 110 | | def get_clean_id(self): |
| 111 | | return self.id.replace('msgid "','', 1) |
| 112 | | def obsolete(self): |
| 113 | | self.width -= len('#~ ') |
| 114 | | self.wash() |
| 115 | | t = [ '#~ %s\n' % s for s in self.id.splitlines() ] |
| 116 | | self.id = ''.join(t) |
| 117 | | t = [ '#~ %s\n' % s for s in self.str.splitlines() ] |
| 118 | | self.str = ''.join(t) |
| 119 | | |
| 120 | | class Options: |
| 121 | | '''Class to hold options''' |
| 122 | | def __init__(self, cmdline = False, **kwds): |
| 123 | | if not cmdline: |
| 124 | | self.update = kwds.get('update', True) |
| 125 | | self.outfile = kwds.get('outfile', '-') |
| 126 | | self.docstrings = kwds.get('docstrings', True) |
| 127 | | self.verbose = kwds.get('verbose', False) |
| 128 | | self.suffix = kwds.get('suffix', '~') |
| 129 | | self.backup = kwds.get('backup', True) |
| 130 | | else: |
| 131 | | self.update = False |
| 132 | | self.outfile = False |
| 133 | | self.docstrings = False |
| 134 | | self.verbose = True |
| 135 | | self.suffix = '~' |
| 136 | | self.backup = True |
| 137 | | |
| 138 | | class MsgmergeError(Exception): |
| 139 | | '''Exception class for msgmerge''' |
| 140 | | |
| 141 | | def gen(lines): |
| 142 | | ''' |
| 143 | | Generator which returns a line (with the obsolete prefix removed) |
| 144 | | from the list of lines in <lines>, the line number is also |
| 145 | | returned. |
| 146 | | ''' |
| 147 | | lno = 0 |
| 148 | | for l in lines: |
| 149 | | lno += 1 |
| 150 | | yield l.replace('#~ ', '', 1), lno |
| 151 | | yield l, lno |
| 152 | | |
| 153 | | def slurp(s, g, sign): |
| 154 | | ''' |
| 155 | | The string returned from iterator <g>\'s next() method is added to |
| 156 | | the string <s> if string returned is beginning with the string |
| 157 | | <sign>. The return value is the first returned string which do not |
| 158 | | start with <sign>, the line number, the iterator <g> and the |
| 159 | | (possibly) updated string <s>. |
| 160 | | ''' |
| 161 | | l, lno = g.next() |
| 162 | | while l.startswith(sign) or (sign == '# ' and l.strip() == '#'): |
| 163 | | s += l |
| 164 | | l, lno = g.next() |
| 165 | | return l, lno, g, s |
| 166 | | |
| 167 | | def splitted_fit(chunk, line, width, break_always, break_after_space): |
| 168 | | ''' |
| 169 | | Check if string <chunk> can be splitted by newline to fit into |
| 170 | | string <line> with width smaller than <width>. The return value is |
| 171 | | a tuple where the first element is the part of chunk which fits |
| 172 | | and the second element is the rest of chunk. |
| 173 | | ''' |
| 174 | | ret = '', chunk |
| 175 | | l = len(chunk) |
| 176 | | for i in range(l - 1, -1, -1): |
| 177 | | if chunk[i] in break_always and len(chunk[0:i] + line) <= width: |
| 178 | | ret = chunk[0:i], chunk[i:] |
| 179 | | break |
| 180 | | elif chunk[i] in break_after_space and i and chunk[i-1].strip() == '': |
| 181 | | ret = chunk[0:i], chunk[i:] |
| 182 | | break |
| 183 | | elif chunk[i] == '\\' and len(chunk[i:]) > 1 and chunk[i+1] == '"' \ |
| 184 | | and len(chunk[0:i] + line) <= width: |
| 185 | | ret = chunk[0:i], chunk[i:] |
| 186 | | break |
| 187 | | return ret |
| 188 | | |
| 189 | | def wrap(msg, width): |
| 190 | | ''' |
| 191 | | Accept a list <msg> of strings to wrap, each string is wrapped to |
| 192 | | width <width> and surrounded with a pair of ". The return value is |
| 193 | | a string with these wrapped strings joined together with newlines. |
| 194 | | ''' |
| 195 | | if msg.isspace() or not msg: |
| 196 | | return '"%s"' % msg |
| 197 | | |
| 198 | | # \ and " is here, but " is special in po files. |
| 199 | | break_always = '$%+({[' |
| 200 | | # XXX what about: « © » ¦ § etc? |
| 201 | | break_after_space = '_-=^`~\'<|>&*#@' |
| 202 | | enders = '.:,;!?/])}|%-' |
| 203 | | extra = string.punctuation |
| 204 | | for c in enders: |
| 205 | | extra = extra.replace(c, '') |
| 206 | | escaped = { 'enders' : re.escape(enders), |
| 207 | | 'extra' : re.escape(extra) } |
| 208 | | regex = r'([\w%(extra)s]*[\s%(enders)s)]+[\s%(enders)s]*)' % escaped |
| 209 | | r = re.compile(regex, re.UNICODE) |
| 210 | | msg = [ m for m in r.split(msg) if not m == ''] |
| 211 | | |
| 212 | | lines = [] |
| 213 | | line = msg.pop(0) |
| 214 | | |
| 215 | | # Handle \n on end of line |
| 216 | | if len(msg) > 1 and msg[-1] == 'n' and len(msg[-2]) > 0 \ |
| 217 | | and msg[-2][-1] == '\\': |
| 218 | | msg[-2] += msg[-1] |
| 219 | | msg.pop() |
| 220 | | # Do not allow a single \n on a line |
| 221 | | if len(msg) > 2 and msg[-1] == '\\n': |
| 222 | | msg[-2] += msg[-1] |
| 223 | | msg.pop() |
| 224 | | |
| 225 | | for m in msg: |
| 226 | | if len(line) > width or len(m) > width or len(line + m) > width: |
| 227 | | fit, rest = splitted_fit(m, line, width, break_always, |
| 228 | | break_after_space) |
| 229 | | line += fit |
| 230 | | lines.append(line) |
| 231 | | line = rest |
| 232 | | else: |
| 233 | | line += m |
| 234 | | lines.append(line) |
| 235 | | lines = [ '"%s"' % l for l in lines ] |
| 236 | | return '\n'.join(lines) |
| 237 | | |
| 238 | | def normalize(lines): |
| 239 | | ''' |
| 240 | | Normalize <lines>: e.g "\n\nText\n\n" becomes: |
| 241 | | "\n" |
| 242 | | "\n" |
| 243 | | "Text\n" |
| 244 | | "\n" |
| 245 | | ''' |
| 246 | | if 0 < lines.find('\\n') < len(lines) - 3: |
| 247 | | if lines[-3:] == '\\n"': |
| 248 | | lines = lines[:-3].replace('\\n','\\n"\n"').replace('""\n','') \ |
| 249 | | + '\\n"' |
| 250 | | else: |
| 251 | | lines = lines.replace('\\n','\\n"\n"').replace('""\n','') |
| 252 | | return lines |
| 253 | | |
| 254 | | def wash(msg, idx = 'msgid', width = 80, **kwds): |
| 255 | | ''' |
| 256 | | Do washing on the msgstr or msgid fields. Wrap the text to fit in |
| 257 | | width <width>. <msg> is a list of lines that makes up the field. |
| 258 | | <idx> indicate msgid or msgstr, <width> holds the width. <filename> |
| 259 | | and <lno> (line number) is picked up from <kwds>. |
| 260 | | Returns the washed field as a string. |
| 261 | | ''' |
| 262 | | msg = normalize(msg) |
| 263 | | lines = msg.splitlines() |
| 264 | | size = len(lines) |
| 265 | | if size > 1 or len(msg) > width: |
| 266 | | washed = [] |
| 267 | | # The first line is special |
| 268 | | m = re.match('^%s "(.*)"$' % (idx, ), lines[0]) |
| 269 | | if not m: |
| 270 | | print lines[0] |
| 271 | | kwds['lno'] -= size + 1 |
| 272 | | raise MsgmergeError('parse error: %(filename)s:%(lno)s.' |
| 273 | | % kwds) |
| 274 | | washed.append(m.group(1)) |
| 275 | | if m.group(1).endswith(r'\n'): |
| 276 | | washed.append('') |
| 277 | | i = 0 |
| 278 | | for line in lines[1:]: |
| 279 | | m = re.match('^"(\s*.*)"$', line) |
| 280 | | i += 1 |
| 281 | | if not m: |
| 282 | | print line |
| 283 | | kwds['lno'] -= size - i + 1 |
| 284 | | raise MsgmergeError('parse error: %(filename)s:%(lno)s.' |
| 285 | | % kwds) |
| 286 | | washed[-1] += m.group(1) |
| 287 | | if m.group(1).endswith(r'\n'): |
| 288 | | washed.append('') |
| 289 | | if washed[0] == '': |
| 290 | | washed.pop(0) |
| 291 | | if washed[-1] == '': |
| 292 | | washed.pop() |
| 293 | | |
| 294 | | washed = [ wrap(w, width - 3) for w in washed ] # " and \n removed. |
| 295 | | |
| 296 | | # One line or multiline |
| 297 | | if len(washed) == 1 and len('%s %s\n' % (idx, washed[0])) < width: |
| 298 | | washed = '%s %s\n' % (idx, washed[0]) |
| 299 | | else: |
| 300 | | washed = '%s ""\n%s\n' % (idx, '\n'.join(washed)) |
| 301 | | else: |
| 302 | | washed = msg |
| 303 | | |
| 304 | | return washed |
| 305 | | |
| 306 | | def parse(filename, entry): |
| 307 | | ''' |
| 308 | | Parse po or pot file with name <filename>. Set the variable |
| 309 | | <entry> to msgid/msgstr to indicate pot/po file. The return value |
| 310 | | is a dict with msgid (washed) as key and Msgs instances as |
| 311 | | values. |
| 312 | | ''' |
| 313 | | lines = io(filename).readlines() |
| 314 | | Msgs.file = filename |
| 315 | | messages = {} |
| 316 | | last = len(lines) |
| 317 | | g = gen(lines) |
| 318 | | cmt = autocmt = ref = flag = '' |
| 319 | | msgid = False |
| 320 | | lno = 0 |
| 321 | | while not lno == last: |
| 322 | | l, lno = g.next() |
| 323 | | if l.startswith('# '): |
| 324 | | l, lno, g, cmt = slurp(l, g, '# ') |
| 325 | | if l.startswith('#.'): |
| 326 | | l, lno, g, autocmt = slurp(l, g, '#.') |
| 327 | | if l.startswith('#:'): |
| 328 | | l, lno, g, ref = slurp(l, g, '#:') |
| 329 | | if l.startswith('#,'): |
| 330 | | l, lno, g, flag = slurp(l, g, '#,') |
| 331 | | if l.startswith('msgid'): |
| 332 | | l, lno, g, msgid = slurp(l, g, '"') |
| 333 | | if l.startswith('msgstr'): |
| 334 | | l, lno, g, msgstr = slurp(l, g, '"') |
| 335 | | |
| 336 | | if not lno == last and not l.strip() == '': |
| 337 | | raise MsgmergeError('parse error: %s:%s.' % (filename, lno)) |
| 338 | | |
| 339 | | if msgid and entry == 'msgstr': |
| 340 | | idx = wash(msgid, filename = filename, lno = lno) |
| 341 | | messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, cmt = cmt) |
| 342 | | msgid = False; msgstr = cmt = autocmt = ref = flag = '' |
| 343 | | elif msgid and entry == 'msgid': |
| 344 | | idx = wash(msgid, filename = filename, lno = lno) |
| 345 | | messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, |
| 346 | | autocmt = autocmt, ref = ref) |
| 347 | | msgid = False; msgstr = cmt = autocmt = ref = flag = '' |
| 348 | | |
| 349 | | for m in messages.values(): |
| 350 | | m.wash() |
| 351 | | return messages |
| 352 | | |
| 353 | | def fuzzy_match(pot, defs): |
| 354 | | ''' |
| 355 | | Try to find the best difflib match (with ratio > 0.6) between |
| 356 | | id of Msgs object <pot> and Msgs in the dict <defs>. |
| 357 | | Return value is the Msgs object in <defs> with highest ratio, |
| 358 | | False is returned if no suitable Msgs is found. |
| 359 | | ''' |
| 360 | | limit = 0.6 |
| 361 | | l, po = limit - 0.01, False |
| 362 | | s = difflib.SequenceMatcher(lambda x: x == ' "', '', pot.get_clean_id()) |
| 363 | | len2 = len(pot.get_clean_id()) |
| 364 | | for candidate in defs.values(): |
| 365 | | if candidate.str == 'msgstr ""\n': # Empty translation |
| 366 | | continue |
| 367 | | if candidate.id == 'msgid ""\n': # Empty msgid (header) |
| 368 | | continue |
| 369 | | len1 = len(candidate.get_clean_id()) |
| 370 | | if len2 > 2 * len1 or len1 > 1.5 * len2: # Simple and fast tests first |
| 371 | | continue |
| 372 | | s.set_seq1(candidate.get_clean_id()) |
| 373 | | if s.quick_ratio() < l: |
| 374 | | continue |
| 375 | | r = s.ratio() # This is expensive |
| 376 | | if r > l: |
| 377 | | l, po = r, candidate |
| 378 | | return po |
| 379 | | |
| 380 | | def flags(po, pot, fuzzy = False, obs = False): |
| 381 | | ''' |
| 382 | | Create flag field from flag field in Msgs objects <po> and |
| 383 | | <pot>. When <fuzzy> is true <po>\'s flags are ignored and the |
| 384 | | fuzzy flag is added. If <obs> is set then most flags but fuzzy are |
| 385 | | removed. If the global variable option.docstrings is set then |
| 386 | | docstring flags will not be removed. The return value is a string |
| 387 | | which holds the combined flag. |
| 388 | | ''' |
| 389 | | global option |
| 390 | | flag = '' |
| 391 | | if po.flag or pot.flag or fuzzy: |
| 392 | | if not fuzzy: |
| 393 | | flag = '%s, %s' % (po.flag.strip(), pot.flag.strip()) |
| 394 | | else: |
| 395 | | flag = '%s, %s' % ('#, fuzzy', pot.flag.strip()) |
| 396 | | flag = flag.split(', ') |
| 397 | | fl = {} |
| 398 | | flag = [fl.setdefault(f, f) for f in flag if f not in fl and f] |
| 399 | | if not option.docstrings: |
| 400 | | try: |
| 401 | | flag.remove('docstring') |
| 402 | | except ValueError: |
| 403 | | pass |
| 404 | | if obs: |
| 405 | | removes = ['c-format', 'python-format', 'docstring'] |
| 406 | | for remove in removes: |
| 407 | | try: |
| 408 | | flag.remove(remove) |
| 409 | | except ValueError: |
| 410 | | pass |
| 411 | | # Put fuzzy first |
| 412 | | if 'fuzzy' in flag and not flag.index('fuzzy') == 1: |
| 413 | | i = flag.index('fuzzy') |
| 414 | | flag[1], flag[i] = flag[i], flag[1] |
| 415 | | |
| 416 | | if len(flag) == 1: |
| 417 | | flag = '' |
| 418 | | else: |
| 419 | | flag = ', '.join(flag) + '\n' |
| 420 | | return flag |
| 421 | | |
| 422 | | def add(pot, po, fuzzy = False): |
| 423 | | ''' |
| 424 | | Build a new entry from the Msgs objects <pot> and <pot>. If |
| 425 | | <fuzzy> is true, <po>\'s flag field is ignored (in |
| 426 | | flags()). Returns a multiline string with a up to date entry. |
| 427 | | ''' |
| 428 | | msg = [] |
| 429 | | msg.append(po.cmt) |
| 430 | | msg.append(pot.autocmt) |
| 431 | | msg.append(pot.ref) |
| 432 | | msg.append(flags(po, pot, fuzzy = fuzzy)) |
| 433 | | msg.append(pot.id) |
| 434 | | msg.append(po.str) |
| 435 | | return ''.join(msg) |
| 436 | | |
| 437 | | def header(pot, defs): |
| 438 | | ''' |
| 439 | | Update date in header entry. Returns the updated header entry. |
| 440 | | ''' |
| 441 | | try: |
| 442 | | [po] = [ d for d in defs.values() if d.id == 'msgid ""\n' ] |
| 443 | | except ValueError: |
| 444 | | raise MsgmergeError('Error: did not find header in po file.') |
| 445 | | |
| 446 | | r = re.compile(r'(.*^"POT-Creation-Date:\s+)(.*?)(\\n"$.*)', |
| 447 | | re.MULTILINE | re.DOTALL) |
| 448 | | m = r.match(pot.str) |
| 449 | | if not m: |
| 450 | | raise MsgmergeError( |
| 451 | | 'Error: did not find POT-Creation-Date field in pot file.') |
| 452 | | |
| 453 | | subs = '\\1%s\\3' % m.group(2) |
| 454 | | _, count = r.subn(subs, po.str) |
| 455 | | if not count == 1: |
| 456 | | raise MsgmergeError( |
| 457 | | 'Error: did not find POT-Creation-Date field in po file.') |
| 458 | | return po |
| 459 | | |
| 460 | | def match(defs, refs): |
| 461 | | ''' |
| 462 | | Try to match Msgs objects in <refs> with Msgs objects in |
| 463 | | <defs>. The return value is a list with po entries. |
| 464 | | ''' |
| 465 | | global option |
| 466 | | matches = [] |
| 467 | | empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str') |
| 468 | | deco = [(r.lno, r) for r in refs.values()] |
| 469 | | deco.sort() |
| 470 | | po = header(deco.pop(0)[1], defs) # Header entry |
| 471 | | matches.append(add(empty, po)) |
| 472 | | po.used() |
| 473 | | sorted = [ a[1] for a in deco ] |
| 474 | | for pot in sorted: |
| 475 | | if option.verbose: |
| 476 | | sys.stderr.write('.') |
| 477 | | po = defs.get(pot.id, False) # Perfect match |
| 478 | | if po: |
| 479 | | matches.append(add(pot, po)) |
| 480 | | po.used(); pot.used() |
| 481 | | continue |
| 482 | | po = fuzzy_match(pot, defs) # Fuzzy match |
| 483 | | if po: |
| 484 | | matches.append(add(pot, po, fuzzy = True)) |
| 485 | | po.used(); pot.used() |
| 486 | | continue |
| 487 | | matches.append(add(pot, empty)) # No match |
| 488 | | |
| 489 | | obsolete(defs, matches) |
| 490 | | return matches |
| 491 | | |
| 492 | | def obsolete(defs, matches): |
| 493 | | '''Handle obsolete translations.''' |
| 494 | | deco = [ (d.lno, d) for d in defs.values() if |
| 495 | | d.count == 0 and not d.str == 'msgstr ""\n' ] |
| 496 | | deco.sort() |
| 497 | | empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str') |
| 498 | | obs = [ o[1] for o in deco ] |
| 499 | | for o in obs: |
| 500 | | o.flag = flags(o, empty, obs = True) |
| 501 | | o.obsolete() |
| 502 | | matches.append('%s%s%s' % (o.flag, o.id, o.str)) |
| 503 | | |
| 504 | | def help(): |
| 505 | | '''Print help text and exit.''' |
| 506 | | print __doc__ |
| 507 | | sys.exit(0) |
| 508 | | |
| 509 | | def cmdline(): |
| 510 | | '''Parse options and arguments from command line.''' |
| 511 | | advice = 'Try `%(name)s --help\' for more information.' |
| 512 | | try: |
| 513 | | long_opt = ['help', 'version', 'update', 'output-file=', |
| 514 | | 'quiet', 'silent', 'docstrings', 'suffix', 'backup'] |
| 515 | | opts, args = getopt.getopt(sys.argv[1:], 'hVUo:qD', long_opt) |
| 516 | | except getopt.error, msg: |
| 517 | | print '%s: %s\n%s' % ('%(name)s', msg, advice) % globals() |
| 518 | | sys.exit(1) |
| 519 | | |
| 520 | | option = Options(cmdline = True) |
| 521 | | for opt, arg in opts: |
| 522 | | if opt in ['-h', '--help']: |
| 523 | | help() |
| 524 | | elif opt in ['-V', '--version']: |
| 525 | | print '%(name)s %(__version__)s' % globals() |
| 526 | | sys.exit(0) |
| 527 | | elif opt in ['-o', '--output-file']: |
| 528 | | option.outfile = arg |
| 529 | | elif opt in ['-U', '--update']: |
| 530 | | option.update = True |
| 531 | | elif opt in ['-q', '--silent', '--quiet']: |
| 532 | | option.verbose = False |
| 533 | | elif opt in ['-D', '--docstrings']: |
| 534 | | option.docstrings = True |
| 535 | | elif opt in ['--suffix']: |
| 536 | | option.suffix = arg |
| 537 | | elif opt in ['--backup']: |
| 538 | | option.backup = arg |
| 539 | | |
| 540 | | # Sanity checks |
| 541 | | warn = False |
| 542 | | if option.update and option.outfile: |
| 543 | | warn = '--update and --output-file are mutually exclusive.' |
| 544 | | if len(args) == 0: |
| 545 | | warn = 'no input files given.' |
| 546 | | elif len(args) == 1 or len(args) > 2: |
| 547 | | warn = 'exactly 2 input files required.' |
| 548 | | if warn: |
| 549 | | print '%s: %s\n%s' % ('%(name)s', warn, advice) % globals() |
| 550 | | sys.exit(1) |
| 551 | | |
| 552 | | if option.update: |
| 553 | | option.outfile = args[0] |
| 554 | | elif not option.outfile: |
| 555 | | option.outfile = '-' |
| 556 | | |
| 557 | | defs, refs = args |
| 558 | | |
| 559 | | try: |
| 560 | | merge(defs, refs, option = option) |
| 561 | | except MsgmergeError, err: |
| 562 | | print '%(name)s: ' % globals() + '%s' % err |
| 563 | | sys.exit(1) |
| 564 | | |
| 565 | | def io(iofile, mode = 'rU'): |
| 566 | | '''Wrapper around open().''' |
| 567 | | try: |
| 568 | | fo = open(iofile, mode) |
| 569 | | if 'r' in mode and fo.read(3) != codecs.BOM_UTF8: |
| 570 | | fo.seek(0) |
| 571 | | |
| 572 | | except IOError, msg: |
| 573 | | raise MsgmergeError('error while opening file: %s: %s.' % |
| 574 | | (msg[1], iofile)) |
| 575 | | return fo |
| 576 | | |
| 577 | | def backup(infile): |
| 578 | | '''Handle backup of files in update mode''' |
| 579 | | os.environ.get('VERSION_CONTROL', '') |
| 580 | | suffix = os.environ.get('SIMPLE_BACKUP_SUFFIX', '~') |
| 581 | | |
| 582 | | backup_file = '%s%s' % (infile, suffix) |
| 583 | | |
| 584 | | def changes(new, old): |
| 585 | | return cmp(''.join(old), '\n'.join(new)) |
| 586 | | |
| 587 | | def write(matches, outfile): |
| 588 | | '''Write the list <matches> to file <outfile>''' |
| 589 | | if not outfile == '-': |
| 590 | | fd = io(outfile, 'w') |
| 591 | | else: |
| 592 | | fd = sys.stdout |
| 593 | | fd.write('\n'.join(matches)) |
| 594 | | |
| 595 | | def merge(def_file, ref_file, update = True, outfile = '-', |
| 596 | | docstrings = True, suffix = '~', backup = True, |
| 597 | | verbose = True, **kwds): |
| 598 | | ''' |
| 599 | | Merge po file <def_file> with pot file <ref_file> . If <update> is |
| 600 | | set to True then only update if there are changes to the po |
| 601 | | file. Set outfile to write updated po file to an another file. Set |
| 602 | | to `-\' for writing to standard out. If docstrings is False |
| 603 | | docstrings flag will removed. Set verbose to False to suppress |
| 604 | | progress indicators. <kwds> is used to pass options from the |
| 605 | | command line interface. |
| 606 | | ''' |
| 607 | | global option |
| 608 | | option = kwds.get('option', Options(update = update, |
| 609 | | outfile = outfile, |
| 610 | | docstrings = docstrings, |
| 611 | | suffix = suffix, |
| 612 | | backup = backup, |
| 613 | | verbose = verbose)) |
| 614 | | def_msgs = parse(def_file, 'msgstr') |
| 615 | | ref_msgs = parse(ref_file, 'msgid') |
| 616 | | if verbose and not __name__ == '__main__': |
| 617 | | print >> sys.stderr, 'Merging %s with %s' % (ref_file, def_file) |
| 618 | | updated_lines = match(def_msgs, ref_msgs) |
| 619 | | if option.verbose: |
| 620 | | print >> sys.stderr, ' done.' |
| 621 | | if not option.update: |
| 622 | | write(updated_lines, option.outfile) |
| 623 | | elif option.update and changes(updated_lines, io(def_file).readlines()): |
| 624 | | write(updated_lines, def_file) |
| 625 | | |
| 626 | | def merge_dir(directory, pot = False, include = [], exclude = [], |
| 627 | | verbose = True): |
| 628 | | ''' |
| 629 | | Tries to merge a directory of po files. Uses simple glob to find |
| 630 | | po files and pot file. The parameter <pot> can be used to specify |
| 631 | | the pot file in the directory. If the list <include> is given only |
| 632 | | files in this list is merged. Use the list <exclude> to exclude |
| 633 | | files to be merged. This function is only useful if po files and |
| 634 | | pot file are in the same directory. Set <verbose> to get |
| 635 | | information when running. |
| 636 | | ''' |
| 637 | | if directory[-1] == '/': |
| 638 | | directory = os.path.dirname(directory) |
| 639 | | if pot: |
| 640 | | pot = os.path.basename(pot) |
| 641 | | else: |
| 642 | | pot = glob.glob('%s/*.pot' % directory) |
| 643 | | if not pot: |
| 644 | | raise MsgmergeError('No pot file found.') |
| 645 | | elif len(pot) > 1: |
| 646 | | raise MsgmergeError('More than one pot file found: %s.' % pot) |
| 647 | | pot = os.path.basename(pot[0]) |
| 648 | | |
| 649 | | if not include: |
| 650 | | pos = glob.glob('%s/*po' % directory) |
| 651 | | if not len(pos) > 1: |
| 652 | | raise MsgmergeError('No po file(s) found.') |
| 653 | | pos = [ os.path.basename(po) for po in pos ] |
| 654 | | else: |
| 655 | | pos = [ os.path.basename(po) for po in include ] |
| 656 | | |
| 657 | | for po in exclude: |
| 658 | | try: |
| 659 | | pos.remove(po) |
| 660 | | except ValueError: |
| 661 | | pass |
| 662 | | format = '%s/%s' |
| 663 | | for po in pos: |
| 664 | | try: |
| 665 | | merge(format % (directory, po), format % (directory, pot), |
| 666 | | update = True, verbose = verbose, |
| 667 | | outfile = format % (directory, po)) |
| 668 | | except MsgmergeError, err: |
| 669 | | if verbose: |
| 670 | | print >> sys.stderr, '%s Not updated.' % err |
| 671 | | else: |
| 672 | | print >> sys.stderr, '%s %s not updated.' % (err, po) |
| 673 | | |
| 674 | | if __name__ == '__main__': |
| 675 | | cmdline() |
trunk/scripts/build/pygettext.py
| r253614 | r253615 | |
| 1 | | #! /usr/bin/env python |
| 2 | | # -*- coding: iso-8859-1 -*- |
| 3 | | # Originally written by Barry Warsaw <barry@zope.com> |
| 4 | | # |
| 5 | | # Minimally patched to make it even more xgettext compatible |
| 6 | | # by Peter Funk <pf@artcom-gmbh.de> |
| 7 | | # |
| 8 | | # 2002-11-22 Jürgen Hermann <jh@web.de> |
| 9 | | # Added checks that _() only contains string literals, and |
| 10 | | # command line args are resolved to module lists, i.e. you |
| 11 | | # can now pass a filename, a module or package name, or a |
| 12 | | # directory (including globbing chars, important for Win32). |
| 13 | | # Made docstring fit in 80 chars wide displays using pydoc. |
| 14 | | # |
| 15 | | |
| 16 | | # for selftesting |
| 17 | | try: |
| 18 | | import fintl |
| 19 | | _ = fintl.gettext |
| 20 | | except ImportError: |
| 21 | | _ = lambda s: s |
| 22 | | |
| 23 | | __doc__ = _("""pygettext -- Python equivalent of xgettext(1) |
| 24 | | |
| 25 | | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the |
| 26 | | internationalization of C programs. Most of these tools are independent of |
| 27 | | the programming language and can be used from within Python programs. |
| 28 | | Martin von Loewis' work[1] helps considerably in this regard. |
| 29 | | |
| 30 | | There's one problem though; xgettext is the program that scans source code |
| 31 | | looking for message strings, but it groks only C (or C++). Python |
| 32 | | introduces a few wrinkles, such as dual quoting characters, triple quoted |
| 33 | | strings, and raw strings. xgettext understands none of this. |
| 34 | | |
| 35 | | Enter pygettext, which uses Python's standard tokenize module to scan |
| 36 | | Python source code, generating .pot files identical to what GNU xgettext[2] |
| 37 | | generates for C and C++ code. From there, the standard GNU tools can be |
| 38 | | used. |
| 39 | | |
| 40 | | A word about marking Python strings as candidates for translation. GNU |
| 41 | | xgettext recognizes the following keywords: gettext, dgettext, dcgettext, |
| 42 | | and gettext_noop. But those can be a lot of text to include all over your |
| 43 | | code. C and C++ have a trick: they use the C preprocessor. Most |
| 44 | | internationalized C source includes a #define for gettext() to _() so that |
| 45 | | what has to be written in the source is much less. Thus these are both |
| 46 | | translatable strings: |
| 47 | | |
| 48 | | gettext("Translatable String") |
| 49 | | _("Translatable String") |
| 50 | | |
| 51 | | Python of course has no preprocessor so this doesn't work so well. Thus, |
| 52 | | pygettext searches only for _() by default, but see the -k/--keyword flag |
| 53 | | below for how to augment this. |
| 54 | | |
| 55 | | [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html |
| 56 | | [2] http://www.gnu.org/software/gettext/gettext.html |
| 57 | | |
| 58 | | NOTE: pygettext attempts to be option and feature compatible with GNU |
| 59 | | xgettext where ever possible. However some options are still missing or are |
| 60 | | not fully implemented. Also, xgettext's use of command line switches with |
| 61 | | option arguments is broken, and in these cases, pygettext just defines |
| 62 | | additional switches. |
| 63 | | |
| 64 | | Usage: pygettext [options] inputfile ... |
| 65 | | |
| 66 | | Options: |
| 67 | | |
| 68 | | -a |
| 69 | | --extract-all |
| 70 | | Extract all strings. |
| 71 | | |
| 72 | | -d name |
| 73 | | --default-domain=name |
| 74 | | Rename the default output file from messages.pot to name.pot. |
| 75 | | |
| 76 | | -E |
| 77 | | --escape |
| 78 | | Replace non-ASCII characters with octal escape sequences. |
| 79 | | |
| 80 | | -D |
| 81 | | --docstrings |
| 82 | | Extract module, class, method, and function docstrings. These do |
| 83 | | not need to be wrapped in _() markers, and in fact cannot be for |
| 84 | | Python to consider them docstrings. (See also the -X option). |
| 85 | | |
| 86 | | -h |
| 87 | | --help |
| 88 | | Print this help message and exit. |
| 89 | | |
| 90 | | -k word |
| 91 | | --keyword=word |
| 92 | | Keywords to look for in addition to the default set, which are: |
| 93 | | %(DEFAULTKEYWORDS)s |
| 94 | | |
| 95 | | You can have multiple -k flags on the command line. |
| 96 | | |
| 97 | | -K |
| 98 | | --no-default-keywords |
| 99 | | Disable the default set of keywords (see above). Any keywords |
| 100 | | explicitly added with the -k/--keyword option are still recognized. |
| 101 | | |
| 102 | | --no-location |
| 103 | | Do not write filename/lineno location comments. |
| 104 | | |
| 105 | | -n |
| 106 | | --add-location |
| 107 | | Write filename/lineno location comments indicating where each |
| 108 | | extracted string is found in the source. These lines appear before |
| 109 | | each msgid. The style of comments is controlled by the -S/--style |
| 110 | | option. This is the default. |
| 111 | | |
| 112 | | -o filename |
| 113 | | --output=filename |
| 114 | | Rename the default output file from messages.pot to filename. If |
| 115 | | filename is `-' then the output is sent to standard out. |
| 116 | | |
| 117 | | -p dir |
| 118 | | --output-dir=dir |
| 119 | | Output files will be placed in directory dir. |
| 120 | | |
| 121 | | -S stylename |
| 122 | | --style stylename |
| 123 | | Specify which style to use for location comments. Two styles are |
| 124 | | supported: |
| 125 | | |
| 126 | | Solaris # File: filename, line: line-number |
| 127 | | GNU #: filename:line |
| 128 | | |
| 129 | | The style name is case insensitive. GNU style is the default. |
| 130 | | |
| 131 | | -v |
| 132 | | --verbose |
| 133 | | Print the names of the files being processed. |
| 134 | | |
| 135 | | -V |
| 136 | | --version |
| 137 | | Print the version of pygettext and exit. |
| 138 | | |
| 139 | | -w columns |
| 140 | | --width=columns |
| 141 | | Set width of output to columns. |
| 142 | | |
| 143 | | -x filename |
| 144 | | --exclude-file=filename |
| 145 | | Specify a file that contains a list of strings that are not be |
| 146 | | extracted from the input files. Each string to be excluded must |
| 147 | | appear on a line by itself in the file. |
| 148 | | |
| 149 | | -X filename |
| 150 | | --no-docstrings=filename |
| 151 | | Specify a file that contains a list of files (one per line) that |
| 152 | | should not have their docstrings extracted. This is only useful in |
| 153 | | conjunction with the -D option above. |
| 154 | | |
| 155 | | If `inputfile' is -, standard input is read. |
| 156 | | """) |
| 157 | | |
| 158 | | import os |
| 159 | | import imp |
| 160 | | import sys |
| 161 | | import glob |
| 162 | | import time |
| 163 | | import getopt |
| 164 | | import token |
| 165 | | import tokenize |
| 166 | | import operator |
| 167 | | |
| 168 | | from umit.pm.core.const import PM_VERSION |
| 169 | | |
| 170 | | __version__ = '1.5' |
| 171 | | |
| 172 | | default_keywords = ['_'] |
| 173 | | DEFAULTKEYWORDS = ', '.join(default_keywords) |
| 174 | | |
| 175 | | EMPTYSTRING = '' |
| 176 | | |
| 177 | | |
| 178 | | |
| 179 | | # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's |
| 180 | | # there. |
| 181 | | pot_header = _('''\ |
| 182 | | # PacketManipulator catalog. |
| 183 | | # Copyright (C) 2009 Adriano Montero Marques |
| 184 | | # Francesco Piccinno <stack.box@gmail.com>, 2009 |
| 185 | | # |
| 186 | | msgid "" |
| 187 | | msgstr "" |
| 188 | | "Project-Id-Version: PacketManipulator %(pm_version)s\\n" |
| 189 | | "POT-Creation-Date: %(time)s\\n" |
| 190 | | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" |
| 191 | | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" |
| 192 | | "Language-Team: LANGUAGE <LL@li.org>\\n" |
| 193 | | "MIME-Version: 1.0\\n" |
| 194 | | "Content-Type: text/plain; charset=UTF-8\\n" |
| 195 | | "Content-Transfer-Encoding: 8bit\\n" |
| 196 | | "Generated-By: pygettext.py %(version)s\\n" |
| 197 | | |
| 198 | | ''') |
| 199 | | |
| 200 | | |
| 201 | | def usage(code, msg=''): |
| 202 | | print >> sys.stderr, __doc__ % globals() |
| 203 | | if msg: |
| 204 | | print >> sys.stderr, msg |
| 205 | | sys.exit(code) |
| 206 | | |
| 207 | | |
| 208 | | |
| 209 | | escapes = [] |
| 210 | | |
| 211 | | def make_escapes(pass_iso8859): |
| 212 | | global escapes |
| 213 | | if pass_iso8859: |
| 214 | | # Allow iso-8859 characters to pass through so that e.g. 'msgid |
| 215 | | # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we |
| 216 | | # escape any character outside the 32..126 range. |
| 217 | | mod = 128 |
| 218 | | else: |
| 219 | | mod = 256 |
| 220 | | for i in range(256): |
| 221 | | if 32 <= (i % mod) <= 126: |
| 222 | | escapes.append(chr(i)) |
| 223 | | else: |
| 224 | | escapes.append("\\%03o" % i) |
| 225 | | escapes[ord('\\')] = '\\\\' |
| 226 | | escapes[ord('\t')] = '\\t' |
| 227 | | escapes[ord('\r')] = '\\r' |
| 228 | | escapes[ord('\n')] = '\\n' |
| 229 | | escapes[ord('\"')] = '\\"' |
| 230 | | |
| 231 | | |
| 232 | | def escape(s): |
| 233 | | global escapes |
| 234 | | s = list(s) |
| 235 | | for i in range(len(s)): |
| 236 | | s[i] = escapes[ord(s[i])] |
| 237 | | return EMPTYSTRING.join(s) |
| 238 | | |
| 239 | | |
| 240 | | def safe_eval(s): |
| 241 | | # unwrap quotes, safely |
| 242 | | return eval(s, {'__builtins__':{}}, {}) |
| 243 | | |
| 244 | | |
| 245 | | def normalize(s): |
| 246 | | # This converts the various Python string types into a format that is |
| 247 | | # appropriate for .po files, namely much closer to C style. |
| 248 | | lines = s.split('\n') |
| 249 | | if len(lines) == 1: |
| 250 | | s = '"' + escape(s) + '"' |
| 251 | | else: |
| 252 | | if not lines[-1]: |
| 253 | | del lines[-1] |
| 254 | | lines[-1] = lines[-1] + '\n' |
| 255 | | for i in range(len(lines)): |
| 256 | | lines[i] = escape(lines[i]) |
| 257 | | lineterm = '\\n"\n"' |
| 258 | | s = '""\n"' + lineterm.join(lines) + '"' |
| 259 | | return s |
| 260 | | |
| 261 | | |
| 262 | | def containsAny(str, set): |
| 263 | | """Check whether 'str' contains ANY of the chars in 'set'""" |
| 264 | | return 1 in [c in str for c in set] |
| 265 | | |
| 266 | | |
| 267 | | def _visit_pyfiles(list, dirname, names): |
| 268 | | """Helper for getFilesForName().""" |
| 269 | | # get extension for python source files |
| 270 | | if not globals().has_key('_py_ext'): |
| 271 | | global _py_ext |
| 272 | | _py_ext = [triple[0] for triple in imp.get_suffixes() |
| 273 | | if triple[2] == imp.PY_SOURCE][0] |
| 274 | | |
| 275 | | # don't recurse into CVS directories |
| 276 | | if 'CVS' in names: |
| 277 | | names.remove('CVS') |
| 278 | | |
| 279 | | # add all *.py files to list |
| 280 | | list.extend( |
| 281 | | [os.path.join(dirname, file) for file in names |
| 282 | | if os.path.splitext(file)[1] == _py_ext] |
| 283 | | ) |
| 284 | | |
| 285 | | |
| 286 | | def _get_modpkg_path(dotted_name, pathlist=None): |
| 287 | | """Get the filesystem path for a module or a package. |
| 288 | | |
| 289 | | Return the file system path to a file for a module, and to a directory for |
| 290 | | a package. Return None if the name is not found, or is a builtin or |
| 291 | | extension module. |
| 292 | | """ |
| 293 | | # split off top-most name |
| 294 | | parts = dotted_name.split('.', 1) |
| 295 | | |
| 296 | | if len(parts) > 1: |
| 297 | | # we have a dotted path, import top-level package |
| 298 | | try: |
| 299 | | file, pathname, description = imp.find_module(parts[0], pathlist) |
| 300 | | if file: file.close() |
| 301 | | except ImportError: |
| 302 | | return None |
| 303 | | |
| 304 | | # check if it's indeed a package |
| 305 | | if description[2] == imp.PKG_DIRECTORY: |
| 306 | | # recursively handle the remaining name parts |
| 307 | | pathname = _get_modpkg_path(parts[1], [pathname]) |
| 308 | | else: |
| 309 | | pathname = None |
| 310 | | else: |
| 311 | | # plain name |
| 312 | | try: |
| 313 | | file, pathname, description = imp.find_module( |
| 314 | | dotted_name, pathlist) |
| 315 | | if file: |
| 316 | | file.close() |
| 317 | | if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]: |
| 318 | | pathname = None |
| 319 | | except ImportError: |
| 320 | | pathname = None |
| 321 | | |
| 322 | | return pathname |
| 323 | | |
| 324 | | |
| 325 | | def getFilesForName(name): |
| 326 | | """Get a list of module files for a filename, a module or package name, |
| 327 | | or a directory. |
| 328 | | """ |
| 329 | | if not os.path.exists(name): |
| 330 | | # check for glob chars |
| 331 | | if containsAny(name, "*?[]"): |
| 332 | | files = glob.glob(name) |
| 333 | | list = [] |
| 334 | | for file in files: |
| 335 | | list.extend(getFilesForName(file)) |
| 336 | | return list |
| 337 | | |
| 338 | | # try to find module or package |
| 339 | | name = _get_modpkg_path(name) |
| 340 | | if not name: |
| 341 | | return [] |
| 342 | | |
| 343 | | if os.path.isdir(name): |
| 344 | | # find all python files in directory |
| 345 | | list = [] |
| 346 | | os.path.walk(name, _visit_pyfiles, list) |
| 347 | | return list |
| 348 | | elif os.path.exists(name): |
| 349 | | # a single file |
| 350 | | return [name] |
| 351 | | |
| 352 | | return [] |
| 353 | | |
| 354 | | |
| 355 | | class TokenEater: |
| 356 | | def __init__(self, options): |
| 357 | | self.__options = options |
| 358 | | self.__messages = {} |
| 359 | | self.__state = self.__waiting |
| 360 | | self.__data = [] |
| 361 | | self.__lineno = -1 |
| 362 | | self.__freshmodule = 1 |
| 363 | | self.__curfile = None |
| 364 | | |
| 365 | | def __call__(self, ttype, tstring, stup, etup, line): |
| 366 | | # dispatch |
| 367 | | ## import token |
| 368 | | ## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ |
| 369 | | ## 'tstring:', tstring |
| 370 | | self.__state(ttype, tstring, stup[0]) |
| 371 | | |
| 372 | | def __waiting(self, ttype, tstring, lineno): |
| 373 | | opts = self.__options |
| 374 | | # Do docstring extractions, if enabled |
| 375 | | if opts.docstrings and not opts.nodocstrings.get(self.__curfile): |
| 376 | | # module docstring? |
| 377 | | if self.__freshmodule: |
| 378 | | if ttype == tokenize.STRING: |
| 379 | | self.__addentry(safe_eval(tstring), lineno, isdocstring=1) |
| 380 | | self.__freshmodule = 0 |
| 381 | | elif ttype not in (tokenize.COMMENT, tokenize.NL): |
| 382 | | self.__freshmodule = 0 |
| 383 | | return |
| 384 | | # class docstring? |
| 385 | | if ttype == tokenize.NAME and tstring in ('class', 'def'): |
| 386 | | self.__state = self.__suiteseen |
| 387 | | return |
| 388 | | if ttype == tokenize.NAME and tstring in opts.keywords: |
| 389 | | self.__state = self.__keywordseen |
| 390 | | |
| 391 | | def __suiteseen(self, ttype, tstring, lineno): |
| 392 | | # ignore anything until we see the colon |
| 393 | | if ttype == tokenize.OP and tstring == ':': |
| 394 | | self.__state = self.__suitedocstring |
| 395 | | |
| 396 | | def __suitedocstring(self, ttype, tstring, lineno): |
| 397 | | # ignore any intervening noise |
| 398 | | if ttype == tokenize.STRING: |
| 399 | | self.__addentry(safe_eval(tstring), lineno, isdocstring=1) |
| 400 | | self.__state = self.__waiting |
| 401 | | elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, |
| 402 | | tokenize.COMMENT): |
| 403 | | # there was no class docstring |
| 404 | | self.__state = self.__waiting |
| 405 | | |
| 406 | | def __keywordseen(self, ttype, tstring, lineno): |
| 407 | | if ttype == tokenize.OP and tstring == '(': |
| 408 | | self.__data = [] |
| 409 | | self.__lineno = lineno |
| 410 | | self.__state = self.__openseen |
| 411 | | else: |
| 412 | | self.__state = self.__waiting |
| 413 | | |
| 414 | | def __openseen(self, ttype, tstring, lineno): |
| 415 | | if ttype == tokenize.OP and tstring == ')': |
| 416 | | # We've seen the last of the translatable strings. Record the |
| 417 | | # line number of the first line of the strings and update the list |
| 418 | | # of messages seen. Reset state for the next batch. If there |
| 419 | | # were no strings inside _(), then just ignore this entry. |
| 420 | | if self.__data: |
| 421 | | self.__addentry(EMPTYSTRING.join(self.__data)) |
| 422 | | self.__state = self.__waiting |
| 423 | | elif ttype == tokenize.STRING: |
| 424 | | self.__data.append(safe_eval(tstring)) |
| 425 | | elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, |
| 426 | | token.NEWLINE, tokenize.NL]: |
| 427 | | # warn if we see anything else than STRING or whitespace |
| 428 | | print >> sys.stderr, _( |
| 429 | | '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' |
| 430 | | ) % { |
| 431 | | 'token': tstring, |
| 432 | | 'file': self.__curfile, |
| 433 | | 'lineno': self.__lineno |
| 434 | | } |
| 435 | | self.__state = self.__waiting |
| 436 | | |
| 437 | | def __addentry(self, msg, lineno=None, isdocstring=0): |
| 438 | | if lineno is None: |
| 439 | | lineno = self.__lineno |
| 440 | | if not msg in self.__options.toexclude: |
| 441 | | entry = (self.__curfile, lineno) |
| 442 | | self.__messages.setdefault(msg, {})[entry] = isdocstring |
| 443 | | |
| 444 | | def set_filename(self, filename): |
| 445 | | self.__curfile = filename |
| 446 | | self.__freshmodule = 1 |
| 447 | | |
| 448 | | def write(self, fp): |
| 449 | | options = self.__options |
| 450 | | timestamp = time.strftime('%Y-%m-%d %H:%M+%Z') |
| 451 | | # The time stamp in the header doesn't have the same format as that |
| 452 | | # generated by xgettext... |
| 453 | | print >> fp, pot_header % {'time': timestamp, 'version': __version__, |
| 454 | | 'pm_version': PM_VERSION} |
| 455 | | # Sort the entries. First sort each particular entry's keys, then |
| 456 | | # sort all the entries by their first item. |
| 457 | | reverse = {} |
| 458 | | for k, v in self.__messages.items(): |
| 459 | | keys = v.keys() |
| 460 | | keys.sort() |
| 461 | | reverse.setdefault(tuple(keys), []).append((k, v)) |
| 462 | | rkeys = reverse.keys() |
| 463 | | rkeys.sort() |
| 464 | | for rkey in rkeys: |
| 465 | | rentries = reverse[rkey] |
| 466 | | rentries.sort() |
| 467 | | for k, v in rentries: |
| 468 | | isdocstring = 0 |
| 469 | | # If the entry was gleaned out of a docstring, then add a |
| 470 | | # comment stating so. This is to aid translators who may wish |
| 471 | | # to skip translating some unimportant docstrings. |
| 472 | | if reduce(operator.__add__, v.values()): |
| 473 | | isdocstring = 1 |
| 474 | | # k is the message string, v is a dictionary-set of (filename, |
| 475 | | # lineno) tuples. We want to sort the entries in v first by |
| 476 | | # file name and then by line number. |
| 477 | | v = v.keys() |
| 478 | | v.sort() |
| 479 | | if not options.writelocations: |
| 480 | | pass |
| 481 | | # location comments are different b/w Solaris and GNU: |
| 482 | | elif options.locationstyle == options.SOLARIS: |
| 483 | | for filename, lineno in v: |
| 484 | | d = {'filename': filename, 'lineno': lineno} |
| 485 | | print >>fp, _( |
| 486 | | '# File: %(filename)s, line: %(lineno)d') % d |
| 487 | | elif options.locationstyle == options.GNU: |
| 488 | | # fit as many locations on one line, as long as the |
| 489 | | # resulting line length doesn't exceeds 'options.width' |
| 490 | | locline = '#:' |
| 491 | | for filename, lineno in v: |
| 492 | | d = {'filename': filename, 'lineno': lineno} |
| 493 | | s = _(' %(filename)s:%(lineno)d') % d |
| 494 | | if len(locline) + len(s) <= options.width: |
| 495 | | locline = locline + s |
| 496 | | else: |
| 497 | | print >> fp, locline |
| 498 | | locline = "#:" + s |
| 499 | | if len(locline) > 2: |
| 500 | | print >> fp, locline |
| 501 | | if isdocstring: |
| 502 | | print >> fp, '#, docstring' |
| 503 | | print >> fp, 'msgid', normalize(k) |
| 504 | | print >> fp, 'msgstr ""\n' |
| 505 | | |
| 506 | | |
| 507 | | |
| 508 | | def main(): |
| 509 | | global default_keywords |
| 510 | | try: |
| 511 | | opts, args = getopt.getopt( |
| 512 | | sys.argv[1:], |
| 513 | | 'ad:DEhk:Kno:p:S:Vvw:x:X:', |
| 514 | | ['extract-all', 'default-domain=', 'escape', 'help', |
| 515 | | 'keyword=', 'no-default-keywords', |
| 516 | | 'add-location', 'no-location', 'output=', 'output-dir=', |
| 517 | | 'style=', 'verbose', 'version', 'width=', 'exclude-file=', |
| 518 | | 'docstrings', 'no-docstrings', |
| 519 | | ]) |
| 520 | | except getopt.error, msg: |
| 521 | | usage(1, msg) |
| 522 | | |
| 523 | | # for holding option values |
| 524 | | class Options: |
| 525 | | # constants |
| 526 | | GNU = 1 |
| 527 | | SOLARIS = 2 |
| 528 | | # defaults |
| 529 | | extractall = 0 # FIXME: currently this option has no effect at all. |
| 530 | | escape = 0 |
| 531 | | keywords = [] |
| 532 | | outpath = '' |
| 533 | | outfile = 'messages.pot' |
| 534 | | writelocations = 1 |
| 535 | | locationstyle = GNU |
| 536 | | verbose = 0 |
| 537 | | width = 78 |
| 538 | | excludefilename = '' |
| 539 | | docstrings = 0 |
| 540 | | nodocstrings = {} |
| 541 | | |
| 542 | | options = Options() |
| 543 | | locations = {'gnu' : options.GNU, |
| 544 | | 'solaris' : options.SOLARIS, |
| 545 | | } |
| 546 | | |
| 547 | | # parse options |
| 548 | | for opt, arg in opts: |
| 549 | | if opt in ('-h', '--help'): |
| 550 | | usage(0) |
| 551 | | elif opt in ('-a', '--extract-all'): |
| 552 | | options.extractall = 1 |
| 553 | | elif opt in ('-d', '--default-domain'): |
| 554 | | options.outfile = arg + '.pot' |
| 555 | | elif opt in ('-E', '--escape'): |
| 556 | | options.escape = 1 |
| 557 | | elif opt in ('-D', '--docstrings'): |
| 558 | | options.docstrings = 1 |
| 559 | | elif opt in ('-k', '--keyword'): |
| 560 | | options.keywords.append(arg) |
| 561 | | elif opt in ('-K', '--no-default-keywords'): |
| 562 | | default_keywords = [] |
| 563 | | elif opt in ('-n', '--add-location'): |
| 564 | | options.writelocations = 1 |
| 565 | | elif opt in ('--no-location',): |
| 566 | | options.writelocations = 0 |
| 567 | | elif opt in ('-S', '--style'): |
| 568 | | options.locationstyle = locations.get(arg.lower()) |
| 569 | | if options.locationstyle is None: |
| 570 | | usage(1, _('Invalid value for --style: %s') % arg) |
| 571 | | elif opt in ('-o', '--output'): |
| 572 | | options.outfile = arg |
| 573 | | elif opt in ('-p', '--output-dir'): |
| 574 | | options.outpath = arg |
| 575 | | elif opt in ('-v', '--verbose'): |
| 576 | | options.verbose = 1 |
| 577 | | elif opt in ('-V', '--version'): |
| 578 | | print _('pygettext.py (xgettext for Python) %s') % __version__ |
| 579 | | sys.exit(0) |
| 580 | | elif opt in ('-w', '--width'): |
| 581 | | try: |
| 582 | | options.width = int(arg) |
| 583 | | except ValueError: |
| 584 | | usage(1, _('--width argument must be an integer: %s') % arg) |
| 585 | | elif opt in ('-x', '--exclude-file'): |
| 586 | | options.excludefilename = arg |
| 587 | | elif opt in ('-X', '--no-docstrings'): |
| 588 | | fp = open(arg) |
| 589 | | try: |
| 590 | | while 1: |
| 591 | | line = fp.readline() |
| 592 | | if not line: |
| 593 | | break |
| 594 | | options.nodocstrings[line[:-1]] = 1 |
| 595 | | finally: |
| 596 | | fp.close() |
| 597 | | |
| 598 | | # calculate escapes |
| 599 | | make_escapes(options.escape) |
| 600 | | |
| 601 | | # calculate all keywords |
| 602 | | options.keywords.extend(default_keywords) |
| 603 | | |
| 604 | | # initialize list of strings to exclude |
| 605 | | if options.excludefilename: |
| 606 | | try: |
| 607 | | fp = open(options.excludefilename) |
| 608 | | options.toexclude = fp.readlines() |
| 609 | | fp.close() |
| 610 | | except IOError: |
| 611 | | print >> sys.stderr, _( |
| 612 | | "Can't read --exclude-file: %s") % options.excludefilename |
| 613 | | sys.exit(1) |
| 614 | | else: |
| 615 | | options.toexclude = [] |
| 616 | | |
| 617 | | # resolve args to module lists |
| 618 | | expanded = [] |
| 619 | | for arg in args: |
| 620 | | if arg == '-': |
| 621 | | expanded.append(arg) |
| 622 | | else: |
| 623 | | expanded.extend(getFilesForName(arg)) |
| 624 | | args = expanded |
| 625 | | |
| 626 | | # slurp through all the files |
| 627 | | eater = TokenEater(options) |
| 628 | | for filename in args: |
| 629 | | if filename == '-': |
| 630 | | if options.verbose: |
| 631 | | print _('Reading standard input') |
| 632 | | fp = sys.stdin |
| 633 | | closep = 0 |
| 634 | | else: |
| 635 | | if options.verbose: |
| 636 | | print _('Working on %s') % filename |
| 637 | | fp = open(filename) |
| 638 | | closep = 1 |
| 639 | | try: |
| 640 | | eater.set_filename(filename) |
| 641 | | try: |
| 642 | | tokenize.tokenize(fp.readline, eater) |
| 643 | | except tokenize.TokenError, e: |
| 644 | | print >> sys.stderr, '%s: %s, line %d, column %d' % ( |
| 645 | | e[0], filename, e[1][0], e[1][1]) |
| 646 | | finally: |
| 647 | | if closep: |
| 648 | | fp.close() |
| 649 | | |
| 650 | | # write the output |
| 651 | | if options.outfile == '-': |
| 652 | | fp = sys.stdout |
| 653 | | closep = 0 |
| 654 | | else: |
| 655 | | if options.outpath: |
| 656 | | options.outfile = os.path.join(options.outpath, options.outfile) |
| 657 | | fp = open(options.outfile, 'w') |
| 658 | | closep = 1 |
| 659 | | try: |
| 660 | | eater.write(fp) |
| 661 | | finally: |
| 662 | | if closep: |
| 663 | | fp.close() |
| 664 | | |
| 665 | | |
| 666 | | if __name__ == '__main__': |
| 667 | | main() |
| 668 | | # some more test strings |
| 669 | | _(u'a unicode string') |
| 670 | | # this one creates a warning |
| 671 | | _('*** Seen unexpected token "%(token)s"') % {'token': 'test'} |
| 672 | | _('more' 'than' 'one' 'string') |