trunk/scripts/build/check_po.py
| r0 | r253614 | |
| 1 | #! /usr/bin/env python |
| 2 | # |
| 3 | # check_po - a gramps tool to check validity of po files |
| 4 | # |
| 5 | # Copyright (C) 2006-2006 Kees Bakker |
| 6 | # |
| 7 | # This program is free software; you can redistribute it and/or modify |
| 8 | # it under the terms of the GNU General Public License as published by |
| 9 | # the Free Software Foundation; either version 2 of the License, or |
| 10 | # (at your option) any later version. |
| 11 | # |
| 12 | # This program is distributed in the hope that it will be useful, |
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | # GNU General Public License for more details. |
| 16 | # |
| 17 | # You should have received a copy of the GNU General Public License |
| 18 | # along with this program; if not, write to the Free Software |
| 19 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 20 | |
| 21 | # |
| 22 | # TODO |
| 23 | # |
| 24 | # * Check for HTML text in msgstr when there is none in msgid |
| 25 | # * Check for matching HTML tag/endtag in msgstr |
| 26 | # |
| 27 | |
| 28 | # Adapted for Umit by Guilherme Polo, original file: |
| 29 | # https://gramps.svn.sourceforge.net/svnroot/gramps/branches/gramps22/po/check_po |
| 30 | |
| 31 | import re |
| 32 | import sys |
| 33 | from optparse import OptionParser |
| 34 | |
| 35 | APP = "Umit" |
| 36 | |
| 37 | all_total = {} |
| 38 | all_fuzzy = {} |
| 39 | all_untranslated = {} |
| 40 | all_percent_s = {} |
| 41 | all_named_s = {} |
| 42 | all_bnamed_s = {} |
| 43 | all_context = {} |
| 44 | all_coverage = {} |
| 45 | all_template_coverage = {} |
| 46 | |
| 47 | def strip_quotes(st): |
| 48 | if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"': |
| 49 | st = st.strip()[1:-1] |
| 50 | return st |
| 51 | |
| 52 | # This is a base class for all checks |
| 53 | class Check: |
| 54 | def __init__( self ): |
| 55 | self.msgs = [] |
| 56 | def diag( self ): |
| 57 | if len( self.msgs ): |
| 58 | print |
| 59 | print self.diag_header |
| 60 | for m in self.msgs: |
| 61 | m.diag() |
| 62 | def summary( self ): |
| 63 | print "%-20s%d" % ( self.summary_text, len(self.msgs) ) |
| 64 | |
| 65 | class Check_fmt( Check ): |
| 66 | def __init__( self, fmt ): |
| 67 | Check.__init__( self ) |
| 68 | self.diag_header = "-------- %s mismatches --------------" % fmt |
| 69 | self.summary_text = "%s mismatches:" % fmt |
| 70 | self.fmt = fmt |
| 71 | def process( self, msg ): |
| 72 | msgid = msg.msgid |
| 73 | msgstr = msg.msgstr |
| 74 | cnt1 = msgid.count( self.fmt ) |
| 75 | cnt2 = msgstr.count( self.fmt ) |
| 76 | if cnt1 != cnt2: |
| 77 | self.msgs.append( msg ) |
| 78 | |
| 79 | class Check_named_fmt( Check ): |
| 80 | # A pattern to find all %() |
| 81 | find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE) |
| 82 | |
| 83 | def __init__( self ): |
| 84 | Check.__init__( self ) |
| 85 | self.diag_header = "-------- %() name mismatches --------------" |
| 86 | self.summary_text = "%() name mismatches:" |
| 87 | def process( self, msg ): |
| 88 | msgid = msg.msgid |
| 89 | msgstr = msg.msgstr |
| 90 | # Same number of named formats? |
| 91 | fmts1 = self.find_named_fmt_pat.findall( msgid ) |
| 92 | fmts2 = self.find_named_fmt_pat.findall( msgstr ) |
| 93 | if len( fmts1 ) != len( fmts2 ): |
| 94 | self.msgs.append( msg ) |
| 95 | else: |
| 96 | # Do we have the same named formats? |
| 97 | fmts1.sort() |
| 98 | fmts2.sort() |
| 99 | if fmts1 != fmts2: |
| 100 | self.msgs.append( msg ) |
| 101 | |
| 102 | class Check_missing_sd( Check ): |
| 103 | # A pattern to find %() without s or d |
| 104 | # Here is a command to use for testing |
| 105 | # print re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' ) |
| 106 | find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE) |
| 107 | |
| 108 | def __init__( self ): |
| 109 | Check.__init__( self ) |
| 110 | self.diag_header = "-------- %() without 's' or 'd' mismatches --------------" |
| 111 | self.summary_text = "%() missing s/d:" |
| 112 | def process( self, msg ): |
| 113 | msgstr = msg.msgstr |
| 114 | fmts = self.find_named_fmt_pat2.findall( msgstr ) |
| 115 | for f in fmts: |
| 116 | if not f in ('s', 'd'): |
| 117 | self.msgs.append( msg ) |
| 118 | break |
| 119 | |
| 120 | class Check_runaway( Check ): |
| 121 | def __init__( self ): |
| 122 | Check.__init__( self ) |
| 123 | self.diag_header = "-------- Runaway context in translation ---------" |
| 124 | self.summary_text = "Runaway context:" |
| 125 | def process( self, msg ): |
| 126 | msgid = msg.msgid |
| 127 | msgstr = msg.msgstr |
| 128 | |
| 129 | # Runaway context. In the translated part we only to see |
| 130 | # the translation of the word after the | |
| 131 | if msgid.count('|') > 0 and msgstr.count('|') > 0 and msgid != msgstr: |
| 132 | self.msgs.append( msg ) |
| 133 | |
| 134 | class Check_xml_chars( Check ): |
| 135 | # Special XML characters |
| 136 | # It is not allowed to have a quote, an ampersand or an angle bracket |
| 137 | xml_chars_pat = re.compile( r'(?<=\W) > | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE ) |
| 138 | |
| 139 | def __init__( self ): |
| 140 | Check.__init__( self ) |
| 141 | self.diag_header = "-------- unescaped XML special characters ---------" |
| 142 | self.summary_text = "XML special chars:" |
| 143 | def process( self, msg ): |
| 144 | msgid = msg.msgid |
| 145 | msgstr = msg.msgstr |
| 146 | |
| 147 | # XML errors |
| 148 | # Only look at messages in the tips.xml |
| 149 | if msg.is_tips_xml: |
| 150 | if self.xml_chars_pat.search( msgstr ): |
| 151 | self.msgs.append( msg ) |
| 152 | |
| 153 | class Check_last_char( Check ): |
| 154 | def __init__( self ): |
| 155 | Check.__init__( self ) |
| 156 | self.diag_header = "-------- last character not identical ---------" |
| 157 | self.summary_text = "Last character:" |
| 158 | def process( self, msg ): |
| 159 | msgid = msg.msgid |
| 160 | msgstr = msg.msgstr |
| 161 | |
| 162 | # Last character of msgid? White space? Period? |
| 163 | if msg.is_fuzzy: |
| 164 | return |
| 165 | |
| 166 | msgid_last = msgid[-1:] |
| 167 | msgstr_last = msgstr[-1:] |
| 168 | if msgid_last.isspace() != msgstr_last.isspace(): |
| 169 | self.msgs.append( msg ) |
| 170 | elif (msgid_last == '.') != (msgstr_last == '.'): |
| 171 | self.msgs.append( msg ) |
| 172 | |
| 173 | class Check_shortcut_trans( Check ): |
| 174 | def __init__( self ): |
| 175 | Check.__init__( self ) |
| 176 | self.diag_header = "-------- shortcut key in translation ---------" |
| 177 | self.summary_text = "Shortcut in msgstr:" |
| 178 | def process( self, msg ): |
| 179 | msgid = msg.msgid |
| 180 | msgstr = msg.msgstr |
| 181 | |
| 182 | if msgid.count('_') == 0 and msgstr.count('_') > 0: |
| 183 | self.msgs.append( msg ) |
| 184 | |
| 185 | class Msgid: |
| 186 | fuzzy_pat = re.compile( 'fuzzy' ) |
| 187 | tips_xml_pat = re.compile( r'tips\.xml' ) |
| 188 | def __init__( self, msgnr, lineno ): |
| 189 | self._msgid = [] |
| 190 | self._msgstr = [] |
| 191 | self.msgid = '' |
| 192 | self.msgstr = '' |
| 193 | self._cmnt = [] |
| 194 | self.nr = msgnr |
| 195 | self.lineno = lineno |
| 196 | self.is_fuzzy = 0 |
| 197 | self.is_tips_xml = 0 |
| 198 | |
| 199 | def diag( self ): |
| 200 | if 1: |
| 201 | print |
| 202 | print "msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" ) |
| 203 | sys.stdout.write( ''.join( self._msgid ) ) |
| 204 | sys.stdout.write( ''.join( self._msgstr ) ) |
| 205 | else: |
| 206 | # Compatible with the old check_po |
| 207 | print "%d '%s' : '%s'" % ( self.lineno, self.msgid, self.msgstr ) |
| 208 | |
| 209 | def add_msgid( self, line, lineno ): |
| 210 | self._msgid.append( line ) |
| 211 | line = re.sub( r'msgid\s+', '', line ) |
| 212 | line = line.strip() |
| 213 | if line[0] != '"' or line[-1:] != '"': |
| 214 | print "ERROR at line %d: Missing quote." % lineno |
| 215 | line = strip_quotes( line ) |
| 216 | self.msgid += line |
| 217 | |
| 218 | def add_msgstr( self, line, lineno ): |
| 219 | self._msgstr.append( line ) |
| 220 | line = re.sub( r'msgstr\s+', '', line ) |
| 221 | line = line.strip() |
| 222 | if line[0] != '"' or line[-1:] != '"': |
| 223 | print "ERROR at line %d: Missing quote." % lineno |
| 224 | line = strip_quotes( line ) |
| 225 | self.msgstr += line |
| 226 | |
| 227 | def add_cmnt( self, line ): |
| 228 | self._cmnt.append( line ) |
| 229 | if not self.is_fuzzy and self.fuzzy_pat.search( line ): |
| 230 | self.is_fuzzy = 1 |
| 231 | if not self.is_tips_xml and self.tips_xml_pat.search( line ): |
| 232 | self.is_tips_xml = 1 |
| 233 | |
| 234 | def read_msgs( fname ): |
| 235 | empty_pat = re.compile( r'^ \s* $', re.VERBOSE ) |
| 236 | comment_pat = re.compile( r'\#', re.VERBOSE ) |
| 237 | msgid_pat = re.compile( r'msgid \s+ "', re.VERBOSE ) |
| 238 | msgstr_pat = re.compile( r'msgstr \s+ "', re.VERBOSE ) |
| 239 | str_pat = re.compile( r'"', re.VERBOSE ) |
| 240 | old_pat = re.compile( r'\#~ \s+ ', re.VERBOSE ) |
| 241 | |
| 242 | msgnr = 0 # This is the message number of the next message to read. The first real message is 1. |
| 243 | f = open( fname ) |
| 244 | lines = f.readlines() |
| 245 | |
| 246 | # parse it like a statemachine |
| 247 | NONE = 0 # Nothing detected, yet |
| 248 | CMNT = 1 # Inside comment part |
| 249 | MSGID = 2 # Inside msgid part |
| 250 | MSGSTR = 3 # Inside msgstr part |
| 251 | STR = 4 # A continuation string |
| 252 | OLD = 5 # An old pattern with #~ |
| 253 | |
| 254 | state = NONE |
| 255 | msg = None |
| 256 | msgs = [] |
| 257 | |
| 258 | for ix in range( len(lines) ): # Use line numbers for messages |
| 259 | line = lines[ix] |
| 260 | lineno = ix + 1 |
| 261 | |
| 262 | m = empty_pat.match( line ) |
| 263 | if m: |
| 264 | continue # Empty lines are not interesting |
| 265 | |
| 266 | # What's the next state? |
| 267 | if old_pat.match( line ): |
| 268 | next_state = OLD |
| 269 | elif comment_pat.match( line ): |
| 270 | next_state = CMNT |
| 271 | elif msgid_pat.match( line ): |
| 272 | next_state = MSGID |
| 273 | elif msgstr_pat.match( line ): |
| 274 | next_state = MSGSTR |
| 275 | elif str_pat.match( line ): |
| 276 | next_state = STR |
| 277 | else: |
| 278 | print 'WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars() |
| 279 | next_state = NONE |
| 280 | |
| 281 | #print "%(state)d->%(next_state)d\t%(line)s" % vars() |
| 282 | if state == NONE: |
| 283 | # expect msgid or comment or old stuff |
| 284 | if next_state == CMNT: |
| 285 | state = CMNT |
| 286 | msg = Msgid( msgnr, lineno ) # Start with an empty new item |
| 287 | msgnr += 1 |
| 288 | msgs.append( msg ) |
| 289 | msg.add_cmnt( line ) |
| 290 | |
| 291 | elif next_state == MSGID: |
| 292 | state = MSGID |
| 293 | msg = Msgid( msgnr, lineno ) # Start with an empty new item |
| 294 | msgnr += 1 |
| 295 | msgs.append( msg ) |
| 296 | msg.add_msgid( line, lineno ) |
| 297 | |
| 298 | elif next_state == MSGSTR: |
| 299 | print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars() |
| 300 | state = MSGSTR |
| 301 | msg = Msgid( msgnr, lineno ) # Start with an empty new item |
| 302 | msgnr += 1 |
| 303 | msgs.append( msg ) |
| 304 | msg.add_msgstr( line, lineno ) |
| 305 | |
| 306 | elif next_state == STR: |
| 307 | print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars() |
| 308 | |
| 309 | elif next_state == OLD: |
| 310 | pass # Just skip |
| 311 | |
| 312 | elif state == CMNT: |
| 313 | if next_state == CMNT: |
| 314 | if msg: |
| 315 | msg.add_cmnt( line ) |
| 316 | else: |
| 317 | # Note. We may need to do something about these comments |
| 318 | # Skip for now |
| 319 | pass |
| 320 | |
| 321 | elif next_state == MSGID: |
| 322 | state = MSGID |
| 323 | if not msg: |
| 324 | msg = Msgid( msgnr, lineno ) # Start with an empty new item |
| 325 | msgnr += 1 |
| 326 | msgs.append( msg ) |
| 327 | msg.add_msgid( line, lineno ) |
| 328 | |
| 329 | elif next_state == MSGSTR: |
| 330 | print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars() |
| 331 | state = MSGSTR |
| 332 | msg = Msgid( msgnr, lineno ) # Start with an empty new item |
| 333 | msgnr += 1 |
| 334 | msgs.append( msg ) |
| 335 | msg.add_msgstr( line, lineno ) |
| 336 | |
| 337 | elif next_state == STR: |
| 338 | print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars() |
| 339 | |
| 340 | elif next_state == OLD: |
| 341 | msg = None |
| 342 | pass # Just skip |
| 343 | |
| 344 | elif state == MSGID: |
| 345 | if next_state == CMNT: |
| 346 | # Hmmm. A comment here? |
| 347 | print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars() |
| 348 | |
| 349 | elif next_state == MSGID: |
| 350 | raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() ) |
| 351 | |
| 352 | elif next_state == MSGSTR: |
| 353 | state = MSGSTR |
| 354 | msg.add_msgstr( line, lineno ) |
| 355 | |
| 356 | elif next_state == STR: |
| 357 | msg.add_msgid( line, lineno ) |
| 358 | |
| 359 | elif next_state == OLD: |
| 360 | msg = None |
| 361 | pass # Just skip |
| 362 | |
| 363 | elif state == MSGSTR: |
| 364 | if next_state == CMNT: |
| 365 | # A comment probably starts a new item |
| 366 | state = CMNT |
| 367 | msg = Msgid( msgnr, lineno ) |
| 368 | msgnr += 1 |
| 369 | msgs.append( msg ) |
| 370 | msg.add_cmnt( line ) |
| 371 | |
| 372 | elif next_state == MSGID: |
| 373 | state = MSGID |
| 374 | msg = Msgid( msgnr, lineno ) |
| 375 | msgnr += 1 |
| 376 | msgs.append( msg ) |
| 377 | msg.add_msgid( line, lineno ) |
| 378 | |
| 379 | elif next_state == MSGSTR: |
| 380 | raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() ) |
| 381 | |
| 382 | elif next_state == STR: |
| 383 | msg.add_msgstr( line, lineno ) |
| 384 | |
| 385 | elif next_state == OLD: |
| 386 | msg = None |
| 387 | pass # Just skip |
| 388 | |
| 389 | else: |
| 390 | raise Exception( 'Unexpected state in po parsing (state = %d)' % state ) |
| 391 | |
| 392 | # Strip items with just comments. (Can this happen?) |
| 393 | msgs1 = [] |
| 394 | for m in msgs: |
| 395 | if not m.msgid and not m.msgstr: |
| 396 | #print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno ) |
| 397 | pass |
| 398 | else: |
| 399 | msgs1.append( m ) |
| 400 | msgs = msgs1 |
| 401 | return msgs |
| 402 | |
| 403 | def analyze_msgs( options, fname, msgs, nr_templates = None, nth = 0 ): |
| 404 | nr_fuzzy = 0 |
| 405 | nr_untranslated = 0 |
| 406 | |
| 407 | checks = [] |
| 408 | checks.append( Check_fmt( '%s' ) ) |
| 409 | checks.append( Check_fmt( '%d' ) ) |
| 410 | checks.append( Check_named_fmt() ) |
| 411 | checks.append( Check_missing_sd() ) |
| 412 | checks.append( Check_runaway() ) |
| 413 | checks.append( Check_xml_chars() ) |
| 414 | checks.append( Check_last_char() ) |
| 415 | checks.append( Check_shortcut_trans() ) |
| 416 | |
| 417 | for msg in msgs: |
| 418 | msgid = msg.msgid |
| 419 | msgstr = msg.msgstr |
| 420 | #print |
| 421 | #print "msgid: %(msgid)s" % vars() |
| 422 | #print "msgstr: %(msgstr)s" % vars() |
| 423 | |
| 424 | if not msgstr: |
| 425 | nr_untranslated += 1 |
| 426 | continue |
| 427 | |
| 428 | if msg.is_fuzzy: |
| 429 | nr_fuzzy += 1 |
| 430 | if options.skip_fuzzy: |
| 431 | continue |
| 432 | |
| 433 | for c in checks: |
| 434 | c.process( msg ) |
| 435 | |
| 436 | nr_msgs = len(msgs) |
| 437 | if nth > 0: |
| 438 | print |
| 439 | print "=====================================" |
| 440 | print "%-20s%s" % ( "File:", fname ) |
| 441 | print "%-20s%d" % ( "Template total:", nr_templates ) |
| 442 | print "%-20s%d" % ( "PO total:", nr_msgs ) |
| 443 | print "%-20s%d" % ( "Fuzzy:", nr_fuzzy ) |
| 444 | print "%-20s%d" % ( "Untranslated:", nr_untranslated ) |
| 445 | |
| 446 | for c in checks: |
| 447 | c.summary() |
| 448 | |
| 449 | po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100 |
| 450 | print "%-20s%5.2f%%" % ( "PO Coverage:", po_coverage ) |
| 451 | |
| 452 | template_coverage = po_coverage * float(nr_msgs) / float(nr_templates) |
| 453 | print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage ) |
| 454 | |
| 455 | if not options.only_summary: |
| 456 | for c in checks: |
| 457 | c.diag() |
| 458 | |
| 459 | def main(args): |
| 460 | if len(sys.argv) < 2: |
| 461 | print "Error: Especify the umit.pot file path" |
| 462 | sys.exit(1) |
| 463 | |
| 464 | parser = OptionParser(description="This program validates a PO file for " |
| 465 | "%s." % APP, usage='%prog [options] po-file...' ) |
| 466 | |
| 467 | parser.add_option("", "--skip-fuzzy", |
| 468 | action="store_true", dest="skip_fuzzy", default=False, |
| 469 | help="skip fuzzies") |
| 470 | |
| 471 | parser.add_option("-s", "--only-summary", |
| 472 | action="store_true", dest="only_summary", default=False, |
| 473 | help="only give the summary") |
| 474 | |
| 475 | options, args = parser.parse_args() |
| 476 | |
| 477 | try: |
| 478 | pot_msgs = read_msgs(sys.argv[1]) |
| 479 | nr_templates = len(pot_msgs) |
| 480 | nth = 0 |
| 481 | for fname in args: |
| 482 | msgs = read_msgs(fname) |
| 483 | analyze_msgs(options, fname, msgs, nr_templates, nth) |
| 484 | nth += 1 |
| 485 | |
| 486 | except Exception, e: |
| 487 | print e |
| 488 | |
| 489 | if __name__ == "__main__": |
| 490 | main(sys.argv) |
trunk/scripts/build/msgmerge.py
| r0 | r253614 | |
| 1 | #! /usr/bin/env python |
| 2 | # -*- coding: iso-8859-1 -*- |
| 3 | # |
| 4 | # Copyright Terje Røsten <terjeros@phys.ntnu.no> Nov. 2003. |
| 5 | # |
| 6 | '''Merge two Uniforum style .po files together. |
| 7 | |
| 8 | This is a implementation (not complete) in Python of the GNU |
| 9 | msgmerge(1) program. It can be used on the command line (or as a Python |
| 10 | module). |
| 11 | |
| 12 | Usage: msgmerge.py [OPTIONS] def.po ref.pot |
| 13 | |
| 14 | The def.po file is an existing PO file with translations. The ref.pot |
| 15 | file is the last created PO file with up-to-date source references but |
| 16 | old translations, or a PO Template file. |
| 17 | |
| 18 | Options: |
| 19 | -U, --update update def.po, |
| 20 | do nothing if def.po is already up to date. |
| 21 | -o, --output-file=FILE write output to file FILE. Output is written to |
| 22 | stdout if set to - or if the option is not present. |
| 23 | -D, --docstrings don\'t remove docstring flag. |
| 24 | -h, --help display help text and exit. |
| 25 | -V, --version display version and exit. |
| 26 | -q, --quiet, --silent suppress progress indicators. |
| 27 | ''' |
| 28 | from __future__ import generators |
| 29 | |
| 30 | if not __name__ == '__main__': |
| 31 | __doc__ += '''\ |
| 32 | |
| 33 | When used as module the interesting functions are merge() and |
| 34 | merge_dir(). |
| 35 | |
| 36 | The merge() function does the same as the command line version, and |
| 37 | the arguments are as follows. The first argument is the def.po file, |
| 38 | then the ref.pot file. The third argument controls whether do work in |
| 39 | update mode or not, then the next argument sets the output file. Set |
| 40 | the next argument to False to remove docstring flags. The last |
| 41 | argument can be used to suppress progress indicators. The default is |
| 42 | to work in update mode with progress indicators. |
| 43 | |
| 44 | Example: |
| 45 | merge("def.po", "ref.pot") |
| 46 | merge the files def.po and ref.pot and write output to def.po if |
| 47 | there are any changes. |
| 48 | merge("def.po", "red.pot", docstrings = False, verbose = False, |
| 49 | update = False, outfile = "-") |
| 50 | merge the files def.po and ref.pot and write output to stdout, |
| 51 | remove docstring flag and be quiet. |
| 52 | |
| 53 | The merge_dir() function is useful when merging a directory of po |
| 54 | files. The only required argument is the name of the directory with po |
| 55 | files and the pot file. It will use simple glob to find the files. The |
| 56 | second argument can be used to specify the pot file (in the |
| 57 | directory). Third argument is a list of po files (then globbing will |
| 58 | not be used) and the next argument is list of filename to exclude. The |
| 59 | last argument can be used to suppress progress indicators. Docstring |
| 60 | flag will not be removed. |
| 61 | |
| 62 | Example: |
| 63 | merge_dir("po") |
| 64 | merge (and update) all po files in directory po with the single pot |
| 65 | file in the same directory. |
| 66 | |
| 67 | The module raises the MsgmergeError exception in case of error. |
| 68 | ''' |
| 69 | __revision__ = '$Id: msgmerge.py,v 1.41 2003/11/18 19:10:42 terjeros Exp $' |
| 70 | __version__ = '0.1' |
| 71 | name = 'msgmerge.py' |
| 72 | |
| 73 | __all__ = [ 'merge', 'merge_dir', 'MsgmergeError' ] |
| 74 | |
| 75 | import sys |
| 76 | import re |
| 77 | import string |
| 78 | import getopt |
| 79 | import difflib |
| 80 | import glob |
| 81 | import os.path |
| 82 | import codecs |
| 83 | |
| 84 | try: |
| 85 | True, False |
| 86 | except NameError: |
| 87 | True, False = 1, 0 |
| 88 | |
| 89 | class Msgs: |
| 90 | '''Class to hold information about messages.''' |
| 91 | width = 80 |
| 92 | file = '' |
| 93 | def __init__(self, msgid, msgstr, flag, lno, entry, **kwds): |
| 94 | self.id = msgid |
| 95 | self.str = msgstr |
| 96 | self.cmt = kwds.get('cmt', '') |
| 97 | self.ref = kwds.get('ref', '') |
| 98 | self.autocmt = kwds.get('autocmt', '') |
| 99 | self.flag = flag |
| 100 | self.entry = entry |
| 101 | self.lno = lno |
| 102 | self.count = 0 |
| 103 | def wash(self): |
| 104 | self.id = wash(self.id, width = self.width, |
| 105 | filename = self.file, lno = self.lno) |
| 106 | self.str = wash(self.str, 'msgstr', width = self.width, |
| 107 | filename = self.file, lno = self.lno) |
| 108 | def used(self): |
| 109 | self.count += 1 |
| 110 | def get_clean_id(self): |
| 111 | return self.id.replace('msgid "','', 1) |
| 112 | def obsolete(self): |
| 113 | self.width -= len('#~ ') |
| 114 | self.wash() |
| 115 | t = [ '#~ %s\n' % s for s in self.id.splitlines() ] |
| 116 | self.id = ''.join(t) |
| 117 | t = [ '#~ %s\n' % s for s in self.str.splitlines() ] |
| 118 | self.str = ''.join(t) |
| 119 | |
| 120 | class Options: |
| 121 | '''Class to hold options''' |
| 122 | def __init__(self, cmdline = False, **kwds): |
| 123 | if not cmdline: |
| 124 | self.update = kwds.get('update', True) |
| 125 | self.outfile = kwds.get('outfile', '-') |
| 126 | self.docstrings = kwds.get('docstrings', True) |
| 127 | self.verbose = kwds.get('verbose', False) |
| 128 | self.suffix = kwds.get('suffix', '~') |
| 129 | self.backup = kwds.get('backup', True) |
| 130 | else: |
| 131 | self.update = False |
| 132 | self.outfile = False |
| 133 | self.docstrings = False |
| 134 | self.verbose = True |
| 135 | self.suffix = '~' |
| 136 | self.backup = True |
| 137 | |
| 138 | class MsgmergeError(Exception): |
| 139 | '''Exception class for msgmerge''' |
| 140 | |
| 141 | def gen(lines): |
| 142 | ''' |
| 143 | Generator which returns a line (with the obsolete prefix removed) |
| 144 | from the list of lines in <lines>, the line number is also |
| 145 | returned. |
| 146 | ''' |
| 147 | lno = 0 |
| 148 | for l in lines: |
| 149 | lno += 1 |
| 150 | yield l.replace('#~ ', '', 1), lno |
| 151 | yield l, lno |
| 152 | |
| 153 | def slurp(s, g, sign): |
| 154 | ''' |
| 155 | The string returned from iterator <g>\'s next() method is added to |
| 156 | the string <s> if string returned is beginning with the string |
| 157 | <sign>. The return value is the first returned string which do not |
| 158 | start with <sign>, the line number, the iterator <g> and the |
| 159 | (possibly) updated string <s>. |
| 160 | ''' |
| 161 | l, lno = g.next() |
| 162 | while l.startswith(sign) or (sign == '# ' and l.strip() == '#'): |
| 163 | s += l |
| 164 | l, lno = g.next() |
| 165 | return l, lno, g, s |
| 166 | |
| 167 | def splitted_fit(chunk, line, width, break_always, break_after_space): |
| 168 | ''' |
| 169 | Check if string <chunk> can be splitted by newline to fit into |
| 170 | string <line> with width smaller than <width>. The return value is |
| 171 | a tuple where the first element is the part of chunk which fits |
| 172 | and the second element is the rest of chunk. |
| 173 | ''' |
| 174 | ret = '', chunk |
| 175 | l = len(chunk) |
| 176 | for i in range(l - 1, -1, -1): |
| 177 | if chunk[i] in break_always and len(chunk[0:i] + line) <= width: |
| 178 | ret = chunk[0:i], chunk[i:] |
| 179 | break |
| 180 | elif chunk[i] in break_after_space and i and chunk[i-1].strip() == '': |
| 181 | ret = chunk[0:i], chunk[i:] |
| 182 | break |
| 183 | elif chunk[i] == '\\' and len(chunk[i:]) > 1 and chunk[i+1] == '"' \ |
| 184 | and len(chunk[0:i] + line) <= width: |
| 185 | ret = chunk[0:i], chunk[i:] |
| 186 | break |
| 187 | return ret |
| 188 | |
| 189 | def wrap(msg, width): |
| 190 | ''' |
| 191 | Accept a list <msg> of strings to wrap, each string is wrapped to |
| 192 | width <width> and surrounded with a pair of ". The return value is |
| 193 | a string with these wrapped strings joined together with newlines. |
| 194 | ''' |
| 195 | if msg.isspace() or not msg: |
| 196 | return '"%s"' % msg |
| 197 | |
| 198 | # \ and " is here, but " is special in po files. |
| 199 | break_always = '$%+({[' |
| 200 | # XXX what about: « © » ¦ § etc? |
| 201 | break_after_space = '_-=^`~\'<|>&*#@' |
| 202 | enders = '.:,;!?/])}|%-' |
| 203 | extra = string.punctuation |
| 204 | for c in enders: |
| 205 | extra = extra.replace(c, '') |
| 206 | escaped = { 'enders' : re.escape(enders), |
| 207 | 'extra' : re.escape(extra) } |
| 208 | regex = r'([\w%(extra)s]*[\s%(enders)s)]+[\s%(enders)s]*)' % escaped |
| 209 | r = re.compile(regex, re.UNICODE) |
| 210 | msg = [ m for m in r.split(msg) if not m == ''] |
| 211 | |
| 212 | lines = [] |
| 213 | line = msg.pop(0) |
| 214 | |
| 215 | # Handle \n on end of line |
| 216 | if len(msg) > 1 and msg[-1] == 'n' and len(msg[-2]) > 0 \ |
| 217 | and msg[-2][-1] == '\\': |
| 218 | msg[-2] += msg[-1] |
| 219 | msg.pop() |
| 220 | # Do not allow a single \n on a line |
| 221 | if len(msg) > 2 and msg[-1] == '\\n': |
| 222 | msg[-2] += msg[-1] |
| 223 | msg.pop() |
| 224 | |
| 225 | for m in msg: |
| 226 | if len(line) > width or len(m) > width or len(line + m) > width: |
| 227 | fit, rest = splitted_fit(m, line, width, break_always, |
| 228 | break_after_space) |
| 229 | line += fit |
| 230 | lines.append(line) |
| 231 | line = rest |
| 232 | else: |
| 233 | line += m |
| 234 | lines.append(line) |
| 235 | lines = [ '"%s"' % l for l in lines ] |
| 236 | return '\n'.join(lines) |
| 237 | |
| 238 | def normalize(lines): |
| 239 | ''' |
| 240 | Normalize <lines>: e.g "\n\nText\n\n" becomes: |
| 241 | "\n" |
| 242 | "\n" |
| 243 | "Text\n" |
| 244 | "\n" |
| 245 | ''' |
| 246 | if 0 < lines.find('\\n') < len(lines) - 3: |
| 247 | if lines[-3:] == '\\n"': |
| 248 | lines = lines[:-3].replace('\\n','\\n"\n"').replace('""\n','') \ |
| 249 | + '\\n"' |
| 250 | else: |
| 251 | lines = lines.replace('\\n','\\n"\n"').replace('""\n','') |
| 252 | return lines |
| 253 | |
| 254 | def wash(msg, idx = 'msgid', width = 80, **kwds): |
| 255 | ''' |
| 256 | Do washing on the msgstr or msgid fields. Wrap the text to fit in |
| 257 | width <width>. <msg> is a list of lines that makes up the field. |
| 258 | <idx> indicate msgid or msgstr, <width> holds the width. <filename> |
| 259 | and <lno> (line number) is picked up from <kwds>. |
| 260 | Returns the washed field as a string. |
| 261 | ''' |
| 262 | msg = normalize(msg) |
| 263 | lines = msg.splitlines() |
| 264 | size = len(lines) |
| 265 | if size > 1 or len(msg) > width: |
| 266 | washed = [] |
| 267 | # The first line is special |
| 268 | m = re.match('^%s "(.*)"$' % (idx, ), lines[0]) |
| 269 | if not m: |
| 270 | print lines[0] |
| 271 | kwds['lno'] -= size + 1 |
| 272 | raise MsgmergeError('parse error: %(filename)s:%(lno)s.' |
| 273 | % kwds) |
| 274 | washed.append(m.group(1)) |
| 275 | if m.group(1).endswith(r'\n'): |
| 276 | washed.append('') |
| 277 | i = 0 |
| 278 | for line in lines[1:]: |
| 279 | m = re.match('^"(\s*.*)"$', line) |
| 280 | i += 1 |
| 281 | if not m: |
| 282 | print line |
| 283 | kwds['lno'] -= size - i + 1 |
| 284 | raise MsgmergeError('parse error: %(filename)s:%(lno)s.' |
| 285 | % kwds) |
| 286 | washed[-1] += m.group(1) |
| 287 | if m.group(1).endswith(r'\n'): |
| 288 | washed.append('') |
| 289 | if washed[0] == '': |
| 290 | washed.pop(0) |
| 291 | if washed[-1] == '': |
| 292 | washed.pop() |
| 293 | |
| 294 | washed = [ wrap(w, width - 3) for w in washed ] # " and \n removed. |
| 295 | |
| 296 | # One line or multiline |
| 297 | if len(washed) == 1 and len('%s %s\n' % (idx, washed[0])) < width: |
| 298 | washed = '%s %s\n' % (idx, washed[0]) |
| 299 | else: |
| 300 | washed = '%s ""\n%s\n' % (idx, '\n'.join(washed)) |
| 301 | else: |
| 302 | washed = msg |
| 303 | |
| 304 | return washed |
| 305 | |
| 306 | def parse(filename, entry): |
| 307 | ''' |
| 308 | Parse po or pot file with name <filename>. Set the variable |
| 309 | <entry> to msgid/msgstr to indicate pot/po file. The return value |
| 310 | is a dict with msgid (washed) as key and Msgs instances as |
| 311 | values. |
| 312 | ''' |
| 313 | lines = io(filename).readlines() |
| 314 | Msgs.file = filename |
| 315 | messages = {} |
| 316 | last = len(lines) |
| 317 | g = gen(lines) |
| 318 | cmt = autocmt = ref = flag = '' |
| 319 | msgid = False |
| 320 | lno = 0 |
| 321 | while not lno == last: |
| 322 | l, lno = g.next() |
| 323 | if l.startswith('# '): |
| 324 | l, lno, g, cmt = slurp(l, g, '# ') |
| 325 | if l.startswith('#.'): |
| 326 | l, lno, g, autocmt = slurp(l, g, '#.') |
| 327 | if l.startswith('#:'): |
| 328 | l, lno, g, ref = slurp(l, g, '#:') |
| 329 | if l.startswith('#,'): |
| 330 | l, lno, g, flag = slurp(l, g, '#,') |
| 331 | if l.startswith('msgid'): |
| 332 | l, lno, g, msgid = slurp(l, g, '"') |
| 333 | if l.startswith('msgstr'): |
| 334 | l, lno, g, msgstr = slurp(l, g, '"') |
| 335 | |
| 336 | if not lno == last and not l.strip() == '': |
| 337 | raise MsgmergeError('parse error: %s:%s.' % (filename, lno)) |
| 338 | |
| 339 | if msgid and entry == 'msgstr': |
| 340 | idx = wash(msgid, filename = filename, lno = lno) |
| 341 | messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, cmt = cmt) |
| 342 | msgid = False; msgstr = cmt = autocmt = ref = flag = '' |
| 343 | elif msgid and entry == 'msgid': |
| 344 | idx = wash(msgid, filename = filename, lno = lno) |
| 345 | messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, |
| 346 | autocmt = autocmt, ref = ref) |
| 347 | msgid = False; msgstr = cmt = autocmt = ref = flag = '' |
| 348 | |
| 349 | for m in messages.values(): |
| 350 | m.wash() |
| 351 | return messages |
| 352 | |
| 353 | def fuzzy_match(pot, defs): |
| 354 | ''' |
| 355 | Try to find the best difflib match (with ratio > 0.6) between |
| 356 | id of Msgs object <pot> and Msgs in the dict <defs>. |
| 357 | Return value is the Msgs object in <defs> with highest ratio, |
| 358 | False is returned if no suitable Msgs is found. |
| 359 | ''' |
| 360 | limit = 0.6 |
| 361 | l, po = limit - 0.01, False |
| 362 | s = difflib.SequenceMatcher(lambda x: x == ' "', '', pot.get_clean_id()) |
| 363 | len2 = len(pot.get_clean_id()) |
| 364 | for candidate in defs.values(): |
| 365 | if candidate.str == 'msgstr ""\n': # Empty translation |
| 366 | continue |
| 367 | if candidate.id == 'msgid ""\n': # Empty msgid (header) |
| 368 | continue |
| 369 | len1 = len(candidate.get_clean_id()) |
| 370 | if len2 > 2 * len1 or len1 > 1.5 * len2: # Simple and fast tests first |
| 371 | continue |
| 372 | s.set_seq1(candidate.get_clean_id()) |
| 373 | if s.quick_ratio() < l: |
| 374 | continue |
| 375 | r = s.ratio() # This is expensive |
| 376 | if r > l: |
| 377 | l, po = r, candidate |
| 378 | return po |
| 379 | |
| 380 | def flags(po, pot, fuzzy = False, obs = False): |
| 381 | ''' |
| 382 | Create flag field from flag field in Msgs objects <po> and |
| 383 | <pot>. When <fuzzy> is true <po>\'s flags are ignored and the |
| 384 | fuzzy flag is added. If <obs> is set then most flags but fuzzy are |
| 385 | removed. If the global variable option.docstrings is set then |
| 386 | docstring flags will not be removed. The return value is a string |
| 387 | which holds the combined flag. |
| 388 | ''' |
| 389 | global option |
| 390 | flag = '' |
| 391 | if po.flag or pot.flag or fuzzy: |
| 392 | if not fuzzy: |
| 393 | flag = '%s, %s' % (po.flag.strip(), pot.flag.strip()) |
| 394 | else: |
| 395 | flag = '%s, %s' % ('#, fuzzy', pot.flag.strip()) |
| 396 | flag = flag.split(', ') |
| 397 | fl = {} |
| 398 | flag = [fl.setdefault(f, f) for f in flag if f not in fl and f] |
| 399 | if not option.docstrings: |
| 400 | try: |
| 401 | flag.remove('docstring') |
| 402 | except ValueError: |
| 403 | pass |
| 404 | if obs: |
| 405 | removes = ['c-format', 'python-format', 'docstring'] |
| 406 | for remove in removes: |
| 407 | try: |
| 408 | flag.remove(remove) |
| 409 | except ValueError: |
| 410 | pass |
| 411 | # Put fuzzy first |
| 412 | if 'fuzzy' in flag and not flag.index('fuzzy') == 1: |
| 413 | i = flag.index('fuzzy') |
| 414 | flag[1], flag[i] = flag[i], flag[1] |
| 415 | |
| 416 | if len(flag) == 1: |
| 417 | flag = '' |
| 418 | else: |
| 419 | flag = ', '.join(flag) + '\n' |
| 420 | return flag |
| 421 | |
| 422 | def add(pot, po, fuzzy = False): |
| 423 | ''' |
| 424 | Build a new entry from the Msgs objects <pot> and <pot>. If |
| 425 | <fuzzy> is true, <po>\'s flag field is ignored (in |
| 426 | flags()). Returns a multiline string with a up to date entry. |
| 427 | ''' |
| 428 | msg = [] |
| 429 | msg.append(po.cmt) |
| 430 | msg.append(pot.autocmt) |
| 431 | msg.append(pot.ref) |
| 432 | msg.append(flags(po, pot, fuzzy = fuzzy)) |
| 433 | msg.append(pot.id) |
| 434 | msg.append(po.str) |
| 435 | return ''.join(msg) |
| 436 | |
| 437 | def header(pot, defs): |
| 438 | ''' |
| 439 | Update date in header entry. Returns the updated header entry. |
| 440 | ''' |
| 441 | try: |
| 442 | [po] = [ d for d in defs.values() if d.id == 'msgid ""\n' ] |
| 443 | except ValueError: |
| 444 | raise MsgmergeError('Error: did not find header in po file.') |
| 445 | |
| 446 | r = re.compile(r'(.*^"POT-Creation-Date:\s+)(.*?)(\\n"$.*)', |
| 447 | re.MULTILINE | re.DOTALL) |
| 448 | m = r.match(pot.str) |
| 449 | if not m: |
| 450 | raise MsgmergeError( |
| 451 | 'Error: did not find POT-Creation-Date field in pot file.') |
| 452 | |
| 453 | subs = '\\1%s\\3' % m.group(2) |
| 454 | _, count = r.subn(subs, po.str) |
| 455 | if not count == 1: |
| 456 | raise MsgmergeError( |
| 457 | 'Error: did not find POT-Creation-Date field in po file.') |
| 458 | return po |
| 459 | |
| 460 | def match(defs, refs): |
| 461 | ''' |
| 462 | Try to match Msgs objects in <refs> with Msgs objects in |
| 463 | <defs>. The return value is a list with po entries. |
| 464 | ''' |
| 465 | global option |
| 466 | matches = [] |
| 467 | empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str') |
| 468 | deco = [(r.lno, r) for r in refs.values()] |
| 469 | deco.sort() |
| 470 | po = header(deco.pop(0)[1], defs) # Header entry |
| 471 | matches.append(add(empty, po)) |
| 472 | po.used() |
| 473 | sorted = [ a[1] for a in deco ] |
| 474 | for pot in sorted: |
| 475 | if option.verbose: |
| 476 | sys.stderr.write('.') |
| 477 | po = defs.get(pot.id, False) # Perfect match |
| 478 | if po: |
| 479 | matches.append(add(pot, po)) |
| 480 | po.used(); pot.used() |
| 481 | continue |
| 482 | po = fuzzy_match(pot, defs) # Fuzzy match |
| 483 | if po: |
| 484 | matches.append(add(pot, po, fuzzy = True)) |
| 485 | po.used(); pot.used() |
| 486 | continue |
| 487 | matches.append(add(pot, empty)) # No match |
| 488 | |
| 489 | obsolete(defs, matches) |
| 490 | return matches |
| 491 | |
| 492 | def obsolete(defs, matches): |
| 493 | '''Handle obsolete translations.''' |
| 494 | deco = [ (d.lno, d) for d in defs.values() if |
| 495 | d.count == 0 and not d.str == 'msgstr ""\n' ] |
| 496 | deco.sort() |
| 497 | empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str') |
| 498 | obs = [ o[1] for o in deco ] |
| 499 | for o in obs: |
| 500 | o.flag = flags(o, empty, obs = True) |
| 501 | o.obsolete() |
| 502 | matches.append('%s%s%s' % (o.flag, o.id, o.str)) |
| 503 | |
| 504 | def help(): |
| 505 | '''Print help text and exit.''' |
| 506 | print __doc__ |
| 507 | sys.exit(0) |
| 508 | |
| 509 | def cmdline(): |
| 510 | '''Parse options and arguments from command line.''' |
| 511 | advice = 'Try `%(name)s --help\' for more information.' |
| 512 | try: |
| 513 | long_opt = ['help', 'version', 'update', 'output-file=', |
| 514 | 'quiet', 'silent', 'docstrings', 'suffix', 'backup'] |
| 515 | opts, args = getopt.getopt(sys.argv[1:], 'hVUo:qD', long_opt) |
| 516 | except getopt.error, msg: |
| 517 | print '%s: %s\n%s' % ('%(name)s', msg, advice) % globals() |
| 518 | sys.exit(1) |
| 519 | |
| 520 | option = Options(cmdline = True) |
| 521 | for opt, arg in opts: |
| 522 | if opt in ['-h', '--help']: |
| 523 | help() |
| 524 | elif opt in ['-V', '--version']: |
| 525 | print '%(name)s %(__version__)s' % globals() |
| 526 | sys.exit(0) |
| 527 | elif opt in ['-o', '--output-file']: |
| 528 | option.outfile = arg |
| 529 | elif opt in ['-U', '--update']: |
| 530 | option.update = True |
| 531 | elif opt in ['-q', '--silent', '--quiet']: |
| 532 | option.verbose = False |
| 533 | elif opt in ['-D', '--docstrings']: |
| 534 | option.docstrings = True |
| 535 | elif opt in ['--suffix']: |
| 536 | option.suffix = arg |
| 537 | elif opt in ['--backup']: |
| 538 | option.backup = arg |
| 539 | |
| 540 | # Sanity checks |
| 541 | warn = False |
| 542 | if option.update and option.outfile: |
| 543 | warn = '--update and --output-file are mutually exclusive.' |
| 544 | if len(args) == 0: |
| 545 | warn = 'no input files given.' |
| 546 | elif len(args) == 1 or len(args) > 2: |
| 547 | warn = 'exactly 2 input files required.' |
| 548 | if warn: |
| 549 | print '%s: %s\n%s' % ('%(name)s', warn, advice) % globals() |
| 550 | sys.exit(1) |
| 551 | |
| 552 | if option.update: |
| 553 | option.outfile = args[0] |
| 554 | elif not option.outfile: |
| 555 | option.outfile = '-' |
| 556 | |
| 557 | defs, refs = args |
| 558 | |
| 559 | try: |
| 560 | merge(defs, refs, option = option) |
| 561 | except MsgmergeError, err: |
| 562 | print '%(name)s: ' % globals() + '%s' % err |
| 563 | sys.exit(1) |
| 564 | |
| 565 | def io(iofile, mode = 'rU'): |
| 566 | '''Wrapper around open().''' |
| 567 | try: |
| 568 | fo = open(iofile, mode) |
| 569 | if 'r' in mode and fo.read(3) != codecs.BOM_UTF8: |
| 570 | fo.seek(0) |
| 571 | |
| 572 | except IOError, msg: |
| 573 | raise MsgmergeError('error while opening file: %s: %s.' % |
| 574 | (msg[1], iofile)) |
| 575 | return fo |
| 576 | |
| 577 | def backup(infile): |
| 578 | '''Handle backup of files in update mode''' |
| 579 | os.environ.get('VERSION_CONTROL', '') |
| 580 | suffix = os.environ.get('SIMPLE_BACKUP_SUFFIX', '~') |
| 581 | |
| 582 | backup_file = '%s%s' % (infile, suffix) |
| 583 | |
| 584 | def changes(new, old): |
| 585 | return cmp(''.join(old), '\n'.join(new)) |
| 586 | |
| 587 | def write(matches, outfile): |
| 588 | '''Write the list <matches> to file <outfile>''' |
| 589 | if not outfile == '-': |
| 590 | fd = io(outfile, 'w') |
| 591 | else: |
| 592 | fd = sys.stdout |
| 593 | fd.write('\n'.join(matches)) |
| 594 | |
| 595 | def merge(def_file, ref_file, update = True, outfile = '-', |
| 596 | docstrings = True, suffix = '~', backup = True, |
| 597 | verbose = True, **kwds): |
| 598 | ''' |
| 599 | Merge po file <def_file> with pot file <ref_file> . If <update> is |
| 600 | set to True then only update if there are changes to the po |
| 601 | file. Set outfile to write updated po file to an another file. Set |
| 602 | to `-\' for writing to standard out. If docstrings is False |
| 603 | docstrings flag will removed. Set verbose to False to suppress |
| 604 | progress indicators. <kwds> is used to pass options from the |
| 605 | command line interface. |
| 606 | ''' |
| 607 | global option |
| 608 | option = kwds.get('option', Options(update = update, |
| 609 | outfile = outfile, |
| 610 | docstrings = docstrings, |
| 611 | suffix = suffix, |
| 612 | backup = backup, |
| 613 | verbose = verbose)) |
| 614 | def_msgs = parse(def_file, 'msgstr') |
| 615 | ref_msgs = parse(ref_file, 'msgid') |
| 616 | if verbose and not __name__ == '__main__': |
| 617 | print >> sys.stderr, 'Merging %s with %s' % (ref_file, def_file) |
| 618 | updated_lines = match(def_msgs, ref_msgs) |
| 619 | if option.verbose: |
| 620 | print >> sys.stderr, ' done.' |
| 621 | if not option.update: |
| 622 | write(updated_lines, option.outfile) |
| 623 | elif option.update and changes(updated_lines, io(def_file).readlines()): |
| 624 | write(updated_lines, def_file) |
| 625 | |
| 626 | def merge_dir(directory, pot = False, include = [], exclude = [], |
| 627 | verbose = True): |
| 628 | ''' |
| 629 | Tries to merge a directory of po files. Uses simple glob to find |
| 630 | po files and pot file. The parameter <pot> can be used to specify |
| 631 | the pot file in the directory. If the list <include> is given only |
| 632 | files in this list is merged. Use the list <exclude> to exclude |
| 633 | files to be merged. This function is only useful if po files and |
| 634 | pot file are in the same directory. Set <verbose> to get |
| 635 | information when running. |
| 636 | ''' |
| 637 | if directory[-1] == '/': |
| 638 | directory = os.path.dirname(directory) |
| 639 | if pot: |
| 640 | pot = os.path.basename(pot) |
| 641 | else: |
| 642 | pot = glob.glob('%s/*.pot' % directory) |
| 643 | if not pot: |
| 644 | raise MsgmergeError('No pot file found.') |
| 645 | elif len(pot) > 1: |
| 646 | raise MsgmergeError('More than one pot file found: %s.' % pot) |
| 647 | pot = os.path.basename(pot[0]) |
| 648 | |
| 649 | if not include: |
| 650 | pos = glob.glob('%s/*po' % directory) |
| 651 | if not len(pos) > 1: |
| 652 | raise MsgmergeError('No po file(s) found.') |
| 653 | pos = [ os.path.basename(po) for po in pos ] |
| 654 | else: |
| 655 | pos = [ os.path.basename(po) for po in include ] |
| 656 | |
| 657 | for po in exclude: |
| 658 | try: |
| 659 | pos.remove(po) |
| 660 | except ValueError: |
| 661 | pass |
| 662 | format = '%s/%s' |
| 663 | for po in pos: |
| 664 | try: |
| 665 | merge(format % (directory, po), format % (directory, pot), |
| 666 | update = True, verbose = verbose, |
| 667 | outfile = format % (directory, po)) |
| 668 | except MsgmergeError, err: |
| 669 | if verbose: |
| 670 | print >> sys.stderr, '%s Not updated.' % err |
| 671 | else: |
| 672 | print >> sys.stderr, '%s %s not updated.' % (err, po) |
| 673 | |
| 674 | if __name__ == '__main__': |
| 675 | cmdline() |
trunk/scripts/build/pygettext.py
| r0 | r253614 | |
| 1 | #! /usr/bin/env python |
| 2 | # -*- coding: iso-8859-1 -*- |
| 3 | # Originally written by Barry Warsaw <barry@zope.com> |
| 4 | # |
| 5 | # Minimally patched to make it even more xgettext compatible |
| 6 | # by Peter Funk <pf@artcom-gmbh.de> |
| 7 | # |
| 8 | # 2002-11-22 Jürgen Hermann <jh@web.de> |
| 9 | # Added checks that _() only contains string literals, and |
| 10 | # command line args are resolved to module lists, i.e. you |
| 11 | # can now pass a filename, a module or package name, or a |
| 12 | # directory (including globbing chars, important for Win32). |
| 13 | # Made docstring fit in 80 chars wide displays using pydoc. |
| 14 | # |
| 15 | |
| 16 | # for selftesting |
| 17 | try: |
| 18 | import fintl |
| 19 | _ = fintl.gettext |
| 20 | except ImportError: |
| 21 | _ = lambda s: s |
| 22 | |
| 23 | __doc__ = _("""pygettext -- Python equivalent of xgettext(1) |
| 24 | |
| 25 | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the |
| 26 | internationalization of C programs. Most of these tools are independent of |
| 27 | the programming language and can be used from within Python programs. |
| 28 | Martin von Loewis' work[1] helps considerably in this regard. |
| 29 | |
| 30 | There's one problem though; xgettext is the program that scans source code |
| 31 | looking for message strings, but it groks only C (or C++). Python |
| 32 | introduces a few wrinkles, such as dual quoting characters, triple quoted |
| 33 | strings, and raw strings. xgettext understands none of this. |
| 34 | |
| 35 | Enter pygettext, which uses Python's standard tokenize module to scan |
| 36 | Python source code, generating .pot files identical to what GNU xgettext[2] |
| 37 | generates for C and C++ code. From there, the standard GNU tools can be |
| 38 | used. |
| 39 | |
| 40 | A word about marking Python strings as candidates for translation. GNU |
| 41 | xgettext recognizes the following keywords: gettext, dgettext, dcgettext, |
| 42 | and gettext_noop. But those can be a lot of text to include all over your |
| 43 | code. C and C++ have a trick: they use the C preprocessor. Most |
| 44 | internationalized C source includes a #define for gettext() to _() so that |
| 45 | what has to be written in the source is much less. Thus these are both |
| 46 | translatable strings: |
| 47 | |
| 48 | gettext("Translatable String") |
| 49 | _("Translatable String") |
| 50 | |
| 51 | Python of course has no preprocessor so this doesn't work so well. Thus, |
| 52 | pygettext searches only for _() by default, but see the -k/--keyword flag |
| 53 | below for how to augment this. |
| 54 | |
| 55 | [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html |
| 56 | [2] http://www.gnu.org/software/gettext/gettext.html |
| 57 | |
| 58 | NOTE: pygettext attempts to be option and feature compatible with GNU |
| 59 | xgettext where ever possible. However some options are still missing or are |
| 60 | not fully implemented. Also, xgettext's use of command line switches with |
| 61 | option arguments is broken, and in these cases, pygettext just defines |
| 62 | additional switches. |
| 63 | |
| 64 | Usage: pygettext [options] inputfile ... |
| 65 | |
| 66 | Options: |
| 67 | |
| 68 | -a |
| 69 | --extract-all |
| 70 | Extract all strings. |
| 71 | |
| 72 | -d name |
| 73 | --default-domain=name |
| 74 | Rename the default output file from messages.pot to name.pot. |
| 75 | |
| 76 | -E |
| 77 | --escape |
| 78 | Replace non-ASCII characters with octal escape sequences. |
| 79 | |
| 80 | -D |
| 81 | --docstrings |
| 82 | Extract module, class, method, and function docstrings. These do |
| 83 | not need to be wrapped in _() markers, and in fact cannot be for |
| 84 | Python to consider them docstrings. (See also the -X option). |
| 85 | |
| 86 | -h |
| 87 | --help |
| 88 | Print this help message and exit. |
| 89 | |
| 90 | -k word |
| 91 | --keyword=word |
| 92 | Keywords to look for in addition to the default set, which are: |
| 93 | %(DEFAULTKEYWORDS)s |
| 94 | |
| 95 | You can have multiple -k flags on the command line. |
| 96 | |
| 97 | -K |
| 98 | --no-default-keywords |
| 99 | Disable the default set of keywords (see above). Any keywords |
| 100 | explicitly added with the -k/--keyword option are still recognized. |
| 101 | |
| 102 | --no-location |
| 103 | Do not write filename/lineno location comments. |
| 104 | |
| 105 | -n |
| 106 | --add-location |
| 107 | Write filename/lineno location comments indicating where each |
| 108 | extracted string is found in the source. These lines appear before |
| 109 | each msgid. The style of comments is controlled by the -S/--style |
| 110 | option. This is the default. |
| 111 | |
| 112 | -o filename |
| 113 | --output=filename |
| 114 | Rename the default output file from messages.pot to filename. If |
| 115 | filename is `-' then the output is sent to standard out. |
| 116 | |
| 117 | -p dir |
| 118 | --output-dir=dir |
| 119 | Output files will be placed in directory dir. |
| 120 | |
| 121 | -S stylename |
| 122 | --style stylename |
| 123 | Specify which style to use for location comments. Two styles are |
| 124 | supported: |
| 125 | |
| 126 | Solaris # File: filename, line: line-number |
| 127 | GNU #: filename:line |
| 128 | |
| 129 | The style name is case insensitive. GNU style is the default. |
| 130 | |
| 131 | -v |
| 132 | --verbose |
| 133 | Print the names of the files being processed. |
| 134 | |
| 135 | -V |
| 136 | --version |
| 137 | Print the version of pygettext and exit. |
| 138 | |
| 139 | -w columns |
| 140 | --width=columns |
| 141 | Set width of output to columns. |
| 142 | |
| 143 | -x filename |
| 144 | --exclude-file=filename |
| 145 | Specify a file that contains a list of strings that are not be |
| 146 | extracted from the input files. Each string to be excluded must |
| 147 | appear on a line by itself in the file. |
| 148 | |
| 149 | -X filename |
| 150 | --no-docstrings=filename |
| 151 | Specify a file that contains a list of files (one per line) that |
| 152 | should not have their docstrings extracted. This is only useful in |
| 153 | conjunction with the -D option above. |
| 154 | |
| 155 | If `inputfile' is -, standard input is read. |
| 156 | """) |
| 157 | |
| 158 | import os |
| 159 | import imp |
| 160 | import sys |
| 161 | import glob |
| 162 | import time |
| 163 | import getopt |
| 164 | import token |
| 165 | import tokenize |
| 166 | import operator |
| 167 | |
| 168 | from umit.pm.core.const import PM_VERSION |
| 169 | |
| 170 | __version__ = '1.5' |
| 171 | |
| 172 | default_keywords = ['_'] |
| 173 | DEFAULTKEYWORDS = ', '.join(default_keywords) |
| 174 | |
| 175 | EMPTYSTRING = '' |
| 176 | |
| 177 | |
| 178 | |
| 179 | # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's |
| 180 | # there. |
| 181 | pot_header = _('''\ |
| 182 | # PacketManipulator catalog. |
| 183 | # Copyright (C) 2009 Adriano Montero Marques |
| 184 | # Francesco Piccinno <stack.box@gmail.com>, 2009 |
| 185 | # |
| 186 | msgid "" |
| 187 | msgstr "" |
| 188 | "Project-Id-Version: PacketManipulator %(pm_version)s\\n" |
| 189 | "POT-Creation-Date: %(time)s\\n" |
| 190 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" |
| 191 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" |
| 192 | "Language-Team: LANGUAGE <LL@li.org>\\n" |
| 193 | "MIME-Version: 1.0\\n" |
| 194 | "Content-Type: text/plain; charset=UTF-8\\n" |
| 195 | "Content-Transfer-Encoding: 8bit\\n" |
| 196 | "Generated-By: pygettext.py %(version)s\\n" |
| 197 | |
| 198 | ''') |
| 199 | |
| 200 | |
| 201 | def usage(code, msg=''): |
| 202 | print >> sys.stderr, __doc__ % globals() |
| 203 | if msg: |
| 204 | print >> sys.stderr, msg |
| 205 | sys.exit(code) |
| 206 | |
| 207 | |
| 208 | |
| 209 | escapes = [] |
| 210 | |
| 211 | def make_escapes(pass_iso8859): |
| 212 | global escapes |
| 213 | if pass_iso8859: |
| 214 | # Allow iso-8859 characters to pass through so that e.g. 'msgid |
| 215 | # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we |
| 216 | # escape any character outside the 32..126 range. |
| 217 | mod = 128 |
| 218 | else: |
| 219 | mod = 256 |
| 220 | for i in range(256): |
| 221 | if 32 <= (i % mod) <= 126: |
| 222 | escapes.append(chr(i)) |
| 223 | else: |
| 224 | escapes.append("\\%03o" % i) |
| 225 | escapes[ord('\\')] = '\\\\' |
| 226 | escapes[ord('\t')] = '\\t' |
| 227 | escapes[ord('\r')] = '\\r' |
| 228 | escapes[ord('\n')] = '\\n' |
| 229 | escapes[ord('\"')] = '\\"' |
| 230 | |
| 231 | |
| 232 | def escape(s): |
| 233 | global escapes |
| 234 | s = list(s) |
| 235 | for i in range(len(s)): |
| 236 | s[i] = escapes[ord(s[i])] |
| 237 | return EMPTYSTRING.join(s) |
| 238 | |
| 239 | |
| 240 | def safe_eval(s): |
| 241 | # unwrap quotes, safely |
| 242 | return eval(s, {'__builtins__':{}}, {}) |
| 243 | |
| 244 | |
| 245 | def normalize(s): |
| 246 | # This converts the various Python string types into a format that is |
| 247 | # appropriate for .po files, namely much closer to C style. |
| 248 | lines = s.split('\n') |
| 249 | if len(lines) == 1: |
| 250 | s = '"' + escape(s) + '"' |
| 251 | else: |
| 252 | if not lines[-1]: |
| 253 | del lines[-1] |
| 254 | lines[-1] = lines[-1] + '\n' |
| 255 | for i in range(len(lines)): |
| 256 | lines[i] = escape(lines[i]) |
| 257 | lineterm = '\\n"\n"' |
| 258 | s = '""\n"' + lineterm.join(lines) + '"' |
| 259 | return s |
| 260 | |
| 261 | |
| 262 | def containsAny(str, set): |
| 263 | """Check whether 'str' contains ANY of the chars in 'set'""" |
| 264 | return 1 in [c in str for c in set] |
| 265 | |
| 266 | |
| 267 | def _visit_pyfiles(list, dirname, names): |
| 268 | """Helper for getFilesForName().""" |
| 269 | # get extension for python source files |
| 270 | if not globals().has_key('_py_ext'): |
| 271 | global _py_ext |
| 272 | _py_ext = [triple[0] for triple in imp.get_suffixes() |
| 273 | if triple[2] == imp.PY_SOURCE][0] |
| 274 | |
| 275 | # don't recurse into CVS directories |
| 276 | if 'CVS' in names: |
| 277 | names.remove('CVS') |
| 278 | |
| 279 | # add all *.py files to list |
| 280 | list.extend( |
| 281 | [os.path.join(dirname, file) for file in names |
| 282 | if os.path.splitext(file)[1] == _py_ext] |
| 283 | ) |
| 284 | |
| 285 | |
| 286 | def _get_modpkg_path(dotted_name, pathlist=None): |
| 287 | """Get the filesystem path for a module or a package. |
| 288 | |
| 289 | Return the file system path to a file for a module, and to a directory for |
| 290 | a package. Return None if the name is not found, or is a builtin or |
| 291 | extension module. |
| 292 | """ |
| 293 | # split off top-most name |
| 294 | parts = dotted_name.split('.', 1) |
| 295 | |
| 296 | if len(parts) > 1: |
| 297 | # we have a dotted path, import top-level package |
| 298 | try: |
| 299 | file, pathname, description = imp.find_module(parts[0], pathlist) |
| 300 | if file: file.close() |
| 301 | except ImportError: |
| 302 | return None |
| 303 | |
| 304 | # check if it's indeed a package |
| 305 | if description[2] == imp.PKG_DIRECTORY: |
| 306 | # recursively handle the remaining name parts |
| 307 | pathname = _get_modpkg_path(parts[1], [pathname]) |
| 308 | else: |
| 309 | pathname = None |
| 310 | else: |
| 311 | # plain name |
| 312 | try: |
| 313 | file, pathname, description = imp.find_module( |
| 314 | dotted_name, pathlist) |
| 315 | if file: |
| 316 | file.close() |
| 317 | if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]: |
| 318 | pathname = None |
| 319 | except ImportError: |
| 320 | pathname = None |
| 321 | |
| 322 | return pathname |
| 323 | |
| 324 | |
| 325 | def getFilesForName(name): |
| 326 | """Get a list of module files for a filename, a module or package name, |
| 327 | or a directory. |
| 328 | """ |
| 329 | if not os.path.exists(name): |
| 330 | # check for glob chars |
| 331 | if containsAny(name, "*?[]"): |
| 332 | files = glob.glob(name) |
| 333 | list = [] |
| 334 | for file in files: |
| 335 | list.extend(getFilesForName(file)) |
| 336 | return list |
| 337 | |
| 338 | # try to find module or package |
| 339 | name = _get_modpkg_path(name) |
| 340 | if not name: |
| 341 | return [] |
| 342 | |
| 343 | if os.path.isdir(name): |
| 344 | # find all python files in directory |
| 345 | list = [] |
| 346 | os.path.walk(name, _visit_pyfiles, list) |
| 347 | return list |
| 348 | elif os.path.exists(name): |
| 349 | # a single file |
| 350 | return [name] |
| 351 | |
| 352 | return [] |
| 353 | |
| 354 | |
| 355 | class TokenEater: |
| 356 | def __init__(self, options): |
| 357 | self.__options = options |
| 358 | self.__messages = {} |
| 359 | self.__state = self.__waiting |
| 360 | self.__data = [] |
| 361 | self.__lineno = -1 |
| 362 | self.__freshmodule = 1 |
| 363 | self.__curfile = None |
| 364 | |
| 365 | def __call__(self, ttype, tstring, stup, etup, line): |
| 366 | # dispatch |
| 367 | ## import token |
| 368 | ## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ |
| 369 | ## 'tstring:', tstring |
| 370 | self.__state(ttype, tstring, stup[0]) |
| 371 | |
| 372 | def __waiting(self, ttype, tstring, lineno): |
| 373 | opts = self.__options |
| 374 | # Do docstring extractions, if enabled |
| 375 | if opts.docstrings and not opts.nodocstrings.get(self.__curfile): |
| 376 | # module docstring? |
| 377 | if self.__freshmodule: |
| 378 | if ttype == tokenize.STRING: |
| 379 | self.__addentry(safe_eval(tstring), lineno, isdocstring=1) |
| 380 | self.__freshmodule = 0 |
| 381 | elif ttype not in (tokenize.COMMENT, tokenize.NL): |
| 382 | self.__freshmodule = 0 |
| 383 | return |
| 384 | # class docstring? |
| 385 | if ttype == tokenize.NAME and tstring in ('class', 'def'): |
| 386 | self.__state = self.__suiteseen |
| 387 | return |
| 388 | if ttype == tokenize.NAME and tstring in opts.keywords: |
| 389 | self.__state = self.__keywordseen |
| 390 | |
| 391 | def __suiteseen(self, ttype, tstring, lineno): |
| 392 | # ignore anything until we see the colon |
| 393 | if ttype == tokenize.OP and tstring == ':': |
| 394 | self.__state = self.__suitedocstring |
| 395 | |
| 396 | def __suitedocstring(self, ttype, tstring, lineno): |
| 397 | # ignore any intervening noise |
| 398 | if ttype == tokenize.STRING: |
| 399 | self.__addentry(safe_eval(tstring), lineno, isdocstring=1) |
| 400 | self.__state = self.__waiting |
| 401 | elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, |
| 402 | tokenize.COMMENT): |
| 403 | # there was no class docstring |
| 404 | self.__state = self.__waiting |
| 405 | |
| 406 | def __keywordseen(self, ttype, tstring, lineno): |
| 407 | if ttype == tokenize.OP and tstring == '(': |
| 408 | self.__data = [] |
| 409 | self.__lineno = lineno |
| 410 | self.__state = self.__openseen |
| 411 | else: |
| 412 | self.__state = self.__waiting |
| 413 | |
| 414 | def __openseen(self, ttype, tstring, lineno): |
| 415 | if ttype == tokenize.OP and tstring == ')': |
| 416 | # We've seen the last of the translatable strings. Record the |
| 417 | # line number of the first line of the strings and update the list |
| 418 | # of messages seen. Reset state for the next batch. If there |
| 419 | # were no strings inside _(), then just ignore this entry. |
| 420 | if self.__data: |
| 421 | self.__addentry(EMPTYSTRING.join(self.__data)) |
| 422 | self.__state = self.__waiting |
| 423 | elif ttype == tokenize.STRING: |
| 424 | self.__data.append(safe_eval(tstring)) |
| 425 | elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, |
| 426 | token.NEWLINE, tokenize.NL]: |
| 427 | # warn if we see anything else than STRING or whitespace |
| 428 | print >> sys.stderr, _( |
| 429 | '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' |
| 430 | ) % { |
| 431 | 'token': tstring, |
| 432 | 'file': self.__curfile, |
| 433 | 'lineno': self.__lineno |
| 434 | } |
| 435 | self.__state = self.__waiting |
| 436 | |
| 437 | def __addentry(self, msg, lineno=None, isdocstring=0): |
| 438 | if lineno is None: |
| 439 | lineno = self.__lineno |
| 440 | if not msg in self.__options.toexclude: |
| 441 | entry = (self.__curfile, lineno) |
| 442 | self.__messages.setdefault(msg, {})[entry] = isdocstring |
| 443 | |
| 444 | def set_filename(self, filename): |
| 445 | self.__curfile = filename |
| 446 | self.__freshmodule = 1 |
| 447 | |
| 448 | def write(self, fp): |
| 449 | options = self.__options |
| 450 | timestamp = time.strftime('%Y-%m-%d %H:%M+%Z') |
| 451 | # The time stamp in the header doesn't have the same format as that |
| 452 | # generated by xgettext... |
| 453 | print >> fp, pot_header % {'time': timestamp, 'version': __version__, |
| 454 | 'pm_version': PM_VERSION} |
| 455 | # Sort the entries. First sort each particular entry's keys, then |
| 456 | # sort all the entries by their first item. |
| 457 | reverse = {} |
| 458 | for k, v in self.__messages.items(): |
| 459 | keys = v.keys() |
| 460 | keys.sort() |
| 461 | reverse.setdefault(tuple(keys), []).append((k, v)) |
| 462 | rkeys = reverse.keys() |
| 463 | rkeys.sort() |
| 464 | for rkey in rkeys: |
| 465 | rentries = reverse[rkey] |
| 466 | rentries.sort() |
| 467 | for k, v in rentries: |
| 468 | isdocstring = 0 |
| 469 | # If the entry was gleaned out of a docstring, then add a |
| 470 | # comment stating so. This is to aid translators who may wish |
| 471 | # to skip translating some unimportant docstrings. |
| 472 | if reduce(operator.__add__, v.values()): |
| 473 | isdocstring = 1 |
| 474 | # k is the message string, v is a dictionary-set of (filename, |
| 475 | # lineno) tuples. We want to sort the entries in v first by |
| 476 | # file name and then by line number. |
| 477 | v = v.keys() |
| 478 | v.sort() |
| 479 | if not options.writelocations: |
| 480 | pass |
| 481 | # location comments are different b/w Solaris and GNU: |
| 482 | elif options.locationstyle == options.SOLARIS: |
| 483 | for filename, lineno in v: |
| 484 | d = {'filename': filename, 'lineno': lineno} |
| 485 | print >>fp, _( |
| 486 | '# File: %(filename)s, line: %(lineno)d') % d |
| 487 | elif options.locationstyle == options.GNU: |
| 488 | # fit as many locations on one line, as long as the |
| 489 | # resulting line length doesn't exceeds 'options.width' |
| 490 | locline = '#:' |
| 491 | for filename, lineno in v: |
| 492 | d = {'filename': filename, 'lineno': lineno} |
| 493 | s = _(' %(filename)s:%(lineno)d') % d |
| 494 | if len(locline) + len(s) <= options.width: |
| 495 | locline = locline + s |
| 496 | else: |
| 497 | print >> fp, locline |
| 498 | locline = "#:" + s |
| 499 | if len(locline) > 2: |
| 500 | print >> fp, locline |
| 501 | if isdocstring: |
| 502 | print >> fp, '#, docstring' |
| 503 | print >> fp, 'msgid', normalize(k) |
| 504 | print >> fp, 'msgstr ""\n' |
| 505 | |
| 506 | |
| 507 | |
| 508 | def main(): |
| 509 | global default_keywords |
| 510 | try: |
| 511 | opts, args = getopt.getopt( |
| 512 | sys.argv[1:], |
| 513 | 'ad:DEhk:Kno:p:S:Vvw:x:X:', |
| 514 | ['extract-all', 'default-domain=', 'escape', 'help', |
| 515 | 'keyword=', 'no-default-keywords', |
| 516 | 'add-location', 'no-location', 'output=', 'output-dir=', |
| 517 | 'style=', 'verbose', 'version', 'width=', 'exclude-file=', |
| 518 | 'docstrings', 'no-docstrings', |
| 519 | ]) |
| 520 | except getopt.error, msg: |
| 521 | usage(1, msg) |
| 522 | |
| 523 | # for holding option values |
| 524 | class Options: |
| 525 | # constants |
| 526 | GNU = 1 |
| 527 | SOLARIS = 2 |
| 528 | # defaults |
| 529 | extractall = 0 # FIXME: currently this option has no effect at all. |
| 530 | escape = 0 |
| 531 | keywords = [] |
| 532 | outpath = '' |
| 533 | outfile = 'messages.pot' |
| 534 | writelocations = 1 |
| 535 | locationstyle = GNU |
| 536 | verbose = 0 |
| 537 | width = 78 |
| 538 | excludefilename = '' |
| 539 | docstrings = 0 |
| 540 | nodocstrings = {} |
| 541 | |
| 542 | options = Options() |
| 543 | locations = {'gnu' : options.GNU, |
| 544 | 'solaris' : options.SOLARIS, |
| 545 | } |
| 546 | |
| 547 | # parse options |
| 548 | for opt, arg in opts: |
| 549 | if opt in ('-h', '--help'): |
| 550 | usage(0) |
| 551 | elif opt in ('-a', '--extract-all'): |
| 552 | options.extractall = 1 |
| 553 | elif opt in ('-d', '--default-domain'): |
| 554 | options.outfile = arg + '.pot' |
| 555 | elif opt in ('-E', '--escape'): |
| 556 | options.escape = 1 |
| 557 | elif opt in ('-D', '--docstrings'): |
| 558 | options.docstrings = 1 |
| 559 | elif opt in ('-k', '--keyword'): |
| 560 | options.keywords.append(arg) |
| 561 | elif opt in ('-K', '--no-default-keywords'): |
| 562 | default_keywords = [] |
| 563 | elif opt in ('-n', '--add-location'): |
| 564 | options.writelocations = 1 |
| 565 | elif opt in ('--no-location',): |
| 566 | options.writelocations = 0 |
| 567 | elif opt in ('-S', '--style'): |
| 568 | options.locationstyle = locations.get(arg.lower()) |
| 569 | if options.locationstyle is None: |
| 570 | usage(1, _('Invalid value for --style: %s') % arg) |
| 571 | elif opt in ('-o', '--output'): |
| 572 | options.outfile = arg |
| 573 | elif opt in ('-p', '--output-dir'): |
| 574 | options.outpath = arg |
| 575 | elif opt in ('-v', '--verbose'): |
| 576 | options.verbose = 1 |
| 577 | elif opt in ('-V', '--version'): |
| 578 | print _('pygettext.py (xgettext for Python) %s') % __version__ |
| 579 | sys.exit(0) |
| 580 | elif opt in ('-w', '--width'): |
| 581 | try: |
| 582 | options.width = int(arg) |
| 583 | except ValueError: |
| 584 | usage(1, _('--width argument must be an integer: %s') % arg) |
| 585 | elif opt in ('-x', '--exclude-file'): |
| 586 | options.excludefilename = arg |
| 587 | elif opt in ('-X', '--no-docstrings'): |
| 588 | fp = open(arg) |
| 589 | try: |
| 590 | while 1: |
| 591 | line = fp.readline() |
| 592 | if not line: |
| 593 | break |
| 594 | options.nodocstrings[line[:-1]] = 1 |
| 595 | finally: |
| 596 | fp.close() |
| 597 | |
| 598 | # calculate escapes |
| 599 | make_escapes(options.escape) |
| 600 | |
| 601 | # calculate all keywords |
| 602 | options.keywords.extend(default_keywords) |
| 603 | |
| 604 | # initialize list of strings to exclude |
| 605 | if options.excludefilename: |
| 606 | try: |
| 607 | fp = open(options.excludefilename) |
| 608 | options.toexclude = fp.readlines() |
| 609 | fp.close() |
| 610 | except IOError: |
| 611 | print >> sys.stderr, _( |
| 612 | "Can't read --exclude-file: %s") % options.excludefilename |
| 613 | sys.exit(1) |
| 614 | else: |
| 615 | options.toexclude = [] |
| 616 | |
| 617 | # resolve args to module lists |
| 618 | expanded = [] |
| 619 | for arg in args: |
| 620 | if arg == '-': |
| 621 | expanded.append(arg) |
| 622 | else: |
| 623 | expanded.extend(getFilesForName(arg)) |
| 624 | args = expanded |
| 625 | |
| 626 | # slurp through all the files |
| 627 | eater = TokenEater(options) |
| 628 | for filename in args: |
| 629 | if filename == '-': |
| 630 | if options.verbose: |
| 631 | print _('Reading standard input') |
| 632 | fp = sys.stdin |
| 633 | closep = 0 |
| 634 | else: |
| 635 | if options.verbose: |
| 636 | print _('Working on %s') % filename |
| 637 | fp = open(filename) |
| 638 | closep = 1 |
| 639 | try: |
| 640 | eater.set_filename(filename) |
| 641 | try: |
| 642 | tokenize.tokenize(fp.readline, eater) |
| 643 | except tokenize.TokenError, e: |
| 644 | print >> sys.stderr, '%s: %s, line %d, column %d' % ( |
| 645 | e[0], filename, e[1][0], e[1][1]) |
| 646 | finally: |
| 647 | if closep: |
| 648 | fp.close() |
| 649 | |
| 650 | # write the output |
| 651 | if options.outfile == '-': |
| 652 | fp = sys.stdout |
| 653 | closep = 0 |
| 654 | else: |
| 655 | if options.outpath: |
| 656 | options.outfile = os.path.join(options.outpath, options.outfile) |
| 657 | fp = open(options.outfile, 'w') |
| 658 | closep = 1 |
| 659 | try: |
| 660 | eater.write(fp) |
| 661 | finally: |
| 662 | if closep: |
| 663 | fp.close() |
| 664 | |
| 665 | |
| 666 | if __name__ == '__main__': |
| 667 | main() |
| 668 | # some more test strings |
| 669 | _(u'a unicode string') |
| 670 | # this one creates a warning |
| 671 | _('*** Seen unexpected token "%(token)s"') % {'token': 'test'} |
| 672 | _('more' 'than' 'one' 'string') |