MAME SVN History

199869 Revisions

r45103 Sunday 21st February, 2016 at 13:45:38 UTC by Miodrag Milanović
Placed official version, and removed other files since work only with parsing python (nw)

[scripts/build]

~~check_po.py~~ msgfmt.py ~~msgmerge.py~~ ~~pygettext.py~~

trunk/scripts/build/check_po.py
r253614	r253615
1		#! /usr/bin/env python
2		#
3		# check_po - a gramps tool to check validity of po files
4		#
5		# Copyright (C) 2006-2006 Kees Bakker
6		#
7		# This program is free software; you can redistribute it and/or modify
8		# it under the terms of the GNU General Public License as published by
9		# the Free Software Foundation; either version 2 of the License, or
10		# (at your option) any later version.
11		#
12		# This program is distributed in the hope that it will be useful,
13		# but WITHOUT ANY WARRANTY; without even the implied warranty of
14		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15		# GNU General Public License for more details.
16		#
17		# You should have received a copy of the GNU General Public License
18		# along with this program; if not, write to the Free Software
19		# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21		#
22		# TODO
23		#
24		# * Check for HTML text in msgstr when there is none in msgid
25		# * Check for matching HTML tag/endtag in msgstr
26		#
27
28		# Adapted for Umit by Guilherme Polo, original file:
29		# https://gramps.svn.sourceforge.net/svnroot/gramps/branches/gramps22/po/check_po
30
31		import re
32		import sys
33		from optparse import OptionParser
34
35		APP = "Umit"
36
37		all_total = {}
38		all_fuzzy = {}
39		all_untranslated = {}
40		all_percent_s = {}
41		all_named_s = {}
42		all_bnamed_s = {}
43		all_context = {}
44		all_coverage = {}
45		all_template_coverage = {}
46
47		def strip_quotes(st):
48		if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"':
49		st = st.strip()[1:-1]
50		return st
51
52		# This is a base class for all checks
53		class Check:
54		def __init__( self ):
55		self.msgs = []
56		def diag( self ):
57		if len( self.msgs ):
58		print
59		print self.diag_header
60		for m in self.msgs:
61		m.diag()
62		def summary( self ):
63		print "%-20s%d" % ( self.summary_text, len(self.msgs) )
64
65		class Check_fmt( Check ):
66		def __init__( self, fmt ):
67		Check.__init__( self )
68		self.diag_header = "-------- %s mismatches --------------" % fmt
69		self.summary_text = "%s mismatches:" % fmt
70		self.fmt = fmt
71		def process( self, msg ):
72		msgid = msg.msgid
73		msgstr = msg.msgstr
74		cnt1 = msgid.count( self.fmt )
75		cnt2 = msgstr.count( self.fmt )
76		if cnt1 != cnt2:
77		self.msgs.append( msg )
78
79		class Check_named_fmt( Check ):
80		# A pattern to find all %()
81		find_named_fmt_pat = re.compile('% $ \w+ $ \d* \D', re.VERBOSE)
82
83		def __init__( self ):
84		Check.__init__( self )
85		self.diag_header = "-------- %() name mismatches --------------"
86		self.summary_text = "%() name mismatches:"
87		def process( self, msg ):
88		msgid = msg.msgid
89		msgstr = msg.msgstr
90		# Same number of named formats?
91		fmts1 = self.find_named_fmt_pat.findall( msgid )
92		fmts2 = self.find_named_fmt_pat.findall( msgstr )
93		if len( fmts1 ) != len( fmts2 ):
94		self.msgs.append( msg )
95		else:
96		# Do we have the same named formats?
97		fmts1.sort()
98		fmts2.sort()
99		if fmts1 != fmts2:
100		self.msgs.append( msg )
101
102		class Check_missing_sd( Check ):
103		# A pattern to find %() without s or d
104		# Here is a command to use for testing
105		# print re.compile('% $ \w+ $ \d* (\D\|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' )
106		find_named_fmt_pat2 = re.compile('% $ \w+ $ \d* (\D\|$)', re.VERBOSE)
107
108		def __init__( self ):
109		Check.__init__( self )
110		self.diag_header = "-------- %() without 's' or 'd' mismatches --------------"
111		self.summary_text = "%() missing s/d:"
112		def process( self, msg ):
113		msgstr = msg.msgstr
114		fmts = self.find_named_fmt_pat2.findall( msgstr )
115		for f in fmts:
116		if not f in ('s', 'd'):
117		self.msgs.append( msg )
118		break
119
120		class Check_runaway( Check ):
121		def __init__( self ):
122		Check.__init__( self )
123		self.diag_header = "-------- Runaway context in translation ---------"
124		self.summary_text = "Runaway context:"
125		def process( self, msg ):
126		msgid = msg.msgid
127		msgstr = msg.msgstr
128
129		# Runaway context. In the translated part we only to see
130		# the translation of the word after the \|
131		if msgid.count('\|') > 0 and msgstr.count('\|') > 0 and msgid != msgstr:
132		self.msgs.append( msg )
133
134		class Check_xml_chars( Check ):
135		# Special XML characters
136		# It is not allowed to have a quote, an ampersand or an angle bracket
137		xml_chars_pat = re.compile( r'(?<=\W) > \| " \| & (?!(quot\|nbsp\|gt\|amp);)', re.VERBOSE )
138
139		def __init__( self ):
140		Check.__init__( self )
141		self.diag_header = "-------- unescaped XML special characters ---------"
142		self.summary_text = "XML special chars:"
143		def process( self, msg ):
144		msgid = msg.msgid
145		msgstr = msg.msgstr
146
147		# XML errors
148		# Only look at messages in the tips.xml
149		if msg.is_tips_xml:
150		if self.xml_chars_pat.search( msgstr ):
151		self.msgs.append( msg )
152
153		class Check_last_char( Check ):
154		def __init__( self ):
155		Check.__init__( self )
156		self.diag_header = "-------- last character not identical ---------"
157		self.summary_text = "Last character:"
158		def process( self, msg ):
159		msgid = msg.msgid
160		msgstr = msg.msgstr
161
162		# Last character of msgid? White space? Period?
163		if msg.is_fuzzy:
164		return
165
166		msgid_last = msgid[-1:]
167		msgstr_last = msgstr[-1:]
168		if msgid_last.isspace() != msgstr_last.isspace():
169		self.msgs.append( msg )
170		elif (msgid_last == '.') != (msgstr_last == '.'):
171		self.msgs.append( msg )
172
173		class Check_shortcut_trans( Check ):
174		def __init__( self ):
175		Check.__init__( self )
176		self.diag_header = "-------- shortcut key in translation ---------"
177		self.summary_text = "Shortcut in msgstr:"
178		def process( self, msg ):
179		msgid = msg.msgid
180		msgstr = msg.msgstr
181
182		if msgid.count('_') == 0 and msgstr.count('_') > 0:
183		self.msgs.append( msg )
184
185		class Msgid:
186		fuzzy_pat = re.compile( 'fuzzy' )
187		tips_xml_pat = re.compile( r'tips\.xml' )
188		def __init__( self, msgnr, lineno ):
189		self._msgid = []
190		self._msgstr = []
191		self.msgid = ''
192		self.msgstr = ''
193		self._cmnt = []
194		self.nr = msgnr
195		self.lineno = lineno
196		self.is_fuzzy = 0
197		self.is_tips_xml = 0
198
199		def diag( self ):
200		if 1:
201		print
202		print "msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" )
203		sys.stdout.write( ''.join( self._msgid ) )
204		sys.stdout.write( ''.join( self._msgstr ) )
205		else:
206		# Compatible with the old check_po
207		print "%d '%s' : '%s'" % ( self.lineno, self.msgid, self.msgstr )
208
209		def add_msgid( self, line, lineno ):
210		self._msgid.append( line )
211		line = re.sub( r'msgid\s+', '', line )
212		line = line.strip()
213		if line[0] != '"' or line[-1:] != '"':
214		print "ERROR at line %d: Missing quote." % lineno
215		line = strip_quotes( line )
216		self.msgid += line
217
218		def add_msgstr( self, line, lineno ):
219		self._msgstr.append( line )
220		line = re.sub( r'msgstr\s+', '', line )
221		line = line.strip()
222		if line[0] != '"' or line[-1:] != '"':
223		print "ERROR at line %d: Missing quote." % lineno
224		line = strip_quotes( line )
225		self.msgstr += line
226
227		def add_cmnt( self, line ):
228		self._cmnt.append( line )
229		if not self.is_fuzzy and self.fuzzy_pat.search( line ):
230		self.is_fuzzy = 1
231		if not self.is_tips_xml and self.tips_xml_pat.search( line ):
232		self.is_tips_xml = 1
233
234		def read_msgs( fname ):
235		empty_pat = re.compile( r'^ \s* $', re.VERBOSE )
236		comment_pat = re.compile( r'\#', re.VERBOSE )
237		msgid_pat = re.compile( r'msgid \s+ "', re.VERBOSE )
238		msgstr_pat = re.compile( r'msgstr \s+ "', re.VERBOSE )
239		str_pat = re.compile( r'"', re.VERBOSE )
240		old_pat = re.compile( r'\#~ \s+ ', re.VERBOSE )
241
242		msgnr = 0 # This is the message number of the next message to read. The first real message is 1.
243		f = open( fname )
244		lines = f.readlines()
245
246		# parse it like a statemachine
247		NONE = 0 # Nothing detected, yet
248		CMNT = 1 # Inside comment part
249		MSGID = 2 # Inside msgid part
250		MSGSTR = 3 # Inside msgstr part
251		STR = 4 # A continuation string
252		OLD = 5 # An old pattern with #~
253
254		state = NONE
255		msg = None
256		msgs = []
257
258		for ix in range( len(lines) ): # Use line numbers for messages
259		line = lines[ix]
260		lineno = ix + 1
261
262		m = empty_pat.match( line )
263		if m:
264		continue # Empty lines are not interesting
265
266		# What's the next state?
267		if old_pat.match( line ):
268		next_state = OLD
269		elif comment_pat.match( line ):
270		next_state = CMNT
271		elif msgid_pat.match( line ):
272		next_state = MSGID
273		elif msgstr_pat.match( line ):
274		next_state = MSGSTR
275		elif str_pat.match( line ):
276		next_state = STR
277		else:
278		print 'WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars()
279		next_state = NONE
280
281		#print "%(state)d->%(next_state)d\t%(line)s" % vars()
282		if state == NONE:
283		# expect msgid or comment or old stuff
284		if next_state == CMNT:
285		state = CMNT
286		msg = Msgid( msgnr, lineno ) # Start with an empty new item
287		msgnr += 1
288		msgs.append( msg )
289		msg.add_cmnt( line )
290
291		elif next_state == MSGID:
292		state = MSGID
293		msg = Msgid( msgnr, lineno ) # Start with an empty new item
294		msgnr += 1
295		msgs.append( msg )
296		msg.add_msgid( line, lineno )
297
298		elif next_state == MSGSTR:
299		print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
300		state = MSGSTR
301		msg = Msgid( msgnr, lineno ) # Start with an empty new item
302		msgnr += 1
303		msgs.append( msg )
304		msg.add_msgstr( line, lineno )
305
306		elif next_state == STR:
307		print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
308
309		elif next_state == OLD:
310		pass # Just skip
311
312		elif state == CMNT:
313		if next_state == CMNT:
314		if msg:
315		msg.add_cmnt( line )
316		else:
317		# Note. We may need to do something about these comments
318		# Skip for now
319		pass
320
321		elif next_state == MSGID:
322		state = MSGID
323		if not msg:
324		msg = Msgid( msgnr, lineno ) # Start with an empty new item
325		msgnr += 1
326		msgs.append( msg )
327		msg.add_msgid( line, lineno )
328
329		elif next_state == MSGSTR:
330		print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
331		state = MSGSTR
332		msg = Msgid( msgnr, lineno ) # Start with an empty new item
333		msgnr += 1
334		msgs.append( msg )
335		msg.add_msgstr( line, lineno )
336
337		elif next_state == STR:
338		print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
339
340		elif next_state == OLD:
341		msg = None
342		pass # Just skip
343
344		elif state == MSGID:
345		if next_state == CMNT:
346		# Hmmm. A comment here?
347		print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars()
348
349		elif next_state == MSGID:
350		raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() )
351
352		elif next_state == MSGSTR:
353		state = MSGSTR
354		msg.add_msgstr( line, lineno )
355
356		elif next_state == STR:
357		msg.add_msgid( line, lineno )
358
359		elif next_state == OLD:
360		msg = None
361		pass # Just skip
362
363		elif state == MSGSTR:
364		if next_state == CMNT:
365		# A comment probably starts a new item
366		state = CMNT
367		msg = Msgid( msgnr, lineno )
368		msgnr += 1
369		msgs.append( msg )
370		msg.add_cmnt( line )
371
372		elif next_state == MSGID:
373		state = MSGID
374		msg = Msgid( msgnr, lineno )
375		msgnr += 1
376		msgs.append( msg )
377		msg.add_msgid( line, lineno )
378
379		elif next_state == MSGSTR:
380		raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() )
381
382		elif next_state == STR:
383		msg.add_msgstr( line, lineno )
384
385		elif next_state == OLD:
386		msg = None
387		pass # Just skip
388
389		else:
390		raise Exception( 'Unexpected state in po parsing (state = %d)' % state )
391
392		# Strip items with just comments. (Can this happen?)
393		msgs1 = []
394		for m in msgs:
395		if not m.msgid and not m.msgstr:
396		#print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno )
397		pass
398		else:
399		msgs1.append( m )
400		msgs = msgs1
401		return msgs
402
403		def analyze_msgs( options, fname, msgs, nr_templates = None, nth = 0 ):
404		nr_fuzzy = 0
405		nr_untranslated = 0
406
407		checks = []
408		checks.append( Check_fmt( '%s' ) )
409		checks.append( Check_fmt( '%d' ) )
410		checks.append( Check_named_fmt() )
411		checks.append( Check_missing_sd() )
412		checks.append( Check_runaway() )
413		checks.append( Check_xml_chars() )
414		checks.append( Check_last_char() )
415		checks.append( Check_shortcut_trans() )
416
417		for msg in msgs:
418		msgid = msg.msgid
419		msgstr = msg.msgstr
420		#print
421		#print "msgid: %(msgid)s" % vars()
422		#print "msgstr: %(msgstr)s" % vars()
423
424		if not msgstr:
425		nr_untranslated += 1
426		continue
427
428		if msg.is_fuzzy:
429		nr_fuzzy += 1
430		if options.skip_fuzzy:
431		continue
432
433		for c in checks:
434		c.process( msg )
435
436		nr_msgs = len(msgs)
437		if nth > 0:
438		print
439		print "====================================="
440		print "%-20s%s" % ( "File:", fname )
441		print "%-20s%d" % ( "Template total:", nr_templates )
442		print "%-20s%d" % ( "PO total:", nr_msgs )
443		print "%-20s%d" % ( "Fuzzy:", nr_fuzzy )
444		print "%-20s%d" % ( "Untranslated:", nr_untranslated )
445
446		for c in checks:
447		c.summary()
448
449		po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100
450		print "%-20s%5.2f%%" % ( "PO Coverage:", po_coverage )
451
452		template_coverage = po_coverage * float(nr_msgs) / float(nr_templates)
453		print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage )
454
455		if not options.only_summary:
456		for c in checks:
457		c.diag()
458
459		def main(args):
460		if len(sys.argv) < 2:
461		print "Error: Especify the umit.pot file path"
462		sys.exit(1)
463
464		parser = OptionParser(description="This program validates a PO file for "
465		"%s." % APP, usage='%prog [options] po-file...' )
466
467		parser.add_option("", "--skip-fuzzy",
468		action="store_true", dest="skip_fuzzy", default=False,
469		help="skip fuzzies")
470
471		parser.add_option("-s", "--only-summary",
472		action="store_true", dest="only_summary", default=False,
473		help="only give the summary")
474
475		options, args = parser.parse_args()
476
477		try:
478		pot_msgs = read_msgs(sys.argv[1])
479		nr_templates = len(pot_msgs)
480		nth = 0
481		for fname in args:
482		msgs = read_msgs(fname)
483		analyze_msgs(options, fname, msgs, nr_templates, nth)
484		nth += 1
485
486		except Exception, e:
487		print e
488
489		if __name__ == "__main__":
490		main(sys.argv)

trunk/scripts/build/msgfmt.py
r253614	r253615
1		#! /usr/bin/env python
	1	#!/usr/bin/env python2
2	2	# -- coding: iso-8859-1 --
3	3	# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
4		#
5		# Changelog: (Guilherme Polo)
6		# 2008-04-11
7		# - Support for files with BOM UTF8 mark.
8		#
9		# 2008-04-10
10		# - Support for fuzzy strings in output.
11		# - Bumped to version 1.1.1
12	4
13	5	"""Generate binary message catalog from textual translation description.
14	6
r253614	r253615
24	16	Specify the output file to write to. If omitted, output will go to a
25	17	file named filename.mo (based off the input file name).
26	18
27		-f
28		--use-fuzzy
29		Use fuzzy entries in output
30
31	19	-h
32	20	--help
33	21	Print this message and exit.
r253614	r253615
35	23	-V
36	24	--version
37	25	Display version information and exit.
38
39		Before using the -f (fuzzy) option, read this:
40		http://www.finesheer.com:8457/cgi-bin/info2html?(gettext)Fuzzy%20Entries&lang=en
41	26	"""
42	27
	28	import os
43	29	import sys
44		import os
	30	import ast
45	31	import getopt
46	32	import struct
47	33	import array
48		import codecs
49	34
50		__version__ = "1.1.1"
	35	__version__ = "1.1"
51	36
52	37	MESSAGES = {}
53	38
54	39
	40
55	41	def usage(code, msg=''):
56	42	print >> sys.stderr, __doc__
57	43	if msg:
r253614	r253615
59	45	sys.exit(code)
60	46
61	47
62		def add(id, str, fuzzy, use_fuzzy):
63		"Add a translation to the dictionary."
	48
	49	def add(id, str, fuzzy):
	50	"Add a non-fuzzy translation to the dictionary."
64	51	global MESSAGES
65		if (not fuzzy ~~or use_fuzzy)~~ and str:
	52	if not fuzzy and str:
66	53	MESSAGES[id] = str
67	54
68	55
	56
69	57	def generate():
70	58	"Return the generated output."
71	59	global MESSAGES
r253614	r253615
108	96	return output
109	97
110	98
111		def make(filename, outfile, use_fuzzy):
	99
	100	def make(filename, outfile):
112	101	ID = 1
113	102	STR = 2
114	103
r253614	r253615
122	111
123	112	try:
124	113	lines = open(infile).readlines()
125		if lines[0].startswith(codecs.BOM_UTF8):
126		lines[0] = lines[0][len(codecs.BOM_UTF8):]
127	114	except IOError, msg:
128	115	print >> sys.stderr, msg
129	116	sys.exit(1)
r253614	r253615
137	124	lno += 1
138	125	# If we get a comment line after a msgstr, this is a new entry
139	126	if l[0] == '#' and section == STR:
140		add(msgid, msgstr, fuzzy~~, use_fuzzy~~)
	127	add(msgid, msgstr, fuzzy)
141	128	section = None
142	129	fuzzy = 0
143	130	# Record a fuzzy mark
r253614	r253615
147	134	if l[0] == '#':
148	135	continue
149	136	# Now we are in a msgid section, output previous section
150		if l.startswith('msgid'):
	137	if l.startswith('msgid') and not l.startswith('msgid_plural'):
151	138	if section == STR:
152		add(msgid, msgstr, fuzzy~~, use_fuzzy~~)
	139	add(msgid, msgstr, fuzzy)
153	140	section = ID
154	141	l = l[5:]
155	142	msgid = msgstr = ''
	143	is_plural = False
	144	# This is a message with plural forms
	145	elif l.startswith('msgid_plural'):
	146	if section != ID:
	147	print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\
	148	(infile, lno)
	149	sys.exit(1)
	150	l = l[12:]
	151	msgid += '\0' # separator of singular and plural
	152	is_plural = True
156	153	# Now we are in a msgstr section
157	154	elif l.startswith('msgstr'):
158	155	section = STR
159		l = l[6:]
	156	if l.startswith('msgstr['):
	157	if not is_plural:
	158	print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
	159	(infile, lno)
	160	sys.exit(1)
	161	l = l.split(']', 1)[1]
	162	if msgstr:
	163	msgstr += '\0' # Separator of the various plural forms
	164	else:
	165	if is_plural:
	166	print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\
	167	(infile, lno)
	168	sys.exit(1)
	169	l = l[6:]
160	170	# Skip empty lines
161	171	l = l.strip()
162	172	if not l:
163	173	continue
164		# XXX: Does this always follow Python escape semantics?
165		l = eval(l)
	174	l = ast.literal_eval(l)
166	175	if section == ID:
167	176	msgid += l
168	177	elif section == STR:
r253614	r253615
174	183	sys.exit(1)
175	184	# Add last entry
176	185	if section == STR:
177		add(msgid, msgstr, fuzzy~~, use_fuzzy~~)
	186	add(msgid, msgstr, fuzzy)
178	187
179	188	# Compute output
180	189	output = generate()
r253614	r253615
185	194	print >> sys.stderr, msg
186	195
187	196
	197
188	198	def main():
189	199	try:
190		opts, args = getopt.getopt(sys.argv[1:], 'hVo:f',
191		['help', 'version', 'output-file=', 'use-fuzzy'])
	200	opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
	201	['help', 'version', 'output-file='])
192	202	except getopt.error, msg:
193	203	usage(1, msg)
194	204
195	205	outfile = None
196		use_fuzzy = False
197	206	# parse options
198	207	for opt, arg in opts:
199	208	if opt in ('-h', '--help'):
r253614	r253615
201	210	elif opt in ('-V', '--version'):
202	211	print >> sys.stderr, "msgfmt.py", __version__
203	212	sys.exit(0)
204		elif opt in ('-f', '--use-fuzzy'):
205		use_fuzzy = True
206	213	elif opt in ('-o', '--output-file'):
207	214	outfile = arg
208	215	# do it
r253614	r253615
212	219	return
213	220
214	221	for filename in args:
215		make(filename, outfile~~, use_fuzzy~~)
	222	make(filename, outfile)
216	223
217	224
218	225	if __name__ == '__main__':

trunk/scripts/build/msgmerge.py
r253614	r253615
1		#! /usr/bin/env python
2		# -- coding: iso-8859-1 --
3		#
4		# Copyright Terje Røsten <terjeros@phys.ntnu.no> Nov. 2003.
5		#
6		'''Merge two Uniforum style .po files together.
7
8		This is a implementation (not complete) in Python of the GNU
9		msgmerge(1) program. It can be used on the command line (or as a Python
10		module).
11
12		Usage: msgmerge.py [OPTIONS] def.po ref.pot
13
14		The def.po file is an existing PO file with translations. The ref.pot
15		file is the last created PO file with up-to-date source references but
16		old translations, or a PO Template file.
17
18		Options:
19		-U, --update update def.po,
20		do nothing if def.po is already up to date.
21		-o, --output-file=FILE write output to file FILE. Output is written to
22		stdout if set to - or if the option is not present.
23		-D, --docstrings don\'t remove docstring flag.
24		-h, --help display help text and exit.
25		-V, --version display version and exit.
26		-q, --quiet, --silent suppress progress indicators.
27		'''
28		from __future__ import generators
29
30		if not __name__ == '__main__':
31		__doc__ += '''\
32
33		When used as module the interesting functions are merge() and
34		merge_dir().
35
36		The merge() function does the same as the command line version, and
37		the arguments are as follows. The first argument is the def.po file,
38		then the ref.pot file. The third argument controls whether do work in
39		update mode or not, then the next argument sets the output file. Set
40		the next argument to False to remove docstring flags. The last
41		argument can be used to suppress progress indicators. The default is
42		to work in update mode with progress indicators.
43
44		Example:
45		merge("def.po", "ref.pot")
46		merge the files def.po and ref.pot and write output to def.po if
47		there are any changes.
48		merge("def.po", "red.pot", docstrings = False, verbose = False,
49		update = False, outfile = "-")
50		merge the files def.po and ref.pot and write output to stdout,
51		remove docstring flag and be quiet.
52
53		The merge_dir() function is useful when merging a directory of po
54		files. The only required argument is the name of the directory with po
55		files and the pot file. It will use simple glob to find the files. The
56		second argument can be used to specify the pot file (in the
57		directory). Third argument is a list of po files (then globbing will
58		not be used) and the next argument is list of filename to exclude. The
59		last argument can be used to suppress progress indicators. Docstring
60		flag will not be removed.
61
62		Example:
63		merge_dir("po")
64		merge (and update) all po files in directory po with the single pot
65		file in the same directory.
66
67		The module raises the MsgmergeError exception in case of error.
68		'''
69		__revision__ = '$Id: msgmerge.py,v 1.41 2003/11/18 19:10:42 terjeros Exp $'
70		__version__ = '0.1'
71		name = 'msgmerge.py'
72
73		__all__ = [ 'merge', 'merge_dir', 'MsgmergeError' ]
74
75		import sys
76		import re
77		import string
78		import getopt
79		import difflib
80		import glob
81		import os.path
82		import codecs
83
84		try:
85		True, False
86		except NameError:
87		True, False = 1, 0
88
89		class Msgs:
90		'''Class to hold information about messages.'''
91		width = 80
92		file = ''
93		def __init__(self, msgid, msgstr, flag, lno, entry, **kwds):
94		self.id = msgid
95		self.str = msgstr
96		self.cmt = kwds.get('cmt', '')
97		self.ref = kwds.get('ref', '')
98		self.autocmt = kwds.get('autocmt', '')
99		self.flag = flag
100		self.entry = entry
101		self.lno = lno
102		self.count = 0
103		def wash(self):
104		self.id = wash(self.id, width = self.width,
105		filename = self.file, lno = self.lno)
106		self.str = wash(self.str, 'msgstr', width = self.width,
107		filename = self.file, lno = self.lno)
108		def used(self):
109		self.count += 1
110		def get_clean_id(self):
111		return self.id.replace('msgid "','', 1)
112		def obsolete(self):
113		self.width -= len('#~ ')
114		self.wash()
115		t = [ '#~ %s\n' % s for s in self.id.splitlines() ]
116		self.id = ''.join(t)
117		t = [ '#~ %s\n' % s for s in self.str.splitlines() ]
118		self.str = ''.join(t)
119
120		class Options:
121		'''Class to hold options'''
122		def __init__(self, cmdline = False, **kwds):
123		if not cmdline:
124		self.update = kwds.get('update', True)
125		self.outfile = kwds.get('outfile', '-')
126		self.docstrings = kwds.get('docstrings', True)
127		self.verbose = kwds.get('verbose', False)
128		self.suffix = kwds.get('suffix', '~')
129		self.backup = kwds.get('backup', True)
130		else:
131		self.update = False
132		self.outfile = False
133		self.docstrings = False
134		self.verbose = True
135		self.suffix = '~'
136		self.backup = True
137
138		class MsgmergeError(Exception):
139		'''Exception class for msgmerge'''
140
141		def gen(lines):
142		'''
143		Generator which returns a line (with the obsolete prefix removed)
144		from the list of lines in <lines>, the line number is also
145		returned.
146		'''
147		lno = 0
148		for l in lines:
149		lno += 1
150		yield l.replace('#~ ', '', 1), lno
151		yield l, lno
152
153		def slurp(s, g, sign):
154		'''
155		The string returned from iterator <g>\'s next() method is added to
156		the string <s> if string returned is beginning with the string
157		<sign>. The return value is the first returned string which do not
158		start with <sign>, the line number, the iterator <g> and the
159		(possibly) updated string <s>.
160		'''
161		l, lno = g.next()
162		while l.startswith(sign) or (sign == '# ' and l.strip() == '#'):
163		s += l
164		l, lno = g.next()
165		return l, lno, g, s
166
167		def splitted_fit(chunk, line, width, break_always, break_after_space):
168		'''
169		Check if string <chunk> can be splitted by newline to fit into
170		string <line> with width smaller than <width>. The return value is
171		a tuple where the first element is the part of chunk which fits
172		and the second element is the rest of chunk.
173		'''
174		ret = '', chunk
175		l = len(chunk)
176		for i in range(l - 1, -1, -1):
177		if chunk[i] in break_always and len(chunk[0:i] + line) <= width:
178		ret = chunk[0:i], chunk[i:]
179		break
180		elif chunk[i] in break_after_space and i and chunk[i-1].strip() == '':
181		ret = chunk[0:i], chunk[i:]
182		break
183		elif chunk[i] == '\\' and len(chunk[i:]) > 1 and chunk[i+1] == '"' \
184		and len(chunk[0:i] + line) <= width:
185		ret = chunk[0:i], chunk[i:]
186		break
187		return ret
188
189		def wrap(msg, width):
190		'''
191		Accept a list <msg> of strings to wrap, each string is wrapped to
192		width <width> and surrounded with a pair of ". The return value is
193		a string with these wrapped strings joined together with newlines.
194		'''
195		if msg.isspace() or not msg:
196		return '"%s"' % msg
197
198		# \ and " is here, but " is special in po files.
199		break_always = '$%+({['
200		# XXX what about: « © » ¦ § etc?
201		break_after_space = '_-=^`~\'<\|>&*#@'
202		enders = '.:,;!?/])}\|%-'
203		extra = string.punctuation
204		for c in enders:
205		extra = extra.replace(c, '')
206		escaped = { 'enders' : re.escape(enders),
207		'extra' : re.escape(extra) }
208		regex = r'([\w%(extra)s][\s%(enders)s)]+[\s%(enders)s])' % escaped
209		r = re.compile(regex, re.UNICODE)
210		msg = [ m for m in r.split(msg) if not m == '']
211
212		lines = []
213		line = msg.pop(0)
214
215		# Handle \n on end of line
216		if len(msg) > 1 and msg[-1] == 'n' and len(msg[-2]) > 0 \
217		and msg[-2][-1] == '\\':
218		msg[-2] += msg[-1]
219		msg.pop()
220		# Do not allow a single \n on a line
221		if len(msg) > 2 and msg[-1] == '\\n':
222		msg[-2] += msg[-1]
223		msg.pop()
224
225		for m in msg:
226		if len(line) > width or len(m) > width or len(line + m) > width:
227		fit, rest = splitted_fit(m, line, width, break_always,
228		break_after_space)
229		line += fit
230		lines.append(line)
231		line = rest
232		else:
233		line += m
234		lines.append(line)
235		lines = [ '"%s"' % l for l in lines ]
236		return '\n'.join(lines)
237
238		def normalize(lines):
239		'''
240		Normalize <lines>: e.g "\n\nText\n\n" becomes:
241		"\n"
242		"\n"
243		"Text\n"
244		"\n"
245		'''
246		if 0 < lines.find('\\n') < len(lines) - 3:
247		if lines[-3:] == '\\n"':
248		lines = lines[:-3].replace('\\n','\\n"\n"').replace('""\n','') \
249		+ '\\n"'
250		else:
251		lines = lines.replace('\\n','\\n"\n"').replace('""\n','')
252		return lines
253
254		def wash(msg, idx = 'msgid', width = 80, **kwds):
255		'''
256		Do washing on the msgstr or msgid fields. Wrap the text to fit in
257		width <width>. <msg> is a list of lines that makes up the field.
258		<idx> indicate msgid or msgstr, <width> holds the width. <filename>
259		and <lno> (line number) is picked up from <kwds>.
260		Returns the washed field as a string.
261		'''
262		msg = normalize(msg)
263		lines = msg.splitlines()
264		size = len(lines)
265		if size > 1 or len(msg) > width:
266		washed = []
267		# The first line is special
268		m = re.match('^%s "(.*)"$' % (idx, ), lines[0])
269		if not m:
270		print lines[0]
271		kwds['lno'] -= size + 1
272		raise MsgmergeError('parse error: %(filename)s:%(lno)s.'
273		% kwds)
274		washed.append(m.group(1))
275		if m.group(1).endswith(r'\n'):
276		washed.append('')
277		i = 0
278		for line in lines[1:]:
279		m = re.match('^"(\s.)"$', line)
280		i += 1
281		if not m:
282		print line
283		kwds['lno'] -= size - i + 1
284		raise MsgmergeError('parse error: %(filename)s:%(lno)s.'
285		% kwds)
286		washed[-1] += m.group(1)
287		if m.group(1).endswith(r'\n'):
288		washed.append('')
289		if washed[0] == '':
290		washed.pop(0)
291		if washed[-1] == '':
292		washed.pop()
293
294		washed = [ wrap(w, width - 3) for w in washed ] # " and \n removed.
295
296		# One line or multiline
297		if len(washed) == 1 and len('%s %s\n' % (idx, washed[0])) < width:
298		washed = '%s %s\n' % (idx, washed[0])
299		else:
300		washed = '%s ""\n%s\n' % (idx, '\n'.join(washed))
301		else:
302		washed = msg
303
304		return washed
305
306		def parse(filename, entry):
307		'''
308		Parse po or pot file with name <filename>. Set the variable
309		<entry> to msgid/msgstr to indicate pot/po file. The return value
310		is a dict with msgid (washed) as key and Msgs instances as
311		values.
312		'''
313		lines = io(filename).readlines()
314		Msgs.file = filename
315		messages = {}
316		last = len(lines)
317		g = gen(lines)
318		cmt = autocmt = ref = flag = ''
319		msgid = False
320		lno = 0
321		while not lno == last:
322		l, lno = g.next()
323		if l.startswith('# '):
324		l, lno, g, cmt = slurp(l, g, '# ')
325		if l.startswith('#.'):
326		l, lno, g, autocmt = slurp(l, g, '#.')
327		if l.startswith('#:'):
328		l, lno, g, ref = slurp(l, g, '#:')
329		if l.startswith('#,'):
330		l, lno, g, flag = slurp(l, g, '#,')
331		if l.startswith('msgid'):
332		l, lno, g, msgid = slurp(l, g, '"')
333		if l.startswith('msgstr'):
334		l, lno, g, msgstr = slurp(l, g, '"')
335
336		if not lno == last and not l.strip() == '':
337		raise MsgmergeError('parse error: %s:%s.' % (filename, lno))
338
339		if msgid and entry == 'msgstr':
340		idx = wash(msgid, filename = filename, lno = lno)
341		messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, cmt = cmt)
342		msgid = False; msgstr = cmt = autocmt = ref = flag = ''
343		elif msgid and entry == 'msgid':
344		idx = wash(msgid, filename = filename, lno = lno)
345		messages[idx] = Msgs(msgid, msgstr, flag, lno, entry,
346		autocmt = autocmt, ref = ref)
347		msgid = False; msgstr = cmt = autocmt = ref = flag = ''
348
349		for m in messages.values():
350		m.wash()
351		return messages
352
353		def fuzzy_match(pot, defs):
354		'''
355		Try to find the best difflib match (with ratio > 0.6) between
356		id of Msgs object <pot> and Msgs in the dict <defs>.
357		Return value is the Msgs object in <defs> with highest ratio,
358		False is returned if no suitable Msgs is found.
359		'''
360		limit = 0.6
361		l, po = limit - 0.01, False
362		s = difflib.SequenceMatcher(lambda x: x == ' "', '', pot.get_clean_id())
363		len2 = len(pot.get_clean_id())
364		for candidate in defs.values():
365		if candidate.str == 'msgstr ""\n': # Empty translation
366		continue
367		if candidate.id == 'msgid ""\n': # Empty msgid (header)
368		continue
369		len1 = len(candidate.get_clean_id())
370		if len2 > 2 * len1 or len1 > 1.5 * len2: # Simple and fast tests first
371		continue
372		s.set_seq1(candidate.get_clean_id())
373		if s.quick_ratio() < l:
374		continue
375		r = s.ratio() # This is expensive
376		if r > l:
377		l, po = r, candidate
378		return po
379
380		def flags(po, pot, fuzzy = False, obs = False):
381		'''
382		Create flag field from flag field in Msgs objects <po> and
383		<pot>. When <fuzzy> is true <po>\'s flags are ignored and the
384		fuzzy flag is added. If <obs> is set then most flags but fuzzy are
385		removed. If the global variable option.docstrings is set then
386		docstring flags will not be removed. The return value is a string
387		which holds the combined flag.
388		'''
389		global option
390		flag = ''
391		if po.flag or pot.flag or fuzzy:
392		if not fuzzy:
393		flag = '%s, %s' % (po.flag.strip(), pot.flag.strip())
394		else:
395		flag = '%s, %s' % ('#, fuzzy', pot.flag.strip())
396		flag = flag.split(', ')
397		fl = {}
398		flag = [fl.setdefault(f, f) for f in flag if f not in fl and f]
399		if not option.docstrings:
400		try:
401		flag.remove('docstring')
402		except ValueError:
403		pass
404		if obs:
405		removes = ['c-format', 'python-format', 'docstring']
406		for remove in removes:
407		try:
408		flag.remove(remove)
409		except ValueError:
410		pass
411		# Put fuzzy first
412		if 'fuzzy' in flag and not flag.index('fuzzy') == 1:
413		i = flag.index('fuzzy')
414		flag[1], flag[i] = flag[i], flag[1]
415
416		if len(flag) == 1:
417		flag = ''
418		else:
419		flag = ', '.join(flag) + '\n'
420		return flag
421
422		def add(pot, po, fuzzy = False):
423		'''
424		Build a new entry from the Msgs objects <pot> and <pot>. If
425		<fuzzy> is true, <po>\'s flag field is ignored (in
426		flags()). Returns a multiline string with a up to date entry.
427		'''
428		msg = []
429		msg.append(po.cmt)
430		msg.append(pot.autocmt)
431		msg.append(pot.ref)
432		msg.append(flags(po, pot, fuzzy = fuzzy))
433		msg.append(pot.id)
434		msg.append(po.str)
435		return ''.join(msg)
436
437		def header(pot, defs):
438		'''
439		Update date in header entry. Returns the updated header entry.
440		'''
441		try:
442		[po] = [ d for d in defs.values() if d.id == 'msgid ""\n' ]
443		except ValueError:
444		raise MsgmergeError('Error: did not find header in po file.')
445
446		r = re.compile(r'(.^"POT-Creation-Date:\s+)(.?)(\\n"$.*)',
447		re.MULTILINE \| re.DOTALL)
448		m = r.match(pot.str)
449		if not m:
450		raise MsgmergeError(
451		'Error: did not find POT-Creation-Date field in pot file.')
452
453		subs = '\\1%s\\3' % m.group(2)
454		_, count = r.subn(subs, po.str)
455		if not count == 1:
456		raise MsgmergeError(
457		'Error: did not find POT-Creation-Date field in po file.')
458		return po
459
460		def match(defs, refs):
461		'''
462		Try to match Msgs objects in <refs> with Msgs objects in
463		<defs>. The return value is a list with po entries.
464		'''
465		global option
466		matches = []
467		empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str')
468		deco = [(r.lno, r) for r in refs.values()]
469		deco.sort()
470		po = header(deco.pop(0)[1], defs) # Header entry
471		matches.append(add(empty, po))
472		po.used()
473		sorted = [ a[1] for a in deco ]
474		for pot in sorted:
475		if option.verbose:
476		sys.stderr.write('.')
477		po = defs.get(pot.id, False) # Perfect match
478		if po:
479		matches.append(add(pot, po))
480		po.used(); pot.used()
481		continue
482		po = fuzzy_match(pot, defs) # Fuzzy match
483		if po:
484		matches.append(add(pot, po, fuzzy = True))
485		po.used(); pot.used()
486		continue
487		matches.append(add(pot, empty)) # No match
488
489		obsolete(defs, matches)
490		return matches
491
492		def obsolete(defs, matches):
493		'''Handle obsolete translations.'''
494		deco = [ (d.lno, d) for d in defs.values() if
495		d.count == 0 and not d.str == 'msgstr ""\n' ]
496		deco.sort()
497		empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str')
498		obs = [ o[1] for o in deco ]
499		for o in obs:
500		o.flag = flags(o, empty, obs = True)
501		o.obsolete()
502		matches.append('%s%s%s' % (o.flag, o.id, o.str))
503
504		def help():
505		'''Print help text and exit.'''
506		print __doc__
507		sys.exit(0)
508
509		def cmdline():
510		'''Parse options and arguments from command line.'''
511		advice = 'Try `%(name)s --help\' for more information.'
512		try:
513		long_opt = ['help', 'version', 'update', 'output-file=',
514		'quiet', 'silent', 'docstrings', 'suffix', 'backup']
515		opts, args = getopt.getopt(sys.argv[1:], 'hVUo:qD', long_opt)
516		except getopt.error, msg:
517		print '%s: %s\n%s' % ('%(name)s', msg, advice) % globals()
518		sys.exit(1)
519
520		option = Options(cmdline = True)
521		for opt, arg in opts:
522		if opt in ['-h', '--help']:
523		help()
524		elif opt in ['-V', '--version']:
525		print '%(name)s %(__version__)s' % globals()
526		sys.exit(0)
527		elif opt in ['-o', '--output-file']:
528		option.outfile = arg
529		elif opt in ['-U', '--update']:
530		option.update = True
531		elif opt in ['-q', '--silent', '--quiet']:
532		option.verbose = False
533		elif opt in ['-D', '--docstrings']:
534		option.docstrings = True
535		elif opt in ['--suffix']:
536		option.suffix = arg
537		elif opt in ['--backup']:
538		option.backup = arg
539
540		# Sanity checks
541		warn = False
542		if option.update and option.outfile:
543		warn = '--update and --output-file are mutually exclusive.'
544		if len(args) == 0:
545		warn = 'no input files given.'
546		elif len(args) == 1 or len(args) > 2:
547		warn = 'exactly 2 input files required.'
548		if warn:
549		print '%s: %s\n%s' % ('%(name)s', warn, advice) % globals()
550		sys.exit(1)
551
552		if option.update:
553		option.outfile = args[0]
554		elif not option.outfile:
555		option.outfile = '-'
556
557		defs, refs = args
558
559		try:
560		merge(defs, refs, option = option)
561		except MsgmergeError, err:
562		print '%(name)s: ' % globals() + '%s' % err
563		sys.exit(1)
564
565		def io(iofile, mode = 'rU'):
566		'''Wrapper around open().'''
567		try:
568		fo = open(iofile, mode)
569		if 'r' in mode and fo.read(3) != codecs.BOM_UTF8:
570		fo.seek(0)
571
572		except IOError, msg:
573		raise MsgmergeError('error while opening file: %s: %s.' %
574		(msg[1], iofile))
575		return fo
576
577		def backup(infile):
578		'''Handle backup of files in update mode'''
579		os.environ.get('VERSION_CONTROL', '')
580		suffix = os.environ.get('SIMPLE_BACKUP_SUFFIX', '~')
581
582		backup_file = '%s%s' % (infile, suffix)
583
584		def changes(new, old):
585		return cmp(''.join(old), '\n'.join(new))
586
587		def write(matches, outfile):
588		'''Write the list <matches> to file <outfile>'''
589		if not outfile == '-':
590		fd = io(outfile, 'w')
591		else:
592		fd = sys.stdout
593		fd.write('\n'.join(matches))
594
595		def merge(def_file, ref_file, update = True, outfile = '-',
596		docstrings = True, suffix = '~', backup = True,
597		verbose = True, **kwds):
598		'''
599		Merge po file <def_file> with pot file <ref_file> . If <update> is
600		set to True then only update if there are changes to the po
601		file. Set outfile to write updated po file to an another file. Set
602		to `-\' for writing to standard out. If docstrings is False
603		docstrings flag will removed. Set verbose to False to suppress
604		progress indicators. <kwds> is used to pass options from the
605		command line interface.
606		'''
607		global option
608		option = kwds.get('option', Options(update = update,
609		outfile = outfile,
610		docstrings = docstrings,
611		suffix = suffix,
612		backup = backup,
613		verbose = verbose))
614		def_msgs = parse(def_file, 'msgstr')
615		ref_msgs = parse(ref_file, 'msgid')
616		if verbose and not __name__ == '__main__':
617		print >> sys.stderr, 'Merging %s with %s' % (ref_file, def_file)
618		updated_lines = match(def_msgs, ref_msgs)
619		if option.verbose:
620		print >> sys.stderr, ' done.'
621		if not option.update:
622		write(updated_lines, option.outfile)
623		elif option.update and changes(updated_lines, io(def_file).readlines()):
624		write(updated_lines, def_file)
625
626		def merge_dir(directory, pot = False, include = [], exclude = [],
627		verbose = True):
628		'''
629		Tries to merge a directory of po files. Uses simple glob to find
630		po files and pot file. The parameter <pot> can be used to specify
631		the pot file in the directory. If the list <include> is given only
632		files in this list is merged. Use the list <exclude> to exclude
633		files to be merged. This function is only useful if po files and
634		pot file are in the same directory. Set <verbose> to get
635		information when running.
636		'''
637		if directory[-1] == '/':
638		directory = os.path.dirname(directory)
639		if pot:
640		pot = os.path.basename(pot)
641		else:
642		pot = glob.glob('%s/*.pot' % directory)
643		if not pot:
644		raise MsgmergeError('No pot file found.')
645		elif len(pot) > 1:
646		raise MsgmergeError('More than one pot file found: %s.' % pot)
647		pot = os.path.basename(pot[0])
648
649		if not include:
650		pos = glob.glob('%s/*po' % directory)
651		if not len(pos) > 1:
652		raise MsgmergeError('No po file(s) found.')
653		pos = [ os.path.basename(po) for po in pos ]
654		else:
655		pos = [ os.path.basename(po) for po in include ]
656
657		for po in exclude:
658		try:
659		pos.remove(po)
660		except ValueError:
661		pass
662		format = '%s/%s'
663		for po in pos:
664		try:
665		merge(format % (directory, po), format % (directory, pot),
666		update = True, verbose = verbose,
667		outfile = format % (directory, po))
668		except MsgmergeError, err:
669		if verbose:
670		print >> sys.stderr, '%s Not updated.' % err
671		else:
672		print >> sys.stderr, '%s %s not updated.' % (err, po)
673
674		if __name__ == '__main__':
675		cmdline()

trunk/scripts/build/pygettext.py
r253614	r253615
1		#! /usr/bin/env python
2		# -- coding: iso-8859-1 --
3		# Originally written by Barry Warsaw <barry@zope.com>
4		#
5		# Minimally patched to make it even more xgettext compatible
6		# by Peter Funk <pf@artcom-gmbh.de>
7		#
8		# 2002-11-22 Jürgen Hermann <jh@web.de>
9		# Added checks that _() only contains string literals, and
10		# command line args are resolved to module lists, i.e. you
11		# can now pass a filename, a module or package name, or a
12		# directory (including globbing chars, important for Win32).
13		# Made docstring fit in 80 chars wide displays using pydoc.
14		#
15
16		# for selftesting
17		try:
18		import fintl
19		_ = fintl.gettext
20		except ImportError:
21		_ = lambda s: s
22
23		__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
24
25		Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
26		internationalization of C programs. Most of these tools are independent of
27		the programming language and can be used from within Python programs.
28		Martin von Loewis' work[1] helps considerably in this regard.
29
30		There's one problem though; xgettext is the program that scans source code
31		looking for message strings, but it groks only C (or C++). Python
32		introduces a few wrinkles, such as dual quoting characters, triple quoted
33		strings, and raw strings. xgettext understands none of this.
34
35		Enter pygettext, which uses Python's standard tokenize module to scan
36		Python source code, generating .pot files identical to what GNU xgettext[2]
37		generates for C and C++ code. From there, the standard GNU tools can be
38		used.
39
40		A word about marking Python strings as candidates for translation. GNU
41		xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
42		and gettext_noop. But those can be a lot of text to include all over your
43		code. C and C++ have a trick: they use the C preprocessor. Most
44		internationalized C source includes a #define for gettext() to _() so that
45		what has to be written in the source is much less. Thus these are both
46		translatable strings:
47
48		gettext("Translatable String")
49		_("Translatable String")
50
51		Python of course has no preprocessor so this doesn't work so well. Thus,
52		pygettext searches only for _() by default, but see the -k/--keyword flag
53		below for how to augment this.
54
55		[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
56		[2] http://www.gnu.org/software/gettext/gettext.html
57
58		NOTE: pygettext attempts to be option and feature compatible with GNU
59		xgettext where ever possible. However some options are still missing or are
60		not fully implemented. Also, xgettext's use of command line switches with
61		option arguments is broken, and in these cases, pygettext just defines
62		additional switches.
63
64		Usage: pygettext [options] inputfile ...
65
66		Options:
67
68		-a
69		--extract-all
70		Extract all strings.
71
72		-d name
73		--default-domain=name
74		Rename the default output file from messages.pot to name.pot.
75
76		-E
77		--escape
78		Replace non-ASCII characters with octal escape sequences.
79
80		-D
81		--docstrings
82		Extract module, class, method, and function docstrings. These do
83		not need to be wrapped in _() markers, and in fact cannot be for
84		Python to consider them docstrings. (See also the -X option).
85
86		-h
87		--help
88		Print this help message and exit.
89
90		-k word
91		--keyword=word
92		Keywords to look for in addition to the default set, which are:
93		%(DEFAULTKEYWORDS)s
94
95		You can have multiple -k flags on the command line.
96
97		-K
98		--no-default-keywords
99		Disable the default set of keywords (see above). Any keywords
100		explicitly added with the -k/--keyword option are still recognized.
101
102		--no-location
103		Do not write filename/lineno location comments.
104
105		-n
106		--add-location
107		Write filename/lineno location comments indicating where each
108		extracted string is found in the source. These lines appear before
109		each msgid. The style of comments is controlled by the -S/--style
110		option. This is the default.
111
112		-o filename
113		--output=filename
114		Rename the default output file from messages.pot to filename. If
115		filename is `-' then the output is sent to standard out.
116
117		-p dir
118		--output-dir=dir
119		Output files will be placed in directory dir.
120
121		-S stylename
122		--style stylename
123		Specify which style to use for location comments. Two styles are
124		supported:
125
126		Solaris # File: filename, line: line-number
127		GNU #: filename:line
128
129		The style name is case insensitive. GNU style is the default.
130
131		-v
132		--verbose
133		Print the names of the files being processed.
134
135		-V
136		--version
137		Print the version of pygettext and exit.
138
139		-w columns
140		--width=columns
141		Set width of output to columns.
142
143		-x filename
144		--exclude-file=filename
145		Specify a file that contains a list of strings that are not be
146		extracted from the input files. Each string to be excluded must
147		appear on a line by itself in the file.
148
149		-X filename
150		--no-docstrings=filename
151		Specify a file that contains a list of files (one per line) that
152		should not have their docstrings extracted. This is only useful in
153		conjunction with the -D option above.
154
155		If `inputfile' is -, standard input is read.
156		""")
157
158		import os
159		import imp
160		import sys
161		import glob
162		import time
163		import getopt
164		import token
165		import tokenize
166		import operator
167
168		from umit.pm.core.const import PM_VERSION
169
170		__version__ = '1.5'
171
172		default_keywords = ['_']
173		DEFAULTKEYWORDS = ', '.join(default_keywords)
174
175		EMPTYSTRING = ''
176
177
178
179		# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
180		# there.
181		pot_header = _('''\
182		# PacketManipulator catalog.
183		# Copyright (C) 2009 Adriano Montero Marques
184		# Francesco Piccinno <stack.box@gmail.com>, 2009
185		#
186		msgid ""
187		msgstr ""
188		"Project-Id-Version: PacketManipulator %(pm_version)s\\n"
189		"POT-Creation-Date: %(time)s\\n"
190		"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
191		"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
192		"Language-Team: LANGUAGE <LL@li.org>\\n"
193		"MIME-Version: 1.0\\n"
194		"Content-Type: text/plain; charset=UTF-8\\n"
195		"Content-Transfer-Encoding: 8bit\\n"
196		"Generated-By: pygettext.py %(version)s\\n"
197
198		''')
199
200
201		def usage(code, msg=''):
202		print >> sys.stderr, __doc__ % globals()
203		if msg:
204		print >> sys.stderr, msg
205		sys.exit(code)
206
207
208
209		escapes = []
210
211		def make_escapes(pass_iso8859):
212		global escapes
213		if pass_iso8859:
214		# Allow iso-8859 characters to pass through so that e.g. 'msgid
215		# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
216		# escape any character outside the 32..126 range.
217		mod = 128
218		else:
219		mod = 256
220		for i in range(256):
221		if 32 <= (i % mod) <= 126:
222		escapes.append(chr(i))
223		else:
224		escapes.append("\\%03o" % i)
225		escapes[ord('\\')] = '\\\\'
226		escapes[ord('\t')] = '\\t'
227		escapes[ord('\r')] = '\\r'
228		escapes[ord('\n')] = '\\n'
229		escapes[ord('\"')] = '\\"'
230
231
232		def escape(s):
233		global escapes
234		s = list(s)
235		for i in range(len(s)):
236		s[i] = escapes[ord(s[i])]
237		return EMPTYSTRING.join(s)
238
239
240		def safe_eval(s):
241		# unwrap quotes, safely
242		return eval(s, {'__builtins__':{}}, {})
243
244
245		def normalize(s):
246		# This converts the various Python string types into a format that is
247		# appropriate for .po files, namely much closer to C style.
248		lines = s.split('\n')
249		if len(lines) == 1:
250		s = '"' + escape(s) + '"'
251		else:
252		if not lines[-1]:
253		del lines[-1]
254		lines[-1] = lines[-1] + '\n'
255		for i in range(len(lines)):
256		lines[i] = escape(lines[i])
257		lineterm = '\\n"\n"'
258		s = '""\n"' + lineterm.join(lines) + '"'
259		return s
260
261
262		def containsAny(str, set):
263		"""Check whether 'str' contains ANY of the chars in 'set'"""
264		return 1 in [c in str for c in set]
265
266
267		def _visit_pyfiles(list, dirname, names):
268		"""Helper for getFilesForName()."""
269		# get extension for python source files
270		if not globals().has_key('_py_ext'):
271		global _py_ext
272		_py_ext = [triple[0] for triple in imp.get_suffixes()
273		if triple[2] == imp.PY_SOURCE][0]
274
275		# don't recurse into CVS directories
276		if 'CVS' in names:
277		names.remove('CVS')
278
279		# add all *.py files to list
280		list.extend(
281		[os.path.join(dirname, file) for file in names
282		if os.path.splitext(file)[1] == _py_ext]
283		)
284
285
286		def _get_modpkg_path(dotted_name, pathlist=None):
287		"""Get the filesystem path for a module or a package.
288
289		Return the file system path to a file for a module, and to a directory for
290		a package. Return None if the name is not found, or is a builtin or
291		extension module.
292		"""
293		# split off top-most name
294		parts = dotted_name.split('.', 1)
295
296		if len(parts) > 1:
297		# we have a dotted path, import top-level package
298		try:
299		file, pathname, description = imp.find_module(parts[0], pathlist)
300		if file: file.close()
301		except ImportError:
302		return None
303
304		# check if it's indeed a package
305		if description[2] == imp.PKG_DIRECTORY:
306		# recursively handle the remaining name parts
307		pathname = _get_modpkg_path(parts[1], [pathname])
308		else:
309		pathname = None
310		else:
311		# plain name
312		try:
313		file, pathname, description = imp.find_module(
314		dotted_name, pathlist)
315		if file:
316		file.close()
317		if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
318		pathname = None
319		except ImportError:
320		pathname = None
321
322		return pathname
323
324
325		def getFilesForName(name):
326		"""Get a list of module files for a filename, a module or package name,
327		or a directory.
328		"""
329		if not os.path.exists(name):
330		# check for glob chars
331		if containsAny(name, "*?[]"):
332		files = glob.glob(name)
333		list = []
334		for file in files:
335		list.extend(getFilesForName(file))
336		return list
337
338		# try to find module or package
339		name = _get_modpkg_path(name)
340		if not name:
341		return []
342
343		if os.path.isdir(name):
344		# find all python files in directory
345		list = []
346		os.path.walk(name, _visit_pyfiles, list)
347		return list
348		elif os.path.exists(name):
349		# a single file
350		return [name]
351
352		return []
353
354
355		class TokenEater:
356		def __init__(self, options):
357		self.__options = options
358		self.__messages = {}
359		self.__state = self.__waiting
360		self.__data = []
361		self.__lineno = -1
362		self.__freshmodule = 1
363		self.__curfile = None
364
365		def __call__(self, ttype, tstring, stup, etup, line):
366		# dispatch
367		## import token
368		## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
369		## 'tstring:', tstring
370		self.__state(ttype, tstring, stup[0])
371
372		def __waiting(self, ttype, tstring, lineno):
373		opts = self.__options
374		# Do docstring extractions, if enabled
375		if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
376		# module docstring?
377		if self.__freshmodule:
378		if ttype == tokenize.STRING:
379		self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
380		self.__freshmodule = 0
381		elif ttype not in (tokenize.COMMENT, tokenize.NL):
382		self.__freshmodule = 0
383		return
384		# class docstring?
385		if ttype == tokenize.NAME and tstring in ('class', 'def'):
386		self.__state = self.__suiteseen
387		return
388		if ttype == tokenize.NAME and tstring in opts.keywords:
389		self.__state = self.__keywordseen
390
391		def __suiteseen(self, ttype, tstring, lineno):
392		# ignore anything until we see the colon
393		if ttype == tokenize.OP and tstring == ':':
394		self.__state = self.__suitedocstring
395
396		def __suitedocstring(self, ttype, tstring, lineno):
397		# ignore any intervening noise
398		if ttype == tokenize.STRING:
399		self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
400		self.__state = self.__waiting
401		elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
402		tokenize.COMMENT):
403		# there was no class docstring
404		self.__state = self.__waiting
405
406		def __keywordseen(self, ttype, tstring, lineno):
407		if ttype == tokenize.OP and tstring == '(':
408		self.__data = []
409		self.__lineno = lineno
410		self.__state = self.__openseen
411		else:
412		self.__state = self.__waiting
413
414		def __openseen(self, ttype, tstring, lineno):
415		if ttype == tokenize.OP and tstring == ')':
416		# We've seen the last of the translatable strings. Record the
417		# line number of the first line of the strings and update the list
418		# of messages seen. Reset state for the next batch. If there
419		# were no strings inside _(), then just ignore this entry.
420		if self.__data:
421		self.__addentry(EMPTYSTRING.join(self.__data))
422		self.__state = self.__waiting
423		elif ttype == tokenize.STRING:
424		self.__data.append(safe_eval(tstring))
425		elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
426		token.NEWLINE, tokenize.NL]:
427		# warn if we see anything else than STRING or whitespace
428		print >> sys.stderr, _(
429		'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
430		) % {
431		'token': tstring,
432		'file': self.__curfile,
433		'lineno': self.__lineno
434		}
435		self.__state = self.__waiting
436
437		def __addentry(self, msg, lineno=None, isdocstring=0):
438		if lineno is None:
439		lineno = self.__lineno
440		if not msg in self.__options.toexclude:
441		entry = (self.__curfile, lineno)
442		self.__messages.setdefault(msg, {})[entry] = isdocstring
443
444		def set_filename(self, filename):
445		self.__curfile = filename
446		self.__freshmodule = 1
447
448		def write(self, fp):
449		options = self.__options
450		timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
451		# The time stamp in the header doesn't have the same format as that
452		# generated by xgettext...
453		print >> fp, pot_header % {'time': timestamp, 'version': __version__,
454		'pm_version': PM_VERSION}
455		# Sort the entries. First sort each particular entry's keys, then
456		# sort all the entries by their first item.
457		reverse = {}
458		for k, v in self.__messages.items():
459		keys = v.keys()
460		keys.sort()
461		reverse.setdefault(tuple(keys), []).append((k, v))
462		rkeys = reverse.keys()
463		rkeys.sort()
464		for rkey in rkeys:
465		rentries = reverse[rkey]
466		rentries.sort()
467		for k, v in rentries:
468		isdocstring = 0
469		# If the entry was gleaned out of a docstring, then add a
470		# comment stating so. This is to aid translators who may wish
471		# to skip translating some unimportant docstrings.
472		if reduce(operator.__add__, v.values()):
473		isdocstring = 1
474		# k is the message string, v is a dictionary-set of (filename,
475		# lineno) tuples. We want to sort the entries in v first by
476		# file name and then by line number.
477		v = v.keys()
478		v.sort()
479		if not options.writelocations:
480		pass
481		# location comments are different b/w Solaris and GNU:
482		elif options.locationstyle == options.SOLARIS:
483		for filename, lineno in v:
484		d = {'filename': filename, 'lineno': lineno}
485		print >>fp, _(
486		'# File: %(filename)s, line: %(lineno)d') % d
487		elif options.locationstyle == options.GNU:
488		# fit as many locations on one line, as long as the
489		# resulting line length doesn't exceeds 'options.width'
490		locline = '#:'
491		for filename, lineno in v:
492		d = {'filename': filename, 'lineno': lineno}
493		s = _(' %(filename)s:%(lineno)d') % d
494		if len(locline) + len(s) <= options.width:
495		locline = locline + s
496		else:
497		print >> fp, locline
498		locline = "#:" + s
499		if len(locline) > 2:
500		print >> fp, locline
501		if isdocstring:
502		print >> fp, '#, docstring'
503		print >> fp, 'msgid', normalize(k)
504		print >> fp, 'msgstr ""\n'
505
506
507
508		def main():
509		global default_keywords
510		try:
511		opts, args = getopt.getopt(
512		sys.argv[1:],
513		'ad:DEhk:Kno:p:S:Vvw:x:X:',
514		['extract-all', 'default-domain=', 'escape', 'help',
515		'keyword=', 'no-default-keywords',
516		'add-location', 'no-location', 'output=', 'output-dir=',
517		'style=', 'verbose', 'version', 'width=', 'exclude-file=',
518		'docstrings', 'no-docstrings',
519		])
520		except getopt.error, msg:
521		usage(1, msg)
522
523		# for holding option values
524		class Options:
525		# constants
526		GNU = 1
527		SOLARIS = 2
528		# defaults
529		extractall = 0 # FIXME: currently this option has no effect at all.
530		escape = 0
531		keywords = []
532		outpath = ''
533		outfile = 'messages.pot'
534		writelocations = 1
535		locationstyle = GNU
536		verbose = 0
537		width = 78
538		excludefilename = ''
539		docstrings = 0
540		nodocstrings = {}
541
542		options = Options()
543		locations = {'gnu' : options.GNU,
544		'solaris' : options.SOLARIS,
545		}
546
547		# parse options
548		for opt, arg in opts:
549		if opt in ('-h', '--help'):
550		usage(0)
551		elif opt in ('-a', '--extract-all'):
552		options.extractall = 1
553		elif opt in ('-d', '--default-domain'):
554		options.outfile = arg + '.pot'
555		elif opt in ('-E', '--escape'):
556		options.escape = 1
557		elif opt in ('-D', '--docstrings'):
558		options.docstrings = 1
559		elif opt in ('-k', '--keyword'):
560		options.keywords.append(arg)
561		elif opt in ('-K', '--no-default-keywords'):
562		default_keywords = []
563		elif opt in ('-n', '--add-location'):
564		options.writelocations = 1
565		elif opt in ('--no-location',):
566		options.writelocations = 0
567		elif opt in ('-S', '--style'):
568		options.locationstyle = locations.get(arg.lower())
569		if options.locationstyle is None:
570		usage(1, _('Invalid value for --style: %s') % arg)
571		elif opt in ('-o', '--output'):
572		options.outfile = arg
573		elif opt in ('-p', '--output-dir'):
574		options.outpath = arg
575		elif opt in ('-v', '--verbose'):
576		options.verbose = 1
577		elif opt in ('-V', '--version'):
578		print _('pygettext.py (xgettext for Python) %s') % __version__
579		sys.exit(0)
580		elif opt in ('-w', '--width'):
581		try:
582		options.width = int(arg)
583		except ValueError:
584		usage(1, _('--width argument must be an integer: %s') % arg)
585		elif opt in ('-x', '--exclude-file'):
586		options.excludefilename = arg
587		elif opt in ('-X', '--no-docstrings'):
588		fp = open(arg)
589		try:
590		while 1:
591		line = fp.readline()
592		if not line:
593		break
594		options.nodocstrings[line[:-1]] = 1
595		finally:
596		fp.close()
597
598		# calculate escapes
599		make_escapes(options.escape)
600
601		# calculate all keywords
602		options.keywords.extend(default_keywords)
603
604		# initialize list of strings to exclude
605		if options.excludefilename:
606		try:
607		fp = open(options.excludefilename)
608		options.toexclude = fp.readlines()
609		fp.close()
610		except IOError:
611		print >> sys.stderr, _(
612		"Can't read --exclude-file: %s") % options.excludefilename
613		sys.exit(1)
614		else:
615		options.toexclude = []
616
617		# resolve args to module lists
618		expanded = []
619		for arg in args:
620		if arg == '-':
621		expanded.append(arg)
622		else:
623		expanded.extend(getFilesForName(arg))
624		args = expanded
625
626		# slurp through all the files
627		eater = TokenEater(options)
628		for filename in args:
629		if filename == '-':
630		if options.verbose:
631		print _('Reading standard input')
632		fp = sys.stdin
633		closep = 0
634		else:
635		if options.verbose:
636		print _('Working on %s') % filename
637		fp = open(filename)
638		closep = 1
639		try:
640		eater.set_filename(filename)
641		try:
642		tokenize.tokenize(fp.readline, eater)
643		except tokenize.TokenError, e:
644		print >> sys.stderr, '%s: %s, line %d, column %d' % (
645		e[0], filename, e[1][0], e[1][1])
646		finally:
647		if closep:
648		fp.close()
649
650		# write the output
651		if options.outfile == '-':
652		fp = sys.stdout
653		closep = 0
654		else:
655		if options.outpath:
656		options.outfile = os.path.join(options.outpath, options.outfile)
657		fp = open(options.outfile, 'w')
658		closep = 1
659		try:
660		eater.write(fp)
661		finally:
662		if closep:
663		fp.close()
664
665
666		if __name__ == '__main__':
667		main()
668		# some more test strings
669		_(u'a unicode string')
670		# this one creates a warning
671		_('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
672		_('more' 'than' 'one' 'string')

https://github.com/mamedev/mame/commit/be7cc43b0845591b2a993731509bbf95e4705b17

199869 Revisions