MAME SVN History

199869 Revisions

r45102 Sunday 21st February, 2016 at 13:16:28 UTC by Miodrag Milanović
Add other python tools for gettext replacement (nw)

[scripts/build]

check_po.py* msgfmt.py msgmerge.py* pygettext.py*

trunk/scripts/build/check_po.py
r0	r253614
	1	#! /usr/bin/env python
	2	#
	3	# check_po - a gramps tool to check validity of po files
	4	#
	5	# Copyright (C) 2006-2006 Kees Bakker
	6	#
	7	# This program is free software; you can redistribute it and/or modify
	8	# it under the terms of the GNU General Public License as published by
	9	# the Free Software Foundation; either version 2 of the License, or
	10	# (at your option) any later version.
	11	#
	12	# This program is distributed in the hope that it will be useful,
	13	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	# GNU General Public License for more details.
	16	#
	17	# You should have received a copy of the GNU General Public License
	18	# along with this program; if not, write to the Free Software
	19	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	20
	21	#
	22	# TODO
	23	#
	24	# * Check for HTML text in msgstr when there is none in msgid
	25	# * Check for matching HTML tag/endtag in msgstr
	26	#
	27
	28	# Adapted for Umit by Guilherme Polo, original file:
	29	# https://gramps.svn.sourceforge.net/svnroot/gramps/branches/gramps22/po/check_po
	30
	31	import re
	32	import sys
	33	from optparse import OptionParser
	34
	35	APP = "Umit"
	36
	37	all_total = {}
	38	all_fuzzy = {}
	39	all_untranslated = {}
	40	all_percent_s = {}
	41	all_named_s = {}
	42	all_bnamed_s = {}
	43	all_context = {}
	44	all_coverage = {}
	45	all_template_coverage = {}
	46
	47	def strip_quotes(st):
	48	if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"':
	49	st = st.strip()[1:-1]
	50	return st
	51
	52	# This is a base class for all checks
	53	class Check:
	54	def __init__( self ):
	55	self.msgs = []
	56	def diag( self ):
	57	if len( self.msgs ):
	58	print
	59	print self.diag_header
	60	for m in self.msgs:
	61	m.diag()
	62	def summary( self ):
	63	print "%-20s%d" % ( self.summary_text, len(self.msgs) )
	64
	65	class Check_fmt( Check ):
	66	def __init__( self, fmt ):
	67	Check.__init__( self )
	68	self.diag_header = "-------- %s mismatches --------------" % fmt
	69	self.summary_text = "%s mismatches:" % fmt
	70	self.fmt = fmt
	71	def process( self, msg ):
	72	msgid = msg.msgid
	73	msgstr = msg.msgstr
	74	cnt1 = msgid.count( self.fmt )
	75	cnt2 = msgstr.count( self.fmt )
	76	if cnt1 != cnt2:
	77	self.msgs.append( msg )
	78
	79	class Check_named_fmt( Check ):
	80	# A pattern to find all %()
	81	find_named_fmt_pat = re.compile('% $ \w+ $ \d* \D', re.VERBOSE)
	82
	83	def __init__( self ):
	84	Check.__init__( self )
	85	self.diag_header = "-------- %() name mismatches --------------"
	86	self.summary_text = "%() name mismatches:"
	87	def process( self, msg ):
	88	msgid = msg.msgid
	89	msgstr = msg.msgstr
	90	# Same number of named formats?
	91	fmts1 = self.find_named_fmt_pat.findall( msgid )
	92	fmts2 = self.find_named_fmt_pat.findall( msgstr )
	93	if len( fmts1 ) != len( fmts2 ):
	94	self.msgs.append( msg )
	95	else:
	96	# Do we have the same named formats?
	97	fmts1.sort()
	98	fmts2.sort()
	99	if fmts1 != fmts2:
	100	self.msgs.append( msg )
	101
	102	class Check_missing_sd( Check ):
	103	# A pattern to find %() without s or d
	104	# Here is a command to use for testing
	105	# print re.compile('% $ \w+ $ \d* (\D\|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' )
	106	find_named_fmt_pat2 = re.compile('% $ \w+ $ \d* (\D\|$)', re.VERBOSE)
	107
	108	def __init__( self ):
	109	Check.__init__( self )
	110	self.diag_header = "-------- %() without 's' or 'd' mismatches --------------"
	111	self.summary_text = "%() missing s/d:"
	112	def process( self, msg ):
	113	msgstr = msg.msgstr
	114	fmts = self.find_named_fmt_pat2.findall( msgstr )
	115	for f in fmts:
	116	if not f in ('s', 'd'):
	117	self.msgs.append( msg )
	118	break
	119
	120	class Check_runaway( Check ):
	121	def __init__( self ):
	122	Check.__init__( self )
	123	self.diag_header = "-------- Runaway context in translation ---------"
	124	self.summary_text = "Runaway context:"
	125	def process( self, msg ):
	126	msgid = msg.msgid
	127	msgstr = msg.msgstr
	128
	129	# Runaway context. In the translated part we only to see
	130	# the translation of the word after the \|
	131	if msgid.count('\|') > 0 and msgstr.count('\|') > 0 and msgid != msgstr:
	132	self.msgs.append( msg )
	133
	134	class Check_xml_chars( Check ):
	135	# Special XML characters
	136	# It is not allowed to have a quote, an ampersand or an angle bracket
	137	xml_chars_pat = re.compile( r'(?<=\W) > \| " \| & (?!(quot\|nbsp\|gt\|amp);)', re.VERBOSE )
	138
	139	def __init__( self ):
	140	Check.__init__( self )
	141	self.diag_header = "-------- unescaped XML special characters ---------"
	142	self.summary_text = "XML special chars:"
	143	def process( self, msg ):
	144	msgid = msg.msgid
	145	msgstr = msg.msgstr
	146
	147	# XML errors
	148	# Only look at messages in the tips.xml
	149	if msg.is_tips_xml:
	150	if self.xml_chars_pat.search( msgstr ):
	151	self.msgs.append( msg )
	152
	153	class Check_last_char( Check ):
	154	def __init__( self ):
	155	Check.__init__( self )
	156	self.diag_header = "-------- last character not identical ---------"
	157	self.summary_text = "Last character:"
	158	def process( self, msg ):
	159	msgid = msg.msgid
	160	msgstr = msg.msgstr
	161
	162	# Last character of msgid? White space? Period?
	163	if msg.is_fuzzy:
	164	return
	165
	166	msgid_last = msgid[-1:]
	167	msgstr_last = msgstr[-1:]
	168	if msgid_last.isspace() != msgstr_last.isspace():
	169	self.msgs.append( msg )
	170	elif (msgid_last == '.') != (msgstr_last == '.'):
	171	self.msgs.append( msg )
	172
	173	class Check_shortcut_trans( Check ):
	174	def __init__( self ):
	175	Check.__init__( self )
	176	self.diag_header = "-------- shortcut key in translation ---------"
	177	self.summary_text = "Shortcut in msgstr:"
	178	def process( self, msg ):
	179	msgid = msg.msgid
	180	msgstr = msg.msgstr
	181
	182	if msgid.count('_') == 0 and msgstr.count('_') > 0:
	183	self.msgs.append( msg )
	184
	185	class Msgid:
	186	fuzzy_pat = re.compile( 'fuzzy' )
	187	tips_xml_pat = re.compile( r'tips\.xml' )
	188	def __init__( self, msgnr, lineno ):
	189	self._msgid = []
	190	self._msgstr = []
	191	self.msgid = ''
	192	self.msgstr = ''
	193	self._cmnt = []
	194	self.nr = msgnr
	195	self.lineno = lineno
	196	self.is_fuzzy = 0
	197	self.is_tips_xml = 0
	198
	199	def diag( self ):
	200	if 1:
	201	print
	202	print "msg nr: %d, lineno: %d%s" % ( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" )
	203	sys.stdout.write( ''.join( self._msgid ) )
	204	sys.stdout.write( ''.join( self._msgstr ) )
	205	else:
	206	# Compatible with the old check_po
	207	print "%d '%s' : '%s'" % ( self.lineno, self.msgid, self.msgstr )
	208
	209	def add_msgid( self, line, lineno ):
	210	self._msgid.append( line )
	211	line = re.sub( r'msgid\s+', '', line )
	212	line = line.strip()
	213	if line[0] != '"' or line[-1:] != '"':
	214	print "ERROR at line %d: Missing quote." % lineno
	215	line = strip_quotes( line )
	216	self.msgid += line
	217
	218	def add_msgstr( self, line, lineno ):
	219	self._msgstr.append( line )
	220	line = re.sub( r'msgstr\s+', '', line )
	221	line = line.strip()
	222	if line[0] != '"' or line[-1:] != '"':
	223	print "ERROR at line %d: Missing quote." % lineno
	224	line = strip_quotes( line )
	225	self.msgstr += line
	226
	227	def add_cmnt( self, line ):
	228	self._cmnt.append( line )
	229	if not self.is_fuzzy and self.fuzzy_pat.search( line ):
	230	self.is_fuzzy = 1
	231	if not self.is_tips_xml and self.tips_xml_pat.search( line ):
	232	self.is_tips_xml = 1
	233
	234	def read_msgs( fname ):
	235	empty_pat = re.compile( r'^ \s* $', re.VERBOSE )
	236	comment_pat = re.compile( r'\#', re.VERBOSE )
	237	msgid_pat = re.compile( r'msgid \s+ "', re.VERBOSE )
	238	msgstr_pat = re.compile( r'msgstr \s+ "', re.VERBOSE )
	239	str_pat = re.compile( r'"', re.VERBOSE )
	240	old_pat = re.compile( r'\#~ \s+ ', re.VERBOSE )
	241
	242	msgnr = 0 # This is the message number of the next message to read. The first real message is 1.
	243	f = open( fname )
	244	lines = f.readlines()
	245
	246	# parse it like a statemachine
	247	NONE = 0 # Nothing detected, yet
	248	CMNT = 1 # Inside comment part
	249	MSGID = 2 # Inside msgid part
	250	MSGSTR = 3 # Inside msgstr part
	251	STR = 4 # A continuation string
	252	OLD = 5 # An old pattern with #~
	253
	254	state = NONE
	255	msg = None
	256	msgs = []
	257
	258	for ix in range( len(lines) ): # Use line numbers for messages
	259	line = lines[ix]
	260	lineno = ix + 1
	261
	262	m = empty_pat.match( line )
	263	if m:
	264	continue # Empty lines are not interesting
	265
	266	# What's the next state?
	267	if old_pat.match( line ):
	268	next_state = OLD
	269	elif comment_pat.match( line ):
	270	next_state = CMNT
	271	elif msgid_pat.match( line ):
	272	next_state = MSGID
	273	elif msgstr_pat.match( line ):
	274	next_state = MSGSTR
	275	elif str_pat.match( line ):
	276	next_state = STR
	277	else:
	278	print 'WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars()
	279	next_state = NONE
	280
	281	#print "%(state)d->%(next_state)d\t%(line)s" % vars()
	282	if state == NONE:
	283	# expect msgid or comment or old stuff
	284	if next_state == CMNT:
	285	state = CMNT
	286	msg = Msgid( msgnr, lineno ) # Start with an empty new item
	287	msgnr += 1
	288	msgs.append( msg )
	289	msg.add_cmnt( line )
	290
	291	elif next_state == MSGID:
	292	state = MSGID
	293	msg = Msgid( msgnr, lineno ) # Start with an empty new item
	294	msgnr += 1
	295	msgs.append( msg )
	296	msg.add_msgid( line, lineno )
	297
	298	elif next_state == MSGSTR:
	299	print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
	300	state = MSGSTR
	301	msg = Msgid( msgnr, lineno ) # Start with an empty new item
	302	msgnr += 1
	303	msgs.append( msg )
	304	msg.add_msgstr( line, lineno )
	305
	306	elif next_state == STR:
	307	print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
	308
	309	elif next_state == OLD:
	310	pass # Just skip
	311
	312	elif state == CMNT:
	313	if next_state == CMNT:
	314	if msg:
	315	msg.add_cmnt( line )
	316	else:
	317	# Note. We may need to do something about these comments
	318	# Skip for now
	319	pass
	320
	321	elif next_state == MSGID:
	322	state = MSGID
	323	if not msg:
	324	msg = Msgid( msgnr, lineno ) # Start with an empty new item
	325	msgnr += 1
	326	msgs.append( msg )
	327	msg.add_msgid( line, lineno )
	328
	329	elif next_state == MSGSTR:
	330	print 'WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars()
	331	state = MSGSTR
	332	msg = Msgid( msgnr, lineno ) # Start with an empty new item
	333	msgnr += 1
	334	msgs.append( msg )
	335	msg.add_msgstr( line, lineno )
	336
	337	elif next_state == STR:
	338	print 'WARNING: Wild string at %(fname)s:%(lineno)d' % vars()
	339
	340	elif next_state == OLD:
	341	msg = None
	342	pass # Just skip
	343
	344	elif state == MSGID:
	345	if next_state == CMNT:
	346	# Hmmm. A comment here?
	347	print 'WARNING: Unexpted comment at %(fname)s:%(lineno)d' % vars()
	348
	349	elif next_state == MSGID:
	350	raise Exception( 'Unexpected msgid at %(fname)s:%(lineno)d' % vars() )
	351
	352	elif next_state == MSGSTR:
	353	state = MSGSTR
	354	msg.add_msgstr( line, lineno )
	355
	356	elif next_state == STR:
	357	msg.add_msgid( line, lineno )
	358
	359	elif next_state == OLD:
	360	msg = None
	361	pass # Just skip
	362
	363	elif state == MSGSTR:
	364	if next_state == CMNT:
	365	# A comment probably starts a new item
	366	state = CMNT
	367	msg = Msgid( msgnr, lineno )
	368	msgnr += 1
	369	msgs.append( msg )
	370	msg.add_cmnt( line )
	371
	372	elif next_state == MSGID:
	373	state = MSGID
	374	msg = Msgid( msgnr, lineno )
	375	msgnr += 1
	376	msgs.append( msg )
	377	msg.add_msgid( line, lineno )
	378
	379	elif next_state == MSGSTR:
	380	raise Exception( 'Unexpected msgstr at %(fname)s:%(lineno)d' % vars() )
	381
	382	elif next_state == STR:
	383	msg.add_msgstr( line, lineno )
	384
	385	elif next_state == OLD:
	386	msg = None
	387	pass # Just skip
	388
	389	else:
	390	raise Exception( 'Unexpected state in po parsing (state = %d)' % state )
	391
	392	# Strip items with just comments. (Can this happen?)
	393	msgs1 = []
	394	for m in msgs:
	395	if not m.msgid and not m.msgstr:
	396	#print "INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno )
	397	pass
	398	else:
	399	msgs1.append( m )
	400	msgs = msgs1
	401	return msgs
	402
	403	def analyze_msgs( options, fname, msgs, nr_templates = None, nth = 0 ):
	404	nr_fuzzy = 0
	405	nr_untranslated = 0
	406
	407	checks = []
	408	checks.append( Check_fmt( '%s' ) )
	409	checks.append( Check_fmt( '%d' ) )
	410	checks.append( Check_named_fmt() )
	411	checks.append( Check_missing_sd() )
	412	checks.append( Check_runaway() )
	413	checks.append( Check_xml_chars() )
	414	checks.append( Check_last_char() )
	415	checks.append( Check_shortcut_trans() )
	416
	417	for msg in msgs:
	418	msgid = msg.msgid
	419	msgstr = msg.msgstr
	420	#print
	421	#print "msgid: %(msgid)s" % vars()
	422	#print "msgstr: %(msgstr)s" % vars()
	423
	424	if not msgstr:
	425	nr_untranslated += 1
	426	continue
	427
	428	if msg.is_fuzzy:
	429	nr_fuzzy += 1
	430	if options.skip_fuzzy:
	431	continue
	432
	433	for c in checks:
	434	c.process( msg )
	435
	436	nr_msgs = len(msgs)
	437	if nth > 0:
	438	print
	439	print "====================================="
	440	print "%-20s%s" % ( "File:", fname )
	441	print "%-20s%d" % ( "Template total:", nr_templates )
	442	print "%-20s%d" % ( "PO total:", nr_msgs )
	443	print "%-20s%d" % ( "Fuzzy:", nr_fuzzy )
	444	print "%-20s%d" % ( "Untranslated:", nr_untranslated )
	445
	446	for c in checks:
	447	c.summary()
	448
	449	po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100
	450	print "%-20s%5.2f%%" % ( "PO Coverage:", po_coverage )
	451
	452	template_coverage = po_coverage * float(nr_msgs) / float(nr_templates)
	453	print "%-20s%5.2f%%" % ( "Template Coverage:", template_coverage )
	454
	455	if not options.only_summary:
	456	for c in checks:
	457	c.diag()
	458
	459	def main(args):
	460	if len(sys.argv) < 2:
	461	print "Error: Especify the umit.pot file path"
	462	sys.exit(1)
	463
	464	parser = OptionParser(description="This program validates a PO file for "
	465	"%s." % APP, usage='%prog [options] po-file...' )
	466
	467	parser.add_option("", "--skip-fuzzy",
	468	action="store_true", dest="skip_fuzzy", default=False,
	469	help="skip fuzzies")
	470
	471	parser.add_option("-s", "--only-summary",
	472	action="store_true", dest="only_summary", default=False,
	473	help="only give the summary")
	474
	475	options, args = parser.parse_args()
	476
	477	try:
	478	pot_msgs = read_msgs(sys.argv[1])
	479	nr_templates = len(pot_msgs)
	480	nth = 0
	481	for fname in args:
	482	msgs = read_msgs(fname)
	483	analyze_msgs(options, fname, msgs, nr_templates, nth)
	484	nth += 1
	485
	486	except Exception, e:
	487	print e
	488
	489	if __name__ == "__main__":
	490	main(sys.argv)

trunk/scripts/build/msgfmt.py
r253613	r253614
1	1	#! /usr/bin/env python
2	2	# -- coding: iso-8859-1 --
3	3	# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
	4	#
	5	# Changelog: (Guilherme Polo)
	6	# 2008-04-11
	7	# - Support for files with BOM UTF8 mark.
	8	#
	9	# 2008-04-10
	10	# - Support for fuzzy strings in output.
	11	# - Bumped to version 1.1.1
4	12
5	13	"""Generate binary message catalog from textual translation description.
6	14
r253613	r253614
16	24	Specify the output file to write to. If omitted, output will go to a
17	25	file named filename.mo (based off the input file name).
18	26
	27	-f
	28	--use-fuzzy
	29	Use fuzzy entries in output
	30
19	31	-h
20	32	--help
21	33	Print this message and exit.
r253613	r253614
23	35	-V
24	36	--version
25	37	Display version information and exit.
	38
	39	Before using the -f (fuzzy) option, read this:
	40	http://www.finesheer.com:8457/cgi-bin/info2html?(gettext)Fuzzy%20Entries&lang=en
26	41	"""
27	42
28	43	import sys
r253613	r253614
30	45	import getopt
31	46	import struct
32	47	import array
	48	import codecs
33	49
34		__version__ = "1.1"
	50	__version__ = "1.1.1"
35	51
36	52	MESSAGES = {}
37	53
38	54
39
40	55	def usage(code, msg=''):
41	56	print >> sys.stderr, __doc__
42	57	if msg:
r253613	r253614
44	59	sys.exit(code)
45	60
46	61
47
48		def add(id, str, fuzzy):
49		"Add a non-fuzzy translation to the dictionary."
	62	def add(id, str, fuzzy, use_fuzzy):
	63	"Add a translation to the dictionary."
50	64	global MESSAGES
51		if not fuzzy and str:
	65	if (not fuzzy or use_fuzzy) and str:
52	66	MESSAGES[id] = str
53	67
54	68
55
56	69	def generate():
57	70	"Return the generated output."
58	71	global MESSAGES
r253613	r253614
95	108	return output
96	109
97	110
98
99		def make(filename, outfile):
	111	def make(filename, outfile, use_fuzzy):
100	112	ID = 1
101	113	STR = 2
102	114
r253613	r253614
110	122
111	123	try:
112	124	lines = open(infile).readlines()
	125	if lines[0].startswith(codecs.BOM_UTF8):
	126	lines[0] = lines[0][len(codecs.BOM_UTF8):]
113	127	except IOError, msg:
114	128	print >> sys.stderr, msg
115	129	sys.exit(1)
r253613	r253614
123	137	lno += 1
124	138	# If we get a comment line after a msgstr, this is a new entry
125	139	if l[0] == '#' and section == STR:
126		add(msgid, msgstr, fuzzy)
	140	add(msgid, msgstr, fuzzy, use_fuzzy)
127	141	section = None
128	142	fuzzy = 0
129	143	# Record a fuzzy mark
r253613	r253614
133	147	if l[0] == '#':
134	148	continue
135	149	# Now we are in a msgid section, output previous section
136		if l.startswith('msgid') ~~and not l.startswith('msgid_plural')~~:
	150	if l.startswith('msgid'):
137	151	if section == STR:
138		add(msgid, msgstr, fuzzy)
	152	add(msgid, msgstr, fuzzy, use_fuzzy)
139	153	section = ID
140	154	l = l[5:]
141	155	msgid = msgstr = ''
142		is_plural = False
143		# This is a message with plural forms
144		elif l.startswith('msgid_plural'):
145		if section != ID:
146		print >> sys.stderr, 'msgid_plural not preceeded by msgid on %s:%d' %\
147		(infile, lno)
148		sys.exit(1)
149		l = l[12:]
150		msgid += '\0' # separator of singular and plural
151		is_plural = True
152	156	# Now we are in a msgstr section
153	157	elif l.startswith('msgstr'):
154	158	section = STR
155		if l.startswith('msgstr['):
156		if not is_plural:
157		print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
158		(infile, lno)
159		sys.exit(1)
160		l = l.split(']', 1)[1]
161		if msgstr:
162		msgstr += '\0' # Separator of the various plural forms
163		else:
164		if is_plural:
165		print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\
166		(infile, lno)
167		sys.exit(1)
168		l = l[6:]
	159	l = l[6:]
169	160	# Skip empty lines
170	161	l = l.strip()
171	162	if not l:
r253613	r253614
183	174	sys.exit(1)
184	175	# Add last entry
185	176	if section == STR:
186		add(msgid, msgstr, fuzzy)
	177	add(msgid, msgstr, fuzzy, use_fuzzy)
187	178
188	179	# Compute output
189	180	output = generate()
r253613	r253614
194	185	print >> sys.stderr, msg
195	186
196	187
197
198	188	def main():
199	189	try:
200		opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
201		['help', 'version', 'output-file='])
	190	opts, args = getopt.getopt(sys.argv[1:], 'hVo:f',
	191	['help', 'version', 'output-file=', 'use-fuzzy'])
202	192	except getopt.error, msg:
203	193	usage(1, msg)
204	194
205	195	outfile = None
	196	use_fuzzy = False
206	197	# parse options
207	198	for opt, arg in opts:
208	199	if opt in ('-h', '--help'):
r253613	r253614
210	201	elif opt in ('-V', '--version'):
211	202	print >> sys.stderr, "msgfmt.py", __version__
212	203	sys.exit(0)
	204	elif opt in ('-f', '--use-fuzzy'):
	205	use_fuzzy = True
213	206	elif opt in ('-o', '--output-file'):
214	207	outfile = arg
215	208	# do it
r253613	r253614
219	212	return
220	213
221	214	for filename in args:
222		make(filename, outfile)
	215	make(filename, outfile, use_fuzzy)
223	216
224	217
225	218	if __name__ == '__main__':

trunk/scripts/build/msgmerge.py
r0	r253614
	1	#! /usr/bin/env python
	2	# -- coding: iso-8859-1 --
	3	#
	4	# Copyright Terje Røsten <terjeros@phys.ntnu.no> Nov. 2003.
	5	#
	6	'''Merge two Uniforum style .po files together.
	7
	8	This is a implementation (not complete) in Python of the GNU
	9	msgmerge(1) program. It can be used on the command line (or as a Python
	10	module).
	11
	12	Usage: msgmerge.py [OPTIONS] def.po ref.pot
	13
	14	The def.po file is an existing PO file with translations. The ref.pot
	15	file is the last created PO file with up-to-date source references but
	16	old translations, or a PO Template file.
	17
	18	Options:
	19	-U, --update update def.po,
	20	do nothing if def.po is already up to date.
	21	-o, --output-file=FILE write output to file FILE. Output is written to
	22	stdout if set to - or if the option is not present.
	23	-D, --docstrings don\'t remove docstring flag.
	24	-h, --help display help text and exit.
	25	-V, --version display version and exit.
	26	-q, --quiet, --silent suppress progress indicators.
	27	'''
	28	from __future__ import generators
	29
	30	if not __name__ == '__main__':
	31	__doc__ += '''\
	32
	33	When used as module the interesting functions are merge() and
	34	merge_dir().
	35
	36	The merge() function does the same as the command line version, and
	37	the arguments are as follows. The first argument is the def.po file,
	38	then the ref.pot file. The third argument controls whether do work in
	39	update mode or not, then the next argument sets the output file. Set
	40	the next argument to False to remove docstring flags. The last
	41	argument can be used to suppress progress indicators. The default is
	42	to work in update mode with progress indicators.
	43
	44	Example:
	45	merge("def.po", "ref.pot")
	46	merge the files def.po and ref.pot and write output to def.po if
	47	there are any changes.
	48	merge("def.po", "red.pot", docstrings = False, verbose = False,
	49	update = False, outfile = "-")
	50	merge the files def.po and ref.pot and write output to stdout,
	51	remove docstring flag and be quiet.
	52
	53	The merge_dir() function is useful when merging a directory of po
	54	files. The only required argument is the name of the directory with po
	55	files and the pot file. It will use simple glob to find the files. The
	56	second argument can be used to specify the pot file (in the
	57	directory). Third argument is a list of po files (then globbing will
	58	not be used) and the next argument is list of filename to exclude. The
	59	last argument can be used to suppress progress indicators. Docstring
	60	flag will not be removed.
	61
	62	Example:
	63	merge_dir("po")
	64	merge (and update) all po files in directory po with the single pot
	65	file in the same directory.
	66
	67	The module raises the MsgmergeError exception in case of error.
	68	'''
	69	__revision__ = '$Id: msgmerge.py,v 1.41 2003/11/18 19:10:42 terjeros Exp $'
	70	__version__ = '0.1'
	71	name = 'msgmerge.py'
	72
	73	__all__ = [ 'merge', 'merge_dir', 'MsgmergeError' ]
	74
	75	import sys
	76	import re
	77	import string
	78	import getopt
	79	import difflib
	80	import glob
	81	import os.path
	82	import codecs
	83
	84	try:
	85	True, False
	86	except NameError:
	87	True, False = 1, 0
	88
	89	class Msgs:
	90	'''Class to hold information about messages.'''
	91	width = 80
	92	file = ''
	93	def __init__(self, msgid, msgstr, flag, lno, entry, **kwds):
	94	self.id = msgid
	95	self.str = msgstr
	96	self.cmt = kwds.get('cmt', '')
	97	self.ref = kwds.get('ref', '')
	98	self.autocmt = kwds.get('autocmt', '')
	99	self.flag = flag
	100	self.entry = entry
	101	self.lno = lno
	102	self.count = 0
	103	def wash(self):
	104	self.id = wash(self.id, width = self.width,
	105	filename = self.file, lno = self.lno)
	106	self.str = wash(self.str, 'msgstr', width = self.width,
	107	filename = self.file, lno = self.lno)
	108	def used(self):
	109	self.count += 1
	110	def get_clean_id(self):
	111	return self.id.replace('msgid "','', 1)
	112	def obsolete(self):
	113	self.width -= len('#~ ')
	114	self.wash()
	115	t = [ '#~ %s\n' % s for s in self.id.splitlines() ]
	116	self.id = ''.join(t)
	117	t = [ '#~ %s\n' % s for s in self.str.splitlines() ]
	118	self.str = ''.join(t)
	119
	120	class Options:
	121	'''Class to hold options'''
	122	def __init__(self, cmdline = False, **kwds):
	123	if not cmdline:
	124	self.update = kwds.get('update', True)
	125	self.outfile = kwds.get('outfile', '-')
	126	self.docstrings = kwds.get('docstrings', True)
	127	self.verbose = kwds.get('verbose', False)
	128	self.suffix = kwds.get('suffix', '~')
	129	self.backup = kwds.get('backup', True)
	130	else:
	131	self.update = False
	132	self.outfile = False
	133	self.docstrings = False
	134	self.verbose = True
	135	self.suffix = '~'
	136	self.backup = True
	137
	138	class MsgmergeError(Exception):
	139	'''Exception class for msgmerge'''
	140
	141	def gen(lines):
	142	'''
	143	Generator which returns a line (with the obsolete prefix removed)
	144	from the list of lines in <lines>, the line number is also
	145	returned.
	146	'''
	147	lno = 0
	148	for l in lines:
	149	lno += 1
	150	yield l.replace('#~ ', '', 1), lno
	151	yield l, lno
	152
	153	def slurp(s, g, sign):
	154	'''
	155	The string returned from iterator <g>\'s next() method is added to
	156	the string <s> if string returned is beginning with the string
	157	<sign>. The return value is the first returned string which do not
	158	start with <sign>, the line number, the iterator <g> and the
	159	(possibly) updated string <s>.
	160	'''
	161	l, lno = g.next()
	162	while l.startswith(sign) or (sign == '# ' and l.strip() == '#'):
	163	s += l
	164	l, lno = g.next()
	165	return l, lno, g, s
	166
	167	def splitted_fit(chunk, line, width, break_always, break_after_space):
	168	'''
	169	Check if string <chunk> can be splitted by newline to fit into
	170	string <line> with width smaller than <width>. The return value is
	171	a tuple where the first element is the part of chunk which fits
	172	and the second element is the rest of chunk.
	173	'''
	174	ret = '', chunk
	175	l = len(chunk)
	176	for i in range(l - 1, -1, -1):
	177	if chunk[i] in break_always and len(chunk[0:i] + line) <= width:
	178	ret = chunk[0:i], chunk[i:]
	179	break
	180	elif chunk[i] in break_after_space and i and chunk[i-1].strip() == '':
	181	ret = chunk[0:i], chunk[i:]
	182	break
	183	elif chunk[i] == '\\' and len(chunk[i:]) > 1 and chunk[i+1] == '"' \
	184	and len(chunk[0:i] + line) <= width:
	185	ret = chunk[0:i], chunk[i:]
	186	break
	187	return ret
	188
	189	def wrap(msg, width):
	190	'''
	191	Accept a list <msg> of strings to wrap, each string is wrapped to
	192	width <width> and surrounded with a pair of ". The return value is
	193	a string with these wrapped strings joined together with newlines.
	194	'''
	195	if msg.isspace() or not msg:
	196	return '"%s"' % msg
	197
	198	# \ and " is here, but " is special in po files.
	199	break_always = '$%+({['
	200	# XXX what about: « © » ¦ § etc?
	201	break_after_space = '_-=^`~\'<\|>&*#@'
	202	enders = '.:,;!?/])}\|%-'
	203	extra = string.punctuation
	204	for c in enders:
	205	extra = extra.replace(c, '')
	206	escaped = { 'enders' : re.escape(enders),
	207	'extra' : re.escape(extra) }
	208	regex = r'([\w%(extra)s][\s%(enders)s)]+[\s%(enders)s])' % escaped
	209	r = re.compile(regex, re.UNICODE)
	210	msg = [ m for m in r.split(msg) if not m == '']
	211
	212	lines = []
	213	line = msg.pop(0)
	214
	215	# Handle \n on end of line
	216	if len(msg) > 1 and msg[-1] == 'n' and len(msg[-2]) > 0 \
	217	and msg[-2][-1] == '\\':
	218	msg[-2] += msg[-1]
	219	msg.pop()
	220	# Do not allow a single \n on a line
	221	if len(msg) > 2 and msg[-1] == '\\n':
	222	msg[-2] += msg[-1]
	223	msg.pop()
	224
	225	for m in msg:
	226	if len(line) > width or len(m) > width or len(line + m) > width:
	227	fit, rest = splitted_fit(m, line, width, break_always,
	228	break_after_space)
	229	line += fit
	230	lines.append(line)
	231	line = rest
	232	else:
	233	line += m
	234	lines.append(line)
	235	lines = [ '"%s"' % l for l in lines ]
	236	return '\n'.join(lines)
	237
	238	def normalize(lines):
	239	'''
	240	Normalize <lines>: e.g "\n\nText\n\n" becomes:
	241	"\n"
	242	"\n"
	243	"Text\n"
	244	"\n"
	245	'''
	246	if 0 < lines.find('\\n') < len(lines) - 3:
	247	if lines[-3:] == '\\n"':
	248	lines = lines[:-3].replace('\\n','\\n"\n"').replace('""\n','') \
	249	+ '\\n"'
	250	else:
	251	lines = lines.replace('\\n','\\n"\n"').replace('""\n','')
	252	return lines
	253
	254	def wash(msg, idx = 'msgid', width = 80, **kwds):
	255	'''
	256	Do washing on the msgstr or msgid fields. Wrap the text to fit in
	257	width <width>. <msg> is a list of lines that makes up the field.
	258	<idx> indicate msgid or msgstr, <width> holds the width. <filename>
	259	and <lno> (line number) is picked up from <kwds>.
	260	Returns the washed field as a string.
	261	'''
	262	msg = normalize(msg)
	263	lines = msg.splitlines()
	264	size = len(lines)
	265	if size > 1 or len(msg) > width:
	266	washed = []
	267	# The first line is special
	268	m = re.match('^%s "(.*)"$' % (idx, ), lines[0])
	269	if not m:
	270	print lines[0]
	271	kwds['lno'] -= size + 1
	272	raise MsgmergeError('parse error: %(filename)s:%(lno)s.'
	273	% kwds)
	274	washed.append(m.group(1))
	275	if m.group(1).endswith(r'\n'):
	276	washed.append('')
	277	i = 0
	278	for line in lines[1:]:
	279	m = re.match('^"(\s.)"$', line)
	280	i += 1
	281	if not m:
	282	print line
	283	kwds['lno'] -= size - i + 1
	284	raise MsgmergeError('parse error: %(filename)s:%(lno)s.'
	285	% kwds)
	286	washed[-1] += m.group(1)
	287	if m.group(1).endswith(r'\n'):
	288	washed.append('')
	289	if washed[0] == '':
	290	washed.pop(0)
	291	if washed[-1] == '':
	292	washed.pop()
	293
	294	washed = [ wrap(w, width - 3) for w in washed ] # " and \n removed.
	295
	296	# One line or multiline
	297	if len(washed) == 1 and len('%s %s\n' % (idx, washed[0])) < width:
	298	washed = '%s %s\n' % (idx, washed[0])
	299	else:
	300	washed = '%s ""\n%s\n' % (idx, '\n'.join(washed))
	301	else:
	302	washed = msg
	303
	304	return washed
	305
	306	def parse(filename, entry):
	307	'''
	308	Parse po or pot file with name <filename>. Set the variable
	309	<entry> to msgid/msgstr to indicate pot/po file. The return value
	310	is a dict with msgid (washed) as key and Msgs instances as
	311	values.
	312	'''
	313	lines = io(filename).readlines()
	314	Msgs.file = filename
	315	messages = {}
	316	last = len(lines)
	317	g = gen(lines)
	318	cmt = autocmt = ref = flag = ''
	319	msgid = False
	320	lno = 0
	321	while not lno == last:
	322	l, lno = g.next()
	323	if l.startswith('# '):
	324	l, lno, g, cmt = slurp(l, g, '# ')
	325	if l.startswith('#.'):
	326	l, lno, g, autocmt = slurp(l, g, '#.')
	327	if l.startswith('#:'):
	328	l, lno, g, ref = slurp(l, g, '#:')
	329	if l.startswith('#,'):
	330	l, lno, g, flag = slurp(l, g, '#,')
	331	if l.startswith('msgid'):
	332	l, lno, g, msgid = slurp(l, g, '"')
	333	if l.startswith('msgstr'):
	334	l, lno, g, msgstr = slurp(l, g, '"')
	335
	336	if not lno == last and not l.strip() == '':
	337	raise MsgmergeError('parse error: %s:%s.' % (filename, lno))
	338
	339	if msgid and entry == 'msgstr':
	340	idx = wash(msgid, filename = filename, lno = lno)
	341	messages[idx] = Msgs(msgid, msgstr, flag, lno, entry, cmt = cmt)
	342	msgid = False; msgstr = cmt = autocmt = ref = flag = ''
	343	elif msgid and entry == 'msgid':
	344	idx = wash(msgid, filename = filename, lno = lno)
	345	messages[idx] = Msgs(msgid, msgstr, flag, lno, entry,
	346	autocmt = autocmt, ref = ref)
	347	msgid = False; msgstr = cmt = autocmt = ref = flag = ''
	348
	349	for m in messages.values():
	350	m.wash()
	351	return messages
	352
	353	def fuzzy_match(pot, defs):
	354	'''
	355	Try to find the best difflib match (with ratio > 0.6) between
	356	id of Msgs object <pot> and Msgs in the dict <defs>.
	357	Return value is the Msgs object in <defs> with highest ratio,
	358	False is returned if no suitable Msgs is found.
	359	'''
	360	limit = 0.6
	361	l, po = limit - 0.01, False
	362	s = difflib.SequenceMatcher(lambda x: x == ' "', '', pot.get_clean_id())
	363	len2 = len(pot.get_clean_id())
	364	for candidate in defs.values():
	365	if candidate.str == 'msgstr ""\n': # Empty translation
	366	continue
	367	if candidate.id == 'msgid ""\n': # Empty msgid (header)
	368	continue
	369	len1 = len(candidate.get_clean_id())
	370	if len2 > 2 * len1 or len1 > 1.5 * len2: # Simple and fast tests first
	371	continue
	372	s.set_seq1(candidate.get_clean_id())
	373	if s.quick_ratio() < l:
	374	continue
	375	r = s.ratio() # This is expensive
	376	if r > l:
	377	l, po = r, candidate
	378	return po
	379
	380	def flags(po, pot, fuzzy = False, obs = False):
	381	'''
	382	Create flag field from flag field in Msgs objects <po> and
	383	<pot>. When <fuzzy> is true <po>\'s flags are ignored and the
	384	fuzzy flag is added. If <obs> is set then most flags but fuzzy are
	385	removed. If the global variable option.docstrings is set then
	386	docstring flags will not be removed. The return value is a string
	387	which holds the combined flag.
	388	'''
	389	global option
	390	flag = ''
	391	if po.flag or pot.flag or fuzzy:
	392	if not fuzzy:
	393	flag = '%s, %s' % (po.flag.strip(), pot.flag.strip())
	394	else:
	395	flag = '%s, %s' % ('#, fuzzy', pot.flag.strip())
	396	flag = flag.split(', ')
	397	fl = {}
	398	flag = [fl.setdefault(f, f) for f in flag if f not in fl and f]
	399	if not option.docstrings:
	400	try:
	401	flag.remove('docstring')
	402	except ValueError:
	403	pass
	404	if obs:
	405	removes = ['c-format', 'python-format', 'docstring']
	406	for remove in removes:
	407	try:
	408	flag.remove(remove)
	409	except ValueError:
	410	pass
	411	# Put fuzzy first
	412	if 'fuzzy' in flag and not flag.index('fuzzy') == 1:
	413	i = flag.index('fuzzy')
	414	flag[1], flag[i] = flag[i], flag[1]
	415
	416	if len(flag) == 1:
	417	flag = ''
	418	else:
	419	flag = ', '.join(flag) + '\n'
	420	return flag
	421
	422	def add(pot, po, fuzzy = False):
	423	'''
	424	Build a new entry from the Msgs objects <pot> and <pot>. If
	425	<fuzzy> is true, <po>\'s flag field is ignored (in
	426	flags()). Returns a multiline string with a up to date entry.
	427	'''
	428	msg = []
	429	msg.append(po.cmt)
	430	msg.append(pot.autocmt)
	431	msg.append(pot.ref)
	432	msg.append(flags(po, pot, fuzzy = fuzzy))
	433	msg.append(pot.id)
	434	msg.append(po.str)
	435	return ''.join(msg)
	436
	437	def header(pot, defs):
	438	'''
	439	Update date in header entry. Returns the updated header entry.
	440	'''
	441	try:
	442	[po] = [ d for d in defs.values() if d.id == 'msgid ""\n' ]
	443	except ValueError:
	444	raise MsgmergeError('Error: did not find header in po file.')
	445
	446	r = re.compile(r'(.^"POT-Creation-Date:\s+)(.?)(\\n"$.*)',
	447	re.MULTILINE \| re.DOTALL)
	448	m = r.match(pot.str)
	449	if not m:
	450	raise MsgmergeError(
	451	'Error: did not find POT-Creation-Date field in pot file.')
	452
	453	subs = '\\1%s\\3' % m.group(2)
	454	_, count = r.subn(subs, po.str)
	455	if not count == 1:
	456	raise MsgmergeError(
	457	'Error: did not find POT-Creation-Date field in po file.')
	458	return po
	459
	460	def match(defs, refs):
	461	'''
	462	Try to match Msgs objects in <refs> with Msgs objects in
	463	<defs>. The return value is a list with po entries.
	464	'''
	465	global option
	466	matches = []
	467	empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str')
	468	deco = [(r.lno, r) for r in refs.values()]
	469	deco.sort()
	470	po = header(deco.pop(0)[1], defs) # Header entry
	471	matches.append(add(empty, po))
	472	po.used()
	473	sorted = [ a[1] for a in deco ]
	474	for pot in sorted:
	475	if option.verbose:
	476	sys.stderr.write('.')
	477	po = defs.get(pot.id, False) # Perfect match
	478	if po:
	479	matches.append(add(pot, po))
	480	po.used(); pot.used()
	481	continue
	482	po = fuzzy_match(pot, defs) # Fuzzy match
	483	if po:
	484	matches.append(add(pot, po, fuzzy = True))
	485	po.used(); pot.used()
	486	continue
	487	matches.append(add(pot, empty)) # No match
	488
	489	obsolete(defs, matches)
	490	return matches
	491
	492	def obsolete(defs, matches):
	493	'''Handle obsolete translations.'''
	494	deco = [ (d.lno, d) for d in defs.values() if
	495	d.count == 0 and not d.str == 'msgstr ""\n' ]
	496	deco.sort()
	497	empty = Msgs('msgid ""\n', 'msgstr ""\n', '', -1, 'str')
	498	obs = [ o[1] for o in deco ]
	499	for o in obs:
	500	o.flag = flags(o, empty, obs = True)
	501	o.obsolete()
	502	matches.append('%s%s%s' % (o.flag, o.id, o.str))
	503
	504	def help():
	505	'''Print help text and exit.'''
	506	print __doc__
	507	sys.exit(0)
	508
	509	def cmdline():
	510	'''Parse options and arguments from command line.'''
	511	advice = 'Try `%(name)s --help\' for more information.'
	512	try:
	513	long_opt = ['help', 'version', 'update', 'output-file=',
	514	'quiet', 'silent', 'docstrings', 'suffix', 'backup']
	515	opts, args = getopt.getopt(sys.argv[1:], 'hVUo:qD', long_opt)
	516	except getopt.error, msg:
	517	print '%s: %s\n%s' % ('%(name)s', msg, advice) % globals()
	518	sys.exit(1)
	519
	520	option = Options(cmdline = True)
	521	for opt, arg in opts:
	522	if opt in ['-h', '--help']:
	523	help()
	524	elif opt in ['-V', '--version']:
	525	print '%(name)s %(__version__)s' % globals()
	526	sys.exit(0)
	527	elif opt in ['-o', '--output-file']:
	528	option.outfile = arg
	529	elif opt in ['-U', '--update']:
	530	option.update = True
	531	elif opt in ['-q', '--silent', '--quiet']:
	532	option.verbose = False
	533	elif opt in ['-D', '--docstrings']:
	534	option.docstrings = True
	535	elif opt in ['--suffix']:
	536	option.suffix = arg
	537	elif opt in ['--backup']:
	538	option.backup = arg
	539
	540	# Sanity checks
	541	warn = False
	542	if option.update and option.outfile:
	543	warn = '--update and --output-file are mutually exclusive.'
	544	if len(args) == 0:
	545	warn = 'no input files given.'
	546	elif len(args) == 1 or len(args) > 2:
	547	warn = 'exactly 2 input files required.'
	548	if warn:
	549	print '%s: %s\n%s' % ('%(name)s', warn, advice) % globals()
	550	sys.exit(1)
	551
	552	if option.update:
	553	option.outfile = args[0]
	554	elif not option.outfile:
	555	option.outfile = '-'
	556
	557	defs, refs = args
	558
	559	try:
	560	merge(defs, refs, option = option)
	561	except MsgmergeError, err:
	562	print '%(name)s: ' % globals() + '%s' % err
	563	sys.exit(1)
	564
	565	def io(iofile, mode = 'rU'):
	566	'''Wrapper around open().'''
	567	try:
	568	fo = open(iofile, mode)
	569	if 'r' in mode and fo.read(3) != codecs.BOM_UTF8:
	570	fo.seek(0)
	571
	572	except IOError, msg:
	573	raise MsgmergeError('error while opening file: %s: %s.' %
	574	(msg[1], iofile))
	575	return fo
	576
	577	def backup(infile):
	578	'''Handle backup of files in update mode'''
	579	os.environ.get('VERSION_CONTROL', '')
	580	suffix = os.environ.get('SIMPLE_BACKUP_SUFFIX', '~')
	581
	582	backup_file = '%s%s' % (infile, suffix)
	583
	584	def changes(new, old):
	585	return cmp(''.join(old), '\n'.join(new))
	586
	587	def write(matches, outfile):
	588	'''Write the list <matches> to file <outfile>'''
	589	if not outfile == '-':
	590	fd = io(outfile, 'w')
	591	else:
	592	fd = sys.stdout
	593	fd.write('\n'.join(matches))
	594
	595	def merge(def_file, ref_file, update = True, outfile = '-',
	596	docstrings = True, suffix = '~', backup = True,
	597	verbose = True, **kwds):
	598	'''
	599	Merge po file <def_file> with pot file <ref_file> . If <update> is
	600	set to True then only update if there are changes to the po
	601	file. Set outfile to write updated po file to an another file. Set
	602	to `-\' for writing to standard out. If docstrings is False
	603	docstrings flag will removed. Set verbose to False to suppress
	604	progress indicators. <kwds> is used to pass options from the
	605	command line interface.
	606	'''
	607	global option
	608	option = kwds.get('option', Options(update = update,
	609	outfile = outfile,
	610	docstrings = docstrings,
	611	suffix = suffix,
	612	backup = backup,
	613	verbose = verbose))
	614	def_msgs = parse(def_file, 'msgstr')
	615	ref_msgs = parse(ref_file, 'msgid')
	616	if verbose and not __name__ == '__main__':
	617	print >> sys.stderr, 'Merging %s with %s' % (ref_file, def_file)
	618	updated_lines = match(def_msgs, ref_msgs)
	619	if option.verbose:
	620	print >> sys.stderr, ' done.'
	621	if not option.update:
	622	write(updated_lines, option.outfile)
	623	elif option.update and changes(updated_lines, io(def_file).readlines()):
	624	write(updated_lines, def_file)
	625
	626	def merge_dir(directory, pot = False, include = [], exclude = [],
	627	verbose = True):
	628	'''
	629	Tries to merge a directory of po files. Uses simple glob to find
	630	po files and pot file. The parameter <pot> can be used to specify
	631	the pot file in the directory. If the list <include> is given only
	632	files in this list is merged. Use the list <exclude> to exclude
	633	files to be merged. This function is only useful if po files and
	634	pot file are in the same directory. Set <verbose> to get
	635	information when running.
	636	'''
	637	if directory[-1] == '/':
	638	directory = os.path.dirname(directory)
	639	if pot:
	640	pot = os.path.basename(pot)
	641	else:
	642	pot = glob.glob('%s/*.pot' % directory)
	643	if not pot:
	644	raise MsgmergeError('No pot file found.')
	645	elif len(pot) > 1:
	646	raise MsgmergeError('More than one pot file found: %s.' % pot)
	647	pot = os.path.basename(pot[0])
	648
	649	if not include:
	650	pos = glob.glob('%s/*po' % directory)
	651	if not len(pos) > 1:
	652	raise MsgmergeError('No po file(s) found.')
	653	pos = [ os.path.basename(po) for po in pos ]
	654	else:
	655	pos = [ os.path.basename(po) for po in include ]
	656
	657	for po in exclude:
	658	try:
	659	pos.remove(po)
	660	except ValueError:
	661	pass
	662	format = '%s/%s'
	663	for po in pos:
	664	try:
	665	merge(format % (directory, po), format % (directory, pot),
	666	update = True, verbose = verbose,
	667	outfile = format % (directory, po))
	668	except MsgmergeError, err:
	669	if verbose:
	670	print >> sys.stderr, '%s Not updated.' % err
	671	else:
	672	print >> sys.stderr, '%s %s not updated.' % (err, po)
	673
	674	if __name__ == '__main__':
	675	cmdline()

trunk/scripts/build/pygettext.py
r0	r253614
	1	#! /usr/bin/env python
	2	# -- coding: iso-8859-1 --
	3	# Originally written by Barry Warsaw <barry@zope.com>
	4	#
	5	# Minimally patched to make it even more xgettext compatible
	6	# by Peter Funk <pf@artcom-gmbh.de>
	7	#
	8	# 2002-11-22 Jürgen Hermann <jh@web.de>
	9	# Added checks that _() only contains string literals, and
	10	# command line args are resolved to module lists, i.e. you
	11	# can now pass a filename, a module or package name, or a
	12	# directory (including globbing chars, important for Win32).
	13	# Made docstring fit in 80 chars wide displays using pydoc.
	14	#
	15
	16	# for selftesting
	17	try:
	18	import fintl
	19	_ = fintl.gettext
	20	except ImportError:
	21	_ = lambda s: s
	22
	23	__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
	24
	25	Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
	26	internationalization of C programs. Most of these tools are independent of
	27	the programming language and can be used from within Python programs.
	28	Martin von Loewis' work[1] helps considerably in this regard.
	29
	30	There's one problem though; xgettext is the program that scans source code
	31	looking for message strings, but it groks only C (or C++). Python
	32	introduces a few wrinkles, such as dual quoting characters, triple quoted
	33	strings, and raw strings. xgettext understands none of this.
	34
	35	Enter pygettext, which uses Python's standard tokenize module to scan
	36	Python source code, generating .pot files identical to what GNU xgettext[2]
	37	generates for C and C++ code. From there, the standard GNU tools can be
	38	used.
	39
	40	A word about marking Python strings as candidates for translation. GNU
	41	xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
	42	and gettext_noop. But those can be a lot of text to include all over your
	43	code. C and C++ have a trick: they use the C preprocessor. Most
	44	internationalized C source includes a #define for gettext() to _() so that
	45	what has to be written in the source is much less. Thus these are both
	46	translatable strings:
	47
	48	gettext("Translatable String")
	49	_("Translatable String")
	50
	51	Python of course has no preprocessor so this doesn't work so well. Thus,
	52	pygettext searches only for _() by default, but see the -k/--keyword flag
	53	below for how to augment this.
	54
	55	[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
	56	[2] http://www.gnu.org/software/gettext/gettext.html
	57
	58	NOTE: pygettext attempts to be option and feature compatible with GNU
	59	xgettext where ever possible. However some options are still missing or are
	60	not fully implemented. Also, xgettext's use of command line switches with
	61	option arguments is broken, and in these cases, pygettext just defines
	62	additional switches.
	63
	64	Usage: pygettext [options] inputfile ...
	65
	66	Options:
	67
	68	-a
	69	--extract-all
	70	Extract all strings.
	71
	72	-d name
	73	--default-domain=name
	74	Rename the default output file from messages.pot to name.pot.
	75
	76	-E
	77	--escape
	78	Replace non-ASCII characters with octal escape sequences.
	79
	80	-D
	81	--docstrings
	82	Extract module, class, method, and function docstrings. These do
	83	not need to be wrapped in _() markers, and in fact cannot be for
	84	Python to consider them docstrings. (See also the -X option).
	85
	86	-h
	87	--help
	88	Print this help message and exit.
	89
	90	-k word
	91	--keyword=word
	92	Keywords to look for in addition to the default set, which are:
	93	%(DEFAULTKEYWORDS)s
	94
	95	You can have multiple -k flags on the command line.
	96
	97	-K
	98	--no-default-keywords
	99	Disable the default set of keywords (see above). Any keywords
	100	explicitly added with the -k/--keyword option are still recognized.
	101
	102	--no-location
	103	Do not write filename/lineno location comments.
	104
	105	-n
	106	--add-location
	107	Write filename/lineno location comments indicating where each
	108	extracted string is found in the source. These lines appear before
	109	each msgid. The style of comments is controlled by the -S/--style
	110	option. This is the default.
	111
	112	-o filename
	113	--output=filename
	114	Rename the default output file from messages.pot to filename. If
	115	filename is `-' then the output is sent to standard out.
	116
	117	-p dir
	118	--output-dir=dir
	119	Output files will be placed in directory dir.
	120
	121	-S stylename
	122	--style stylename
	123	Specify which style to use for location comments. Two styles are
	124	supported:
	125
	126	Solaris # File: filename, line: line-number
	127	GNU #: filename:line
	128
	129	The style name is case insensitive. GNU style is the default.
	130
	131	-v
	132	--verbose
	133	Print the names of the files being processed.
	134
	135	-V
	136	--version
	137	Print the version of pygettext and exit.
	138
	139	-w columns
	140	--width=columns
	141	Set width of output to columns.
	142
	143	-x filename
	144	--exclude-file=filename
	145	Specify a file that contains a list of strings that are not be
	146	extracted from the input files. Each string to be excluded must
	147	appear on a line by itself in the file.
	148
	149	-X filename
	150	--no-docstrings=filename
	151	Specify a file that contains a list of files (one per line) that
	152	should not have their docstrings extracted. This is only useful in
	153	conjunction with the -D option above.
	154
	155	If `inputfile' is -, standard input is read.
	156	""")
	157
	158	import os
	159	import imp
	160	import sys
	161	import glob
	162	import time
	163	import getopt
	164	import token
	165	import tokenize
	166	import operator
	167
	168	from umit.pm.core.const import PM_VERSION
	169
	170	__version__ = '1.5'
	171
	172	default_keywords = ['_']
	173	DEFAULTKEYWORDS = ', '.join(default_keywords)
	174
	175	EMPTYSTRING = ''
	176
	177
	178
	179	# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
	180	# there.
	181	pot_header = _('''\
	182	# PacketManipulator catalog.
	183	# Copyright (C) 2009 Adriano Montero Marques
	184	# Francesco Piccinno <stack.box@gmail.com>, 2009
	185	#
	186	msgid ""
	187	msgstr ""
	188	"Project-Id-Version: PacketManipulator %(pm_version)s\\n"
	189	"POT-Creation-Date: %(time)s\\n"
	190	"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
	191	"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
	192	"Language-Team: LANGUAGE <LL@li.org>\\n"
	193	"MIME-Version: 1.0\\n"
	194	"Content-Type: text/plain; charset=UTF-8\\n"
	195	"Content-Transfer-Encoding: 8bit\\n"
	196	"Generated-By: pygettext.py %(version)s\\n"
	197
	198	''')
	199
	200
	201	def usage(code, msg=''):
	202	print >> sys.stderr, __doc__ % globals()
	203	if msg:
	204	print >> sys.stderr, msg
	205	sys.exit(code)
	206
	207
	208
	209	escapes = []
	210
	211	def make_escapes(pass_iso8859):
	212	global escapes
	213	if pass_iso8859:
	214	# Allow iso-8859 characters to pass through so that e.g. 'msgid
	215	# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
	216	# escape any character outside the 32..126 range.
	217	mod = 128
	218	else:
	219	mod = 256
	220	for i in range(256):
	221	if 32 <= (i % mod) <= 126:
	222	escapes.append(chr(i))
	223	else:
	224	escapes.append("\\%03o" % i)
	225	escapes[ord('\\')] = '\\\\'
	226	escapes[ord('\t')] = '\\t'
	227	escapes[ord('\r')] = '\\r'
	228	escapes[ord('\n')] = '\\n'
	229	escapes[ord('\"')] = '\\"'
	230
	231
	232	def escape(s):
	233	global escapes
	234	s = list(s)
	235	for i in range(len(s)):
	236	s[i] = escapes[ord(s[i])]
	237	return EMPTYSTRING.join(s)
	238
	239
	240	def safe_eval(s):
	241	# unwrap quotes, safely
	242	return eval(s, {'__builtins__':{}}, {})
	243
	244
	245	def normalize(s):
	246	# This converts the various Python string types into a format that is
	247	# appropriate for .po files, namely much closer to C style.
	248	lines = s.split('\n')
	249	if len(lines) == 1:
	250	s = '"' + escape(s) + '"'
	251	else:
	252	if not lines[-1]:
	253	del lines[-1]
	254	lines[-1] = lines[-1] + '\n'
	255	for i in range(len(lines)):
	256	lines[i] = escape(lines[i])
	257	lineterm = '\\n"\n"'
	258	s = '""\n"' + lineterm.join(lines) + '"'
	259	return s
	260
	261
	262	def containsAny(str, set):
	263	"""Check whether 'str' contains ANY of the chars in 'set'"""
	264	return 1 in [c in str for c in set]
	265
	266
	267	def _visit_pyfiles(list, dirname, names):
	268	"""Helper for getFilesForName()."""
	269	# get extension for python source files
	270	if not globals().has_key('_py_ext'):
	271	global _py_ext
	272	_py_ext = [triple[0] for triple in imp.get_suffixes()
	273	if triple[2] == imp.PY_SOURCE][0]
	274
	275	# don't recurse into CVS directories
	276	if 'CVS' in names:
	277	names.remove('CVS')
	278
	279	# add all *.py files to list
	280	list.extend(
	281	[os.path.join(dirname, file) for file in names
	282	if os.path.splitext(file)[1] == _py_ext]
	283	)
	284
	285
	286	def _get_modpkg_path(dotted_name, pathlist=None):
	287	"""Get the filesystem path for a module or a package.
	288
	289	Return the file system path to a file for a module, and to a directory for
	290	a package. Return None if the name is not found, or is a builtin or
	291	extension module.
	292	"""
	293	# split off top-most name
	294	parts = dotted_name.split('.', 1)
	295
	296	if len(parts) > 1:
	297	# we have a dotted path, import top-level package
	298	try:
	299	file, pathname, description = imp.find_module(parts[0], pathlist)
	300	if file: file.close()
	301	except ImportError:
	302	return None
	303
	304	# check if it's indeed a package
	305	if description[2] == imp.PKG_DIRECTORY:
	306	# recursively handle the remaining name parts
	307	pathname = _get_modpkg_path(parts[1], [pathname])
	308	else:
	309	pathname = None
	310	else:
	311	# plain name
	312	try:
	313	file, pathname, description = imp.find_module(
	314	dotted_name, pathlist)
	315	if file:
	316	file.close()
	317	if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
	318	pathname = None
	319	except ImportError:
	320	pathname = None
	321
	322	return pathname
	323
	324
	325	def getFilesForName(name):
	326	"""Get a list of module files for a filename, a module or package name,
	327	or a directory.
	328	"""
	329	if not os.path.exists(name):
	330	# check for glob chars
	331	if containsAny(name, "*?[]"):
	332	files = glob.glob(name)
	333	list = []
	334	for file in files:
	335	list.extend(getFilesForName(file))
	336	return list
	337
	338	# try to find module or package
	339	name = _get_modpkg_path(name)
	340	if not name:
	341	return []
	342
	343	if os.path.isdir(name):
	344	# find all python files in directory
	345	list = []
	346	os.path.walk(name, _visit_pyfiles, list)
	347	return list
	348	elif os.path.exists(name):
	349	# a single file
	350	return [name]
	351
	352	return []
	353
	354
	355	class TokenEater:
	356	def __init__(self, options):
	357	self.__options = options
	358	self.__messages = {}
	359	self.__state = self.__waiting
	360	self.__data = []
	361	self.__lineno = -1
	362	self.__freshmodule = 1
	363	self.__curfile = None
	364
	365	def __call__(self, ttype, tstring, stup, etup, line):
	366	# dispatch
	367	## import token
	368	## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
	369	## 'tstring:', tstring
	370	self.__state(ttype, tstring, stup[0])
	371
	372	def __waiting(self, ttype, tstring, lineno):
	373	opts = self.__options
	374	# Do docstring extractions, if enabled
	375	if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
	376	# module docstring?
	377	if self.__freshmodule:
	378	if ttype == tokenize.STRING:
	379	self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
	380	self.__freshmodule = 0
	381	elif ttype not in (tokenize.COMMENT, tokenize.NL):
	382	self.__freshmodule = 0
	383	return
	384	# class docstring?
	385	if ttype == tokenize.NAME and tstring in ('class', 'def'):
	386	self.__state = self.__suiteseen
	387	return
	388	if ttype == tokenize.NAME and tstring in opts.keywords:
	389	self.__state = self.__keywordseen
	390
	391	def __suiteseen(self, ttype, tstring, lineno):
	392	# ignore anything until we see the colon
	393	if ttype == tokenize.OP and tstring == ':':
	394	self.__state = self.__suitedocstring
	395
	396	def __suitedocstring(self, ttype, tstring, lineno):
	397	# ignore any intervening noise
	398	if ttype == tokenize.STRING:
	399	self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
	400	self.__state = self.__waiting
	401	elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
	402	tokenize.COMMENT):
	403	# there was no class docstring
	404	self.__state = self.__waiting
	405
	406	def __keywordseen(self, ttype, tstring, lineno):
	407	if ttype == tokenize.OP and tstring == '(':
	408	self.__data = []
	409	self.__lineno = lineno
	410	self.__state = self.__openseen
	411	else:
	412	self.__state = self.__waiting
	413
	414	def __openseen(self, ttype, tstring, lineno):
	415	if ttype == tokenize.OP and tstring == ')':
	416	# We've seen the last of the translatable strings. Record the
	417	# line number of the first line of the strings and update the list
	418	# of messages seen. Reset state for the next batch. If there
	419	# were no strings inside _(), then just ignore this entry.
	420	if self.__data:
	421	self.__addentry(EMPTYSTRING.join(self.__data))
	422	self.__state = self.__waiting
	423	elif ttype == tokenize.STRING:
	424	self.__data.append(safe_eval(tstring))
	425	elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
	426	token.NEWLINE, tokenize.NL]:
	427	# warn if we see anything else than STRING or whitespace
	428	print >> sys.stderr, _(
	429	'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
	430	) % {
	431	'token': tstring,
	432	'file': self.__curfile,
	433	'lineno': self.__lineno
	434	}
	435	self.__state = self.__waiting
	436
	437	def __addentry(self, msg, lineno=None, isdocstring=0):
	438	if lineno is None:
	439	lineno = self.__lineno
	440	if not msg in self.__options.toexclude:
	441	entry = (self.__curfile, lineno)
	442	self.__messages.setdefault(msg, {})[entry] = isdocstring
	443
	444	def set_filename(self, filename):
	445	self.__curfile = filename
	446	self.__freshmodule = 1
	447
	448	def write(self, fp):
	449	options = self.__options
	450	timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
	451	# The time stamp in the header doesn't have the same format as that
	452	# generated by xgettext...
	453	print >> fp, pot_header % {'time': timestamp, 'version': __version__,
	454	'pm_version': PM_VERSION}
	455	# Sort the entries. First sort each particular entry's keys, then
	456	# sort all the entries by their first item.
	457	reverse = {}
	458	for k, v in self.__messages.items():
	459	keys = v.keys()
	460	keys.sort()
	461	reverse.setdefault(tuple(keys), []).append((k, v))
	462	rkeys = reverse.keys()
	463	rkeys.sort()
	464	for rkey in rkeys:
	465	rentries = reverse[rkey]
	466	rentries.sort()
	467	for k, v in rentries:
	468	isdocstring = 0
	469	# If the entry was gleaned out of a docstring, then add a
	470	# comment stating so. This is to aid translators who may wish
	471	# to skip translating some unimportant docstrings.
	472	if reduce(operator.__add__, v.values()):
	473	isdocstring = 1
	474	# k is the message string, v is a dictionary-set of (filename,
	475	# lineno) tuples. We want to sort the entries in v first by
	476	# file name and then by line number.
	477	v = v.keys()
	478	v.sort()
	479	if not options.writelocations:
	480	pass
	481	# location comments are different b/w Solaris and GNU:
	482	elif options.locationstyle == options.SOLARIS:
	483	for filename, lineno in v:
	484	d = {'filename': filename, 'lineno': lineno}
	485	print >>fp, _(
	486	'# File: %(filename)s, line: %(lineno)d') % d
	487	elif options.locationstyle == options.GNU:
	488	# fit as many locations on one line, as long as the
	489	# resulting line length doesn't exceeds 'options.width'
	490	locline = '#:'
	491	for filename, lineno in v:
	492	d = {'filename': filename, 'lineno': lineno}
	493	s = _(' %(filename)s:%(lineno)d') % d
	494	if len(locline) + len(s) <= options.width:
	495	locline = locline + s
	496	else:
	497	print >> fp, locline
	498	locline = "#:" + s
	499	if len(locline) > 2:
	500	print >> fp, locline
	501	if isdocstring:
	502	print >> fp, '#, docstring'
	503	print >> fp, 'msgid', normalize(k)
	504	print >> fp, 'msgstr ""\n'
	505
	506
	507
	508	def main():
	509	global default_keywords
	510	try:
	511	opts, args = getopt.getopt(
	512	sys.argv[1:],
	513	'ad:DEhk:Kno:p:S:Vvw:x:X:',
	514	['extract-all', 'default-domain=', 'escape', 'help',
	515	'keyword=', 'no-default-keywords',
	516	'add-location', 'no-location', 'output=', 'output-dir=',
	517	'style=', 'verbose', 'version', 'width=', 'exclude-file=',
	518	'docstrings', 'no-docstrings',
	519	])
	520	except getopt.error, msg:
	521	usage(1, msg)
	522
	523	# for holding option values
	524	class Options:
	525	# constants
	526	GNU = 1
	527	SOLARIS = 2
	528	# defaults
	529	extractall = 0 # FIXME: currently this option has no effect at all.
	530	escape = 0
	531	keywords = []
	532	outpath = ''
	533	outfile = 'messages.pot'
	534	writelocations = 1
	535	locationstyle = GNU
	536	verbose = 0
	537	width = 78
	538	excludefilename = ''
	539	docstrings = 0
	540	nodocstrings = {}
	541
	542	options = Options()
	543	locations = {'gnu' : options.GNU,
	544	'solaris' : options.SOLARIS,
	545	}
	546
	547	# parse options
	548	for opt, arg in opts:
	549	if opt in ('-h', '--help'):
	550	usage(0)
	551	elif opt in ('-a', '--extract-all'):
	552	options.extractall = 1
	553	elif opt in ('-d', '--default-domain'):
	554	options.outfile = arg + '.pot'
	555	elif opt in ('-E', '--escape'):
	556	options.escape = 1
	557	elif opt in ('-D', '--docstrings'):
	558	options.docstrings = 1
	559	elif opt in ('-k', '--keyword'):
	560	options.keywords.append(arg)
	561	elif opt in ('-K', '--no-default-keywords'):
	562	default_keywords = []
	563	elif opt in ('-n', '--add-location'):
	564	options.writelocations = 1
	565	elif opt in ('--no-location',):
	566	options.writelocations = 0
	567	elif opt in ('-S', '--style'):
	568	options.locationstyle = locations.get(arg.lower())
	569	if options.locationstyle is None:
	570	usage(1, _('Invalid value for --style: %s') % arg)
	571	elif opt in ('-o', '--output'):
	572	options.outfile = arg
	573	elif opt in ('-p', '--output-dir'):
	574	options.outpath = arg
	575	elif opt in ('-v', '--verbose'):
	576	options.verbose = 1
	577	elif opt in ('-V', '--version'):
	578	print _('pygettext.py (xgettext for Python) %s') % __version__
	579	sys.exit(0)
	580	elif opt in ('-w', '--width'):
	581	try:
	582	options.width = int(arg)
	583	except ValueError:
	584	usage(1, _('--width argument must be an integer: %s') % arg)
	585	elif opt in ('-x', '--exclude-file'):
	586	options.excludefilename = arg
	587	elif opt in ('-X', '--no-docstrings'):
	588	fp = open(arg)
	589	try:
	590	while 1:
	591	line = fp.readline()
	592	if not line:
	593	break
	594	options.nodocstrings[line[:-1]] = 1
	595	finally:
	596	fp.close()
	597
	598	# calculate escapes
	599	make_escapes(options.escape)
	600
	601	# calculate all keywords
	602	options.keywords.extend(default_keywords)
	603
	604	# initialize list of strings to exclude
	605	if options.excludefilename:
	606	try:
	607	fp = open(options.excludefilename)
	608	options.toexclude = fp.readlines()
	609	fp.close()
	610	except IOError:
	611	print >> sys.stderr, _(
	612	"Can't read --exclude-file: %s") % options.excludefilename
	613	sys.exit(1)
	614	else:
	615	options.toexclude = []
	616
	617	# resolve args to module lists
	618	expanded = []
	619	for arg in args:
	620	if arg == '-':
	621	expanded.append(arg)
	622	else:
	623	expanded.extend(getFilesForName(arg))
	624	args = expanded
	625
	626	# slurp through all the files
	627	eater = TokenEater(options)
	628	for filename in args:
	629	if filename == '-':
	630	if options.verbose:
	631	print _('Reading standard input')
	632	fp = sys.stdin
	633	closep = 0
	634	else:
	635	if options.verbose:
	636	print _('Working on %s') % filename
	637	fp = open(filename)
	638	closep = 1
	639	try:
	640	eater.set_filename(filename)
	641	try:
	642	tokenize.tokenize(fp.readline, eater)
	643	except tokenize.TokenError, e:
	644	print >> sys.stderr, '%s: %s, line %d, column %d' % (
	645	e[0], filename, e[1][0], e[1][1])
	646	finally:
	647	if closep:
	648	fp.close()
	649
	650	# write the output
	651	if options.outfile == '-':
	652	fp = sys.stdout
	653	closep = 0
	654	else:
	655	if options.outpath:
	656	options.outfile = os.path.join(options.outpath, options.outfile)
	657	fp = open(options.outfile, 'w')
	658	closep = 1
	659	try:
	660	eater.write(fp)
	661	finally:
	662	if closep:
	663	fp.close()
	664
	665
	666	if __name__ == '__main__':
	667	main()
	668	# some more test strings
	669	_(u'a unicode string')
	670	# this one creates a warning
	671	_('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
	672	_('more' 'than' 'one' 'string')

https://github.com/mamedev/mame/commit/78ada55150f7ba54fb91bf7a695505f368ff3a22

199869 Revisions