Source code for horizon.utils.babel_extract_angular
# -*- encoding: UTF-8 -*-
# Copyright 2015, Rackspace, US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import re
from six.moves import html_parser
# regex to find filter translation expressions
filter_regex = re.compile(
r"""{\$\s*('([^']|\\')+'|"([^"]|\\")+")\s*\|\s*translate\s*\$}"""
)
# browser innerHTML decodes some html entities automatically, so when
# we extract the msgid and want to match what Javascript sees, we need
# to leave some entities alone, but decode all the rest. Add entries
# to HTML_ENTITIES as necessary.
HTML_ENTITY_PASSTHROUGH = {'amp', 'gt', 'lt'}
HTML_ENTITY_DECODED = {
'reg': u'®',
'times': u'×'
}
[docs]class AngularGettextHTMLParser(html_parser.HTMLParser):
"""Parse HTML to find translate directives.
Currently this parses for these forms of translation:
<translate>content</translate>
The content will be translated. Angular value templating will be
recognised and transformed into gettext-familiar translation
strings (i.e. "{$ expression $}" becomes "%(expression)")
<p translate>content</p>
The content will be translated. As above.
{$ 'content' | translate $}
The string will be translated, minus expression handling (i.e. just
bare strings are allowed.)
"""
def __init__(self):
try:
super(AngularGettextHTMLParser, self).__init__(
convert_charrefs=False
)
except TypeError:
# handle HTMLParser not being a type on Python 2
html_parser.HTMLParser.__init__(self)
self.in_translate = False
self.inner_tags = []
self.data = ''
self.strings = []
self.line = 0
self.plural = False
self.plural_form = ''
self.comments = []
[docs] def handle_starttag(self, tag, attrs):
self.line = self.getpos()[0]
if tag == 'translate' or \
(attrs and 'translate' in [attr[0] for attr in attrs]):
self.in_translate = True
self.plural_form = ''
for attr, value in attrs:
if attr == 'translate-plural':
self.plural = True
self.plural_form = value
if attr == 'translate-comment':
self.comments.append(value)
elif self.in_translate:
s = tag
if attrs:
s += ' ' + ' '.join('%s="%s"' % a for a in attrs)
self.data += '<%s>' % s
self.inner_tags.append(tag)
else:
for attr in attrs:
if not attr[1]:
continue
for match in filter_regex.findall(attr[1]):
if match:
self.strings.append(
(self.line, u'gettext', match[0][1:-1], [])
)
[docs] def handle_data(self, data):
if self.in_translate:
self.data += data
else:
for match in filter_regex.findall(data):
self.strings.append(
(self.line, u'gettext', match[0][1:-1], [])
)
[docs] def handle_entityref(self, name):
if self.in_translate:
if name in HTML_ENTITY_PASSTHROUGH:
self.data += '&%s;' % name
else:
self.data += HTML_ENTITY_DECODED[name]
[docs] def handle_charref(self, name):
if self.in_translate:
self.data += '&#%s;' % name
[docs] def handle_comment(self, comment):
if self.in_translate:
self.data += '<!--%s-->' % comment
[docs] def handle_endtag(self, tag):
if self.in_translate:
if len(self.inner_tags) > 0:
tag = self.inner_tags.pop()
self.data += "</%s>" % tag
return
if self.plural_form:
messages = (
self.data.strip(),
self.plural_form
)
func_name = u'ngettext'
else:
messages = self.data.strip()
func_name = u'gettext'
self.strings.append(
(self.line, func_name, messages, self.comments)
)
self.in_translate = False
self.data = ''
self.comments = []