Skip to content

Commit 4dfd49f

Browse files
committed
Use ast instead of eval for string extraction
This is safer (as we don't actually execute anything), and allows us to parse f-strings too. Closes #769 (supersedes it) Refs #715 (doesn't add an error yet, but doesn't crash on f-strings)
1 parent c7d04e8 commit 4dfd49f

File tree

2 files changed

+49
-9
lines changed

2 files changed

+49
-9
lines changed

babel/messages/extract.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
:copyright: (c) 2013-2022 by the Babel Team.
1616
:license: BSD, see LICENSE for more details.
1717
"""
18-
18+
import ast
1919
import os
2020
from os.path import relpath
2121
import sys
@@ -487,14 +487,9 @@ def extract_python(fileobj, keywords, comment_tags, options):
487487
if nested:
488488
funcname = value
489489
elif tok == STRING:
490-
# Unwrap quotes in a safe manner, maintaining the string's
491-
# encoding
492-
# https://sourceforge.net/tracker/?func=detail&atid=355470&
493-
# aid=617979&group_id=5470
494-
code = compile('# coding=%s\n%s' % (str(encoding), value),
495-
'<string>', 'eval', future_flags)
496-
value = eval(code, {'__builtins__': {}}, {})
497-
buf.append(value)
490+
val = _parse_python_string(value, encoding, future_flags)
491+
if val is not None:
492+
buf.append(val)
498493
elif tok == OP and value == ',':
499494
if buf:
500495
messages.append(''.join(buf))
@@ -516,6 +511,29 @@ def extract_python(fileobj, keywords, comment_tags, options):
516511
funcname = value
517512

518513

514+
def _parse_python_string(value, encoding, future_flags):
515+
# Unwrap quotes in a safe manner, maintaining the string's encoding
516+
# https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470
517+
code = compile(
518+
f'# coding={str(encoding)}\n{value}',
519+
'<string>',
520+
'eval',
521+
ast.PyCF_ONLY_AST | future_flags,
522+
)
523+
if not isinstance(code, ast.Expression):
524+
return None
525+
body = code.body
526+
if isinstance(body, ast.Str):
527+
return body.s
528+
if isinstance(body, ast.JoinedStr): # f-string
529+
if all(isinstance(node, ast.Str) for node in body.values):
530+
return ''.join(node.s for node in body.values)
531+
if all(isinstance(node, ast.Constant) for node in body.values):
532+
return ''.join(str(node.value) for node in body.values)
533+
# TODO: could raise an error or warning when not all nodes are constants
534+
return None
535+
536+
519537
def extract_javascript(fileobj, keywords, comment_tags, options):
520538
"""Extract messages from JavaScript source code.
521539

tests/messages/test_extract.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,3 +528,25 @@ def test_future(self):
528528
messages = list(extract.extract('python', buf,
529529
extract.DEFAULT_KEYWORDS, [], {}))
530530
assert messages[0][1] == u'\xa0'
531+
532+
def test_f_strings(self):
533+
buf = BytesIO(br"""
534+
t1 = _('foobar')
535+
t2 = _(f'spameggs' f'feast') # should be extracted; constant parts only
536+
t2 = _(f'spameggs' 'kerroshampurilainen') # should be extracted (mixing f with no f)
537+
t3 = _(f'spameggs {t1}') # should not be extracted
538+
""")
539+
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
540+
assert len(messages) == 3
541+
assert messages[0][1] == u'foobar'
542+
assert messages[1][1] == u'spameggsfeast'
543+
assert messages[2][1] == u'spameggskerroshampurilainen'
544+
545+
def test_f_strings_non_utf8(self):
546+
buf = BytesIO(b"""
547+
# -- coding: latin-1 --
548+
t2 = _(f'\xe5\xe4\xf6' f'\xc5\xc4\xd6')
549+
""")
550+
messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
551+
assert len(messages) == 1
552+
assert messages[0][1] == u'åäöÅÄÖ'

0 commit comments

Comments
 (0)