Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# http://editorconfig.org

root = true

[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true

[*.{py,rst,ini}]
indent_style = space
indent_size = 4

[*.{html,json,yml}]
indent_style = space
indent_size = 2

[*.md]
trim_trailing_whitespace = false
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ build
.coverage
dist
.idea
Pipfile
Pipfile.lock

# docs
docs/_*
2 changes: 2 additions & 0 deletions docs/customize.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ Other editable attributes

* :py:obj:`~nameparser.config.Constants.string_format` - controls output from `str()`
* :py:obj:`~nameparser.config.Constants.empty_attribute_default` - value returned by empty attributes, defaults to empty string
* :py:obj:`~nameparser.config.Constants.capitalize_name` - If set, applies :py:meth:`~nameparser.parser.HumanName.capitalize` to :py:class:`~nameparser.parser.HumanName` instance.
* :py:obj:`~nameparser.config.Constants.force_mixed_case_capitalization` - If set, forces the capitalization of mixed case strings when :py:meth:`~nameparser.parser.HumanName.capitalize` is called.



Expand Down
30 changes: 27 additions & 3 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,8 @@ Capitalization Support

The HumanName class can try to guess the correct capitalization of name
entered in all upper or lower case. By default, it will not adjust
the case of names entered in mixed case. To run capitalization on all names
pass the parameter `force=True`.

the case of names entered in mixed case. To run capitalization on a
`HumanName` instance, pass the parameter `force=True`.

Capitalize the name.

Expand All @@ -94,6 +93,31 @@ pass the parameter `force=True`.
>>> str(name)
'Shirley MacLaine'

To apply capitalization to all `HumanName` instances, set
:py:attr:`~nameparser.config.Constants.capitalize_name` to `True`.

.. doctest:: capitalize_name
:options: +NORMALIZE_WHITESPACE

>>> from nameparser.config import CONSTANTS
>>> CONSTANTS.capitalize_name = True
>>> name = HumanName("bob v. de la macdole-eisenhower phd")
>>> str(name)
'Bob V. de la MacDole-Eisenhower Ph.D.'

To force the capitalization of mixed case strings on all `HumanName` instances,
set :py:attr:`~nameparser.config.Constants.force_mixed_case_capitalization` to `True`.

.. doctest:: force_mixed_case_capitalization
:options: +NORMALIZE_WHITESPACE

>>> from nameparser.config import CONSTANTS
>>> CONSTANTS.force_mixed_case_capitalization = True
>>> name = HumanName('Shirley Maclaine')
>>> name.capitalize()
>>> str(name)
'Shirley MacLaine'


Nickname Handling
------------------
Expand Down
33 changes: 31 additions & 2 deletions nameparser/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,37 @@ class Constants(object):
'John'

"""


capitalize_name = False
"""
If set, applies :py:meth:`~nameparser.parser.HumanName.capitalize` to
:py:class:`~nameparser.parser.HumanName` instance.

.. doctest::

>>> from nameparser.config import CONSTANTS
>>> CONSTANTS.capitalize_name = True
>>> name = HumanName("bob v. de la macdole-eisenhower phd")
>>> str(name)
'Bob V. de la MacDole-Eisenhower Ph.D.'

"""
force_mixed_case_capitalization = False
"""
If set, forces the capitalization of mixed case strings when
:py:meth:`~nameparser.parser.HumanName.capitalize` is called.

.. doctest::

>>> from nameparser.config import CONSTANTS
>>> CONSTANTS.force_mixed_case_capitalization = True
>>> name = HumanName('Shirley Maclaine')
>>> name.capitalize()
>>> str(name)
'Shirley MacLaine'

"""


def __init__(self,
prefixes=PREFIXES,
suffix_acronyms=SUFFIX_ACRONYMS,
Expand Down
10 changes: 5 additions & 5 deletions nameparser/config/capitalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
from __future__ import unicode_literals

CAPITALIZATION_EXCEPTIONS = (
('ii' ,'II'),
('iii','III'),
('iv' ,'IV'),
('md' ,'M.D.'),
('phd','Ph.D.'),
('ii', 'II'),
('iii', 'III'),
('iv', 'IV'),
('md', 'M.D.'),
('phd', 'Ph.D.'),
)
"""
Any pieces that are not capitalized by capitalizing the first letter.
Expand Down
2 changes: 1 addition & 1 deletion nameparser/config/prefixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#: Name pieces that appear before a last name. Prefixes join to the piece
#: that follows them to make one new piece. They can be chained together, e.g
#: "von der" and "de la". Because they only appear in middle or last names,
#: they also signifiy that all following name pieces should be in the same name
#: they also signify that all following name pieces should be in the same name
#: part, for example, "von" will be joined to all following pieces that are not
#: prefixes or suffixes, allowing recognition of double last names when they
#: appear after a prefixes. So in "pennie von bergen wessels MD", "von" will
Expand Down
27 changes: 21 additions & 6 deletions nameparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def pre_process(self):
This method happens at the beginning of the :py:func:`parse_full_name`
before any other processing of the string aside from unicode
normalization, so it's a good place to do any custom handling in a
subclass. Runs :py:func:`parse_nicknames` and py:func:`squash_emoji`.
subclass. Runs :py:func:`parse_nicknames` and :py:func:`squash_emoji`.

"""
self.fix_phd()
Expand All @@ -397,9 +397,11 @@ def pre_process(self):
def post_process(self):
"""
This happens at the end of the :py:func:`parse_full_name` after
all other processing has taken place. Runs :py:func:`handle_firstnames`.
all other processing has taken place. Runs :py:func:`handle_firstnames`
and :py:func:`handle_capitalization`.
"""
self.handle_firstnames()
self.handle_capitalization()

def fix_phd(self):
_re = self.C.regexes.phd
Expand Down Expand Up @@ -675,9 +677,9 @@ def join_on_conjunctions(self, pieces, additional_parts_count=0):
:param list pieces: name pieces strings after split on spaces
:param int additional_parts_count:
:return: new list with piece next to conjunctions merged into one piece
with spaces in it.
with spaces in it.
:rtype: list

"""
length = len(pieces) + additional_parts_count
# don't join on conjunctions if there's only 2 parts
Expand Down Expand Up @@ -833,14 +835,16 @@ def cap_piece(self, piece, attribute):
replacement = lambda m: self.cap_word(m.group(0), attribute)
return self.C.regexes.word.sub(replacement, piece)

def capitalize(self, force=False):
def capitalize(self, force=None):
"""
The HumanName class can try to guess the correct capitalization of name
entered in all upper or lower case. By default, it will not adjust the
case of names entered in mixed case. To run capitalization on all names
pass the parameter `force=True`.

:param bool force: force capitalization of strings that include mixed case
:param bool force: Forces capitalization of mixed case strings. This
parameter overrides rules set within
:py:class:`~nameparser.config.CONSTANTS`.

**Usage**

Expand All @@ -861,10 +865,21 @@ def capitalize(self, force=False):

"""
name = u(self)
force = self.C.force_mixed_case_capitalization \
if force is None else force

if not force and not (name == name.upper() or name == name.lower()):
return
self.title_list = self.cap_piece(self.title , 'title').split(' ')
self.first_list = self.cap_piece(self.first , 'first').split(' ')
self.middle_list = self.cap_piece(self.middle, 'middle').split(' ')
self.last_list = self.cap_piece(self.last , 'last').split(' ')
self.suffix_list = self.cap_piece(self.suffix, 'suffix').split(', ')

def handle_capitalization(self):
"""
Handles capitalization configurations set within
:py:class:`~nameparser.config.CONSTANTS`.
"""
if self.C.capitalize_name:
self.capitalize()
32 changes: 26 additions & 6 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

class HumanNameTestBase(unittest.TestCase):
def m(self, actual, expected, hn):
"""assertEquals with a better message and awareness of hn.C.empty_attribute_default"""
"""assertEqual with a better message and awareness of hn.C.empty_attribute_default"""
expected = expected or hn.C.empty_attribute_default
try:
self.assertEqual(actual, expected, "'%s' != '%s' for '%s'\n%r" % (
Expand All @@ -50,7 +50,7 @@ def m(self, actual, expected, hn):
hn
))
except UnicodeDecodeError:
self.assertEquals(actual, expected)
self.assertEqual(actual, expected)


class HumanNamePythonTests(HumanNameTestBase):
Expand All @@ -62,8 +62,6 @@ def test_utf8(self):

def test_string_output(self):
hn = HumanName("de la Véña, Jüan")
print(hn)
print(repr(hn))

def test_escaped_utf8_bytes(self):
hn = HumanName(b'B\xc3\xb6ck, Gerald')
Expand Down Expand Up @@ -1267,7 +1265,7 @@ class ConstantsCustomization(HumanNameTestBase):
def test_add_title(self):
hn = HumanName("Te Awanui-a-Rangi Black", constants=None)
start_len = len(hn.C.titles)
self.assert_(start_len > 0)
self.assertTrue(start_len > 0)
hn.C.titles.add('te')
self.assertEqual(start_len + 1, len(hn.C.titles))
hn.parse_full_name()
Expand All @@ -1278,7 +1276,7 @@ def test_add_title(self):
def test_remove_title(self):
hn = HumanName("Hon Solo", constants=None)
start_len = len(hn.C.titles)
self.assert_(start_len > 0)
self.assertTrue(start_len > 0)
hn.C.titles.remove('hon')
self.assertEqual(start_len - 1, len(hn.C.titles))
hn.parse_full_name()
Expand Down Expand Up @@ -2090,6 +2088,28 @@ def test_formatting_constants_attribute(self):
self.assertEqual(u(hn), "TEST2")
CONSTANTS.string_format = _orig

def test_capitalize_name_constants_attribute(self):
from nameparser.config import CONSTANTS
CONSTANTS.capitalize_name = True
hn = HumanName("bob v. de la macdole-eisenhower phd")
self.assertEqual(str(hn), "Bob V. de la MacDole-Eisenhower Ph.D.")
CONSTANTS.capitalize_name = False

def test_force_mixed_case_capitalization_constants_attribute(self):
from nameparser.config import CONSTANTS
CONSTANTS.force_mixed_case_capitalization = True
hn = HumanName('Shirley Maclaine')
hn.capitalize()
self.assertEqual(str(hn), "Shirley MacLaine")
CONSTANTS.force_mixed_case_capitalization = False

def test_capitalize_name_and_force_mixed_case_capitalization_constants_attributes(self):
from nameparser.config import CONSTANTS
CONSTANTS.capitalize_name = True
CONSTANTS.force_mixed_case_capitalization = True
hn = HumanName('Shirley Maclaine')
self.assertEqual(str(hn), "Shirley MacLaine")

def test_quote_nickname_formating(self):
hn = HumanName("Rev John A. Kenneth Doe III (Kenny)")
hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'"
Expand Down