Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,41 @@ Don't want to include nicknames in your output? No problem. Just omit that keywo
'Dr. Juan de la Vega'


Initials Support
----------------

The HumanName class can try to get the correct representation of initials.
Initials can be tricky as different format usages exist.
If you want to exclude on of the name parts from the initials, you can use the initials format by chainging
:py:attr:`~nameparser.config.Constants.initials_format`
Three attributes exist for the format, `first`, `middle` and `last`.

.. doctest:: initials format

>>> from nameparser.config import CONSTANTS
>>> CONSTANTS.initials_format = "{first} {middle}"
>>> HumanName("Doe, John A. Kenneth, Jr.").initials()
'J. A. K.'
>>> HumanName("Doe, John A. Kenneth, Jr.", initials_format="{last}, {first}).initials()
'D., J.'


Furthermore, the delimiter for the string output can be set through:
:py:attr:`~nameparser.config.Constants.initials_delimiter`

.. doctest:: initials delimiter

>>> HumanName("Doe, John A. Kenneth, Jr.", initials_delimiter=";").initials()
"J; A; K;"
>>> from nameparser.config import CONSTANTS
>>> CONSTANTS.initials_delimiter = "."
>>> HumanName("Doe, John A. Kenneth, Jr.", initials_format="{first}{middle}{last}).initials()
"J.A.K.D."

If you want to receive a list representation of the initials, yo ucan use :py:meth:`~nameparser.HumanName.initials_list`.
This function is unaffected by :py:attr:`~nameparser.config.Constants.initials_format`

.. doctest:: list format
>>> HumanName("Doe, John A. Kenneth, Jr.", initials_delimiter=";").initials_list()
["J", "A", "K", "D"]

90 changes: 54 additions & 36 deletions nameparser/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,35 +49,37 @@

DEFAULT_ENCODING = 'UTF-8'


class SetManager(Set):
'''
Easily add and remove config variables per module or instance. Subclass of
``collections.abc.Set``.

Only special functionality beyond that provided by set() is
to normalize constants for comparison (lower case, no periods)
when they are add()ed and remove()d and allow passing multiple
string arguments to the :py:func:`add()` and :py:func:`remove()` methods.

'''

def __init__(self, elements):
self.elements = set(elements)

def __call__(self):
return self.elements

def __repr__(self):
return "SetManager({})".format(self.elements) # used for docs
return "SetManager({})".format(self.elements) # used for docs

def __iter__(self):
return iter(self.elements)

def __contains__(self, value):
return value in self.elements

def __len__(self):
return len(self.elements)

def next(self):
return self.__next__()

Expand All @@ -89,7 +91,7 @@ def __next__(self):
c = self.count
self.count = c + 1
return getattr(self, self.elements[c]) or next(self)

def add_with_encoding(self, s, encoding=None):
"""
Add the lower case and no-period version of the string to the set. Pass an
Expand All @@ -111,7 +113,7 @@ def add(self, *strings):
"""
[self.add_with_encoding(s) for s in strings]
return self

def remove(self, *strings):
"""
Remove the lower case and no-period version of the string arguments from the set.
Expand All @@ -126,10 +128,11 @@ class TupleManager(dict):
A dictionary with dot.notation access. Subclass of ``dict``. Makes the tuple constants
more friendly.
'''

def __getattr__(self, attr):
return self.get(attr)
__setattr__= dict.__setitem__
__delattr__= dict.__delitem__
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__

def __getstate__(self):
return dict(self)
Expand All @@ -140,6 +143,7 @@ def __setstate__(self, state):
def __reduce__(self):
return (TupleManager, (), self.__getstate__())


class Constants(object):
"""
An instance of this class hold all of the configuration constants for the parser.
Expand All @@ -163,11 +167,23 @@ class Constants(object):
:param regexes:
:py:attr:`regexes` wrapped with :py:class:`TupleManager`.
"""

string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
"""
The default string format use for all new `HumanName` instances.
"""

initials_format = "{first} {middle} {last}"
"""
The default initials format used for all new `HumanName` instances.
"""

initials_delimiter = "."
"""
The default initials delimiter used for all new `HumanName` instances.
Will be used to add a delimiter between each initial.
"""

empty_attribute_default = ''
"""
Default return value for empty attributes.
Expand All @@ -183,6 +199,7 @@ class Constants(object):
'John'

"""

capitalize_name = False
"""
If set, applies :py:meth:`~nameparser.parser.HumanName.capitalize` to
Expand All @@ -197,6 +214,7 @@ class Constants(object):
'Bob V. de la MacDole-Eisenhower Ph.D.'

"""

force_mixed_case_capitalization = False
"""
If set, forces the capitalization of mixed case strings when
Expand All @@ -213,27 +231,26 @@ class Constants(object):

"""


def __init__(self,
prefixes=PREFIXES,
suffix_acronyms=SUFFIX_ACRONYMS,
suffix_not_acronyms=SUFFIX_NOT_ACRONYMS,
titles=TITLES,
first_name_titles=FIRST_NAME_TITLES,
conjunctions=CONJUNCTIONS,
capitalization_exceptions=CAPITALIZATION_EXCEPTIONS,
regexes=REGEXES
):
self.prefixes = SetManager(prefixes)
self.suffix_acronyms = SetManager(suffix_acronyms)
def __init__(self,
prefixes=PREFIXES,
suffix_acronyms=SUFFIX_ACRONYMS,
suffix_not_acronyms=SUFFIX_NOT_ACRONYMS,
titles=TITLES,
first_name_titles=FIRST_NAME_TITLES,
conjunctions=CONJUNCTIONS,
capitalization_exceptions=CAPITALIZATION_EXCEPTIONS,
regexes=REGEXES
):
self.prefixes = SetManager(prefixes)
self.suffix_acronyms = SetManager(suffix_acronyms)
self.suffix_not_acronyms = SetManager(suffix_not_acronyms)
self.titles = SetManager(titles)
self.first_name_titles = SetManager(first_name_titles)
self.conjunctions = SetManager(conjunctions)
self.titles = SetManager(titles)
self.first_name_titles = SetManager(first_name_titles)
self.conjunctions = SetManager(conjunctions)
self.capitalization_exceptions = TupleManager(capitalization_exceptions)
self.regexes = TupleManager(regexes)
self.regexes = TupleManager(regexes)
self._pst = None

@property
def suffixes_prefixes_titles(self):
if not self._pst:
Expand All @@ -242,15 +259,16 @@ def suffixes_prefixes_titles(self):

def __repr__(self):
return "<Constants() instance>"

def __setstate__(self, state):
self.__init__(state)

def __getstate__(self):
attrs = [x for x in dir(self) if not x.startswith('_')]
return dict([(a,getattr(self, a)) for a in attrs])
return dict([(a, getattr(self, a)) for a in attrs])


#: A module-level instance of the :py:class:`Constants()` class.
#: A module-level instance of the :py:class:`Constants()` class.
#: Provides a common instance for the module to share
#: to easily adjust configuration for the entire module.
#: See `Customizing the Parser with Your Own Configuration <customize.html>`_.
Expand Down
Loading