Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion identify/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@
'phtml': {'text', 'php'},
'pl': {'text', 'perl'},
'plantuml': {'text', 'plantuml'},
'plist': {'text', 'plist'},
'png': {'binary', 'image', 'png'},
'po': {'text', 'pofile'},
'pp': {'text', 'puppet'},
Expand Down Expand Up @@ -161,6 +160,9 @@
'zip': {'binary', 'zip'},
'zsh': {'text', 'shell', 'zsh'},
}
EXTENSIONS_NEED_BINARY_CHECK = {
'plist': {'plist'},
}

NAMES = {
'.babelrc': {'text', 'json', 'babelrc'},
Expand Down
6 changes: 6 additions & 0 deletions identify/identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

ALL_TAGS = {DIRECTORY, SYMLINK, FILE, EXECUTABLE, NON_EXECUTABLE, TEXT, BINARY}
ALL_TAGS.update(*extensions.EXTENSIONS.values())
ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values())
ALL_TAGS.update(*extensions.NAMES.values())
ALL_TAGS.update(*interpreters.INTERPRETERS.values())
ALL_TAGS = frozenset(ALL_TAGS)
Expand Down Expand Up @@ -54,6 +55,9 @@ def tags_from_path(path):
if len(shebang) > 0:
tags.update(tags_from_interpreter(shebang[0]))

# some extensions can be both binary and text
# see EXTENSIONS_NEED_BINARY_CHECK
if not {TEXT, BINARY} & tags:
if file_is_text(path):
tags.add(TEXT)
else:
Expand All @@ -80,6 +84,8 @@ def tags_from_filename(filename):
ext = ext[1:].lower()
if ext in extensions.EXTENSIONS:
ret.update(extensions.EXTENSIONS[ext])
elif ext in extensions.EXTENSIONS_NEED_BINARY_CHECK:
ret.update(extensions.EXTENSIONS_NEED_BINARY_CHECK[ext])

return ret

Expand Down
13 changes: 13 additions & 0 deletions tests/extensions_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,16 @@
def test_extensions_have_binary_or_text(extension):
tags = extensions.EXTENSIONS[extension]
assert len({'text', 'binary'} & tags) == 1, tags


@pytest.mark.parametrize('extension', extensions.EXTENSIONS_NEED_BINARY_CHECK)
def test_need_binary_check_do_not_specify_text_binary(extension):
tags = extensions.EXTENSIONS_NEED_BINARY_CHECK[extension]
assert len({'text', 'binary'} & tags) == 0, tags


def test_mutually_exclusive_check_types():
assert not (
set(extensions.EXTENSIONS) &
set(extensions.EXTENSIONS_NEED_BINARY_CHECK)
)
39 changes: 39 additions & 0 deletions tests/identify_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ def test_all_tags_includes_basic_ones():
assert 'directory' in identify.ALL_TAGS


def test_all_tags_contains_each_type():
assert 'xml' in identify.ALL_TAGS # extension
assert 'plist' in identify.ALL_TAGS # extension, needs binary check
assert 'dockerfile' in identify.ALL_TAGS # by file convention
assert 'python3' in identify.ALL_TAGS # by shebang


def test_tags_from_path_does_not_exist(tmpdir):
x = tmpdir.join('foo')
with pytest.raises(ValueError):
Expand Down Expand Up @@ -73,6 +80,35 @@ def test_tags_from_path_binary(tmpdir):
}


def test_tags_from_path_plist_binary(tmpdir):
x = tmpdir.join('t.plist')
x.write_binary(
b'bplist00\xd1\x01\x02_\x10\x0fLast Login NameWDefault\x08\x0b\x1d\x00'
b'\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00'
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00%',
)
assert identify.tags_from_path(x.strpath) == {
'file', 'plist', 'binary', 'non-executable',
}


def test_tags_from_path_plist_text(tmpdir):
x = tmpdir.join('t.plist')
x.write(
'<?xml version="1.0" encoding="UTF-8"?>\n'
'<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">\n'
'<plist version="1.0">\n'
'<dict>\n'
'\t<key>Last Login Name</key>\n'
'\t<string>Default</string>\n'
'</dict>\n'
'</plist>\n',
)
assert identify.tags_from_path(x.strpath) == {
'file', 'plist', 'text', 'non-executable',
}


@pytest.mark.parametrize(
('filename', 'expected'),
(
Expand All @@ -85,6 +121,9 @@ def test_tags_from_path_binary(tmpdir):
('mod/test.py', {'text', 'python'}),
('mod/Dockerfile', {'text', 'dockerfile'}),

# does not set binary / text
('f.plist', {'plist'}),

# case of extension should be ignored
('f.JPG', {'binary', 'image', 'jpeg'}),
# but case of name checks should still be honored
Expand Down