Skip to content

Commit 7db98f4

Browse files
committed
initial commit
0 parents  commit 7db98f4

8 files changed

Lines changed: 313 additions & 0 deletions

File tree

README

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
2+
= python-magic =
3+
4+
Adam Hupp <adam at hupp.org>
5+
6+
Distributed under the PSF License: http://www.python.org/psf/license/
7+
8+
python-magic is a simple wrapper for libmagic. libmagic identifies
9+
file types according to their headers. It is the core of the Unix
10+
"file" command.
11+
12+
13+
= Installation =
14+
15+
This module depends on libmagic to run. It needs swig and
16+
libmagic-dev to build.
17+
18+
To build and install run:
19+
20+
# python setup.py install
21+
22+
23+
= Example Usage =
24+
25+
>>> import magic
26+
>>> m = magic.Magic()
27+
>>> m.from_file("testdata/test.pdf")
28+
'PDF document, version 1.2'
29+
>>> m.from_buffer(open("testdata/test.pdf").read(1024))
30+
'PDF document, version 1.2'
31+
32+
# For MIME types
33+
>>> mime = magic.Magic(mime=True)
34+
>>> mime.from_file("testdata/test.pdf")
35+
'application/pdf'
36+
>>>

magic.py

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
"""
2+
magic is a wrapper around the libmagic file identification library.
3+
4+
See README for more information.
5+
6+
Usage:
7+
8+
>>> import magic
9+
>>> magic.from_file("testdata/test.pdf")
10+
'PDF document, version 1.2'
11+
>>> magic.from_file("testdata/test.pdf", mime=True)
12+
'application/pdf'
13+
>>> magic.from_buffer(open("testdata/test.pdf").read(1024))
14+
'PDF document, version 1.2'
15+
>>>
16+
17+
18+
"""
19+
20+
import os.path
21+
import ctypes
22+
import ctypes.util
23+
24+
from ctypes import c_char_p, c_int, c_size_t, c_void_p
25+
26+
class MagicException(Exception): pass
27+
28+
class Magic:
29+
"""
30+
Magic is a wrapper around the libmagic C library.
31+
32+
"""
33+
34+
def __init__(self, mime=False, magic_file=None):
35+
"""
36+
Create a new libmagic wrapper.
37+
38+
mime - if True, mimetypes are returned instead of textual descriptions
39+
magic_file - use a mime database other than the system default
40+
41+
"""
42+
flags = MAGIC_NONE
43+
if mime:
44+
flags |= MAGIC_MIME
45+
46+
self.cookie = magic_open(flags)
47+
48+
magic_load(self.cookie, magic_file)
49+
50+
51+
def from_buffer(self, buf):
52+
"""
53+
Identify the contents of `buf`
54+
"""
55+
return magic_buffer(self.cookie, buf)
56+
57+
def from_file(self, filename):
58+
"""
59+
Identify the contents of file `filename`
60+
raises IOError if the file does not exist
61+
"""
62+
63+
if not os.path.exists(filename):
64+
raise IOError("File does not exist: " + filename)
65+
66+
return magic_file(self.cookie, filename)
67+
68+
def __del__(self):
69+
try:
70+
magic_close(self.cookie)
71+
except Exception, e:
72+
print "got thig: ", e
73+
74+
75+
_magic_mime = None
76+
_magic = None
77+
78+
def _get_magic_mime():
79+
global _magic_mime
80+
if not _magic_mime:
81+
_magic_mime = Magic(mime=True)
82+
return _magic_mime
83+
84+
def _get_magic():
85+
global _magic
86+
if not _magic:
87+
_magic = Magic()
88+
return _magic
89+
90+
def _get_magic_type(mime):
91+
if mime:
92+
return _get_magic_mime()
93+
else:
94+
return _get_magic()
95+
96+
def from_file(filename, mime=False):
97+
m = _get_magic_type(mime)
98+
return m.from_file(filename)
99+
100+
def from_buffer(buffer, mime=False):
101+
m = _get_magic_type(mime)
102+
return m.from_buffer(buffer)
103+
104+
105+
106+
107+
libmagic = ctypes.CDLL(ctypes.util.find_library('magic'))
108+
109+
magic_t = ctypes.c_void_p
110+
111+
def errorcheck(result, func, args):
112+
err = magic_error(args[0])
113+
if err is not None:
114+
raise MagicException(err)
115+
else:
116+
return result
117+
118+
magic_open = libmagic.magic_open
119+
magic_open.restype = magic_t
120+
magic_open.argtypes = [c_int]
121+
122+
magic_close = libmagic.magic_close
123+
magic_close.restype = None
124+
magic_close.argtypes = [magic_t]
125+
126+
magic_error = libmagic.magic_error
127+
magic_error.restype = c_char_p
128+
magic_error.argtypes = [magic_t]
129+
130+
magic_errno = libmagic.magic_errno
131+
magic_errno.restype = c_int
132+
magic_errno.argtypes = [magic_t]
133+
134+
magic_file = libmagic.magic_file
135+
magic_file.restype = c_char_p
136+
magic_file.argtypes = [magic_t, c_char_p]
137+
magic_file.errcheck = errorcheck
138+
139+
140+
_magic_buffer = libmagic.magic_buffer
141+
_magic_buffer.restype = c_char_p
142+
_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
143+
_magic_buffer.errcheck = errorcheck
144+
145+
146+
def magic_buffer(cookie, buf):
147+
return _magic_buffer(cookie, buf, len(buf))
148+
149+
150+
magic_load = libmagic.magic_load
151+
magic_load.restype = c_int
152+
magic_load.argtypes = [magic_t, c_char_p]
153+
magic_load.errcheck = errorcheck
154+
155+
magic_setflags = libmagic.magic_setflags
156+
magic_setflags.restype = c_int
157+
magic_setflags.argtypes = [magic_t, c_int]
158+
159+
magic_check = libmagic.magic_check
160+
magic_check.restype = c_int
161+
magic_check.argtypes = [magic_t, c_char_p]
162+
163+
magic_compile = libmagic.magic_compile
164+
magic_compile.restype = c_int
165+
magic_compile.argtypes = [magic_t, c_char_p]
166+
167+
168+
169+
MAGIC_NONE = 0x000000 # No flags
170+
171+
MAGIC_DEBUG = 0x000001 # Turn on debugging
172+
173+
MAGIC_SYMLINK = 0x000002 # Follow symlinks
174+
175+
MAGIC_COMPRESS = 0x000004 # Check inside compressed files
176+
177+
MAGIC_DEVICES = 0x000008 # Look at the contents of devices
178+
179+
MAGIC_MIME = 0x000010 # Return a mime string
180+
181+
MAGIC_CONTINUE = 0x000020 # Return all matches
182+
183+
MAGIC_CHECK = 0x000040 # Print warnings to stderr
184+
185+
MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
186+
187+
MAGIC_RAW = 0x000100 # Don't translate unprintable chars
188+
189+
MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
190+
191+
MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
192+
193+
MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
194+
195+
MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
196+
197+
MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
198+
199+
MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
200+
201+
MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
202+
203+
MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
204+
205+
MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
206+
207+
MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens

setup.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from setuptools import setup, Extension
2+
#from distutils.core import setup, Extension
3+
4+
setup(name='python-magic',
5+
description='File type identification using libmagic',
6+
author='Adam Hupp',
7+
author_email='[email protected]',
8+
url="http://hupp.org/adam/hg/python-magic",
9+
version='0.1',
10+
py_modules=['magic'],
11+
long_description="""This module uses ctypes to access the libmagic file type
12+
identification library. It makes use of the local magic database and
13+
supports both textual and MIME-type output.
14+
""",
15+
keywords="mime magic file",
16+
license="PSF",
17+
)

test.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
2+
import os.path
3+
import unittest
4+
import random
5+
from StringIO import StringIO
6+
from os import path
7+
from magic import Magic, MagicException
8+
9+
testfile = [
10+
("magic.pyc", "python 2.4 byte-compiled", "application/octet-stream"),
11+
("test.pdf", "PDF document, version 1.2", "application/pdf"),
12+
("test.gz", 'gzip compressed data, was "test", from Unix, last modified: '
13+
'Sat Jun 28 18:32:52 2008', "application/x-gzip"),
14+
("text.txt", "ASCII text", "text/plain; charset=us-ascii"),
15+
]
16+
17+
18+
class TestMagic(unittest.TestCase):
19+
20+
mime = False
21+
22+
def setUp(self):
23+
self.m = Magic(mime=self.mime)
24+
25+
def testFileTypes(self):
26+
for filename, desc, mime in testfile:
27+
filename = path.join(path.dirname(__file__),
28+
"testdata",
29+
filename)
30+
if self.mime:
31+
target = mime
32+
else:
33+
target = desc
34+
35+
self.assertEqual(target, self.m.from_buffer(open(filename).read(1024)))
36+
self.assertEqual(target, self.m.from_file(filename), filename)
37+
38+
39+
def testErrors(self):
40+
self.assertRaises(IOError, self.m.from_file, "nonexistent")
41+
self.assertRaises(MagicException, Magic, magic_file="noneexistent")
42+
os.environ['MAGIC'] = '/nonexistetn'
43+
self.assertRaises(MagicException, Magic)
44+
del os.environ['MAGIC']
45+
46+
class TestMagicMime(TestMagic):
47+
mime = True
48+
49+
if __name__ == '__main__':
50+
unittest.main()
51+

testdata/magic.pyc

1.75 KB
Binary file not shown.

testdata/test.gz

40 Bytes
Binary file not shown.

testdata/test.pdf

2 KB
Binary file not shown.

testdata/text.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Hello, World!
2+

0 commit comments

Comments
 (0)