|
| 1 | +""" |
| 2 | +magic is a wrapper around the libmagic file identification library. |
| 3 | +
|
| 4 | +See README for more information. |
| 5 | +
|
| 6 | +Usage: |
| 7 | +
|
| 8 | +>>> import magic |
| 9 | +>>> magic.from_file("testdata/test.pdf") |
| 10 | +'PDF document, version 1.2' |
| 11 | +>>> magic.from_file("testdata/test.pdf", mime=True) |
| 12 | +'application/pdf' |
| 13 | +>>> magic.from_buffer(open("testdata/test.pdf").read(1024)) |
| 14 | +'PDF document, version 1.2' |
| 15 | +>>> |
| 16 | +
|
| 17 | +
|
| 18 | +""" |
| 19 | + |
| 20 | +import os.path |
| 21 | +import ctypes |
| 22 | +import ctypes.util |
| 23 | + |
| 24 | +from ctypes import c_char_p, c_int, c_size_t, c_void_p |
| 25 | + |
| 26 | +class MagicException(Exception): pass |
| 27 | + |
| 28 | +class Magic: |
| 29 | + """ |
| 30 | + Magic is a wrapper around the libmagic C library. |
| 31 | + |
| 32 | + """ |
| 33 | + |
| 34 | + def __init__(self, mime=False, magic_file=None): |
| 35 | + """ |
| 36 | + Create a new libmagic wrapper. |
| 37 | +
|
| 38 | + mime - if True, mimetypes are returned instead of textual descriptions |
| 39 | + magic_file - use a mime database other than the system default |
| 40 | + |
| 41 | + """ |
| 42 | + flags = MAGIC_NONE |
| 43 | + if mime: |
| 44 | + flags |= MAGIC_MIME |
| 45 | + |
| 46 | + self.cookie = magic_open(flags) |
| 47 | + |
| 48 | + magic_load(self.cookie, magic_file) |
| 49 | + |
| 50 | + |
| 51 | + def from_buffer(self, buf): |
| 52 | + """ |
| 53 | + Identify the contents of `buf` |
| 54 | + """ |
| 55 | + return magic_buffer(self.cookie, buf) |
| 56 | + |
| 57 | + def from_file(self, filename): |
| 58 | + """ |
| 59 | + Identify the contents of file `filename` |
| 60 | + raises IOError if the file does not exist |
| 61 | + """ |
| 62 | + |
| 63 | + if not os.path.exists(filename): |
| 64 | + raise IOError("File does not exist: " + filename) |
| 65 | + |
| 66 | + return magic_file(self.cookie, filename) |
| 67 | + |
| 68 | + def __del__(self): |
| 69 | + try: |
| 70 | + magic_close(self.cookie) |
| 71 | + except Exception, e: |
| 72 | + print "got thig: ", e |
| 73 | + |
| 74 | + |
| 75 | +_magic_mime = None |
| 76 | +_magic = None |
| 77 | + |
| 78 | +def _get_magic_mime(): |
| 79 | + global _magic_mime |
| 80 | + if not _magic_mime: |
| 81 | + _magic_mime = Magic(mime=True) |
| 82 | + return _magic_mime |
| 83 | + |
| 84 | +def _get_magic(): |
| 85 | + global _magic |
| 86 | + if not _magic: |
| 87 | + _magic = Magic() |
| 88 | + return _magic |
| 89 | + |
| 90 | +def _get_magic_type(mime): |
| 91 | + if mime: |
| 92 | + return _get_magic_mime() |
| 93 | + else: |
| 94 | + return _get_magic() |
| 95 | + |
| 96 | +def from_file(filename, mime=False): |
| 97 | + m = _get_magic_type(mime) |
| 98 | + return m.from_file(filename) |
| 99 | + |
| 100 | +def from_buffer(buffer, mime=False): |
| 101 | + m = _get_magic_type(mime) |
| 102 | + return m.from_buffer(buffer) |
| 103 | + |
| 104 | + |
| 105 | + |
| 106 | + |
| 107 | +libmagic = ctypes.CDLL(ctypes.util.find_library('magic')) |
| 108 | + |
| 109 | +magic_t = ctypes.c_void_p |
| 110 | + |
| 111 | +def errorcheck(result, func, args): |
| 112 | + err = magic_error(args[0]) |
| 113 | + if err is not None: |
| 114 | + raise MagicException(err) |
| 115 | + else: |
| 116 | + return result |
| 117 | + |
| 118 | +magic_open = libmagic.magic_open |
| 119 | +magic_open.restype = magic_t |
| 120 | +magic_open.argtypes = [c_int] |
| 121 | + |
| 122 | +magic_close = libmagic.magic_close |
| 123 | +magic_close.restype = None |
| 124 | +magic_close.argtypes = [magic_t] |
| 125 | + |
| 126 | +magic_error = libmagic.magic_error |
| 127 | +magic_error.restype = c_char_p |
| 128 | +magic_error.argtypes = [magic_t] |
| 129 | + |
| 130 | +magic_errno = libmagic.magic_errno |
| 131 | +magic_errno.restype = c_int |
| 132 | +magic_errno.argtypes = [magic_t] |
| 133 | + |
| 134 | +magic_file = libmagic.magic_file |
| 135 | +magic_file.restype = c_char_p |
| 136 | +magic_file.argtypes = [magic_t, c_char_p] |
| 137 | +magic_file.errcheck = errorcheck |
| 138 | + |
| 139 | + |
| 140 | +_magic_buffer = libmagic.magic_buffer |
| 141 | +_magic_buffer.restype = c_char_p |
| 142 | +_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t] |
| 143 | +_magic_buffer.errcheck = errorcheck |
| 144 | + |
| 145 | + |
| 146 | +def magic_buffer(cookie, buf): |
| 147 | + return _magic_buffer(cookie, buf, len(buf)) |
| 148 | + |
| 149 | + |
| 150 | +magic_load = libmagic.magic_load |
| 151 | +magic_load.restype = c_int |
| 152 | +magic_load.argtypes = [magic_t, c_char_p] |
| 153 | +magic_load.errcheck = errorcheck |
| 154 | + |
| 155 | +magic_setflags = libmagic.magic_setflags |
| 156 | +magic_setflags.restype = c_int |
| 157 | +magic_setflags.argtypes = [magic_t, c_int] |
| 158 | + |
| 159 | +magic_check = libmagic.magic_check |
| 160 | +magic_check.restype = c_int |
| 161 | +magic_check.argtypes = [magic_t, c_char_p] |
| 162 | + |
| 163 | +magic_compile = libmagic.magic_compile |
| 164 | +magic_compile.restype = c_int |
| 165 | +magic_compile.argtypes = [magic_t, c_char_p] |
| 166 | + |
| 167 | + |
| 168 | + |
| 169 | +MAGIC_NONE = 0x000000 # No flags |
| 170 | + |
| 171 | +MAGIC_DEBUG = 0x000001 # Turn on debugging |
| 172 | + |
| 173 | +MAGIC_SYMLINK = 0x000002 # Follow symlinks |
| 174 | + |
| 175 | +MAGIC_COMPRESS = 0x000004 # Check inside compressed files |
| 176 | + |
| 177 | +MAGIC_DEVICES = 0x000008 # Look at the contents of devices |
| 178 | + |
| 179 | +MAGIC_MIME = 0x000010 # Return a mime string |
| 180 | + |
| 181 | +MAGIC_CONTINUE = 0x000020 # Return all matches |
| 182 | + |
| 183 | +MAGIC_CHECK = 0x000040 # Print warnings to stderr |
| 184 | + |
| 185 | +MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit |
| 186 | + |
| 187 | +MAGIC_RAW = 0x000100 # Don't translate unprintable chars |
| 188 | + |
| 189 | +MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors |
| 190 | + |
| 191 | +MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files |
| 192 | + |
| 193 | +MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files |
| 194 | + |
| 195 | +MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries |
| 196 | + |
| 197 | +MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type |
| 198 | + |
| 199 | +MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details |
| 200 | + |
| 201 | +MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files |
| 202 | + |
| 203 | +MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff |
| 204 | + |
| 205 | +MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran |
| 206 | + |
| 207 | +MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens |
0 commit comments