-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpdf-image-extractor.py
55 lines (46 loc) · 1.9 KB
/
pdf-image-extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""
Extract images from PDF without resampling or altering.
Adapted from work by Sylvain Pelissier
http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python
"""
import sys
from PIL import Image
import PyPDF2
if len(sys.argv) != 2:
print("\nUsage: python {} input_file\n".format(sys.argv[0]))
sys.exit(1)
pdf = sys.argv[1]
if __name__ == "__main__":
input1 = PyPDF2.PdfFileReader(open(pdf, "rb"))
page0 = input1.getPage(3)
if "/XObject" in page0["/Resources"]:
xObject = page0["/Resources"]["/XObject"].getObject()
for obj in xObject:
if xObject[obj]["/Subtype"] == "/Image":
size = (xObject[obj]["/Width"], xObject[obj]["/Height"])
data = xObject[obj].getData()
if xObject[obj]["/ColorSpace"] == "/DeviceRGB":
mode = "RGB"
else:
mode = "P"
if "/Filter" in xObject[obj]:
if xObject[obj]["/Filter"] == "/FlateDecode":
img = Image.frombytes(mode, size, data)
img.save(obj[1:] + ".png")
elif xObject[obj]["/Filter"] == "/DCTDecode":
img = open(obj[1:] + ".jpg", "wb")
img.write(data)
img.close()
elif xObject[obj]["/Filter"] == "/JPXDecode":
img = open(obj[1:] + ".jp2", "wb")
img.write(data)
img.close()
elif xObject[obj]["/Filter"] == "/CCITTFaxDecode":
img = open(obj[1:] + ".tiff", "wb")
img.write(data)
img.close()
else:
img = Image.frombytes(mode, size, data)
img.save(obj[1:] + ".png")
else:
print("No image found.")