forked from mukeshrathore/SmartDoc-Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
infocus.py
45 lines (36 loc) · 1.15 KB
/
infocus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# -*- coding: utf-8 -*-
"""Metadata_Extraction.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1W5rAU5kYgk923jxA40_RLYdgmrsikyh_
"""
# Image Metadata Extractor
# !pip install pikepdf
# !pip install pypdf
# !pip install PyPDF2
import pikepdf
from PyPDF2 import PdfReader
def Extract_MetaData(path):
pdf = pikepdf.Pdf.open(path)
docinfo = pdf.docinfo
for key, value in docinfo.items():
print((key, ":", value))
#Keyword Extraction
if '/Keywords' in docinfo:
print("\n")
print('The keywords in this pdf are: ' + pdf_info['/Keywords'])
from PyPDF2 import PdfFileReader
# mypath = r'/usr/src/Tax_and_legal_form.pdf'
mypath = r'c:/Users/mukes/Downloads/SmartDoc/Tax_and_legal_form.pdf'
pdf_toread = PdfReader(open(mypath, 'rb'))
pdf_info = pdf_toread.metadata
print(pdf_info)
if __name__ == "__main__":
#main function
#Test case 1:
# path = r'/usr/src/Tax_and_legal_form.pdf'
path = r'c:/Users/mukes/Downloads/SmartDoc/Tax_and_legal_form.pdf'
# print(Extract_MetaData(path))
text_file = open("Output.txt", "w")
text_file.write(str(Extract_MetaData(path)))
text_file.close()