1+ # from bs4 import BeautifulSoup
2+ # import requests
3+ # import requests.exceptions
4+ # from urllib.parse import urlsplit
5+ # from collections import deque
6+ # import re
7+
8+ # # a queue of urls to be crawled
9+ # new_urls = deque(['https://mail.google.com/mail/u/1/#inbox'])
10+
11+ # # a set of urls that we have already crawled
12+ # processed_urls = set()
13+
14+ # # a set of crawled emails
15+ # emails = set()
16+
17+ # # process urls one by one until we exhaust the queue
18+ # while len(new_urls):
19+
20+ # # move next url from the queue to the set of processed urls
21+ # url = new_urls.popleft()
22+ # processed_urls.add(url)
23+
24+ # # extract base url to resolve relative links
25+ # parts = urlsplit(url)
26+ # base_url = "{0.scheme}://{0.netloc}".format(parts)
27+ # path = url[:url.rfind('/')+1] if '/' in parts.path else url
28+
29+ # # get url's content
30+ # print("Processing %s" % url)
31+ # try:
32+ # response = requests.get(url)
33+ # except (requests.exceptions.MissingSchema, requests.exceptions.ConnectionError):
34+ # # ignore pages with errors
35+ # continue
36+
37+ # # extract all email addresses and add them into the resulting set
38+ # new_emails = set(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", response.text, re.I))
39+ # emails.update(new_emails)
40+
41+ # # create a beutiful soup for the html document
42+ # soup = BeautifulSoup(response.text)
43+
44+ # # find and process all the anchors in the document
45+ # for anchor in soup.find_all("a"):
46+ # # extract link url from the anchor
47+ # link = anchor.attrs["href"] if "href" in anchor.attrs else ''
48+ # # resolve relative links
49+ # if link.startswith('/'):
50+ # link = base_url + link
51+ # elif not link.startswith('http'):
52+ # link = path + link
53+ # # add the new url to the queue if it was not enqueued nor processed yet
54+ # if not link in new_urls and not link in processed_urls:
55+ # new_urls.append(link)
56+
57+ import imaplib
58+ import email
59+ from email .header import decode_header
60+ import HTMLParser
61+
62+
63+ # to unescape xml entities
64+ _parser = HTMLParser .HTMLParser ()
65+
66+ def decodeHeader (value ):
67+ if value .startswith ('"=?' ):
68+ value = value .replace ('"' , '' )
69+
70+ value , encoding = decode_header (value )[0 ]
71+ if encoding :
72+ value = value .decode (encoding )
73+
74+ return _parser .unescape (value )
75+
76+ def listLastInbox (top = 10 ):
77+ mailbox = imaplib .IMAP4_SSL ('imap.gmail.com' )
78+ mailbox .
login (
'[email protected] ' ,
'Rish@1996' )
79+
80+ selected = mailbox .select ('INBOX' )
81+ assert selected [0 ] == 'OK'
82+ messageCount = int (selected [1 ][0 ])
83+
84+ for i in range (messageCount , messageCount - top , - 1 ):
85+ reponse = mailbox .fetch (str (i ), '(RFC822)' )[1 ]
86+ for part in reponse :
87+ if isinstance (part , tuple ):
88+ message = email .message_from_string (part [1 ])
89+ # for h in ('subject','from','date'):
90+ # if "Firebase" in decodeHeader(message['from']):
91+ # print(decodeHeader(message['from']))
92+ yield {h : decodeHeader (message [h ]) for h in ('subject' , 'from' , 'date' ) if "Firebase" in decodeHeader (message ['from' ]) }
93+
94+ mailbox .logout ()
95+
96+
97+ if __name__ == '__main__' :
98+ for message in listLastInbox ():
99+ print ('-' * 40 )
100+ for h , v in message .items ():
101+ print (u'{0:8s}: {1}' .format (h .upper (), v ))
102+
103+
104+
105+
106+
107+
108+ # def scramble(s1, s2):
109+ # # your code here
110+ # # print(list(s1),list(s2))
111+ # l = len(s2)
112+ # s1 = list(s1)
113+ # s2 = list(s2)
114+ # count = 0
115+ # for i in s2:
116+ # if i in s1:
117+ # count = count+1
118+ # # print(list(s1))
119+ # s1.remove(i)
120+ # # print(list(s1))
121+ # # if count == l:
122+ # # return True
123+ # # else:
124+ # # return False
125+ # print(count,l,len(s1))
126+
127+ # scramble('scriptjava','javascript')
0 commit comments