First Commit

mskian · mskian · commit 748a28fe5001 · 2018-05-09T01:58:41.000+05:30
diff --git a/links.py b/links.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+
+from BeautifulSoup import BeautifulSoup
+#import urllib2
+import re # Module for RegEx
+import yaml
+import cfscrape
+
+def getLinks(url):
+     # Get the text at the set URL
+    scraper = cfscrape.create_scraper()
+    cfurl = scraper.get(url).content
+    #html_page = urllib2.urlopen(cfurl)
+    soup = BeautifulSoup(cfurl)
+    links = ['Full Web Page Internal & External links']
+ 
+
+    for link in soup.findAll('a', attrs={'href': re.compile("^(http|https)://")}):
+        links.append(link.get('href'))
+    return links
+
+#print (getLinks("https://example.com"))
+
+## Output printed in result.yml file
+with open('result.yml', 'w') as yaml_file:
+ yaml.safe_dump((getLinks('https://www.example.com')), yaml_file, default_flow_style=False, encoding='utf-8', allow_unicode=True)
+
+
+print "done"
diff --git a/og.py b/og.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+#import urllib2
+from BeautifulSoup import BeautifulSoup
+import cfscrape
+
+# Get the text at the set URL
+scraper = cfscrape.create_scraper()
+
+url = "https://example.com"
+cfurl = scraper.get(url).content
+#bs = BeautifulSoup(urllib2.urlopen(url))
+bs = BeautifulSoup(cfurl)
+
+metatag = bs.find("meta", {"property": "og:image"})
+if metatag is not None:
+    print metatag["content"]
+else:
+    print "This page has no Open Graph meta image tag"
diff --git a/weblink.py b/weblink.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+from BeautifulSoup import BeautifulSoup
+#import urllib2
+import cfscrape
+import re
+ 
+#html_page = urllib2.urlopen("https://example.com")
+
+# Get the text at the set URL
+scraper = cfscrape.create_scraper()
+
+url = "https://example.com"
+cfurl = scraper.get(url).content
+soup = BeautifulSoup(cfurl)
+for link in soup.findAll('a', attrs={'href': re.compile("^(http|https)://")}):
+
+ ## Print Output
+ print link.get('href')