Skip to content

Commit

Permalink
pylint check_https.py
Browse files Browse the repository at this point in the history
  • Loading branch information
rasa committed Jun 20, 2017
1 parent be557d7 commit b8866c3
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 45 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,5 @@ Temporary Items

archive/
*.failed
tmp/
wip/
96 changes: 51 additions & 45 deletions check_https.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,24 @@
import os
import ssl
import sys
# buggy!

from six.moves.urllib.parse import urlsplit, urlunsplit # pylint: disable=import-error

import requests
import certifi
import urllib3
import urllib3.contrib.pyopenssl

USE_URLLIB3 = True

from jsoncomment import JsonComment

from six.moves.urllib.parse import urlsplit, urlunsplit # pylint: disable=import-error
import jsoncomment

urllib3.contrib.pyopenssl.inject_into_urllib3()

UA = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
UAS = {
'sourceforge.net': 'Wget/1.19.1 (mingw32)'
}
SF_UA = "Scoop/1.0 (+http://scoop.sh/) (Windows NT 6.1; WOW64)"

UAS = {'sourceforge.net': SF_UA}

USE_URLLIB3 = True

class CheckURLs(object):
""" @todo docstring me """
Expand Down Expand Up @@ -61,8 +61,9 @@ def get_scheme(self, url):
return parts[0]

def get_ua(self, url):
""" @todo docstring me """
parts = list(urlsplit(url))
logging.debug('parts=%s', parts)
# logging.debug('parts=%s', parts)
for regex in UAS:
if re.search(re.escape(regex), parts[1], re.I):
return UAS[regex]
Expand All @@ -84,18 +85,19 @@ def get(self, url, key='', whine=True):
retries = urllib3.util.retry.Retry(connect=1, read=1)
http = urllib3.PoolManager(
retries=retries,
cert_reqs=ssl.CERT_REQUIRED,
cert_reqs=ssl.CERT_REQUIRED,
ca_certs=certifi.where())

if not self.is_http_or_https(url):
logging.debug('%s: %s: %s', self.file, 'not_httpx', url)
logging.debug('%s %s: %s', key, 'not http or https', url)
return False

try:
data = None
logging.debug('%s: Retrieving %s', self.file, url)
logging.debug('%s: Retrieving %s', key, url)
ua = self.get_ua(url)
headers={'User-Agent': ua}
logging.debug('%s: User agent: %s', key, ua)
headers = {'User-Agent': ua}
if USE_URLLIB3:
r = http.request('GET', url, headers=headers)
status = r.status
Expand All @@ -104,16 +106,15 @@ def get(self, url, key='', whine=True):
r = requests.get(url)
status = r.status_code
data = r.content
if whine and status < 200 or status > 299:
logging.warning('%s: %s: %s', self.file, status, url)
if whine and (status < 200 or status > 299):
logging.warning('%s: Error %s: %s', key, status, url)
return False
if not self.check_exists:
logging.info('%s: %s: %s: %s', self.file, status, key, url)
logging.debug('%s: Status %s: %s', key, status, url)
return data
except Exception as e:
logging.debug('%s: %s: %s: %s', self.file, type(e).__name__, key, url)
if type(e).__name__ in ssl_errors:
return False
logging.warning('%s: Exception %s: %s: %s', key, type(e).__name__, url)
logging.exception(e)
return False

Expand All @@ -129,7 +130,7 @@ def check_url(self, url, key, hash='', desc=''):

if desc:
key += '.' + desc
logging.debug('%s: key=%s hash=%s url=%s', self.file, key, hash, url)
# logging.debug('%s: %s (%s)', key, url, hash)
if not hash and self.is_https(url) and not self.check_exists:
return False

Expand All @@ -152,41 +153,43 @@ def check_url(self, url, key, hash='', desc=''):
content = self.get(new_url, key)

if not content:
logging.debug('%s: No content for %s', self.file, new_url)
logging.debug('%s: No content for %s', key, new_url)
return False

if self.check_hash and hash:
logging.debug('%s: Verifying hash %s', self.file, hash)
logging.debug('%s: Verifying hash %s', key, hash)
m = re.search(r':([^:]+)$', hash)
if m:
hash = m.group(1).strip()
hashlen = len(hash)
if hashlen not in hashmap:
logging.error('%s: Unknown hash type %s: %s', self.file, hashlen, hash)
logging.error('%s: Unknown hash type %s: %s', key,
hashlen, hash)
else:
h = hashlib.new(hashmap[hashlen])
h.update(content)
chash = h.hexdigest().lower()
if chash == hash.lower():
logging.debug('%s: Hashes match: %s', self.file, chash)
logging.debug('%s: Hashes match: %s', key, chash)
else:
logging.warning(
'%s: Found %s, expected %s', self.file, chash, hash)
logging.warning('%s: Found %s, expected %s', key,
chash, hash)
self.data = re.sub(hash, chash, self.data)

if new_url == url:
return ''

old_data = self.data

logging.debug('%s: Changing\n%s to\n%s', self.file, url, new_url)
logging.debug('%s: Changing\n%s to\n%s', key, url, new_url)
self.data = re.sub(re.escape(url), new_url, self.data)

if self.data != old_data:
logging.debug('%s: Returning %s', self.file, self.get_scheme(new_url))
logging.debug('%s: Returning %s', key,
self.get_scheme(new_url))
return self.get_scheme(new_url)

logging.debug('%s: Returning %s', self.file, '')
logging.debug('%s: Returning %s', key, '')
return ''

def check_urls(self, url_or_list, key, hash='', desc=''):
Expand Down Expand Up @@ -230,6 +233,9 @@ def _fix_scheme(self, url, key, scheme='https', desc=''):
if new_url != url:
self.data = re.sub(re.escape(url), new_url, self.data)

if self.data != old_data:
logging.debug('%s: Changing %s to %s', key, url, new_url)

return self.data != old_data

def _fix_schemes(self, url_or_list, key, scheme='https', desc=''):
Expand All @@ -242,7 +248,7 @@ def _fix_schemes(self, url_or_list, key, scheme='https', desc=''):

return updated

logging.debug('scheme=%s', scheme)
# logging.debug('scheme=%s', scheme)
return self._fix_scheme(url_or_list, key, scheme, desc)

def fix_schemes(self, j, key, scheme='https', desc=''):
Expand Down Expand Up @@ -271,18 +277,18 @@ def run(self):
dir_name = '.'

self.logger = logging.getLogger()
self.logger.setLevel(logging.INFO)
self.logger.setLevel(logging.DEBUG)

logger2 = logging.getLogger('urllib3')
logger2.setLevel(logging.CRITICAL)

parser = JsonComment(json)
parser = jsoncomment.JsonComment(json)

mask = dir_name + '/' + filespec
logging.info("==> Processing dir %s", mask)
for file in glob.glob(mask):
self.file = os.path.basename(file)
logging.debug("--> Processing file %s", file)
logging.info("--> Processing file %s", file)
with io.open(file, 'r', encoding='utf-8') as f:
self.data = f.read()
orig_data = self.data
Expand All @@ -300,29 +306,29 @@ def run(self):
if isinstance(j['checkver'], dict):
scheme = self.process(j['checkver'], 'github')
if 'architecture' in j:
scheme = self.process(j['architecture'], '32bit',
'', 'architecture')
scheme = self.process(j['architecture'], '32bit', '',
'architecture')
if isinstance(scheme, list):
scheme = scheme[0]
if scheme:
if 'autoupdate' in j:
if 'architecture' in j['autoupdate']:
self.fix_schemes(
j['autoupdate']['architecture'], '32bit',
scheme, 'autoupdate.architecture')
self.fix_schemes(j['autoupdate']['architecture'],
'32bit', scheme,
'autoupdate.architecture')

scheme = self.process(j['architecture'], '64bit',
'', 'architecture')
scheme = self.process(j['architecture'], '64bit', '',
'architecture')
if isinstance(scheme, list):
scheme = scheme[0]
if scheme:
if 'autoupdate' in j:
if 'architecture' in j['autoupdate']:
self.fix_schemes(
j['autoupdate']['architecture'], '64bit',
scheme, 'autoupdate.architecture')
self.fix_schemes(j['autoupdate']['architecture'],
'64bit', scheme,
'autoupdate.architecture')
if self.data != orig_data:
logging.info("Updating %s" % file)
logging.info("Updating %s", file)
if os.path.isfile(file + '.bak'):
os.remove(file + '.bak')
os.rename(file, file + '.bak')
Expand Down

0 comments on commit b8866c3

Please sign in to comment.