@@ -18,6 +18,7 @@ import subprocess
1818import sys
1919import time
2020import platform
21+ PY2 = False
2122try :
2223 # python 3
2324 from urllib .parse import urlparse
2627 from urllib .error import HTTPError , URLError
2728 from urllib .request import urlopen
2829 from urllib .request import Request
30+ from urllib .request import HTTPRedirectHandler
31+ from urllib .request import build_opener
2932except ImportError :
3033 # python 2
34+ PY2 = True
3135 from urlparse import urlparse
3236 from urllib import quote as urlquote
3337 from urllib import urlencode
3438 from urllib2 import HTTPError , URLError
3539 from urllib2 import urlopen
3640 from urllib2 import Request
41+ from urllib2 import HTTPRedirectHandler
42+ from urllib2 import build_opener
3743
3844from github_backup import __version__
3945
@@ -308,6 +314,10 @@ def parse_args():
308314 dest = 'include_releases' ,
309315 help = 'include release information, not including assets or binaries'
310316 )
317+ parser .add_argument ('--assets' ,
318+ action = 'store_true' ,
319+ dest = 'include_assets' ,
320+ help = 'include assets alongside release information; only applies if including releases' )
311321 return parser .parse_args ()
312322
313323
@@ -537,6 +547,39 @@ def _request_url_error(template, retry_timeout):
537547 return False
538548
539549
550+ class S3HTTPRedirectHandler (HTTPRedirectHandler ):
551+ """
552+ A subclassed redirect handler for downloading Github assets from S3.
553+
554+ urllib will add the Authorization header to the redirected request to S3, which will result in a 400,
555+ so we should remove said header on redirect.
556+ """
557+ def redirect_request (self , req , fp , code , msg , headers , newurl ):
558+ if PY2 :
559+ # HTTPRedirectHandler is an old style class
560+ request = HTTPRedirectHandler .redirect_request (self , req , fp , code , msg , headers , newurl )
561+ else :
562+ request = super (S3HTTPRedirectHandler , self ).redirect_request (req , fp , code , msg , headers , newurl )
563+ del request .headers ['Authorization' ]
564+ return request
565+
566+
567+ def download_file (url , path , auth ):
568+ request = Request (url )
569+ request .add_header ('Accept' , 'application/octet-stream' )
570+ request .add_header ('Authorization' , 'Basic ' .encode ('ascii' ) + auth )
571+ opener = build_opener (S3HTTPRedirectHandler )
572+ response = opener .open (request )
573+
574+ chunk_size = 16 * 1024
575+ with open (path , 'wb' ) as f :
576+ while True :
577+ chunk = response .read (chunk_size )
578+ if not chunk :
579+ break
580+ f .write (chunk )
581+
582+
540583def get_authenticated_user (args ):
541584 template = 'https://{0}/user' .format (get_github_api_host (args ))
542585 data = retrieve_data (args , template , single_request = True )
@@ -705,7 +748,8 @@ def backup_repositories(args, output_directory, repositories):
705748 backup_hooks (args , repo_cwd , repository , repos_template )
706749
707750 if args .include_releases or args .include_everything :
708- backup_releases (args , repo_cwd , repository , repos_template )
751+ backup_releases (args , repo_cwd , repository , repos_template ,
752+ include_assets = args .include_assets or args .include_everything )
709753
710754 if args .incremental :
711755 open (last_update_path , 'w' ).write (last_update )
@@ -888,7 +932,7 @@ def backup_hooks(args, repo_cwd, repository, repos_template):
888932 log_info ("Unable to read hooks, skipping" )
889933
890934
891- def backup_releases (args , repo_cwd , repository , repos_template ):
935+ def backup_releases (args , repo_cwd , repository , repos_template , include_assets = False ):
892936 repository_fullname = repository ['full_name' ]
893937
894938 # give release files somewhere to live & log intent
@@ -898,17 +942,22 @@ def backup_releases(args, repo_cwd, repository, repos_template):
898942
899943 query_args = {}
900944
901- _release_template = '{0}/{1}/releases' .format (repos_template , repository_fullname )
902- _releases = retrieve_data (args , _release_template , query_args = query_args )
945+ release_template = '{0}/{1}/releases' .format (repos_template , repository_fullname )
946+ releases = retrieve_data (args , release_template , query_args = query_args )
903947
904948 # for each release, store it
905- log_info ('Saving {0} releases to disk' .format (len (_releases )))
906- for release in _releases :
949+ log_info ('Saving {0} releases to disk' .format (len (releases )))
950+ for release in releases :
907951 release_name = release ['tag_name' ]
908952 output_filepath = os .path .join (release_cwd , '{0}.json' .format (release_name ))
909953 with codecs .open (output_filepath , 'w+' , encoding = 'utf-8' ) as f :
910954 json_dump (release , f )
911955
956+ if include_assets :
957+ assets = retrieve_data (args , release ['assets_url' ])
958+ for asset in assets :
959+ download_file (asset ['url' ], os .path .join (release_cwd , asset ['name' ]), get_auth (args ))
960+
912961
913962def fetch_repository (name ,
914963 remote_url ,
0 commit comments