@@ -16,6 +16,7 @@ import sys
1616import time
1717import urllib
1818import urllib2
19+ import codecs
1920
2021from github_backup import __version__
2122
@@ -116,6 +117,7 @@ def parse_args():
116117 parser .add_argument ('-F' , '--fork' , action = 'store_true' , dest = 'fork' , help = 'include forked repositories' )
117118 parser .add_argument ('--prefer-ssh' , action = 'store_true' , help = 'Clone repositories using SSH instead of HTTPS' )
118119 parser .add_argument ('-v' , '--version' , action = 'version' , version = '%(prog)s ' + __version__ )
120+ parser .add_argument ('--write-unicode' , action = 'store_true' , dest = 'write_unicode' , help = 'preserve unicode characters in the output files' )
119121 return parser .parse_args ()
120122
121123
@@ -317,8 +319,13 @@ def backup_repositories(args, output_directory, repositories):
317319 if args .include_issue_events or args .include_everything :
318320 issues [number ]['event_data' ] = retrieve_data (args , events_template .format (number ))
319321
320- with open ('{0}/{1}.json' .format (issue_cwd , number ), 'w' ) as issue_file :
321- json .dump (issue , issue_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
322+ if args .write_unicode :
323+ with codecs .open ('{0}/{1}.json' .format (issue_cwd , number ), 'w' , encoding = 'utf-8' ) as issue_file :
324+ json .dump (issue , issue_file , ensure_ascii = False , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
325+ else :
326+ with open ('{0}/{1}.json' .format (issue_cwd , number ), 'w' ) as issue_file :
327+ json .dump (issue , issue_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
328+
322329
323330 if args .include_pulls or args .include_everything :
324331 if args .skip_existing and os .path .isdir ('{0}/pulls/.git' .format (repo_cwd )):
@@ -350,8 +357,12 @@ def backup_repositories(args, output_directory, repositories):
350357 if args .include_pull_commits or args .include_everything :
351358 pulls [number ]['commit_data' ] = retrieve_data (args , commits_template .format (number ))
352359
353- with open ('{0}/{1}.json' .format (pulls_cwd , number ), 'w' ) as pull_file :
354- json .dump (pull , pull_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
360+ if args .write_unicode :
361+ with codecs .open ('{0}/{1}.json' .format (pulls_cwd , number ), 'w' , encoding = 'utf-8' ) as pull_file :
362+ json .dump (pull , pull_file , ensure_ascii = False , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
363+ else :
364+ with open ('{0}/{1}.json' .format (pulls_cwd , number ), 'w' ) as pull_file :
365+ json .dump (pull , pull_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
355366
356367 if args .include_milestones or args .include_everything :
357368 if args .skip_existing and os .path .isdir ('{0}/milestones/.git' .format (repo_cwd )):
@@ -375,8 +386,11 @@ def backup_repositories(args, output_directory, repositories):
375386
376387 log_info ('Saving {0} milestones to disk' .format (len (milestones .keys ())))
377388 for number , milestone in milestones .iteritems ():
378- with open ('{0}/{1}.json' .format (milestone_cwd , number ), 'w' ) as milestone_file :
379- json .dump (milestone , milestone_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
389+ if args .write_unicode :
390+ with codecs .open ('{0}/{1}.json' .format (milestone_cwd , number ), 'w' , encoding = 'utf-8' ) as milestone_file :
391+ json .dump (milestone , milestone_file , ensure_ascii = False , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
392+ with open ('{0}/{1}.json' .format (milestone_cwd , number ), 'w' ) as milestone_file :
393+ json .dump (milestone , milestone_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
380394
381395 if args .include_labels or args .include_everything :
382396 if args .skip_existing and os .path .isdir ('{0}/labels/.git' .format (repo_cwd )):
@@ -391,8 +405,11 @@ def backup_repositories(args, output_directory, repositories):
391405 labels = retrieve_data (args , _label_template , query_args = {})
392406
393407 log_info ('Saving {0} labels to disk' .format (len (labels )))
394- with open ('{0}/labels.json' .format (label_cwd ), 'w' ) as label_file :
395- json .dump (labels , label_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
408+ if args .write_unicode :
409+ with codecs .open ('{0}/labels.json' .format (label_cwd ), 'w' , encoding = 'utf-8' ) as label_file :
410+ json .dump (labels , label_file , ensure_ascii = False , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
411+ with open ('{0}/labels.json' .format (label_cwd ), 'w' ) as label_file :
412+ json .dump (labels , label_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
396413
397414
398415def fetch_repository (name , remote_url , local_dir , skip_existing = False ):
@@ -426,8 +443,11 @@ def backup_account(args, output_directory):
426443 starred_template = "https://{0}/users/{1}/starred"
427444 starred = retrieve_data (args , starred_template .format (get_github_api_host (args ), args .user ))
428445 log_info ('Writing {0} starred repositories' .format (len (starred )))
429- with open ('{0}/starred.json' .format (account_cwd ), 'w' ) as starred_file :
430- json .dump (starred , starred_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
446+ if args .write_unicode :
447+ with codecs .open ('{0}/starred.json' .format (account_cwd ), 'w' , encoding = 'utf-8' ) as starred_file :
448+ json .dump (starred , starred_file , ensure_ascii = False , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
449+ with open ('{0}/starred.json' .format (account_cwd ), 'w' ) as starred_file :
450+ json .dump (starred , starred_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
431451
432452 if args .include_watched or args .include_everything :
433453 if not args .skip_existing or not os .path .exists ('{0}/watched.json' .format (account_cwd )):
@@ -437,8 +457,11 @@ def backup_account(args, output_directory):
437457 watched_template = "https://{0}/users/{1}/subscriptions"
438458 watched = retrieve_data (args , watched_template .format (get_github_api_host (args ), args .user ))
439459 log_info ('Writing {0} watched repositories' .format (len (watched )))
440- with open ('{0}/watched.json' .format (account_cwd ), 'w' ) as watched_file :
441- json .dump (watched , watched_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
460+ if args .write_unicode :
461+ with codecs .open ('{0}/watched.json' .format (account_cwd ), 'w' , encoding = 'utf-8' ) as watched_file :
462+ json .dump (watched , watched_file , ensure_ascii = False , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
463+ with open ('{0}/watched.json' .format (account_cwd ), 'w' ) as watched_file :
464+ json .dump (watched , watched_file , sort_keys = True , indent = 4 , separators = (',' , ': ' ))
442465
443466
444467def main ():
0 commit comments