Skip to content

Commit ab4b28c

Browse files
committed
Preserve Unicode characters in the output file
Added option to preserve Unicode characters in the output file
1 parent 6feb409 commit ab4b28c

1 file changed

Lines changed: 35 additions & 12 deletions

File tree

bin/github-backup

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import sys
1616
import time
1717
import urllib
1818
import urllib2
19+
import codecs
1920

2021
from github_backup import __version__
2122

@@ -116,6 +117,7 @@ def parse_args():
116117
parser.add_argument('-F', '--fork', action='store_true', dest='fork', help='include forked repositories')
117118
parser.add_argument('--prefer-ssh', action='store_true', help='Clone repositories using SSH instead of HTTPS')
118119
parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)
120+
parser.add_argument('--write-unicode', action='store_true', dest='write_unicode', help='preserve unicode characters in the output files')
119121
return parser.parse_args()
120122

121123

@@ -317,8 +319,13 @@ def backup_repositories(args, output_directory, repositories):
317319
if args.include_issue_events or args.include_everything:
318320
issues[number]['event_data'] = retrieve_data(args, events_template.format(number))
319321

320-
with open('{0}/{1}.json'.format(issue_cwd, number), 'w') as issue_file:
321-
json.dump(issue, issue_file, sort_keys=True, indent=4, separators=(',', ': '))
322+
if args.write_unicode:
323+
with codecs.open('{0}/{1}.json'.format(issue_cwd, number), 'w', encoding='utf-8') as issue_file:
324+
json.dump(issue, issue_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
325+
else:
326+
with open('{0}/{1}.json'.format(issue_cwd, number), 'w') as issue_file:
327+
json.dump(issue, issue_file, sort_keys=True, indent=4, separators=(',', ': '))
328+
322329

323330
if args.include_pulls or args.include_everything:
324331
if args.skip_existing and os.path.isdir('{0}/pulls/.git'.format(repo_cwd)):
@@ -350,8 +357,12 @@ def backup_repositories(args, output_directory, repositories):
350357
if args.include_pull_commits or args.include_everything:
351358
pulls[number]['commit_data'] = retrieve_data(args, commits_template.format(number))
352359

353-
with open('{0}/{1}.json'.format(pulls_cwd, number), 'w') as pull_file:
354-
json.dump(pull, pull_file, sort_keys=True, indent=4, separators=(',', ': '))
360+
if args.write_unicode:
361+
with codecs.open('{0}/{1}.json'.format(pulls_cwd, number), 'w', encoding='utf-8') as pull_file:
362+
json.dump(pull, pull_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
363+
else:
364+
with open('{0}/{1}.json'.format(pulls_cwd, number), 'w') as pull_file:
365+
json.dump(pull, pull_file, sort_keys=True, indent=4, separators=(',', ': '))
355366

356367
if args.include_milestones or args.include_everything:
357368
if args.skip_existing and os.path.isdir('{0}/milestones/.git'.format(repo_cwd)):
@@ -375,8 +386,11 @@ def backup_repositories(args, output_directory, repositories):
375386

376387
log_info('Saving {0} milestones to disk'.format(len(milestones.keys())))
377388
for number, milestone in milestones.iteritems():
378-
with open('{0}/{1}.json'.format(milestone_cwd, number), 'w') as milestone_file:
379-
json.dump(milestone, milestone_file, sort_keys=True, indent=4, separators=(',', ': '))
389+
if args.write_unicode:
390+
with codecs.open('{0}/{1}.json'.format(milestone_cwd, number), 'w', encoding='utf-8') as milestone_file:
391+
json.dump(milestone, milestone_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
392+
with open('{0}/{1}.json'.format(milestone_cwd, number), 'w') as milestone_file:
393+
json.dump(milestone, milestone_file, sort_keys=True, indent=4, separators=(',', ': '))
380394

381395
if args.include_labels or args.include_everything:
382396
if args.skip_existing and os.path.isdir('{0}/labels/.git'.format(repo_cwd)):
@@ -391,8 +405,11 @@ def backup_repositories(args, output_directory, repositories):
391405
labels = retrieve_data(args, _label_template, query_args={})
392406

393407
log_info('Saving {0} labels to disk'.format(len(labels)))
394-
with open('{0}/labels.json'.format(label_cwd), 'w') as label_file:
395-
json.dump(labels, label_file, sort_keys=True, indent=4, separators=(',', ': '))
408+
if args.write_unicode:
409+
with codecs.open('{0}/labels.json'.format(label_cwd), 'w', encoding='utf-8') as label_file:
410+
json.dump(labels, label_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
411+
with open('{0}/labels.json'.format(label_cwd), 'w') as label_file:
412+
json.dump(labels, label_file, sort_keys=True, indent=4, separators=(',', ': '))
396413

397414

398415
def fetch_repository(name, remote_url, local_dir, skip_existing=False):
@@ -426,8 +443,11 @@ def backup_account(args, output_directory):
426443
starred_template = "https://{0}/users/{1}/starred"
427444
starred = retrieve_data(args, starred_template.format(get_github_api_host(args), args.user))
428445
log_info('Writing {0} starred repositories'.format(len(starred)))
429-
with open('{0}/starred.json'.format(account_cwd), 'w') as starred_file:
430-
json.dump(starred, starred_file, sort_keys=True, indent=4, separators=(',', ': '))
446+
if args.write_unicode:
447+
with codecs.open('{0}/starred.json'.format(account_cwd), 'w', encoding='utf-8') as starred_file:
448+
json.dump(starred, starred_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
449+
with open('{0}/starred.json'.format(account_cwd), 'w') as starred_file:
450+
json.dump(starred, starred_file, sort_keys=True, indent=4, separators=(',', ': '))
431451

432452
if args.include_watched or args.include_everything:
433453
if not args.skip_existing or not os.path.exists('{0}/watched.json'.format(account_cwd)):
@@ -437,8 +457,11 @@ def backup_account(args, output_directory):
437457
watched_template = "https://{0}/users/{1}/subscriptions"
438458
watched = retrieve_data(args, watched_template.format(get_github_api_host(args), args.user))
439459
log_info('Writing {0} watched repositories'.format(len(watched)))
440-
with open('{0}/watched.json'.format(account_cwd), 'w') as watched_file:
441-
json.dump(watched, watched_file, sort_keys=True, indent=4, separators=(',', ': '))
460+
if args.write_unicode:
461+
with codecs.open('{0}/watched.json'.format(account_cwd), 'w', encoding='utf-8') as watched_file:
462+
json.dump(watched, watched_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))
463+
with open('{0}/watched.json'.format(account_cwd), 'w') as watched_file:
464+
json.dump(watched, watched_file, sort_keys=True, indent=4, separators=(',', ': '))
442465

443466

444467
def main():

0 commit comments

Comments
 (0)