Skip to content

Commit

Permalink
Add private helpers to simplify Blogger importer (#390)
Browse files Browse the repository at this point in the history
Merge pull request 390
  • Loading branch information
ashmaroli authored Apr 24, 2021
1 parent 5218e0d commit c4215bc
Showing 1 changed file with 69 additions and 48 deletions.
117 changes: 69 additions & 48 deletions lib/jekyll-import/importers/blogger.rb
Original file line number Diff line number Diff line change
Expand Up @@ -124,49 +124,49 @@ def tag_start(tag, attrs)
when "category"
if @in_entry_elem
if attrs["scheme"] == "http://www.blogger.com/atom/ns#"
@in_entry_elem[:meta][:category] = [] unless @in_entry_elem[:meta][:category]
@in_entry_elem[:meta][:category] << attrs["term"]
element_meta[:category] ||= []
element_meta[:category] << attrs["term"]
elsif attrs["scheme"] == "http://schemas.google.com/g/2005#kind"
kind = attrs["term"]
kind.sub!(Regexp.new("^http://schemas\\.google\\.com/blogger/2008/kind\\#"), "")
@in_entry_elem[:meta][:kind] = kind
element_meta[:kind] = kind
end
end
when "content"
@in_entry_elem[:meta][:content_type] = attrs["type"] if @in_entry_elem
element_meta[:content_type] = attrs["type"] if @in_entry_elem
when "link"
if @in_entry_elem
if attrs["rel"] == "alternate" && attrs["type"] == "text/html"
@in_entry_elem[:meta][:original_url] = attrs["href"]
elsif attrs["rel"] == "replies" && attrs["type"] == "text/html"
@in_entry_elem[:meta][:original_url] = attrs["href"].sub(%r!\#comment-form$!, "") unless @in_entry_elem[:meta][:original_url]
if @in_entry_elem && attrs["type"] == "text/html"
if attrs["rel"] == "alternate"
element_meta[:original_url] = attrs["href"]
elsif attrs["rel"] == "replies"
element_meta[:original_url] ||= attrs["href"].sub(%r!\#comment-form$!, "")
end
end
when "media:thumbnail"
@in_entry_elem[:meta][:thumbnail] = attrs["url"] if @in_entry_elem
element_meta[:thumbnail] = attrs["url"] if @in_entry_elem
when "thr:in-reply-to"
@in_entry_elem[:meta][:post_id] = attrs["ref"] if @in_entry_elem
element_meta[:post_id] = attrs["ref"] if @in_entry_elem
end
end

def text(text)
if @in_entry_elem
case @tag_bread.last
when "content"
@in_entry_elem[:body] = text
when "id"
@in_entry_elem[:meta][:id] = text
element_meta[:id] = text
when "published"
@in_entry_elem[:meta][:published] = text
element_meta[:published] = text
when "updated"
@in_entry_elem[:meta][:updated] = text
element_meta[:updated] = text
when "title"
@in_entry_elem[:meta][:title] = text
when "content"
@in_entry_elem[:body] = text
element_meta[:title] = text
when "name"
@in_entry_elem[:meta][:author] = text if @tag_bread[-2..-1] == %w(author name)
element_meta[:author] = text if @tag_bread[-2..-1] == %w(author name)
when "app:draft"
if @tag_bread[-2..-1] == %w(app:control app:draft)
@in_entry_elem[:meta][:draft] = true if text == "yes"
element_meta[:draft] = true if text == "yes"
end
end
end
Expand All @@ -177,12 +177,12 @@ def tag_end(tag)
when "entry"
raise "nest entry element" unless @in_entry_elem

if @in_entry_elem[:meta][:kind] == "post"
if element_meta[:kind] == "post"
post_data = post_data_from_in_entry_elem_info

if post_data
target_dir = "_posts"
target_dir = "_drafts" if @in_entry_elem[:meta][:draft]
target_dir = "_drafts" if element_meta[:draft]

FileUtils.mkdir_p(target_dir)

Expand All @@ -195,7 +195,7 @@ def tag_end(tag)
f << post_data[:body]
end
end
elsif @in_entry_elem[:meta][:kind] == "comment" && @comments
elsif element_meta[:kind] == "comment" && @comments
post_data = post_data_from_in_entry_elem_info

if post_data
Expand All @@ -221,19 +221,19 @@ def tag_end(tag)
end

def post_data_from_in_entry_elem_info
if @in_entry_elem.nil? || !@in_entry_elem.key?(:meta) || !@in_entry_elem[:meta].key?(:kind)
if @in_entry_elem.nil? || !@in_entry_elem.key?(:meta) || !element_meta.key?(:kind)
nil
elsif @in_entry_elem[:meta][:kind] == "post"
timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime("%Y-%m-%d")
if @in_entry_elem[:meta][:original_url]
original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
elsif element_meta[:kind] == "post"
timestamp = Time.parse(element_meta[:published]).strftime("%Y-%m-%d")
if element_meta[:original_url]
original_uri = URI.parse(element_meta[:original_url])
original_path = original_uri.path.to_s
filename = format("%s-%s", timestamp, File.basename(original_path, File.extname(original_path)))

@original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
elsif @in_entry_elem[:meta][:draft]
elsif element_meta[:draft]
# Drafts don't have published urls
name = @in_entry_elem[:meta][:title]
name = element_meta[:title]
filename = if name.nil?
timestamp
else
Expand All @@ -245,15 +245,18 @@ def post_data_from_in_entry_elem_info

header = {
"layout" => "post",
"title" => @in_entry_elem[:meta][:title],
"date" => @in_entry_elem[:meta][:published],
"author" => @in_entry_elem[:meta][:author],
"tags" => @in_entry_elem[:meta][:category],
"title" => element_meta[:title],
"date" => element_meta[:published],
"author" => element_meta[:author],
"tags" => element_meta[:category],
}
header["modified_time"] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
header["thumbnail"] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
header["blogger_id"] = @in_entry_elem[:meta][:id] if @leave_blogger_info
header["blogger_orig_url"] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
header["modified_time"] = element_modified_time
header["thumbnail"] = element_meta[:thumbnail] if element_meta[:thumbnail]

if @leave_blogger_info
header["blogger_id"] = element_meta[:id]
header["blogger_orig_url"] = element_meta[:original_url] if element_meta[:original_url]
end

body = @in_entry_elem[:body]

Expand All @@ -262,12 +265,12 @@ def post_data_from_in_entry_elem_info
body.gsub!(%r!{%!, '{{ "{%" }}') if %r!{%!.match?(body)

{ :filename => filename, :header => header, :body => body }
elsif @in_entry_elem[:meta][:kind] == "comment"
timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime("%Y-%m-%d")
if @in_entry_elem[:meta][:original_url]
elsif element_meta[:kind] == "comment"
timestamp = Time.parse(element_meta[:published]).strftime("%Y-%m-%d")
if element_meta[:original_url]
@comment_seq ||= 1

original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
original_uri = URI.parse(element_meta[:original_url])
original_path = original_uri.path.to_s
filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)

Expand All @@ -279,14 +282,17 @@ def post_data_from_in_entry_elem_info
end

header = {
"date" => @in_entry_elem[:meta][:published],
"author" => @in_entry_elem[:meta][:author],
"blogger_post_id" => @in_entry_elem[:meta][:post_id],
"date" => element_meta[:published],
"author" => element_meta[:author],
"blogger_post_id" => element_meta[:post_id],
}
header["modified_time"] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
header["thumbnail"] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
header["blogger_id"] = @in_entry_elem[:meta][:id] if @leave_blogger_info
header["blogger_orig_url"] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
header["modified_time"] = element_modified_time
header["thumbnail"] = element_meta[:thumbnail] if element_meta[:thumbnail]

if @leave_blogger_info
header["blogger_id"] = element_meta[:id]
header["blogger_orig_url"] = element_meta[:original_url] if element_meta[:original_url]
end

body = @in_entry_elem[:body]

Expand All @@ -297,6 +303,21 @@ def post_data_from_in_entry_elem_info
{ :filename => filename, :header => header, :body => body }
end
end

private

def element_meta
@in_entry_elem ||= {}
@in_entry_elem[:meta] ||= {}
end

def element_modified_time
updated_time = element_meta[:updated]
return unless updated_time
return if updated_time == element_meta[:published]

updated_time
end
end
end
end
Expand Down

0 comments on commit c4215bc

Please sign in to comment.