Skip to content
This repository has been archived by the owner on Dec 21, 2023. It is now read-only.

Commit

Permalink
Rewrite import feature (mastodon#21054)
Browse files Browse the repository at this point in the history
  • Loading branch information
ClearlyClaire authored May 2, 2023
1 parent 0ad2413 commit 32a030d
Show file tree
Hide file tree
Showing 40 changed files with 2,053 additions and 107 deletions.
2 changes: 2 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ Metrics/AbcSize:
Metrics/BlockLength:
CountAsOne: ['array', 'hash', 'heredoc', 'method_call']
Exclude:
- 'config/routes.rb'
- 'lib/mastodon/*_cli.rb'
- 'lib/tasks/*.rake'
- 'app/models/concerns/account_associations.rb'
Expand Down Expand Up @@ -130,6 +131,7 @@ Metrics/ClassLength:
- 'app/services/activitypub/process_account_service.rb'
- 'app/services/activitypub/process_status_update_service.rb'
- 'app/services/backup_service.rb'
- 'app/services/bulk_import_service.rb'
- 'app/services/delete_account_service.rb'
- 'app/services/fan_out_on_write_service.rb'
- 'app/services/fetch_link_card_service.rb'
Expand Down
2 changes: 2 additions & 0 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,7 @@ RSpec/LetSetup:
- 'spec/controllers/following_accounts_controller_spec.rb'
- 'spec/controllers/oauth/authorized_applications_controller_spec.rb'
- 'spec/controllers/oauth/tokens_controller_spec.rb'
- 'spec/controllers/settings/imports_controller_spec.rb'
- 'spec/lib/activitypub/activity/delete_spec.rb'
- 'spec/lib/vacuum/preview_cards_vacuum_spec.rb'
- 'spec/models/account_spec.rb'
Expand All @@ -755,6 +756,7 @@ RSpec/LetSetup:
- 'spec/services/activitypub/process_collection_service_spec.rb'
- 'spec/services/batched_remove_status_service_spec.rb'
- 'spec/services/block_domain_service_spec.rb'
- 'spec/services/bulk_import_service_spec.rb'
- 'spec/services/delete_account_service_spec.rb'
- 'spec/services/import_service_spec.rb'
- 'spec/services/notify_service_spec.rb'
Expand Down
90 changes: 78 additions & 12 deletions app/controllers/settings/imports_controller.rb
Original file line number Diff line number Diff line change
@@ -1,31 +1,97 @@
# frozen_string_literal: true

require 'csv'

class Settings::ImportsController < Settings::BaseController
before_action :set_account
before_action :set_bulk_import, only: [:show, :confirm, :destroy]
before_action :set_recent_imports, only: [:index]

TYPE_TO_FILENAME_MAP = {
following: 'following_accounts_failures.csv',
blocking: 'blocked_accounts_failures.csv',
muting: 'muted_accounts_failures.csv',
domain_blocking: 'blocked_domains_failures.csv',
bookmarks: 'bookmarks_failures.csv',
}.freeze

TYPE_TO_HEADERS_MAP = {
following: ['Account address', 'Show boosts', 'Notify on new posts', 'Languages'],
blocking: false,
muting: ['Account address', 'Hide notifications'],
domain_blocking: false,
bookmarks: false,
}.freeze

def index
@import = Form::Import.new(current_account: current_account)
end

def show; end

def failures
@bulk_import = current_account.bulk_imports.where(state: :finished).find(params[:id])

respond_to do |format|
format.csv do
filename = TYPE_TO_FILENAME_MAP[@bulk_import.type.to_sym]
headers = TYPE_TO_HEADERS_MAP[@bulk_import.type.to_sym]

export_data = CSV.generate(headers: headers, write_headers: true) do |csv|
@bulk_import.rows.find_each do |row|
case @bulk_import.type.to_sym
when :following
csv << [row.data['acct'], row.data.fetch('show_reblogs', true), row.data.fetch('notify', false), row.data['languages']&.join(', ')]
when :blocking
csv << [row.data['acct']]
when :muting
csv << [row.data['acct'], row.data.fetch('hide_notifications', true)]
when :domain_blocking
csv << [row.data['domain']]
when :bookmarks
csv << [row.data['uri']]
end
end
end

def show
@import = Import.new
send_data export_data, filename: filename
end
end
end

def confirm
@bulk_import.update!(state: :scheduled)
BulkImportWorker.perform_async(@bulk_import.id)
redirect_to settings_imports_path, notice: I18n.t('imports.success')
end

def create
@import = Import.new(import_params)
@import.account = @account
@import = Form::Import.new(import_params.merge(current_account: current_account))

if @import.save
ImportWorker.perform_async(@import.id)
redirect_to settings_import_path, notice: I18n.t('imports.success')
redirect_to settings_import_path(@import.bulk_import.id)
else
render :show
# We need to set recent imports as we are displaying the index again
set_recent_imports
render :index
end
end

def destroy
@bulk_import.destroy!
redirect_to settings_imports_path
end

private

def set_account
@account = current_user.account
def import_params
params.require(:form_import).permit(:data, :type, :mode)
end

def import_params
params.require(:import).permit(:data, :type, :mode)
def set_bulk_import
@bulk_import = current_account.bulk_imports.where(state: :unconfirmed).find(params[:id])
end

def set_recent_imports
@recent_imports = current_account.bulk_imports.reorder(id: :desc).limit(10)
end
end
18 changes: 18 additions & 0 deletions app/lib/vacuum/imports_vacuum.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true

class Vacuum::ImportsVacuum
def perform
clean_unconfirmed_imports!
clean_old_imports!
end

private

def clean_unconfirmed_imports!
BulkImport.where(state: :unconfirmed).where('created_at <= ?', 10.minutes.ago).reorder(nil).in_batches.delete_all
end

def clean_old_imports!
BulkImport.where('created_at <= ?', 1.week.ago).reorder(nil).in_batches.delete_all
end
end
53 changes: 53 additions & 0 deletions app/models/bulk_import.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: bulk_imports
#
# id :bigint(8) not null, primary key
# type :integer not null
# state :integer not null
# total_items :integer default(0), not null
# imported_items :integer default(0), not null
# processed_items :integer default(0), not null
# finished_at :datetime
# overwrite :boolean default(FALSE), not null
# likely_mismatched :boolean default(FALSE), not null
# original_filename :string default(""), not null
# account_id :bigint(8) not null
# created_at :datetime not null
# updated_at :datetime not null
#
class BulkImport < ApplicationRecord
self.inheritance_column = false

belongs_to :account
has_many :rows, class_name: 'BulkImportRow', inverse_of: :bulk_import, dependent: :delete_all

enum type: {
following: 0,
blocking: 1,
muting: 2,
domain_blocking: 3,
bookmarks: 4,
}

enum state: {
unconfirmed: 0,
scheduled: 1,
in_progress: 2,
finished: 3,
}

validates :type, presence: true

def self.progress!(bulk_import_id, imported: false)
# Use `increment_counter` so that the incrementation is done atomically in the database
BulkImport.increment_counter(:processed_items, bulk_import_id) # rubocop:disable Rails/SkipsModelValidations
BulkImport.increment_counter(:imported_items, bulk_import_id) if imported # rubocop:disable Rails/SkipsModelValidations

# Since the incrementation has been done atomically, concurrent access to `bulk_import` is now bening
bulk_import = BulkImport.find(bulk_import_id)
bulk_import.update!(state: :finished, finished_at: Time.now.utc) if bulk_import.processed_items == bulk_import.total_items
end
end
15 changes: 15 additions & 0 deletions app/models/bulk_import_row.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: bulk_import_rows
#
# id :bigint(8) not null, primary key
# bulk_import_id :bigint(8) not null
# data :jsonb
# created_at :datetime not null
# updated_at :datetime not null
#
class BulkImportRow < ApplicationRecord
belongs_to :bulk_import
end
3 changes: 3 additions & 0 deletions app/models/concerns/account_associations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,8 @@ module AccountAssociations

# Account statuses cleanup policy
has_one :statuses_cleanup_policy, class_name: 'AccountStatusesCleanupPolicy', inverse_of: :account, dependent: :destroy

# Imports
has_many :bulk_imports, inverse_of: :account, dependent: :delete_all
end
end
151 changes: 151 additions & 0 deletions app/models/form/import.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# frozen_string_literal: true

require 'csv'

# A non-ActiveRecord helper class for CSV uploads.
# Handles saving contents to database.
class Form::Import
include ActiveModel::Model

MODES = %i(merge overwrite).freeze

FILE_SIZE_LIMIT = 20.megabytes
ROWS_PROCESSING_LIMIT = 20_000

EXPECTED_HEADERS_BY_TYPE = {
following: ['Account address', 'Show boosts', 'Notify on new posts', 'Languages'],
blocking: ['Account address'],
muting: ['Account address', 'Hide notifications'],
domain_blocking: ['#domain'],
bookmarks: ['#uri'],
}.freeze

KNOWN_FIRST_HEADERS = EXPECTED_HEADERS_BY_TYPE.values.map(&:first).uniq.freeze

ATTRIBUTE_BY_HEADER = {
'Account address' => 'acct',
'Show boosts' => 'show_reblogs',
'Notify on new posts' => 'notify',
'Languages' => 'languages',
'Hide notifications' => 'hide_notifications',
'#domain' => 'domain',
'#uri' => 'uri',
}.freeze

class EmptyFileError < StandardError; end

attr_accessor :current_account, :data, :type, :overwrite, :bulk_import

validates :type, presence: true
validates :data, presence: true
validate :validate_data

def guessed_type
return :muting if csv_data.headers.include?('Hide notifications')
return :following if csv_data.headers.include?('Show boosts') || csv_data.headers.include?('Notify on new posts') || csv_data.headers.include?('Languages')
return :following if data.original_filename&.start_with?('follows') || data.original_filename&.start_with?('following_accounts')
return :blocking if data.original_filename&.start_with?('blocks') || data.original_filename&.start_with?('blocked_accounts')
return :muting if data.original_filename&.start_with?('mutes') || data.original_filename&.start_with?('muted_accounts')
return :domain_blocking if data.original_filename&.start_with?('domain_blocks') || data.original_filename&.start_with?('blocked_domains')
return :bookmarks if data.original_filename&.start_with?('bookmarks')
end

# Whether the uploaded CSV file seems to correspond to a different import type than the one selected
def likely_mismatched?
guessed_type.present? && guessed_type != type.to_sym
end

def save
return false unless valid?

ApplicationRecord.transaction do
now = Time.now.utc
@bulk_import = current_account.bulk_imports.create(type: type, overwrite: overwrite || false, state: :unconfirmed, original_filename: data.original_filename, likely_mismatched: likely_mismatched?)
nb_items = BulkImportRow.insert_all(parsed_rows.map { |row| { bulk_import_id: bulk_import.id, data: row, created_at: now, updated_at: now } }).length # rubocop:disable Rails/SkipsModelValidations
@bulk_import.update(total_items: nb_items)
end
end

def mode
overwrite ? :overwrite : :merge
end

def mode=(str)
self.overwrite = str.to_sym == :overwrite
end

private

def default_csv_header
case type.to_sym
when :following, :blocking, :muting
'Account address'
when :domain_blocking
'#domain'
when :bookmarks
'#uri'
end
end

def csv_data
return @csv_data if defined?(@csv_data)

csv_converter = lambda do |field, field_info|
case field_info.header
when 'Show boosts', 'Notify on new posts', 'Hide notifications'
ActiveModel::Type::Boolean.new.cast(field)
when 'Languages'
field&.split(',')&.map(&:strip)&.presence
when 'Account address'
field.strip.gsub(/\A@/, '')
when '#domain', '#uri'
field.strip
else
field
end
end

@csv_data = CSV.open(data.path, encoding: 'UTF-8', skip_blanks: true, headers: true, converters: csv_converter)
@csv_data.take(1) # Ensure the headers are read
raise EmptyFileError if @csv_data.headers == true

@csv_data = CSV.open(data.path, encoding: 'UTF-8', skip_blanks: true, headers: [default_csv_header], converters: csv_converter) unless KNOWN_FIRST_HEADERS.include?(@csv_data.headers&.first)
@csv_data
end

def csv_row_count
return @csv_row_count if defined?(@csv_row_count)

csv_data.rewind
@csv_row_count = csv_data.take(ROWS_PROCESSING_LIMIT + 2).count
end

def parsed_rows
csv_data.rewind

expected_headers = EXPECTED_HEADERS_BY_TYPE[type.to_sym]

csv_data.take(ROWS_PROCESSING_LIMIT + 1).map do |row|
row.to_h.slice(*expected_headers).transform_keys { |key| ATTRIBUTE_BY_HEADER[key] }
end
end

def validate_data
return if data.nil?
return errors.add(:data, I18n.t('imports.errors.too_large')) if data.size > FILE_SIZE_LIMIT
return errors.add(:data, I18n.t('imports.errors.incompatible_type')) unless csv_data.headers.include?(default_csv_header)

errors.add(:data, I18n.t('imports.errors.over_rows_processing_limit', count: ROWS_PROCESSING_LIMIT)) if csv_row_count > ROWS_PROCESSING_LIMIT

if type.to_sym == :following
base_limit = FollowLimitValidator.limit_for_account(current_account)
limit = base_limit
limit -= current_account.following_count unless overwrite
errors.add(:data, I18n.t('users.follow_limit_reached', limit: base_limit)) if csv_row_count > limit
end
rescue CSV::MalformedCSVError => e
errors.add(:data, I18n.t('imports.errors.invalid_csv_file', error: e.message))
rescue EmptyFileError
errors.add(:data, I18n.t('imports.errors.empty'))
end
end
Loading

0 comments on commit 32a030d

Please sign in to comment.