Browse Source

Add `tootctl media remove-orphans` (#12568)

master^2
Eugen Rochko 4 years ago
committed by GitHub
parent
commit
f3d232381d
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 90 additions and 1 deletions
  1. +12
    -0
      app/models/media_attachment.rb
  2. +1
    -1
      config/initializers/paperclip.rb
  3. +77
    -0
      lib/mastodon/media_cli.rb

+ 12
- 0
app/models/media_attachment.rb View File

@@ -167,6 +167,18 @@ class MediaAttachment < ApplicationRecord
audio? || video?
end

def variant?(other_file_name)
return true if file_file_name == other_file_name

formats = file.styles.values.map(&:format).compact

return false if formats.empty?

extension = File.extname(other_file_name)

formats.include?(extension.delete('.')) && File.basename(other_file_name, extension) == File.basename(file_file_name, File.extname(file_file_name))
end

def to_param
shortcode
end


+ 1
- 1
config/initializers/paperclip.rb View File

@@ -89,7 +89,7 @@ else
Paperclip::Attachment.default_options.merge!(
storage: :filesystem,
use_timestamp: true,
path: ENV.fetch('PAPERCLIP_ROOT_PATH', ':rails_root/public/system') + '/:class/:attachment/:id_partition/:style/:filename',
path: File.join(ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')), ':class', ':attachment', ':id_partition', ':style', ':filename'),
url: ENV.fetch('PAPERCLIP_ROOT_URL', '/system') + '/:class/:attachment/:id_partition/:style/:filename',
)
end

+ 77
- 0
lib/mastodon/media_cli.rb View File

@@ -44,6 +44,83 @@ module Mastodon
say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
end

option :start_after
option :dry_run, type: :boolean, default: false
desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
long_desc <<~LONG_DESC
Scans file storage for files that do not belong to existing media attachments. Because this operation
requires iterating over every single file individually, it will be slow.

Please mind that some storage providers charge for the necessary API requests to list objects.
LONG_DESC
def remove_orphans
progress = create_progress_bar(nil)
reclaimed_bytes = 0
removed = 0
dry_run = options[:dry_run] ? ' (DRY RUN)' : ''

case Paperclip::Attachment.default_options[:storage]
when :s3
paperclip_instance = MediaAttachment.new.file
s3_interface = paperclip_instance.s3_interface
bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
last_key = options[:start_after]

loop do
objects = bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x }

break if objects.empty?

last_key = objects.last.key
attachments_map = MediaAttachment.where(id: objects.map { |object| object.key.split('/')[2..-2].join.to_i }).each_with_object({}) { |attachment, map| map[attachment.id] = attachment }

objects.each do |object|
attachment_id = object.key.split('/')[2..-2].join.to_i
filename = object.key.split('/').last

progress.increment

next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename)

reclaimed_bytes += object.size
removed += 1
object.delete unless options[:dry_run]
progress.log("Found and removed orphan: #{object.key}")
end
end
when :fog
say('The fog storage driver is not supported for this operation at this time', :red)
exit(1)
when :filesystem
require 'find'

root_path = ENV.fetch('RAILS_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)

Find.find(File.join(root_path, 'media_attachments', 'files')) do |path|
next if File.directory?(path)

key = path.gsub("#{root_path}#{File::SEPARATOR}", '')
attachment_id = key.split(File::SEPARATOR)[2..-2].join.to_i
filename = key.split(File::SEPARATOR).last
attachment = MediaAttachment.find_by(id: attachment_id)

progress.increment

next unless attachment.nil? || !attachment.variant?(filename)

reclaimed_bytes += File.size(path)
removed += 1
File.delete(path) unless options[:dry_run]
progress.log("Found and removed orphan: #{key}")
end
end

progress.total = progress.progress
progress.finish

say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
end

option :account, type: :string
option :domain, type: :string
option :status, type: :numeric


Loading…
Cancel
Save