Below is a pretty simple ruby script for parsing files uploaded to an Amazon S3 bucket and inserting the file’s information into a MySQL database.

#!/usr/local/bin/ruby

require 'rubygems'
require 'aws/s3'
require 'mysql'
require 'lockfile'

begin
 Lockfile.new('/tmp/scraper.lock', :retries => 0) do # Setup the lock file

         my = Mysql::new("localhost", "nfssupport", "password", "upload_files") # Setup the MySQL connection
         q = my.query("Select FileName, FileModified from files") # Setup the MySQL query
         db = Array.new # Create the array for the MySQL file list
         q.each_hash do |f| # For each file in the DB
                 db << "#{f['FileName']} - #{f['FileModified']}" # Put it into the array
         end

         AWS::S3::Base.establish_connection!( # Create the S3 connection
                 :access_key_id      => 'access_key_id',
                 :secret_access_key  => 'secret_access_key'
         )
         files = AWS::S3::Bucket.objects( # Put the S3 files into the files array
                 'domain-tld',
                 :prefix => 'uploads/user@domain.tld/uuid'
         )

         files.each do |file| # For each S3 file
                 t = Time.parse(file.about['last-modified'])
                 file_date = "#{t.year}#{sprintf('%02d',t.month)}#{sprintf('%02d',t.day)}"
                 file_name = file.key.split('/',4).last.gsub(/^(\d+\-)/,'')
                 file_modified = file.about['last-modified'].split(' ',5).last

                if !db.include? "#{file_name} - #{file_modified}" # If file not in DB
                         st = my.prepare("insert into files (FileName, FileDate, FileModified, FileLink, FileDescription, FileSize) VALUES (?, ?, ?, ?, ?, ?)
")
                         st.execute(file_name, file_date, file_modified, "https://uploads.domain.com/download/#{file.key.split('/',3).last}", file.metada
ta['x-amz-meta-description'], file.about['content-length']) # Insert it
                         st.close
                end
         end
 end

rescue Lockfile::MaxTriesLockError => e
        puts "Scraper is already running!"
end