Below is a pretty simple ruby script for parsing files uploaded to an Amazon S3 bucket and inserting the file's information into a MySQL database. #!/usr/local/bin/ruby</p> <p>require 'rubygems' require 'aws/s3' require 'mysql' require 'lockfile'</p> <p>begin Lockfile.new('/tmp/scraper.lock', :retries => 0) do # Setup the lock file</p> <p> my = Mysql::new("localhost", "nfssupport", "password", "upload_files") # Setup the MySQL connection q = my.query("Select FileName, FileModified from files") # Setup the MySQL query db = Array.new # Create the array for the MySQL file list q.each_hash do |f| # For each file in the DB db << "#{f['FileName']} - #{f['FileModified']}" # Put it into the array end</p> <p> AWS::S3::Base.establish_connection!( # Create the S3 connection :access_key_id => 'access_key_id', :secret_access_key => 'secret_access_key' ) files = AWS::S3::Bucket.objects( # Put the S3 files into the files array 'domain-tld', :prefix => 'uploads/user@domain.tld/uuid' )</p> <p> files.each do |file| # For each S3 file t = Time.parse(file.about['last-modified']) file_date = "#{t.year}#{sprintf('%02d',t.month)}#{sprintf('%02d',t.day)}" file_name = file.key.split('/',4).last.gsub(/^(\d+\-)/,'') file_modified = file.about['last-modified'].split(' ',5).last</p> <p> if !db.include? "#{file_name} - #{file_modified}" # If file not in DB st = my.prepare("insert into files (FileName, FileDate, FileModified, FileLink, FileDescription, FileSize) VALUES (?, ?, ?, ?, ?, ?) ") st.execute(file_name, file_date, file_modified, "https://uploads.domain.com/download/#{file.key.split('/',3).last}", file.metada ta['x-amz-meta-description'], file.about['content-length']) # Insert it st.close end end end</p> <p>rescue Lockfile::MaxTriesLockError => e puts "Scraper is already running!" end