aboutsummaryrefslogtreecommitdiff
path: root/email/eml2mbox/eml2mbox.rb
diff options
context:
space:
mode:
Diffstat (limited to 'email/eml2mbox/eml2mbox.rb')
-rwxr-xr-xemail/eml2mbox/eml2mbox.rb265
1 files changed, 265 insertions, 0 deletions
diff --git a/email/eml2mbox/eml2mbox.rb b/email/eml2mbox/eml2mbox.rb
new file mode 100755
index 0000000..1fc7bca
--- /dev/null
+++ b/email/eml2mbox/eml2mbox.rb
@@ -0,0 +1,265 @@
+#!/usr/bin/ruby
+#============================================================================================#
+# eml2mbox.rb v0.08 #
+# Last updated: Jan 23, 2004 #
+# #
+# Converts a bunch of eml files into one mbox file. #
+# #
+# Usage: [ruby] eml2mbx.rb [-c] [-l] [-s] [-yz] [emlpath [trgtmbx]] #
+# Switches: #
+# -c Remove CRs (^M) appearing at end of lines (Unix) #
+# -l Remove LFs appearing at beggining of lines (old Mac) - not tested #
+# -s Don't use standard mbox postmark formatting (for From_ line) #
+# This will force the use of original From and Date found in mail headers. #
+# Not recommended, unless you really have problems importing emls. #
+# -yz Use this to force the order of the year and timezone in date in the From_ #
+# line from the default [timezone][year] to [year][timezone]. #
+# emlpath - Path of dir with eml files. Defaults to the current dir if not specified #
+# trgtmbx - Name of the target mbox file. Defaults to "archive.mbox" in 'emlpath' #
+# #
+# Ruby homepage: http://www.ruby-lang.org/en/ #
+# Unix mailbox format: http://www.broobles.com/eml2mbox/mbox.html #
+# This script : http://www.broobles.com/eml2mbox #
+# #
+#============================================================================================#
+# Licence: #
+# #
+# This script is free software; you can redistribute it and/or modify it under the terms of #
+# the GNU Lesser General Public License as published by the Free Software Foundation; #
+# either version 2.1 of the License, or (at your option) any later version. #
+# #
+# You should have received a copy of the GNU Lesser General Public License along with this #
+# script; if not, please visit http://www.gnu.org/copyleft/gpl.html for more information. #
+#============================================================================================#
+
+require "parsedate"
+
+include ParseDate
+
+#=======================================================#
+# Class that encapsulates the processing file in memory #
+#=======================================================#
+
+class FileInMemory
+
+ ZoneOffset = {
+ # Standard zones by RFC 2822
+ 'UTC' => '0000',
+ 'UT' => '0000', 'GMT' => '0000',
+ 'EST' => '-0500', 'EDT' => '-0400',
+ 'CST' => '-0600', 'CDT' => '-0500',
+ 'MST' => '-0700', 'MDT' => '-0600',
+ 'PST' => '-0800', 'PDT' => '-0700',
+ }
+
+ def initialize()
+ @lines = Array.new
+ @counter = 1 # keep the 0 position for the From_ line
+ @from = nil # from part of the From_ line
+ @date = nil # date part of the From_ line
+ end
+
+ def addLine(line)
+ # If the line is a 'false' From line, add a '>' to its beggining
+ line = line.sub(/From/, '>From') if line =~ /^From/ and @from!=nil
+
+ # If the line is the first valid From line, save it (without the line break)
+ if line =~ /^From:\s.*@/ and @from==nil
+ @from = line.sub(/From:/,'From')
+ @from = @from.chop # Remove line break(s)
+ @from = standardizeFrom(@from) unless $switches["noStandardFromLine"]
+ end
+
+ # Get the date
+ if $switches["noStandardFromLine"]
+ # Don't parse the content of the Date header
+ @date = line.sub(/Date:\s/,'') if line =~ /^Date:\s/ and @date==nil
+ else
+ if line =~ /^Date:\s/ and @date==nil
+ # Parse content of the Date header and convert to the mbox standard for the From_ line
+ @date = line.sub(/Date:\s/,'')
+ year, month, day, hour, minute, second, timezone, wday = parsedate(@date)
+ # Need to convert the timezone from a string to a 4 digit offset
+ unless timezone =~ /[+|-]\d*/
+ timezone=ZoneOffset[timezone]
+ end
+ time = Time.gm(year,month,day,hour,minute,second)
+ @date = formMboxDate(time,timezone)
+ end
+ end
+
+ # Now add the line to the array
+ line = fixLineEndings(line)
+ @lines[@counter]=line
+ @counter+=1
+ end
+
+ # Forms the first line (from + date) and returns all the lines
+ # Returns all the lines in the file
+ def getProcessedLines()
+ if @from != nil
+ # Add from and date to the first line
+ if @date==nil
+ puts "WARN: Failed to extract date. Will use current time in the From_ line"
+ @date=formMboxDate(Time.now,nil)
+ end
+ @lines[0] = @from + " " + @date
+
+ @lines[0] = fixLineEndings(@lines[0])
+ @lines[@counter] = ""
+ return @lines
+ end
+ # else don't return anything
+ end
+
+ # Fixes CR/LFs
+ def fixLineEndings(line)
+ line = removeCR(line) if $switches["removeCRs"];
+ line = removeLF(line) if $switches["removeLFs"];
+ return line
+ end
+
+ # emls usually have CR+LF (DOS) line endings, Unix uses LF as a line break,
+ # so there's a hanging CR at the end of the line when viewed on Unix.
+ # This method will remove the next to the last character from a line
+ def removeCR(line)
+ line = line[0..-3]+line[-1..-1] if line[-2]==0xD
+ return line
+ end
+
+ # Similar to the above. This one is for Macs that use CR as a line break.
+ # So, remove the last char
+ def removeLF(line)
+ line = line[0..-2] if line[-1]==0xA
+ return line
+ end
+
+end
+
+#================#
+# Helper methods #
+#================#
+
+# Converts: 'From "some one <aa@aa.aa>" <aa@aa.aa>' -> 'From aa@aa.aa'
+def standardizeFrom(fromLine)
+ # Get indexes of last "<" and ">" in line
+ openIndex = fromLine.rindex('<')
+ closeIndex = fromLine.rindex('>')
+ if openIndex!=nil and closeIndex!=nil
+ fromLine = fromLine[0..4]+fromLine[openIndex+1..closeIndex-1]
+ end
+ # else leave as it is - it is either already well formed or is invalid
+ return fromLine
+end
+
+# Returns a mbox postmark formatted date.
+# If timezone is unknown, it is skipped.
+# mbox date format used is described here:
+# http://www.broobles.com/eml2mbox/mbox.html
+def formMboxDate(time,timezone)
+ if timezone==nil
+ return time.strftime("%a %b %d %H:%M:%S %Y")
+ else
+ if $switches["zoneYearOrder"]
+ return time.strftime("%a %b %d %H:%M:%S "+timezone.to_s+" %Y")
+ else
+ return time.strftime("%a %b %d %H:%M:%S %Y "+timezone.to_s)
+ end
+ end
+end
+
+
+# Extracts all switches from the command line and returns
+# a hashmap with valid switch names as keys and booleans as values
+# Moves real params to the beggining of the ARGV array
+def extractSwitches()
+ switches = Hash.new(false) # All switches (values) default to false
+ i=0
+ while (ARGV[i]=~ /^-/) # while arguments are switches
+ if ARGV[i]=="-c"
+ switches["removeCRs"] = true
+ puts "\nWill fix lines ending with a CR"
+ elsif ARGV[i]=="-l"
+ switches["removeLFs"] = true
+ puts "\nWill fix lines beggining with a LF"
+ elsif ARGV[i]=="-s"
+ switches["noStandardFromLine"] = true
+ puts "\nWill use From and Date from mail headers in From_ line"
+ elsif ARGV[i]=="-yz"
+ switches["zoneYearOrder"] = true
+ puts "\nTimezone will be placed before the year in From_ line"
+ else
+ puts "\nUnknown switch: "+ARGV[i]+". Ignoring."
+ end
+ i = i+1
+ end
+ # Move real arguments to the beggining of the array
+ ARGV[0] = ARGV[i]
+ ARGV[1] = ARGV[i+1]
+ return switches
+end
+
+#===============#
+# Main #
+#===============#
+
+ $switches = extractSwitches()
+
+ # Extract specified directory with emls and the target archive (if any)
+ emlDir = "." # default if not specified
+ emlDir = ARGV[0] if ARGV[0]!=nil
+ mboxArchive = emlDir+"/archive.mbox" # default if not specified
+ mboxArchive = ARGV[1] if ARGV[1] != nil
+
+ # Show specified settings
+ puts "\nSpecified dir : "+emlDir
+ puts "Specified file: "+mboxArchive+"\n"
+
+ # Check that the dir exists
+ if FileTest.directory?(emlDir)
+ Dir.chdir(emlDir)
+ else
+ puts "\n["+emlDir+"] is not a directory (might not exist). Please specify a valid dir"
+ exit(0)
+ end
+
+ # Check if destination file exists. If yes allow user to select an option.
+ canceled = false
+ if FileTest.exist?(mboxArchive)
+ print "\nFile ["+mboxArchive+"] exists! Please select: [A]ppend [O]verwrite [C]ancel (default) "
+ sel = STDIN.gets.chomp
+ if sel == 'A' or sel == 'a'
+ aFile = File.new(mboxArchive, "a");
+ elsif sel == 'O' or sel == 'o'
+ aFile = File.new(mboxArchive, "w");
+ else
+ canceled = true
+ end
+ else
+ # File doesn't exist, open for writing
+ aFile = File.new(mboxArchive, "w");
+ end
+
+ if not canceled
+ puts
+ files = Dir["*.eml"]
+ if files.size == 0
+ puts "No *.eml files in this directory. mbox file not created."
+ aFile.close
+ File.delete(mboxArchive)
+ exit(0)
+ end
+ # For each .eml file in the specified directory do the following
+ files.each() do |x|
+ puts "Processing file: "+x
+ thisFile = FileInMemory.new()
+ File.open(x).each {|item| thisFile.addLine(item) }
+ lines = thisFile.getProcessedLines
+ if lines == nil
+ puts "WARN: File ["+x+"] doesn't seem to have a regular From: line. Not included in mbox"
+ else
+ lines.each {|line| aFile.puts line}
+ end
+ end
+ aFile.close
+ end