📄 mail_to_db.pl
字号:
{ $type = "in_reply_to"; s/^\s*(<.*>)\s*/$1/; $values{$type} = $1; } elsif (/^Date: (.*)/i) { date_parser($1, \%values, $file_name); $type = "rubbish"; } # Catch those fields that we don't or can't handle (yet) elsif (/^[\w\W-]+:/) { $type = "rubbish"; } elsif ($_ eq "") { $type = "message"; $values{$type} = ""; } else { s/^\s*/ /; if ($type eq 'message_id' || $type eq 'in_reply_to') { s/^\s*(<.*>)\s*/$1/; } $values{$type} .= $_; } } elsif ($check != 0 && $_ ne "") # in case of forwarded messages { $values{$type} .= "\n" . $_; $check--; } elsif (/^From .* \d\d:\d\d:\d\d\s\d\d\d\d/ || /^From .* \d\d\d\d\s\d\d:\d\d:\d\d/) { $values{'hash'} = checksum("$values{'message'}"); update_table($dbh, $file_name, \%values); %values = (); $type = ""; $check = 0; } elsif (/-* forwarded message .*-*/i) # in case of forwarded messages { $values{$type} .= "\n" . $_; $check++; $mail_forwarded++; } else { $values{$type} .= "\n" . $_; } } if (defined($values{'message'})) { $values{'hash'} = checksum("$values{'message'}"); update_table($dbh, $file_name, \%values); }}######## get date and timezone####sub date_parser{ my ($date_raw, $values, $file_name, $tmp) = @_; # If you ever need to change this test, be especially careful with # the timezone; it may be just a number (-0600), or just a name (EET), or # both (-0600 (EET), or -0600 (EET GMT)), or without parenthesis: GMT. # You probably should use a 'greedy' regexp in the end $date_raw =~ /^\D*(\d{1,2})\s+(\w+)\s+(\d{2,4})\s+(\d+:\d+)(:\d+)?\s*(\S+.*)?/; if (!defined($1) || !defined($2) || !defined($3) || !defined($4) || !defined($months{$2})) { if ($opt_debug || $opt_stop_on_error) { print "FAILED: date_parser: 1: $1 2: $2 3: $3 4: $4 5: $5\n"; print "months{2}: $months{$2}\n"; print "date_raw: $date_raw\n"; print "Inbox filename: $file_name\n"; } exit(1) if ($opt_stop_on_error); $values->{'date'} = ""; $values->{'time_zone'} = ""; return; } $tmp = $3 . "-" . $months{$2} . "-" . "$1 $4"; $tmp.= defined($5) ? $5 : ":00"; $values->{'date'} = $tmp; print "INSERTING DATE: $tmp\n" if ($opt_debug); $values->{'time_zone'} = $6;}######## Insert to table#### sub update_table{ my($dbh, $file_name, $values) = @_; my($q, $tail, $message); if (!defined($values->{'subject'}) || !defined($values->{'to'})) { $mail_no_subject_f++; return; # Ignore these } $message = $values->{'message'}; $message =~ s/^\s*//; # removes whitespaces from the beginning restart: $message =~ s/[\s\n>]*$//; # removes whitespaces and '>' from the end $values->{'message'} = $message; foreach $tail (@remove_tail) { $message =~ s/$tail//; } if ($message ne $values->{'message'}) { $message =~ s/\s*$//; # removes whitespaces from the end $mail_fixed++; goto restart; # Some mails may have duplicated messages } $q = "INSERT INTO my_mail ("; $q.= "mail_id,"; $q.= "message_id,"; $q.= "in_reply_to,"; $q.= "date,"; $q.= "time_zone,"; $q.= "mail_from,"; $q.= "reply,"; $q.= "mail_to,"; $q.= "cc,"; $q.= "sbj,"; $q.= "txt,"; $q.= "file,"; $q.= "hash"; $q.= ") VALUES ("; $q.= "NULL,"; $q.= (defined($values->{'message_id'}) ? $dbh->quote($values->{'message_id'}) : "NULL"); $q.= ","; $q.= (defined($values->{'in_reply_to'}) ? $dbh->quote($values->{'in_reply_to'}) : "NULL"); $q.= ","; $q.= "'" . $values->{'date'} . "',"; $q.= (defined($values->{'time_zone'}) ? $dbh->quote($values->{'time_zone'}) : "NULL"); $q.= ","; $q.= defined($values->{'from'}) ? $dbh->quote($values->{'from'}) : "NULL"; $q.= ","; $q.= defined($values->{'reply'}) ? $dbh->quote($values->{'reply'}) : "NULL"; $q.= ","; $q.= defined($values->{'to'}) ? $dbh->quote($values->{'to'}) : "NULL"; $q.= ","; $q.= defined($values->{'cc'}) ? $dbh->quote($values->{'cc'}) : "NULL"; $q.= ","; $q.= $dbh->quote($values->{'subject'}); $q.= ","; $q.= $dbh->quote($message); $q.= ","; $q.= $dbh->quote($file_name); $q.= ","; $q.= "'" . $values->{'hash'} . "'"; $q.= ")"; # Don't insert mails bigger than $opt_max_mail_size if (length($message) > $opt_max_mail_size) { $mail_too_big++; } # Don't insert mails without 'From' field elsif (!defined($values->{'from'}) || $values->{'from'} eq "") { $mail_no_from_f++; } elsif ($opt_test) { print "$q\n"; $mail_inserted++; } # Don't insert mails without the 'message' elsif ($message eq "") { $mail_no_txt_f++; } elsif ($dbh->do($q)) { $mail_inserted++; } # This should never happen. This means that the above q failed, # but it wasn't because of a duplicate mail entry elsif (!($DBI::errstr =~ /Duplicate entry /)) { die "FATAL: Got error :$DBI::errstr\nAttempted query was: $q\n"; } else { $mail_duplicates++; print "Duplicate mail: query: $q\n" if ($opt_debug); } $q = "";}######## In case you have two identical messages we wanted to identify them#### and remove additionals; We do this by calculating a hash number of the#### message and ignoring messages with the same from, date and hash.#### This function calculates a simple 32 bit hash value for the message.####sub checksum{ my ($txt)= @_; my ($crc, $i, $count); $count = length($txt); for ($crc = $i = 0; $i < $count ; $i++) { $crc = (($crc << 1) + (ord (substr ($txt, $i, 1)))) + (($crc & (1 << 30)) ? 1 : 0); $crc &= ((1 << 31) -1); } return $crc;}######## my_which is used, because we can't assume that every system has the#### which -command. my_which can take only one argument at a time.#### Return values: requested system command with the first found path,#### or undefined, if not found.####sub my_which{ my ($command) = @_; my (@paths, $path); return $command if (-f $command && -x $command); @paths = split(':', $ENV{'PATH'}); foreach $path (@paths) { $path = "." if ($path eq ""); $path .= "/$command"; return $path if (-f $path && -x $path); } return undef();}######## usage and version####sub usage{ my ($VER)= @_; if ($opt_version) { print "$progname version $VER\n"; } else { print <<EOF;$progname version $VERDescription: Insert mails from inbox file(s) into a table. This program can read group [mail_to_db] from the my.cnf file. You may want to have dband table set there at least.Usage: $progname [options] file1 [file2 file3 ...]or: $progname [options] --create [file1 file2...]or: cat inbox | $progname [options] --stdinThe last example can be used to read mails from standard input and canuseful when inserting mails to database via a program 'on-the-fly'.The filename will be 'READ-FROM-STDIN' in this case.Options:--help Show this help and exit.--version Show the version number and exit.--debug Print some extra information during the run.--host=... Hostname to be used.--port=# TCP/IP port to be used with connection.--socket=... MySQL UNIX socket to be used with connection.--db=... Database to be used.--user=... Username for connecting.--password=... Password for the user.--stdin Read mails from stdin.--max_mail_size=# Maximum size of a mail in bytes. Beware of the downside letting this variable be too big; you may easily end up inserting a lot of attached binary files (like MS Word documents etc), which take space, make the database slower and are not really searchable anyway. (Default $opt_max_mail_size)--create Create the mails table. This can be done with the first run.--test Dry run. Print the queries and the result as it would be.--no_path When inserting the file name, leave out any paths of the name.--stop_on_error Stop the run, if an unexpected, but not fatal error occurs during the run. Without this option some fields may get unwanted values. --debug will also report about these.EOF } exit(0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -