/* * ---------------------------------------------------------------------------------- * * Copyright (c) 2009 by John Fitzgibbon. * * Permission to use, copy, modify, and distribute this software for any purpose * with or without fee is hereby granted, provided that the above copyright notice, * this permission notice and the following disclaimer appear in all copies. * * THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. * * ---------------------------------------------------------------------------------- * * dbxrecover: an Outlook Express 5/6 message recovery utility * * Version: 1.0 * Date: 2009-06-05 * * Description: * * This program attempts to analyse and recover mail messages from corrupt, * (or valid), Outlook Express DBX files, writing the recovered mail to * stdout in UNIX mbox file format. I wrote this program to recover some * old work email from OE5 dbx files that refused to convert using kmail's * mail import features. * * Note that this program ignores the normal DBX file indexes and headers. * It looks for possible mail messages within the file data. Since this * approach can be error prone, it is only recommended if normal Outlook * Express conversion and/or recovery options have failed. The author takes * no responsibility for losses arising from the use of this program. * * This program uses fairly standard C, so it should compile on a variety * of platforms without problems. For GCC, the compile command would be: * * $ gcc -o dbxrecover dbxrecover.c * * Once compiled, run the program with no parameters for more detailed help. * * ---------------------------------------------------------------------------------- */ #include #include #include struct msg_hdr { unsigned int off; unsigned int flags; unsigned int len; unsigned int next; }; static unsigned int msg_count = 0; static unsigned int max_msg_len = 0; static char *msgbuf = NULL; static unsigned int msgbuf_size = 0; static int quiet = 0; static void extract_mail(const char *filename, long offset) { FILE *fp = fopen(filename,"rb"); unsigned int msg_len = 0; if (fp == NULL) { fprintf(stderr,"ERROR: unable to open %s\n", filename); return; } while (offset) { struct msg_hdr hdr; if (fseek(fp, offset, SEEK_SET)) { fprintf(stderr,"%sappears to be invalid or corrupt, (unable to access offset %8.8x in %s)\n", quiet ? "message " : "", offset, filename); goto done; } if (fread(&hdr, sizeof(struct msg_hdr), 1, fp) != 1) { fprintf(stderr,"%sappears to be invalid or corrupt, (cannot read message header at offset %8.8x in %s)\n", quiet ? "message " : "", offset, filename); goto done; } if (hdr.off != (unsigned int)offset) { fprintf(stderr,"%sappears to be invalid or corrupt, (offset mismatch in message header: expected:%8.8x, read:%8.8x in %s)\n", quiet ? "message " : "", offset, hdr.off, filename); goto done; } if (msgbuf && (msg_len + hdr.len < msgbuf_size)) { if (fread(msgbuf + msg_len, hdr.len, 1, fp) != 1) { fprintf(stderr,"%sappears to be invalid or corrupt, (cannot read message data at offset %8.8x in %s)\n", quiet ? "message " : "", offset, filename); goto done; } } msg_len += hdr.len; offset = (long)(hdr.next); } if (!quiet) fprintf(stderr,"appears to be a valid message of length %lu\n", msg_len); msg_count++; if (msgbuf && (msg_len <= msgbuf_size - 4)) { unsigned int extra_len = 0; char *cp = msgbuf; char *msgstart; while (*cp != ':') cp++; if (!strncmp(cp, ": from ", 7)) { cp += 5; } cp -= 3; msgstart = cp; *cp = 'F'; cp++; *cp = 'r'; cp++; *cp = 'o'; cp++; *cp = 'm'; cp++; *cp = ' '; cp++; do { while ((cp < msgbuf + msg_len - 3) && (*cp != '\r') && (*cp != '\n')) cp++; if (((*(cp+1) == '\r') || (*(cp+1) == '\n')) && ((*(cp+2) == ' ') || (*(cp+2) == '\t'))) { *cp = ' '; *(cp+1) = ' '; *(cp+2) = ' '; } } while ((cp < msgbuf + msg_len - 3) && (*cp == ' ')); while (cp < msgbuf + msg_len - 6) { if (((*cp == '\r') || (*cp == '\n')) && !strncmp(cp+1,"From ",5)) { *(cp+1) = 'f'; } cp++; } if ((*(msgbuf+msg_len-2) != '\r') || (*(msgbuf+msg_len-1) != '\n')) { *(msgbuf+msg_len) = '\r'; *(msgbuf+msg_len+1) = '\n'; msg_len += 2; extra_len += 2; } if ((*(msgbuf+msg_len-4) != '\r') || (*(msgbuf+msg_len-3) != '\n')) { *(msgbuf+msg_len) = '\r'; *(msgbuf+msg_len+1) = '\n'; msg_len += 2; extra_len += 2; } fwrite(msgstart, msg_len-(msgstart-msgbuf), 1, stdout); msg_len -= extra_len; } done: if (msg_len+4 > max_msg_len) { max_msg_len = msg_len+4; } fclose(fp); } int main(int argc, char *argv[]) { int i; int force = 0; if (argc < 2) { fprintf(stderr,"\n"); fprintf(stderr,"dbxrecover: an Outlook Express 5/6 message recovery utility.\n"); fprintf(stderr,"\n"); fprintf(stderr,"Version: 1.0\n"); fprintf(stderr,"Date: 2009-06-05\n"); fprintf(stderr,"\n"); fprintf(stderr,"This program attempts to analyse and recover mail messages from corrupt, (or valid),\n"); fprintf(stderr,"Outlook Express DBX files, writing the recovered mail to a standard UNIX mbox file.\n"); fprintf(stderr,"\n"); fprintf(stderr,"Note that this program ignores the normal DBX file indexes and headers. It looks for\n"); fprintf(stderr,"possible mail messages within the file data. Since this approach can be error prone,\n"); fprintf(stderr,"it is only recommended if normal OE conversion and/or recovery options have failed.\n"); fprintf(stderr,"\n"); fprintf(stderr,"Usage:\n"); fprintf(stderr,"\n"); fprintf(stderr,"dbxrecover [dbx file names]\n"); fprintf(stderr," - or - \n"); fprintf(stderr,"dbxrecover -s [max message size] [dbx file names] > [output mbox file]\n"); fprintf(stderr,"\n"); fprintf(stderr,"The first format analyses the specified file(s), listing recoverable message details.\n"); fprintf(stderr,"The second format attempts to recover messages and writes them to the specified mbox file.\n"); fprintf(stderr,"\n"); fprintf(stderr,"Note that the final line of the analysis will tell you the maximum message size encountered.\n"); fprintf(stderr,"You can use this value as the -s parameter in the recovery phase to recover all messages, or\n"); fprintf(stderr,"you can use a smaller value to recover only messages that do not exceed the specified size.\n"); fprintf(stderr,"\n"); fprintf(stderr,"You can include a -q option, (\"quiet\"), to disable listing of individual message details.\n"); fprintf(stderr,"\n"); fprintf(stderr,"You can also specify -f to force analysis/recovery, though this is not recommended.\n"); fprintf(stderr,"The force option ignores the file signature. This could cause unpredictable results\n"); fprintf(stderr,"because you are probably not dealing with a valid Outlook Express DBX message store.\n"); fprintf(stderr,"\n"); exit(1); } i = 1; while ((argc > i) && (argv[i][0] == '-') && ((argv[i][1] == 'f') || (argv[i][1] == 'q') || (argv[i][1] == 's')) && (argv[i][2] == '\0')) { switch (argv[i][1]) { case 'f': i++; force = 1; break; case 'q': i++; quiet = 1; break; case 's': i++; if (argc > i) { msgbuf_size = atol(argv[i]); i++; if (msgbuf_size < 1024) { fprintf(stderr,"ERROR: bad message buffer size %lu, (minimum = 1024)\n", msgbuf_size); exit(1); } msgbuf = (char *)malloc(msgbuf_size); if (msgbuf == NULL) { fprintf(stderr,"ERROR: failed to allocate memory for a message buffer of size %lu\n", msgbuf_size); exit(1); } } break; default: i++; break; } } for ( ; i 0) { char *c = buf; while (c < (buf+readcount)) { if (((*c == 'R') || (*c == 'D')) && (!strncmp(c, "Delivered-To: ", 14) || !strncmp(c, "Return-Path: ", 13) || !strncmp(c, "Received: ", 10))) { if (c - buf >= 16) { unsigned int *phdr = (unsigned int *)c; phdr-=4; if (*phdr == offset + (c-buf) - 16) { if (!quiet) fprintf(stderr,"possible message starting at offset %8.8x...", offset + (c-buf) - 16, *phdr, *(phdr+1), *(phdr+2), *(phdr+3)); extract_mail(argv[i], offset + (c-buf) - 16); } } } c+=4; } if (readcount >= 16) { memcpy(buf, buf+readcount-16, 16); } offset += c - buf; } } else if ((signature[0]==0x36464D4A) && (signature[1]==0x00010003)) { fprintf(stderr,"%s appears to be an OE4 file, (not recoverable)\n", argv[i]); } else if ((signature[0]==0xFE12ADCF) && (signature[1]==0x6F74FDC6) && (signature[2]==0x11D1E366) && (signature[3]==0xC0004E9A)) { fprintf(stderr,"%s appears to be an OE5/6 folder file, (no messages to recover)\n", argv[i]); } else { fprintf(stderr,"%s has an unknown signature, (%8.8x %8.8x %8.8x %8.8x), and cannot be recovered\n", argv[i], signature[0], signature[1], signature[2], signature[3]); } } fclose(fp); } } fprintf(stderr,"%lu recoverable messages, with a maximum message length of %lu bytes\n", msg_count, max_msg_len); return 0; }