#!/usr/bin/python

import re
import os
import sys
from email import message_from_string
from mailbox import UnixMailbox

import sqlite

def find_mailboxes(dir):
    for dirpath, dirnames, filenames in os.walk(dir):
        for filename in filenames:
            match = re.match(r'[a-z][a-z-]*[a-z]', filename)

            if match:
                yield (match.group(0), os.path.join(dirpath, filename))

class MessageIndex:
    def __init__(self, path):
        self.path = path

        if not os.path.exists(path):
            connection = sqlite.connect(path)
            cursor = connection.cursor()

            cursor.execute('''
                create table mailbox (
                    "group" string,
                    path string)
                ''')

            cursor.execute('''
                create table mailbox_entry (
                    path string,
                    "index" integer,
                    message_id string,
                    offset integer)
                ''')

            cursor.execute('''
                create table message (
                    message_id string,
                    subject string,
                    "from" string,
                    date string,
                    "references" string,
                    lines integer,
                    bytes integer)
                ''')

            connection.commit()
            connection.close()

        self.connection = sqlite.connect(path)
        self.cursor = self.connection.cursor()

    def has_message(self, message_id):
        self.cursor.execute(
            'select * from message where message_id = %s', message_id)

        return self.cursor.rowcount > 0

    def _add_message(self, message):
        message_string = str(message)
        lines = len(message_string.splitlines())
        bytes = len(message_string)

        self.cursor.execute(
            '''insert into message values (%s, %s, %s, %s, %s, %s, %s)''',
            message['message-id'],
            message['subject'],
            message['from'],
            message['date'],
            message['references'],
            lines,
            bytes)

    def has_entry(self, path, message_id):
        self.cursor.execute('''
            select * from mailbox_entry where
                message_id = %s and
                path = %s
            ''', message_id, path)

        return self.cursor.rowcount > 0

    def _add_entry(self, path, index, message_id, offset):
        self.cursor.execute(
            '''insert into mailbox_entry values (%s, %s, %s, %d)''',
            path, index, message_id, offset)

    def _add_mailbox(self, group, path):
        self.cursor.execute(
            'insert into mailbox values (%s, %s)', group, path)

    def group_paths(self, group):
        self.cursor.execute(
            'select path from mailbox where "group" = %s', group)
        return sorted(row[0] for row in self.cursor.fetchall())

    def update(self, mailboxes):
        # Update mailbox table.

        self.cursor.execute('delete from mailbox')

        for group, path in mailboxes:
            self._add_mailbox(group, path)

        # Update mailbox_entry and message tables.

        for group in self.groups():
            index = 0

            for path in self.group_paths(group):
                fp = file(path)
                offset = 0
                index = self.group_message_count(group)
                mailbox = UnixMailbox(fp, message_from_file)

                for message in mailbox:
                    message_id = message['message-id']

                    if not self.has_message(message_id):
                        self._add_message(message)

                    if not self.has_entry(path, message_id):
                        self._add_entry(path, index, message_id, offset)

                    offset = fp.tell()
                    index += 1

        self.connection.commit()

    def groups(self):
        self.cursor.execute('select distinct "group" from mailbox')
        return [row[0] for row in self.cursor.fetchall()]

    def group_message_count(self, group):
        self.cursor.execute('''
            select count(*) from mailbox_entry, mailbox where
                mailbox_entry.path = mailbox.path and
                mailbox."group" = %s
            ''', group)
        return self.cursor.fetchone()[0]

    def _message(self, message_id):
        self.cursor.execute(
            'select * from message where message_id = %s', message_id)
        return self.cursor.fetchone()

    def _message_object(self, path, offset):
        fp = file(path)
        fp.seek(offset)
        lines = [fp.readline()]

        while True:
            line = fp.readline()

            if line.startswith('From '):
                break
            else:
                lines.append(line)

        message = message_from_string(''.join(lines))
        fp.close()
        return message

    def entry(self, group, index):
        self.cursor.execute('''
            select * from mailbox_entry, mailbox where
                mailbox_entry.path = mailbox.path and
                mailbox."group" = %s and
                mailbox_entry."index" = %s
            ''', group, index)
        return self.cursor.fetchone()

    def message_overview(self, message_id, header_names):
        headers = []
        message = self._message(message_id)
        (message_subject, message_from, message_date, message_references,
            message_lines, message_bytes) = message[1:]
        by_name = {
            'Message-ID': message_id,
            'Subject': message_subject,
            'From': message_from,
            'Date': message_date,
            'References': message_references,
            'Lines': message_lines,
            'Bytes': message_bytes,
            }

        return [by_name.get(name, 0) for name in header_names]

    def message_by_index(self, group, index):
        self.cursor.execute('''
            select mailbox_entry.* from mailbox_entry, mailbox where
                mailbox_entry."index" = %s and
                mailbox_entry.path = mailbox.path and
                mailbox."group" = %s
            ''', index, group)
        entry = self.cursor.fetchone()
        if entry is None:
            raise IndexError(index)
        path, index, message_id, offset = entry
        message = self._message_object(path, offset)
        return (message_id, message)

    def message_by_id(self, group, message_id):
        self.cursor.execute('''
            select * from mailbox_entry, mailbox where
                mailbox_entry.message_id = %s and
                mailbox_entry.path = mailbox.path and
                mailbox."group" = %s
            ''', message_id, group)
        entry = self.cursor.fetchone()
        if entry is None:
            raise KeyError(index)
        path, index, message_id, offset = entry
        message = self._message_object(path, offset)
        return (index, message)

if __name__ == '__main__':
    index_path, mailbox_path = sys.argv[1:]
    index = Index(index_path)
    index.update(find_mailboxes(mailbox_path))

