Current File : //opt/dedrads/mailparse/eximparse.py
#!/opt/imh-python/bin/python3
import hashlib
import re
import json
import os

pattern_in = re.compile(
    r'^(?P<date>\d{4}-\d{2}-\d{2}) '
    r'(?P<time>\d{2}:\d{2}:\d{2})\.\d+\s+'
    r'\[\d+\] (?P<msgid>\S+) <= (?P<env_sender>\S+).*?'
    r'T="(?P<subject>.*?)"\s+'
    r'from\s+<(?P<sender>[^>]+)>\s+'
    r'for\s+(?P<recipient>\S+)'
)

pattern_out = re.compile(
    r'^\d{4}-\d{2}-\d{2} '
    r'\d{2}:\d{2}:\d{2}\.\d+ '
    r'\[\d+\] (?P<msgid>\S+) => '
    r'(?P<recipient>\S+).*?'
    r'\bR=(?P<router>\S+)'
)

msgid_map = {}
exim_log = "/var/log/exim_mainlog"
offset_file = "/opt/dedrads/mailparse/log_offsets.json"
output_log = "/opt/dedrads/mailparse/out/messages.json"

def load_offset(json_file):
    try:
        with open(json_file, 'r') as f:
            return json.load(f) 
    except (FileNotFoundError, json.JSONDecodeError):
        return {}  # If the file doesn't exist or can't be decoded, start from 0

# Function to save the offset to a JSON file
def save_offset(json_file, offsets):
    with open(json_file, 'w') as f:
        json.dump(offsets, f, indent=4)

def parse_exim():
    offsets = load_offset(offset_file)
    last_position = offsets.get(exim_log, 0)
    previous_first_line = offsets.get('exim_start_hash', 0)

    # store the md5 of the first line
    with open(exim_log, "r", encoding="utf-8", errors="replace") as f:
        current_first_line = f.readline().strip()
        current_first_line_hash = hashlib.md5(current_first_line.encode("utf-8")).hexdigest()

    # start at 0 if the first line md5s dont match or the log is smaller than the offset 
    if previous_first_line != current_first_line_hash or last_position > os.path.getsize(exim_log):
        last_position = 0
        offsets['exim_start_hash'] = current_first_line_hash

    # store <= lines
    with open(exim_log, "r", encoding="utf-8", errors="replace") as f:
        f.seek(last_position)
        for line in f:
            m = pattern_in.search(line)
            if m:
                msgid = m.group("msgid")
                msgid_map[msgid] = {
                    "msgid": msgid,
                    "date": m.group("date"),
                    "time": m.group("time"),
                    "envelope_sender": m.group("env_sender"),
                    "subject": m.group("subject"),
                    "from": m.group("sender"),
                    "original_recipient": m.group("recipient"),
                }

    # store => lines
    with open(exim_log, "r", encoding="utf-8", errors="replace") as f, \
        open(output_log, "a", encoding="utf-8") as out:
        f.seek(last_position)
        for line in f:
            m = pattern_out.search(line)
            if not m:
                continue

            msgid = m.group("msgid")
            router = m.group("router")
            # router=remoteserver_route are the outgoing messages
            if msgid in msgid_map and router == "remoteserver_route":
                result = msgid_map[msgid].copy()
                result["delivered_to"] = m.group("recipient")
                #result["router"] = router 
                result["outbound_line"] = line.strip()
                out.write(json.dumps(result) + "\n")

        # get end of file
        last_position = f.tell()
        offsets[exim_log] = last_position
    
    # update offset json
    save_offset(offset_file, offsets)
    
    return 

if __name__ == "__main__":
    parse_exim()