# -*- Mode: Python -*-

# I need to pull the "Date/Time Original" out of AVI files.
# specifically, I have mostly Canon files with an IDIT chunk.

# https://en.wikipedia.org/wiki/Resource_Interchange_File_Format

import struct
import sys

#W = sys.stderr.write

sub_tags = ('RIFF', 'LIST')

def read_chunk (f):
    ch_id = f.read(4).decode ('ascii')
    len_data = f.read (4)
    (ch_len,) = struct.unpack ('<L', len_data)
    #W ('%s %r %d\n' % (ch_id, len_data, ch_len))
    if ch_id in sub_tags:
        pos = f.tell()
        sub_id = f.read(4).decode ('ascii')
        yield (sub_id, '{', None)
        while f.tell() < pos + ch_len:
            for sub_data in read_chunk (f):
                yield sub_data
        yield (sub_id, '}', None)
    else:
        padded = (ch_len % 2) == 1
        if ch_len <= 64:
            data = f.read (ch_len)
            yield (ch_id, 'data', data)
            if padded:
                # stego
                pad = f.read(1)
        else:
            pos = f.tell()
            f.seek (pos + ch_len + padded)
            yield (ch_id, 'poslen', (pos, ch_len))

# this will print a tree dump of the structure of the entire file.
def walk (path):
    W = sys.stdout.write
    depth = 0
    f = open (path, 'rb')
    for ch_id, info, data in read_chunk (f):
        prefix = depth * '  '
        if info == '{':
            W ('%s%4s {\n' % (prefix, ch_id))
            depth += 1
        elif info == '}':
            depth -= 1
            prefix = depth * '  '
            W ('%s } // %4s\n' % (prefix, ch_id))
        elif info == 'poslen':
            dpos, dlen = data
            W ('%s%4s %d at %d\n' % (prefix, ch_id, dlen, dpos))
        elif info == 'data':
            W ('%s%4s %r\n' % (prefix, ch_id, data))
        else:
            raise ValueError (info)

# this will walk through the file looking for a specific branch.
# in the case of the IDIT tag, it is under ('AVI ', 'hdrl', 'IDIT').
def find (path, keys):
    f = open (path, 'rb')
    for ch_id, info, data in read_chunk (f):
        if ch_id.strip() == keys[0]:
            keys = keys[1:]
            if not keys:
                return data.strip (b'\x00\n').decode ('ascii')

# date formats seen in my sample set:
# 'SUN FEB 19 15:33:46 2006'      CanonMVI02
# '2003/09/09/ 09:20  '           CASIO EX-Z3
# '2001/ 9/10  12:00           '  Photosmart 612
# 'Sat Nov 08 11:56:58 2003'      CanonMVI01

if __name__ == '__main__':
    import argparse
    p = argparse.ArgumentParser (description='AVI metadata extractor')
    p.add_argument ('-f', '--find', help="find specific tag")
    p.add_argument ('-d', '--date', help="find Date/Time Original (AVI/hdrl/IDIT)", action='store_true')
    p.add_argument ('-m', '--model', help="find camera model (AVI/hdrl/INFO/ISFT)", action='store_true')
    p.add_argument ('paths', help="AVI files", metavar="AVI", type=str, nargs='+')
    args = p.parse_args()
    for path in args.paths:
        if args.find:
            keys = args.find.split ('/')
            date = find (path, keys)
            sys.stdout.write ("%r, %r\n" % (date, path))
        elif args.date:
            keys = ('AVI', 'hdrl', 'IDIT')
            date = find (path, keys)
            sys.stdout.write ("%r, %r\n" % (date, path))
        elif args.model:
            keys = ('AVI', 'hdrl', 'INFO', 'ISFT')
            model = find (path, keys)
            sys.stdout.write ("%r, %r\n" % (model, path))
        else:
            sys.stdout.write ('file %s:\n' % (path,))
            walk (path)
