#! /usr/bin/python

""" ver42ver510compare

Compare the number of level 1a files created by two different versions
  and report the differences.

Created on 2017-04-12

@author: Bill Barrett

"""

import os
import re

# The official location for AIM CIPS data
AIM_DATA_DIRECTORY = '/aim/data/cips'
# The pattern for the public data subdirectories
#AIM_SEASON_YEAR_PATTERN = re.compile(r'^(nor|sou)th_20[01]\d$')
AIM_SEASON_YEAR_PATTERN = re.compile(r'^south_2015$')
# The four AIM CIPS cameras
CAMERAS = ['mx', 'my', 'px', 'py']
# The AIM CIPS data versions
OLD_VERSION = '04.20'
NEW_VERSION = '05.20'
DATA_VERSIONS = [ OLD_VERSION, NEW_VERSION]
# cips_sci_1a_orbit_49228_2016-119_cam_mx_v05.10_r02.nc.gz
FILE_PATTERN = re.compile(r'''.*cips_sci_1a_orbit_
                              (?P<orbit>\d{5})                         # orbit number
                              _(20[01]\d\-[0-3]\d{2})                  # date
                              _cam_[mp][xy]_v0[45]\.\d{2}_r0\d.nc.gz$
''', re.VERBOSE)
PRELIM_PATTERN = re.compile(r'.*prelim.*\.nc.*$')


def main(data_directories):
    """The main driver.

    Parameters
    ----------
    data_directories : list
        The list of directories to be searched

    """
    test_directories = get_directories(data_directories)
    for directory in test_directories:
        versions_and_files = {}
        for version in DATA_VERSIONS:
            subdirectories = os.listdir(directory + '/ver_' + version)
            if len(subdirectories) != 1:
                print('PROBLEMS with {0}'.format(data_directories + '/ver_' + version))
            files = os.listdir(directory + '/ver_' + version + '/' + subdirectories[0])
            orbits_and_filenames = {}
            for file in files:
                match = FILE_PATTERN.match(file)
                if match is None:
                    match = PRELIM_PATTERN.match(file)
                    if match is None:
                        print('Bad file name {0}'.format(file))
                else:
                    orbit = match.group('orbit')
                    orbits_and_filenames[orbit] = file
            print('{0} {1}'.format(directory + '/ver_' + version, len(orbits_and_filenames)))
            versions_and_files[version] = orbits_and_filenames
        new_orbits = list(versions_and_files[NEW_VERSION].keys())
        for key, value in versions_and_files[OLD_VERSION].items():
            if not key in new_orbits:
                print(value)
        old_orbits = list(versions_and_files[OLD_VERSION].keys())
        for key, value in versions_and_files[NEW_VERSION].items():
            if not key in old_orbits:
                print(value)


def get_directories(data_directories):
    """The main driver.

    Parameters
    ----------
    data_directories : list
        The list of directories to be searched

    """
    test_directories = []
    for subdirectory in data_directories:
        for camera in CAMERAS:
            camera_path = AIM_DATA_DIRECTORY + '/' + subdirectory + '/level_1a/' + camera
            version_4_path = camera_path + '/ver_' + OLD_VERSION
            if not os.path.isdir(version_4_path):
                continue
            test_directories.append(camera_path)
    return test_directories


if __name__ == '__main__':
    """ Get the list the list of directories to be searched,
        and then process them.
    """
    # Find all of the subdirectories of the AIM_DATA_DIRECTORY
    subdirectories = os.listdir(AIM_DATA_DIRECTORY)
    data_directories = []
    # Only the data directories are important
    for subdirectory in subdirectories:
        match = AIM_SEASON_YEAR_PATTERN.match(subdirectory)
        if match is not None:
            data_directories.append(subdirectory)
    # It makes the data easier to work with if this is done in order
    data_directories.sort()
    main(data_directories)