#! /usr/bin/python """ ver42ver510compare Compare the number of level 1a files created by two different versions and report the differences. Created on 2017-04-12 @author: Bill Barrett """ import os import re # The official location for AIM CIPS data AIM_DATA_DIRECTORY = '/aim/data/cips' # The pattern for the public data subdirectories #AIM_SEASON_YEAR_PATTERN = re.compile(r'^(nor|sou)th_20[01]\d$') AIM_SEASON_YEAR_PATTERN = re.compile(r'^south_2015$') # The four AIM CIPS cameras CAMERAS = ['mx', 'my', 'px', 'py'] # The AIM CIPS data versions OLD_VERSION = '04.20' NEW_VERSION = '05.20' DATA_VERSIONS = [ OLD_VERSION, NEW_VERSION] # cips_sci_1a_orbit_49228_2016-119_cam_mx_v05.10_r02.nc.gz FILE_PATTERN = re.compile(r'''.*cips_sci_1a_orbit_ (?P\d{5}) # orbit number _(20[01]\d\-[0-3]\d{2}) # date _cam_[mp][xy]_v0[45]\.\d{2}_r0\d.nc.gz$ ''', re.VERBOSE) PRELIM_PATTERN = re.compile(r'.*prelim.*\.nc.*$') def main(data_directories): """The main driver. Parameters ---------- data_directories : list The list of directories to be searched """ test_directories = get_directories(data_directories) for directory in test_directories: versions_and_files = {} for version in DATA_VERSIONS: subdirectories = os.listdir(directory + '/ver_' + version) if len(subdirectories) != 1: print('PROBLEMS with {0}'.format(data_directories + '/ver_' + version)) files = os.listdir(directory + '/ver_' + version + '/' + subdirectories[0]) orbits_and_filenames = {} for file in files: match = FILE_PATTERN.match(file) if match is None: match = PRELIM_PATTERN.match(file) if match is None: print('Bad file name {0}'.format(file)) else: orbit = match.group('orbit') orbits_and_filenames[orbit] = file print('{0} {1}'.format(directory + '/ver_' + version, len(orbits_and_filenames))) versions_and_files[version] = orbits_and_filenames new_orbits = list(versions_and_files[NEW_VERSION].keys()) for key, value in versions_and_files[OLD_VERSION].items(): if not key in new_orbits: print(value) old_orbits = list(versions_and_files[OLD_VERSION].keys()) for key, value in versions_and_files[NEW_VERSION].items(): if not key in old_orbits: print(value) def get_directories(data_directories): """The main driver. Parameters ---------- data_directories : list The list of directories to be searched """ test_directories = [] for subdirectory in data_directories: for camera in CAMERAS: camera_path = AIM_DATA_DIRECTORY + '/' + subdirectory + '/level_1a/' + camera version_4_path = camera_path + '/ver_' + OLD_VERSION if not os.path.isdir(version_4_path): continue test_directories.append(camera_path) return test_directories if __name__ == '__main__': """ Get the list the list of directories to be searched, and then process them. """ # Find all of the subdirectories of the AIM_DATA_DIRECTORY subdirectories = os.listdir(AIM_DATA_DIRECTORY) data_directories = [] # Only the data directories are important for subdirectory in subdirectories: match = AIM_SEASON_YEAR_PATTERN.match(subdirectory) if match is not None: data_directories.append(subdirectory) # It makes the data easier to work with if this is done in order data_directories.sort() main(data_directories)