#!/data_systems/opt/bin/python3 """ rsync_cips_to_hampton A script to use rsync to push AIM CIPS data to Hampton University. The python code calls a bash shell script to handle the actual data transfer. This is defined by the variable 'DATA_TRANSFER_SCRIPT' Because the Hampton server times out on large data transfers, the levels with a large number of files are broken into subunits. The requirements and hence the design and goals are in a state of flux. The current goals are: 1) To move the approved ver 05.20 rev 05 PMC seasons to Hampton University. This list is expected to change. 2) To move current data over on a daily basis as it is approved for the level 2 and higher PMC data 3) To update the level 1A and 1P data on a regular basis 4) To be flexible enough that the version 04.20 rev 06 data can be added. 5) To be flexible enough that the RAA data can be added. Created June 2019 @author: Bill Barrett """ import os.path import time import datetime from collections import namedtuple import traceback from aimpi_shell_execution import AimPiShellExecution from season_and_version_utils import SeasonAndVersionUtils # The user name for the remote (Hampton) system REMOTE_USER = 'aim_cips' # The server name for the remote (Hampton) system REMOTE_SERVER = 'aim.hamptonu.edu' # The base directory on the remote (Hampton) system # NOTE: this is correct for PMC data ONLY, as this script does not currently handle RAA REMOTE_BASE_DIR = '/mnt/home/aim_cips/interim_archive/cips/data/PMC' # The base directory on the local (AIM CIPS) system SOURCE_BASE_DIR = '/aim/data/cips' # The directory and subdirectories where the approved data lists are found APPROVED_DATA_DIRECTORY = 'file_listings' APPROVED_DATA_SUBDIRECTORIES = [ 'orbit_strips', # PMC level 2 'daisies' # PMC level 3a ] # The fully vetted version 05.20 rev 05 ssasons APPROVED_VERSION_0520_REV_05_SEASONS = [ # 'north_2007', # 'north_2008', # 'north_2009', # 'north_2010', # 'north_2011', # 'north_2012', # 'north_2013', # 'north_2014', # 'north_2015', # 'north_2016', # 'north_2018', # 'north_2019', 'north_2020', # 'south_2007', # 'south_2008', # 'south_2009', # 'south_2010', # 'south_2011', # 'south_2012', # 'south_2013', # 'south_2014', # 'south_2015', # 'south_2016', # 'south_2018', # 'south_2019', # 'south_2020', ] # The following seasons have 3d data LEVEL_3D_SEASONS = [ 'north_2007', 'north_2008', 'north_2009', 'south_2007', 'south_2008', 'south_2009' ] SEASONS = ['north', 'south'] # Each AIM CIPS data level has both a version and a revision VersionAndRevision = namedtuple('VersionAndRevision', 'version revision') LEVEL_VERSION_AND_REVISION = { 'level_1a': VersionAndRevision('05.20', '01'), # 'level_1p': VersionAndRevision('05.20', '04'), # 'level_2_pre': VersionAndRevision('05.20', '05'), 'level_2': VersionAndRevision('05.20', '05'), ### 'raa/level_2a': VersionAndRevision('01.10', '04'), # This script doesn't handle RAA right now. ### 'raa/level_2b': VersionAndRevision('01.10', '04'), # This script doesn't handle RAA right now. # 'level_3': VersionAndRevision('05.20', '05'), 'level_3b': VersionAndRevision('05.20', '05'), 'level_3c': VersionAndRevision('05.20', '05'), 'level_3d': VersionAndRevision('05.20', '05'), 'level_3e': VersionAndRevision('05.20', '05') } # Level 1A data is broken out by camera CAMERAS = ['mx', 'px', 'my', 'py'] # Level 1p has the following subdirectories LEVEL_1P_SUBDIRECTORIES = ['bias_maps', 'min_1a', 'orbit_catalog', 'rn_daily_map'] # Level 2 pre has the following subdirectories LEVEL_2_PRE_SUBDIRECTORIES = ['daily_mls_bias', 'geo_bin', 'high_sza_mdl_errors', 'hist_widths', 'nadir_rayleigh', 'rayleigh_detect', 'rayleigh_detect', 'stats_grid', 'residual_dmb_errors'] # The following data types are found in PMC level 2 data LEVEL_2_TYPES = ['alb.png', 'cat.nc.gz', 'cld.nc.gz', 'etc.nc.gz', 'frq.nc.gz', 'iwc.png', 'psf.nc.gz', 'rad.png'] # shell script to do the actual rsynsc DATA_TRANSFER_SCRIPT = '/aim/sds/common/scripts/rsync_cips_to_hampton.sh' # The year AIM was launched FIRST_AIM_YEAR = 2007 class RsyncCipsToHampton: """ Use rsync to push AIM CIPS data to Hampton University """ def __init__(self, approved_directory_path): """ Initialize the place to find the raw TLE file and the dictionary of TLE's. Parameters ---------- approved_directory_path : string the path where the approved data listings will be found """ assert os.path.isdir(approved_directory_path), \ '{} is not a valid directory path'.format(approved_directory_path) self.years = [] today = datetime.datetime.now() for year in range(FIRST_AIM_YEAR, today.year + 1): self.years.append(str(year)) processing_date = today.strftime('%Y-%j') self.season_year = SeasonAndVersionUtils.get_season(processing_date) def __enter__(self): """ Walk through the years, seasons, and levels, transferring the data from the AIM CIPS server to the remote server """ for year in self.years: for season in SEASONS: season_year = '{0}_{1}'.format(season, year) if season_year == self.season_year: # Todo continue if season_year in APPROVED_VERSION_0520_REV_05_SEASONS: self.transfer_season(season_year) def __exit__(self, exception_type, exception_value, traceback_info): """ Necessary if __enter__ is defined. Prints message on error exit. True for a normal exit, False if the exception_type is not None """ if exception_type is not None: print(time.strftime("%Y-%m-%d %H:%M:%S") + ' type: {0}, value: {1}'.format(exception_type, exception_value)) traceback.print_exception(exception_type, exception_value, traceback_info) return False # Comment to pass exception through return True def transfer_season(self, season_year): """The function that handles the actual data transfer Parameters ---------- season_and_year : string season and year, e.g. 'north_2018' """ # Cycle through the various levels and data types for level, version_and_revision in sorted(LEVEL_VERSION_AND_REVISION.items()): version = 'ver_' + version_and_revision.version revision = 'rev_' + version_and_revision.revision if level == 'level_1a': # level 1a has separate directories for each of the CIPS cameras for camera in CAMERAS: level_camera = '{0}/{1}'.format(level, camera) RsyncCipsToHampton.rsync_files( season_year, level_camera, version, revision) elif level == 'level_1p': # level_1p is broken into a series of subdirectories: for subdirectory in LEVEL_1P_SUBDIRECTORIES: rev_and_sub = '{0}/{1}'.format(revision, subdirectory) RsyncCipsToHampton.rsync_files( season_year, level, version, rev_and_sub) elif 'level_2_pre' in level: # level_2_pre is broken into a series of subdirectories for subdirectory in LEVEL_2_PRE_SUBDIRECTORIES: rev_and_sub = '{0}/{1}'.format(revision, subdirectory) RsyncCipsToHampton.rsync_files( season_year, level, version, rev_and_sub) continue elif level == 'level_2': # heuristically it has been determined that due to the number # of files involved, transferring the entire level 2 directory # will usually fail. So the transfer is done one file type # at a time for file_type in LEVEL_2_TYPES: RsyncCipsToHampton.rsync_files( season_year, level, version, revision, file_type=file_type) elif 'raa' in level: # NOTE TEMPORARILY SKIPPING RAA continue else: # levels 3, 3b, 3c, 3d, and 3e if level == 'level_3d': if not season_year in LEVEL_3D_SEASONS: continue RsyncCipsToHampton.rsync_files( season_year, level, version, revision) @staticmethod def rsync_files(season_and_year, level, version, revision, file_type=None): """The function that handles the actual data transfer Parameters ---------- season_and_year : string season and year, e.g. 'north_2018' level : string CIPS data level, e.g. 'level_1p', 'level_1a/mx', 'raa/level_2b' version : string CIPS data version, e.g. '01.10' revision : string CIPS data revision, e.g. '04' file_type : string, optional a file type ,e.g. 'alb.png', 'alb.nc.gz' """ # path base is used both at LASP and Hampton as part of the path path_base = '{0}/{1}/{2}/{3}'.format( season_and_year, level, version, revision) lasp_dir = os.path.join(SOURCE_BASE_DIR, path_base) # Does the source directory exist? if not os.path.isdir(lasp_dir): print(time.strftime('%Y-%m-%d %H:%M:%S') + ' {} not found, continuing'.format(lasp_dir)) return # For rsync a directory on the sending side should end with '/' lasp_dir += '/' # Create the target directory on the remote system if necessary target_full_path = os.path.join(REMOTE_BASE_DIR, path_base) mkdir_command = 'mkdir -p {}'.format(target_full_path) print(time.strftime('%Y-%m-%d %H:%M:%S') + ' {}'.format(mkdir_command)) status, output = AimPiShellExecution.execute_ssh_command( REMOTE_USER, REMOTE_SERVER, mkdir_command) if status != 0: output = output.replace('\r', '') print(time.strftime('%Y-%m-%d %H:%M:%S') + ' remote mkdir failed: status = {0}, {1}'.format(status, output)) # If an optional file type was specified, append it to restrict the transfer if file_type is not None: lasp_dir += '*_{}'.format(file_type) print(time.strftime('%Y-%m-%d %H:%M:%S') + ' beginning rsync for {}'.format(lasp_dir)) rsync_command = '{0} {1} {2}'.format(DATA_TRANSFER_SCRIPT, lasp_dir, target_full_path) print(time.strftime('%Y-%m-%d %H:%M:%S') + ' {}'.format(rsync_command)) status, output = AimPiShellExecution.tokenize_and_execute_command(rsync_command) # status, output = 0, '' output = output.replace('\r', '') if status != 0: print(time.strftime('%Y-%m-%d %H:%M:%S') + ' status = {0}'.format(status)) for line in output.split('\n'): if line: print(line) print(time.strftime('%Y-%m-%d %H:%M:%S') + ' rsync complete for {}'.format(lasp_dir)) if __name__ == '__main__': """ Make the class executable """ APPROVED_DIRECTORY_PATH = os.path.join(SOURCE_BASE_DIR, APPROVED_DATA_DIRECTORY) with RsyncCipsToHampton(APPROVED_DIRECTORY_PATH) as CIPS_TO_HAMPTON: pass