#!/data_systems/opt/bin/python3 """ rsync_cips_to_hampton A script to use rsync to push AIM CIPS data to Hampton University. The python code calls a bash shell script to handle the actual data transfer. This is defined by the variable 'DATA_TRANSFER_SCRIPT' Because the Hampton server times out on large data transfers, the levels with a large number of files are broken into subunits. The requirements and hence the design and goals are in a state of flux. The current goals are: 1) To move the approved ver 05.20 rev 05 PMC seasons to Hampton University. This list is expected to change. 2) To move current data over on a daily basis as it is approved for the level 2 and higher PMC data 3) To update the level 1A and 1P data on a regular basis 4) To be flexible enough that the version 04.20 rev 06 data can be added. 5) To be flexible enough that the RAA data can be added. Created June 2019 @author: Bill Barrett """ import os.path import time import datetime import traceback import re import fnmatch from collections import namedtuple from os import listdir, path from aimpi_shell_execution import AimPiShellExecution from season_and_version_utils import SeasonAndVersionUtils # The user name for the remote (Hampton) system REMOTE_USER = 'aim_cips' # The server name for the remote (Hampton) system REMOTE_SERVER = 'aim.hamptonu.edu' # The base directory on the remote (Hampton) system REMOTE_BASE_DIR = '/mnt/home/aim_cips/interim_archive/cips/data' # The base directory on the local (AIM CIPS) system SOURCE_BASE_DIR = '/aim/data/cips' # The directory and subdirectories where the approved data lists are found APPROVED_DATA_DIRECTORY = 'file_listings/daisies' # PMC level 3a # The fully vetted version 05.20 rev 05 ssasons APPROVED_VERSION_0520_REV_05_SEASONS = [ # 'north_2007', # 'north_2008', # 'north_2009', # 'north_2010', # 'north_2011', # 'north_2012', # 'north_2013', # 'north_2014', # 'north_2015', # 'north_2016', # 'north_2018', 'north_2019', # 'north_2020', # 'south_2007', # 'south_2008', # 'south_2009', # 'south_2010', # 'south_2011', # 'south_2012', # 'south_2013', # 'south_2014', # 'south_2015', # 'south_2016', # 'south_2018', # 'south_2019' ] # The following seasons have 3d data # LEVEL_3D_SEASONS = [ # 'north_2007', # 'north_2008', # 'north_2009', # 'south_2007', # 'south_2008', # 'south_2009' # ] SEASONS = ['north', 'south'] # Each AIM CIPS data level has both a version and a revision VersionAndRevision = namedtuple('VersionAndRevision', 'version revision') LEVEL_VERSION_AND_REVISION = { 'level_3': VersionAndRevision('05.20', '05'), } # The year AIM was launched FIRST_AIM_YEAR = 2007 # shell script to do the actual rsynsc DATA_TRANSFER_SCRIPT = '/aim/sds/common/scripts/rsync_cips_to_hampton.sh' # each line should look something like: # cips_sci_3a_2020-322_v05.20_r05 APPROVED_FILE_PATTERN = re.compile(r"""^cips_sci_3a_ (?P20[0-2]\d)\- (?P[0-3]\d{2})_ v(?P0\d\.\d{2})_ r(?P05) .*$""", re.VERBOSE) class RsyncCipsToHampton: """ Use rsync to push AIM CIPS data to Hampton University """ def __init__(self, approved_directory_path): """ Initialize the place to find the raw TLE file and the dictionary of TLE's. Parameters ---------- approved_directory_path : string the path where the approved data listings will be found """ assert os.path.isdir(approved_directory_path), \ '{} is not a valid directory path'.format(approved_directory_path) self.years = [] today = datetime.datetime.now() for year in range(FIRST_AIM_YEAR, today.year + 1): self.years.append(str(year)) processing_date = today.strftime('%Y-%j') self.season_year = SeasonAndVersionUtils.get_season(processing_date) def __enter__(self): """ Walk through the years, seasons, and levels, transferring the data from the AIM CIPS server to the remote server """ for year in self.years: for season in SEASONS: season_year = '{0}_{1}'.format(season, year) if season_year == self.season_year: # Todo continue if season_year in APPROVED_VERSION_0520_REV_05_SEASONS: self.transfer_season(season_year) def __exit__(self, exception_type, exception_value, traceback_info): """ Necessary if __enter__ is defined. Prints message on error exit. True for a normal exit, False if the exception_type is not None """ if exception_type is not None: print(time.strftime("%Y-%m-%d %H:%M:%S") + ' type: {0}, value: {1}'.format(exception_type, exception_value)) traceback.print_exception(exception_type, exception_value, traceback_info) return False # Comment to pass exception through return True def transfer_season(self, season_year): """The function that handles the actual data transfer Parameters ---------- season_and_year : string season and year, e.g. 'north_2018' """ # Cycle through the various levels and data types for level, version_and_revision in sorted(LEVEL_VERSION_AND_REVISION.items()): version = 'ver_' + version_and_revision.version revision = 'rev_' + version_and_revision.revision RsyncCipsToHampton.scp_files( season_year, level, version, revision) @staticmethod def scp_files(season_and_year, level, version, revision, file_type=None): """The function that handles the actual data transfer Parameters ---------- season_and_year : string season and year, e.g. 'north_2018' level : string CIPS data level, e.g. 'level_1p', 'level_1a/mx', 'raa/level_2b' version : string CIPS data version, e.g. '01.10' revision : string CIPS data revision, e.g. '04' file_type : string, optional a file type ,e.g. 'alb.png', 'alb.nc.gz' """ # Set source path: LASP path # path base is used both at LASP and Hampton as part of the path path_base_lasp = '{0}/{1}/{2}/{3}'.format( season_and_year, level, version, revision) lasp_dir = os.path.join(SOURCE_BASE_DIR, path_base_lasp) # Set target path: Hampton server path # On the Hampton server, level_3 is referred to as level_3a if level == 'level_3': level = 'level_3a' # Note: hard coding 'PMC' here will not work for RAA, but that's not currently implemented in this script path_base_hampton = 'PMC/{0}/{1}/{2}/{3}'.format( season_and_year, level, version, revision) target_full_path = os.path.join(REMOTE_BASE_DIR, path_base_hampton) # Does the source directory exist? if not os.path.isdir(lasp_dir): print(time.strftime('%Y-%m-%d %H:%M:%S') + ' {} not found, continuing'.format(lasp_dir)) return # For rsync a directory on the sending side should end with '/' lasp_dir += '/' # Create the target directory on the remote system if necessary mkdir_command = 'mkdir -p {}'.format(target_full_path) print(time.strftime('%Y-%m-%d %H:%M:%S') + ' {}'.format(mkdir_command)) status, output = AimPiShellExecution.execute_ssh_command( REMOTE_USER, REMOTE_SERVER, mkdir_command) if status != 0: output = output.replace('\r', '') print(time.strftime('%Y-%m-%d %H:%M:%S') + ' remote mkdir failed: status = {0}, {1}'.format(status, output)) # If an optional file type was specified, append it to restrict the transfer if file_type is not None: lasp_dir += '*_{}'.format(file_type) print(time.strftime('%Y-%m-%d %H:%M:%S') + ' beginning transfers for {}'.format(lasp_dir)) # Get the file listing # File names for the approved file lists are like 'remote_daisy_listing_south_2020.txt' approved_file_listing_path = os.path.join(APPROVED_DIRECTORY_PATH, 'remote_daisy_listing_{}.txt'.format(season_and_year)) with open(approved_file_listing_path, 'r') as f: while True: basename = f.readline() if basename in (None, ''): # reached end of file break approved_file = APPROVED_FILE_PATTERN.match(basename) if approved_file: # Then scp the string with the following extensions: [.nc.gz, .web.png, .thumb.png, .png] for ext in ['nc.gz', 'web.png', 'thumb.png', 'png']: # scp lasp_dir/basename.ext REMOTE_USER@REMOTE_SERVER:target_full_path # scp_command = 'scp {0}{1}.{2} {3}@{4}:{5}'.format( # lasp_dir, basename.replace('\n', ''), ext, REMOTE_USER, REMOTE_SERVER, target_full_path) # rsync command path_to_copy = os.path.join(lasp_dir, '{}.{}'.format(basename.replace('\n', ''), ext)) print("path_to_copy:---->", path_to_copy) rsync_command = '{0} {1} {2}'.format(DATA_TRANSFER_SCRIPT, path_to_copy, target_full_path) print(time.strftime('%Y-%m-%d %H:%M:%S') + ' {}'.format(rsync_command)) status, output = AimPiShellExecution.tokenize_and_execute_command(rsync_command) output = output.replace('\r', '') if status != 0: print(time.strftime('%Y-%m-%d %H:%M:%S') + ' status = {0}'.format(status)) for line in output.split('\n'): if line: print(line) else: print(time.strftime('%Y-%m-%d %H:%M:%S') + 'bad line in {0} : {1}'.format(approved_file_listing_path, basename)) print(time.strftime('%Y-%m-%d %H:%M:%S') + ' transfer complete for {}'.format(lasp_dir)) if __name__ == '__main__': """ Make the class executable """ APPROVED_DIRECTORY_PATH = os.path.join(SOURCE_BASE_DIR, APPROVED_DATA_DIRECTORY) with RsyncCipsToHampton(APPROVED_DIRECTORY_PATH) as CIPS_TO_HAMPTON: pass