#!/data_systems/opt/bin/python3 """ aim3_transfer_cips_raa_to_hampton A script to use rsync to push AIM CIPS RAA data to Hampton University. Because the Hampton server times out on large data transfers, the levels with a large number of files are broken into subunits. The requirements and hence the design and goals are in a state of flux. The current goals are: 1) To move the approved ver 01.10 rev 05 RAA years to Hampton University. Created JMay 2020 @author: Bill Barrett """ import os.path import time import traceback import re import fnmatch from os import listdir, path import datetime from aimpi_shell_execution import AimPiShellExecution # from season_and_version_utils import SeasonAndVersionUtils # The user name for the remote (Hampton) system REMOTE_USER = 'aim_cips' # The server name for the remote (Hampton) system REMOTE_SERVER = 'aim.hamptonu.edu' # The base directory on the remote (Hampton) system REMOTE_BASE_DIR = '/mnt/home/aim_cips/interim_archive/cips/data/RAA' # The base directory on the local (AIM CIPS) system SOURCE_BASE_DIR = '/aim3/user_data/aimpro/L2AB_Renamed/' # The directory and subdirectories where the approved data lists are found APPROVED_DATA_DIRECTORY = '/aim/data/cips/file_listings/raa_orbit_images' SEASONS = ['north', 'south'] RAA_LEVELS = [ 'level_2a', 'level_2b' ] # File names for the vetted files are like 'remote_raa_orbit_image_listing_2007.txt' VETTED_FILE_LIST_PATTERN = re.compile( r'remote_raa_orbit_image_listing_20[0-2]\d.txt') # each line should look something like: # cips_raa_2b_orbit_59663_2018-079_v01.10_r05 VETTED_ORBIT_PATTERN = re.compile(r"""^cips_raa_2b_orbit_ (?P\d{5})_ (?P20[0-2]\d)\- (?P[0-3]\d{2})_ v(?P0\d\.\d{2})_ r(?P\d{2}) $""", re.VERBOSE) class TransferCipsRaaToHampton: """ Transfer AIM CIPS RAA data to Hampton University """ def __init__(self, approved_directory_path): """ Initialize the place to find the raw TLE file and the dictionary of TLE's. Parameters ---------- approved_directory_path : string the path where the approved data listings will be found """ assert os.path.isdir(approved_directory_path), \ '{} is not a valid directory path'.format(approved_directory_path) self.approved_directory_path = approved_directory_path self.vetted_files = [f for f in listdir(self.approved_directory_path) \ if VETTED_FILE_LIST_PATTERN.match(f)] self.vetted_files.sort() # Set the current month to a null value so that the first # valid month found will trigger new processing self.current_month = '' # same for the directory on the Hampton server self.remote_directory = '' def __enter__(self): """ Walk through the years, seasons, and levels, transferring the data from the AIM CIPS server to the remote server """ for vetted_file in self.vetted_files: vetted_file = path.join(self.approved_directory_path, vetted_file) with open(vetted_file, 'r') as vetted: line = vetted.readline() while (line): vetted_orbit = VETTED_ORBIT_PATTERN.match(line) if vetted_orbit: self.process_matched_line(vetted_orbit) else: print('bad line in {0} : {1}'.format(vetted_file, line)) #Move on to the next line line = vetted.readline() def __exit__(self, exception_type, exception_value, traceback_info): """ Necessary if __enter__ is defined. Prints message on error exit. True for a normal exit, False if the exception_type is not None """ if exception_type is not None: print(time.strftime("%Y-%m-%d %H:%M:%S") + ' type: {0}, value: {1}'.format(exception_type, exception_value)) traceback.print_exception(exception_type, exception_value, traceback_info) return False # Comment to pass exception through return True def process_matched_line(self, vetted_orbit): """ Process a line from the vetted files list that has matched the pattern VETTED_ORBIT_PATTERN Parameters ---------- vetted_orbit : object the result of VETTED_ORBIT_PATTERN.match() Returns ------- month : string the month being processed a two digit month (01-12) with leading zeros """ # The file data has year and day of year # Need to get the month from that for RAA data year = vetted_orbit.group('year') file_date = datetime.datetime(int(year), 1, 1) + \ datetime.timedelta(int(vetted_orbit.group('day_of_year')) - 1) month = file_date.strftime('%m') # A change in month means changes in both where to look for the data # and where to put it for level in RAA_LEVELS: if level == 'level_2b': continue orbit_files = TransferCipsRaaToHampton.get_files_for_transfer( year, month, level, vetted_orbit) if orbit_files: if month != self.current_month: self.current_month = month for orbit_file in orbit_files: print(orbit_file) self.remote_directory = TransferCipsRaaToHampton.get_remote_directory_name( year, month, level, vetted_orbit) command = 'mkdir -p {}'.format(self.remote_directory) return_code, output = AimPiShellExecution.execute_ssh_command( REMOTE_USER, REMOTE_SERVER, command) if return_code != 0: print('{0} return_code: {1}'.format(command, return_code)) print(output) return if self.remote_directory: for orbit_file in orbit_files: orbit_number = int(vetted_orbit.group('orbit_number')) if orbit_number <= 532: continue if (orbit_number % 100) == 0: print(orbit_file) command = 'scp -p {0} {1}@{2}:{3}/'.format( orbit_file, REMOTE_USER, REMOTE_SERVER, self.remote_directory) return_code, output = AimPiShellExecution.tokenize_and_execute_command(command) if return_code != 0: print('{0} return_code: {1}'.format(command, return_code)) print(output) return else: print('No remote directory for: {0}-{1} {2} orbit: {3}'.format( year, month, level, vetted_orbit.group('orbit_number'))) return @staticmethod def get_files_for_transfer(year, month, level, vetted_orbit): """ Given the information regarding the year, month, level, etc. Parameters ---------- year : string a four digit year month : string a two digit month (01-12) with leading zeros level : string a valid RAA data level vetted_orbit : object the result of VETTED_ORBIT_PATTERN.match() Returns ------- orbit_files : list full path of files matching pattern including directory """ # first see if there is something like # '/aim/data/cips/raa_2019/03/level_2a/ver_01.10/rev_05/' aim_server_directory = os.path.join(SOURCE_BASE_DIR, 'raa_' + year, month, level) aim_server_directory = os.path.join(aim_server_directory, 'ver_' + vetted_orbit.group('version'), 'rev_' + vetted_orbit.group('revision')) # Are there the appropriate orbit files in the directory # If so return them if os.path.isdir(aim_server_directory): orbit_files = TransferCipsRaaToHampton.files_to_be_transferred( aim_server_directory, r'*' + vetted_orbit.group('orbit_number') + '*') if orbit_files: return orbit_files # Now is the more difficult part: pulling RAA data from seasonal directories # Trying to create strings like # '/aim/data/cips/north_2007/raa/level_2b/ver_01.10/rev_05/' day_of_year = int(vetted_orbit.group('day_of_year')) if day_of_year <= 80: last_year = str(int(year) - 1) aim_server_directories = [ os.path.join(SOURCE_BASE_DIR, 'south_' + last_year), os.path.join(SOURCE_BASE_DIR, 'north_' + year) ] elif day_of_year <= 267: aim_server_directories = [ os.path.join(SOURCE_BASE_DIR, 'north_' + year), os.path.join(SOURCE_BASE_DIR, 'south_' + year) ] else: aim_server_directories = [ os.path.join(SOURCE_BASE_DIR, 'south_' + year), os.path.join(SOURCE_BASE_DIR, 'north_' + year) ] for i, _ in enumerate(aim_server_directories): aim_server_directories[i] = os.path.join(aim_server_directories[i], level) aim_server_directories[i] = os.path.join(aim_server_directories[i], 'ver_' + vetted_orbit.group('version'), 'rev_' + vetted_orbit.group('revision')) if os.path.isdir(aim_server_directories[i]): orbit_files = TransferCipsRaaToHampton.files_to_be_transferred( aim_server_directories[i], r'*' + vetted_orbit.group('orbit_number') + '*') if orbit_files: return orbit_files return [] @staticmethod def get_remote_directory_name(year, month, level, vetted_orbit): """ Given the information regarding the year, month, level, etc. produce a meaningful remote directory path that will look something like '/mnt/home/aim_cips/interim_archive/cips/data/RAA/raa_2019/03/level_2b/ver_01.10/rev_05/' Parameters ---------- year : string a four digit year month : string a two digit month (01-12) with leading zeros level : string a valid RAA data level vetted_orbit : object the result of VETTED_ORBIT_PATTERN.match() Returns ------- remote_directory : string the remote directory path """ remote_directory = os.path.join(REMOTE_BASE_DIR, 'raa_' + year, month, level) remote_directory = os.path.join(remote_directory, 'ver_' + vetted_orbit.group('version'), 'rev_' + vetted_orbit.group('revision')) return remote_directory @staticmethod def files_to_be_transferred(aim_server_directory, orbit_pattern): """ Given the information regarding the year, month, level, etc. Parameters ---------- aim_server_directory : string directory to be searched for files orbit_pattern : regex pattern that files must match Returns ------- orbit_files : list full path of files matching pattern including directory """ orbit_files = fnmatch.filter( os.listdir(aim_server_directory), orbit_pattern) orbit_files = [f for f in orbit_files if not 'prelim' in f] for i, _ in enumerate(orbit_files): orbit_files[i] = os.path.join(aim_server_directory, orbit_files[i]) return orbit_files def transfer_season(self, season_year): """The function that handles the actual data transfer Parameters ---------- season_and_year : string season and year, e.g. 'north_2018' """ # Cycle through the various levels and data types if __name__ == '__main__': """ Make the class executable """ APPROVED_DIRECTORY_PATH = os.path.join(SOURCE_BASE_DIR, APPROVED_DATA_DIRECTORY) with TransferCipsRaaToHampton(APPROVED_DIRECTORY_PATH) as CIPS_RAA_TO_HAMPTON: pass