Source code for pepys_admin.merge

from datetime import datetime
from getpass import getuser

from geoalchemy2.shape import to_shape
from sqlalchemy.orm import undefer
from sqlalchemy.orm.session import make_transient
from tabulate import tabulate
from tqdm import tqdm

from pepys_admin.utils import (
    create_statistics_from_ids,
    get_name_for_obj,
    make_query_for_unique_cols_or_all,
    print_names_added,
    statistics_to_table_data,
)
from pepys_import.core.formats.location import Location
from pepys_import.core.store.db_status import TableTypes
from pepys_import.utils.sqlalchemy_utils import get_primary_key_for_table
from pepys_import.utils.table_name_utils import table_name_to_class_name


[docs]class MergeDatabases: def __init__(self, master_store, slave_store): self.master_store = master_store self.slave_store = slave_store self.ref_statistics = dict() self.meta_statistics = dict() self.measurement_statistics = dict() self.merge_change_id = None
[docs] def merge_all_reference_tables(self): """Merges all reference tables from the slave_store into the master_store.""" self.master_store.setup_table_type_mapping() reference_table_objects = self.master_store.meta_classes[TableTypes.REFERENCE] reference_table_names = [obj.__name__ for obj in reference_table_objects] # Put the GeometryType table at the front of the list, so that it gets # done first - as GeometrySubType depends on it reference_table_names.remove("GeometryType") reference_table_names.insert(0, "GeometryType") added_names = {} print("Merging reference tables") for ref_table in tqdm(reference_table_names): id_results = self.merge_reference_table(ref_table) self.update_synonyms_table(id_results["modified"]) self.update_logs_table(id_results["modified"]) self.ref_statistics[ref_table] = create_statistics_from_ids(id_results) if len(id_results["added"]) > 0: added_names[ref_table] = [d["name"] for d in id_results["added"]] return added_names
[docs] def merge_reference_table(self, table_object_name): """Merges a reference table (table_object_name should be the singular name of the table object, such as PlatformType) from the slave_store into the master_store. """ # Until we added the HelpText table, all the reference tables had a name field # but the HelpText table has an id field instead (but it is unique and has the same # characteristics as a name field) if table_object_name == "HelpText": name_field = "id" else: name_field = "name" # Get references to the table from the master and slave DataStores master_table = getattr(self.master_store.db_classes, table_object_name) slave_table = getattr(self.slave_store.db_classes, table_object_name) primary_key = get_primary_key_for_table(master_table) # Keep track of each ID and what its status is ids_already_there = [] ids_added = [] ids_modified = [] with self.slave_store.session_scope(): with self.master_store.session_scope(): slave_entries = ( self.slave_store.session.query(slave_table).options(undefer("*")).all() ) for slave_entry in slave_entries: guid = getattr(slave_entry, primary_key) results = ( self.master_store.session.query(master_table) .filter(getattr(master_table, primary_key) == guid) .all() ) n_results = len(results) if n_results == 0: search_by_name_results = ( self.master_store.session.query(master_table) .filter( getattr(master_table, name_field) == getattr(slave_entry, name_field) ) .all() ) n_name_results = len(search_by_name_results) if n_name_results == 0: ids_added.append({"id": guid, "name": getattr(slave_entry, name_field)}) self.slave_store.session.expunge(slave_entry) make_transient(slave_entry) self.master_store.session.merge(slave_entry) elif n_name_results == 1: ids_modified.append( { "from": guid, "to": getattr(search_by_name_results[0], primary_key), "name": getattr(slave_entry, name_field), # Data can never be changed here, because there's only one field (name) and that's what we search by "data_changed": False, } ) setattr( slave_entry, primary_key, getattr(search_by_name_results[0], primary_key), ) self.slave_store.session.add(slave_entry) self.slave_store.session.commit() else: # pragma: no cover assert ( False ), "Fatal assertion error: multiple entries in master reference table with same name" elif n_results == 1: ids_already_there.append( {"id": guid, "name": getattr(slave_entry, name_field)} ) else: # pragma: no cover assert ( False ), "Fatal assertion error: multiple entries in master reference table with same GUID" return { "already_there": ids_already_there, "added": ids_added, "modified": ids_modified, }
[docs] def merge_all_metadata_tables(self): """Merge *most* metadata tables from the slave_store into the master_store, using the merge_change_id as the change_id for any modifications occuring as part of the merge. Note: this does not merge the Datafile, Synonym, Log or Change tables - these are handled separately. """ self.master_store.setup_table_type_mapping() metadata_table_objects = self.master_store.meta_classes[TableTypes.METADATA] metadata_table_names = [obj.__name__ for obj in metadata_table_objects] # Crude ordering system for now (TODO: Improve) # Put Platform first, then Sensor, then rest of them metadata_table_names.remove("Platform") metadata_table_names.remove("Sensor") metadata_table_names.insert(0, "Sensor") metadata_table_names.insert(0, "Platform") # Remove various entries for now - deal with those separately later metadata_table_names.remove("Datafile") metadata_table_names.remove("Log") metadata_table_names.remove("Change") metadata_table_names.remove("Synonym") metadata_table_names.remove("Extraction") added_names = {} print("Merging metadata tables") for met_table in tqdm(metadata_table_names): id_results = self.merge_metadata_table(met_table) self.update_synonyms_table(id_results["modified"]) self.update_logs_table(id_results["modified"]) self.meta_statistics[met_table] = create_statistics_from_ids(id_results) if len(id_results["added"]) > 0: added_names[met_table] = [d["name"] for d in id_results["added"]] return added_names
[docs] def update_master_from_slave_entry(self, master_entry, slave_entry): """Updates the entry in master with any fields that are set on slave but not on master (ie. optional fields like trigraph that may be left blank), or if the slave privacy is higher (ie. more secure) than in the entry on master. Returns True if the entry has been modified. """ column_names = [col.name for col in master_entry.__table__.columns.values()] primary_key = get_primary_key_for_table(master_entry) modified = False # Loop through all fields on master entry for col_name in column_names: # If field is missing on master entry if getattr(master_entry, col_name) is None: # Look to see if it has a value in the slave entry if getattr(slave_entry, col_name) is not None: # Set it on the master entry, and note that we've modified the entry setattr(master_entry, col_name, getattr(slave_entry, col_name)) # Create a Log entry to say that we changed this attribute self.master_store.add_to_logs( table=master_entry.__table__.name, row_id=getattr(master_entry, primary_key), field=col_name, change_id=self.merge_change_id, ) # Note that we modified it, so we can update in DB if necessary modified = True if hasattr(master_entry, "privacy"): master_privacy = master_entry.privacy.level slave_privacy = slave_entry.privacy.level # If master privacy has a level less than the slave privacy, then update with the slave privacy if master_privacy < slave_privacy: master_entry.privacy_id = slave_entry.privacy_id modified = True if modified: self.master_store.session.add(master_entry) self.master_store.session.commit() return modified
[docs] def merge_metadata_table(self, table_object_name): """Merge the specified metadata table (table_object_name should be the singular name for the table, such as "Platform") from the slave_store into the master_store. Use the given change_id for any modifications that occur because of the merge (these modifications would happen if an optional value is set on the slave but not on the master, and it is therefore copied across). """ # Get references to the table from the master and slave DataStores master_table = getattr(self.master_store.db_classes, table_object_name) slave_table = getattr(self.slave_store.db_classes, table_object_name) primary_key = get_primary_key_for_table(master_table) # Keep track of each ID and what its status is ids_already_there = [] ids_added = [] ids_modified = [] with self.slave_store.session_scope(): with self.master_store.session_scope(): # Get all entries in this table in the slave database slave_entries = ( self.slave_store.session.query(slave_table).options(undefer("*")).all() ) for slave_entry in slave_entries: guid = getattr(slave_entry, primary_key) # Find all entries with this GUID in the same table in the master database results = ( self.master_store.session.query(master_table) .filter(getattr(master_table, primary_key) == guid) .all() ) n_results = len(results) if n_results == 0: # The GUID isn't present in the master database # This means this record wasn't originally taken from the master db # but both the master and slave dbs may have had the same entry added # with the same details - so we need to check whether there is an entry # with the same values search_by_all_fields_results = make_query_for_unique_cols_or_all( master_table, slave_entry, self.master_store.session ).all() n_all_field_results = len(search_by_all_fields_results) if n_all_field_results == 0: # We can't find an entry which matches in the master db, # so this is a new entry from the slave which needs copying over ids_added.append({"id": guid, "name": get_name_for_obj(slave_entry)}) self.slave_store.session.expunge(slave_entry) make_transient(slave_entry) self.master_store.session.merge(slave_entry) elif n_all_field_results == 1: # We found an entry that matches in the master db, but it'll have a different # GUID - so update the GUID in the slave database and let it propagate # so we can copy over other tables later and # all the foreign key integrity will work setattr( slave_entry, primary_key, getattr(search_by_all_fields_results[0], primary_key), ) self.slave_store.session.add(slave_entry) self.slave_store.session.commit() # We also need to compare the fields of the slave entry and the master entry # and update any master fields that are currently None with values from the slave entry was_modified = self.update_master_from_slave_entry( search_by_all_fields_results[0], slave_entry, ) ids_modified.append( { "from": guid, "to": getattr(search_by_all_fields_results[0], primary_key), "name": get_name_for_obj(slave_entry), "data_changed": was_modified, } ) else: # pragma: no cover assert ( False ), "Fatal assertion error: multiple entries in master metadata table with same name" elif n_results == 1: # The GUID is in the master db - so the record must also be there(as GUIDs are unique) ids_already_there.append( {"id": guid, "name": get_name_for_obj(slave_entry)} ) else: # pragma: no cover # We should never get here: the GUID should always appear in the master database # either zero or one times, never more assert ( False ), "Fatal assertion error: multiple entries in master metadata table with same GUID" return { "already_there": ids_already_there, "added": ids_added, "modified": ids_modified, }
[docs] def update_synonyms_table(self, modified_ids): """Updates the Synonyms table in the slave_store for entries which have had their GUID modified when they were merged with the master_store. This occurs in the situation where there are entries in both master_store and slave_store with the same details, and therefore the slave GUID for that entry is updated to match the master GUID. A list of information about those entries is passed to this function, and their original IDs ('from_id') are searched in the Synonyms table and updated (to 'to_id') if found. """ with self.slave_store.session_scope(): # For each modified ID for details in modified_ids: from_id = details["from"] to_id = details["to"] # Search for it in the Synonyms table results = ( self.slave_store.session.query(self.slave_store.db_classes.Synonym) .filter(self.slave_store.db_classes.Synonym.entity == from_id) .all() ) if len(results) > 0: # If it exists, then modify the old ID to the new ID for result in results: result.entity = to_id # Commit changes self.slave_store.session.add_all(results) self.slave_store.session.commit()
[docs] def update_logs_table(self, modified_ids): """Updates the Logs table in the slave_store for entries which have had their GUID modified when they were merged with the master_store. This occurs in the situation where there are entries in both master_store and slave_store with the same details, and therefore the slave GUID for that entry is updated to match the master GUID. A list of information about those entries is passed to this function, and their original IDs ('from_id') are searched in the Logs table and updated (to 'to_id') if found. """ with self.slave_store.session_scope(): # For each modified ID for details in modified_ids: from_id = details["from"] to_id = details["to"] # Search for it in the Logs table results = ( self.slave_store.session.query(self.slave_store.db_classes.Log) .filter(self.slave_store.db_classes.Log.id == from_id) .all() ) if len(results) > 0: # If it exists, then modify the old ID to the new ID for result in results: result.id = to_id # Commit changes self.slave_store.session.add_all(results) self.slave_store.session.commit()
[docs] @staticmethod def split_list(lst, n=100): """Yield successive n-sized chunks from lst.""" for i in range(0, len(lst), n): yield lst[i : i + n]
[docs] @staticmethod def rows_to_list_of_dicts(results): """Converts a list of rows returned from a SQLAlchemy query into a list of dicts. The obvious way to do this would be to look in the __table__ attribute of the row and get the column names, and then extract those values from the row. However, this will not work for the Measurement tables, as the attributes of the class have different names to the column names. For example, the column name is "speed" but the attribute name is "_speed" and a property (with getter and setter methods) is used to convert between the two. The bulk_insert_mappings method doesn't use the Table object, so doesn't use the properties to do the conversion. Therefore, we need to have *all* of the attributes of this class, including the attributes starting with _. However, we don't want the SQLAlchemy internal attributes, or the 'dunder' methods that start with a __. Therefore, this function excludes those, but keeps all others. We also need to process the location field to make sure it is in WKT format so the database can understand it. Note: This will currently fail for any table with a generic geometry field in it (ie. the geometry1 table), but this is not used currently. """ dict_results = [] attributes_to_use = None for row in results: if attributes_to_use is None: attributes_to_use = [attrib for attrib in dir(row) if not attrib.startswith("__")] attributes_to_remove = [ "_decl_class_registry", "_sa_class_manager", "_sa_instance_state", ] for attribute_name in attributes_to_remove: if attribute_name in attributes_to_use: attributes_to_use.remove(attribute_name) d = {key: getattr(row, key) for key in attributes_to_use} # Deal with the location field, making sure it gets converted to WKT so it can be inserted # into the db if "location" in d and d["location"] is not None: d["_location"] = d["location"].to_wkt() # Deal with the geometry table where we have a generic geometry in the table # If it is a Location object then convert to WKT. If not then, convert the # WKB geometry field to a WKT field, going via Shapely if "geometry" in d: if isinstance(d["geometry"], Location): d["_geometry"] = d["geometry"].to_wkt() else: shply_geom = to_shape(d["geometry"]) d["_geometry"] = "SRID=4326;" + shply_geom.wkt dict_results.append(d) return dict_results
[docs] def merge_measurement_table(self, table_object_name, added_datafile_ids): """Merge the specified metadata table (specified as the object name, so singular) from the slave_store into the master_store, copying across entries which have a source_id in added_datafile_ids. """ # We don't need to do a 'merge' as such for the measurement tables. Instead we just need to add # the measurement entries for datafiles which hadn't already been imported into the master # database. # These come from the 'added' list of IDs from the datafile merging function and are # passed to this function master_table = getattr(self.master_store.db_classes, table_object_name) slave_table = getattr(self.slave_store.db_classes, table_object_name) to_add = [] with self.slave_store.session_scope(): with self.master_store.session_scope(): # Split the IDs list up into 100 at a time, as otherwise the SQL query could get longer # than SQLite or Postgres allows - as it'll have a full UUID string in it for each # datafile ID print(f"Merging measurement table {table_object_name}") for datafile_ids_chunk in tqdm(self.split_list(added_datafile_ids)): # Search for all slave measurement table entries with IDs in this list results = ( self.slave_store.session.query(slave_table) .filter(slave_table.source_id.in_(datafile_ids_chunk)) .options(undefer("*")) .all() ) # Convert the rows to a list of dicts, taking into account # the location field, and the properties used in the table classes dict_results = self.rows_to_list_of_dicts(results) to_add.extend(dict_results) self.master_store.session.bulk_insert_mappings(master_table, to_add) return len(to_add)
[docs] def merge_all_measurement_tables(self, added_datafile_ids): """Copies across all entries in all measurement tables that have a source_id in the list of added_datafile_ids. Must be run *after* reference and metadata tables have been merged. """ self.master_store.setup_table_type_mapping() measurement_table_objects = self.master_store.meta_classes[TableTypes.MEASUREMENT] measurement_table_names = [obj.__name__ for obj in measurement_table_objects] n_added = {} print("Merging measurement tables") for measurement_table_name in tqdm(measurement_table_names): n_added[measurement_table_name] = self.merge_measurement_table( measurement_table_name, added_datafile_ids ) return n_added
[docs] def prepare_merge_logs(self): """Works out which Log and Change entries need copying from the slave_store to the master_store, by checking which entries refer to something that is actually in the master database. Must be run *after* all other merging is complete. """ # Get references to the table from the master and slave DataStores master_table = self.master_store.db_classes.Log slave_table = self.slave_store.db_classes.Log # Keep track of logs that need to be added to master logs_to_add = [] # Keep track of unique change IDs that need to be copied across changes_to_add = set() with self.slave_store.session_scope(): with self.master_store.session_scope(): # Get all entries in this table in the slave database slave_entries = ( self.slave_store.session.query(slave_table).options(undefer("*")).all() ) print("Preparing to merge Logs and Changes") for slave_entry in tqdm(slave_entries): guid = slave_entry.log_id # Find all entries with this GUID in the same table in the master database results = ( self.master_store.session.query(master_table) .filter(master_table.log_id == guid) .all() ) n_results = len(results) if n_results == 0: # The GUID isn't present in the master database # We now need to check whether the Log entry refers to an entry that actually exists # in the master database (as we've done all other copying by now) class_name = table_name_to_class_name(slave_entry.table) referenced_table = getattr(self.master_store.db_classes, class_name) pri_key_field = get_primary_key_for_table(referenced_table) referenced_table_pri_key = getattr(referenced_table, pri_key_field) id_to_match = slave_entry.id query = self.master_store.session.query(referenced_table).filter( referenced_table_pri_key == id_to_match ) id_results = query.all() if len(id_results) == 1: # The Log's id entry DOES refer to something that exists in master # Therefore put it in a list to be copied over logs_to_add.append(slave_entry.log_id) changes_to_add.add(slave_entry.change_id) elif n_results == 1: # The GUID is in the master db - so the record must also be there # (as GUIDs are unique) pass else: # pragma: no cover # We should never get here: the GUID should always appear in the master database # either zero or one times, never more assert ( False ), "Fatal assertion error: multiple entries in master Logs table with same GUID" return logs_to_add, changes_to_add
[docs] def add_changes(self, changes_to_add): """Copies the Change entries with the specified ids in changes_to_add from the slave_store to the master_store. """ to_add = [] with self.slave_store.session_scope(): with self.master_store.session_scope(): # Split the IDs list up into 100 at a time, as otherwise the SQL query could get longer # than SQLite or Postgres allows - as it'll have a full UUID string in it for each # change ID print("Merging Changes") for change_ids_chunk in tqdm(self.split_list(list(changes_to_add))): # Search for all slave Change entries with IDs in this list results = ( self.slave_store.session.query(self.slave_store.db_classes.Change) .filter(self.slave_store.db_classes.Change.change_id.in_(change_ids_chunk)) .options(undefer("*")) .all() ) # Convert the rows to a list of dicts dict_results = self.rows_to_list_of_dicts(results) to_add.extend(dict_results) self.master_store.session.bulk_insert_mappings( self.master_store.db_classes.Change, to_add )
[docs] def add_logs(self, logs_to_add): """Copies the Log entries with the specified ids in logs_to_add from the slave_store to the master_store. """ to_add = [] with self.slave_store.session_scope(): with self.master_store.session_scope(): # Split the IDs list up into 100 at a time, as otherwise the SQL query could get longer # than SQLite or Postgres allows - as it'll have a full UUID string in it for each # change ID print("Merging Logs") for log_ids_chunk in tqdm(self.split_list(logs_to_add)): # Search for all slave Change entries with IDs in this list results = ( self.slave_store.session.query(self.slave_store.db_classes.Log) .filter(self.slave_store.db_classes.Log.log_id.in_(log_ids_chunk)) .options(undefer("*")) .all() ) # Convert the rows to a list of dicts dict_results = self.rows_to_list_of_dicts(results) to_add.extend(dict_results) self.master_store.session.bulk_insert_mappings( self.master_store.db_classes.Log, to_add )
[docs] def merge_logs_and_changes(self): """Merges the Logs and Changes tables from the slave_store into the master_store. Must be run *after* all other merging is complete. """ # Prepare to merge the logs by working out which ones need # adding, and which changes need adding logs_to_add, changes_to_add = self.prepare_merge_logs() # Add the change entries self.add_changes(changes_to_add) # Add the log entries self.add_logs(logs_to_add)
[docs] def merge_extractions(self, added_datafile_ids): to_add = [] with self.slave_store.session_scope(): with self.master_store.session_scope(): print("Merging Extractions") for datafile_ids_chunk in tqdm(self.split_list(added_datafile_ids)): # Search for all slave Extraction entries with IDs in this list results = ( self.slave_store.session.query(self.slave_store.db_classes.Extraction) .filter( self.slave_store.db_classes.Extraction.datafile_id.in_( datafile_ids_chunk ) ) .options(undefer("*")) .all() ) # Convert the rows to a list of dicts dict_results = self.rows_to_list_of_dicts(results) to_add.extend(dict_results) self.master_store.session.bulk_insert_mappings( self.master_store.db_classes.Extraction, to_add )
[docs] def merge_all_tables(self): """ Does a full merge, taking all data from the slave_store database and merging it into the master_store database. At the end of merging, print some summary tables with merge statistics and lists of new objects added. Both master_store and data_store can be connected to either Postgres or SQLite databases. The overall outline of the merge is that we first merge the reference tables and most of the metadata tables, then merge the Synonyms and Datafiles tables, then copy across the relevant entries from the measurement tables, before finally filtering and copying the Logs and Changes tables. In general, "merging" here means comparing the two databases and dealing with these specific situations: a) The exact same entry exists in both databases, with the same GUID. This will occur when this entry was exported from the master database to the slave database. This is counted as 'already present', and nothing is done to change it. b) An entry exists in the slave database which isn't present in the master database This will occur when the entry is added to the slave database after the two databases have been separated This entry will be added to the master database, and counted as an item 'added'. c) An entry exists in the slave database with the same details as in the master database, but with a different GUID. The details that are compared to see if the entry is the same are the name field for a reference table, the fields defined in a unique constraint for any other table (eg. `name` and `host` for Sensors) or all fields if a unique constraint isn't defined. If optional fields are present in the slave database that are not present in the entry in the master database, then these are copied across. If the privacy value in the slave database is higher (ie. more secure) than the entry in the master database, then the privacy is updated to match the slave value. This entry counts as a 'modified' entry, as the GUID in the slave database is modified to match the GUID in the master, so that other objects can be copied without failing foreign key integrity checks. This does NOT mean that the data for the entry has been modified - this is only the case if the `data_changed` field is set to True in the resulting list of ids. The measurement tables (States, Contacts etc) aren't merged as such: new entries are just copied across. This is done based on the entries in the Datafiles table: we keep track of the IDs of all datafile entries that are added to the master from the slave, and then just copy the measurement data that has come from those datafiles. Logs and Changes are merged at the end, so that we can check each Log entry to see if it refers to something that actually exists in the master database, and only copy it if it does. The results from the merge_all_* functions consist of: - A list of 'added names': a dictionary with keys for each table merged, and values of a list of all the new 'names' that have been added (the 'name' is either the name field, if it exists, or the 'reference' or 'synonym' field if it doesn't.) - A list of statistics: a dictionary with keys for each table merged, and values of a dictionary with counts of items 'already there', 'modified' and 'added'. """ # Create a Change for this merge with self.master_store.session_scope(): self.merge_change_id = self.master_store.add_to_changes( user=getuser(), modified=datetime.utcnow(), reason=f"Merging from database {self.slave_store.db_name}", ).change_id # Merge the reference tables first ref_added_names = self.merge_all_reference_tables() # Merge all the metadata tables, excluding the complicated ones meta_added_names = self.merge_all_metadata_tables() # Merge the synonyms table now we've merged all the reference and metadata tables syn_ids = self.merge_metadata_table("Synonym") # Get the list of added names, and add the statistics to the meta_statistics list syn_added_names = [d["name"] for d in syn_ids["added"]] self.meta_statistics["Synonyms"] = create_statistics_from_ids(syn_ids) # Merge the Datafiles table, keeping track of the IDs that changed df_ids = self.merge_metadata_table("Datafile") # Get the list of added names, and add the statistics to the meta_statistics list df_added_names = [d["name"] for d in df_ids["added"]] self.meta_statistics["Datafiles"] = create_statistics_from_ids(df_ids) # Merge the measurement tables, only merging measurements that come from one of the datafiles that # has been added self.measurement_statistics = self.merge_all_measurement_tables( [d["id"] for d in df_ids["added"]] ) # Merge the Logs and Changes table, only merging ones which still match something in the new db self.merge_logs_and_changes() # Merge the Extractions table, only merging those that match a Datafile that has been added self.merge_extractions([d["id"] for d in df_ids["added"]]) print("Statistics:\n") print("Reference tables:") print( tabulate( statistics_to_table_data(self.ref_statistics), headers=["Table", "Already present", "Added", "Modified"], tablefmt="grid", ) ) print("\nMetadata tables:") print( tabulate( statistics_to_table_data(self.meta_statistics), headers=["Table", "Already present", "Added", "Modified"], tablefmt="grid", ) ) print("\nMeasurement tables:") print( tabulate( list(self.measurement_statistics.items()), headers=["Table", "Added"], tablefmt="grid", ) ) all_added_names = {**ref_added_names, **meta_added_names} if len(syn_added_names) > 0: all_added_names["Synonyms"] = syn_added_names if len(df_added_names) > 0: all_added_names["Datafiles"] = df_added_names print_names_added(all_added_names)