Source code for pepys_import.cli

import argparse
import os
from importlib import reload

from prompt_toolkit import prompt
from sqlalchemy.inspection import inspect

import config
from pepys_import.core.store.data_store import DataStore
from pepys_import.file.file_processor import FileProcessor
from pepys_import.resolvers.command_line_resolver import CommandLineResolver
from pepys_import.resolvers.default_resolver import DefaultResolver
from pepys_import.utils.data_store_utils import is_schema_created
from pepys_import.utils.error_handling import handle_database_errors
from pepys_import.utils.text_formatting_utils import (
    custom_print_formatted_text,
    format_command,
    format_error_message,
)

FILE_PATH = os.path.abspath(__file__)
DIRECTORY_PATH = os.path.dirname(FILE_PATH)
DEFAULT_DATABASE = ":memory:"


[docs]def main():  # pragma: no cover
    # Parse arguments
    parser = argparse.ArgumentParser()
    path_help = "The path to import data from (The default value is the directory of the script)"
    archive_help = " Instruction to archive (move) imported files to designated archive folder"
    db_help = (
        "SQLite database file to use (overrides config file database settings). "
        "Use `:memory:` for temporary in-memory instance"
    )
    resolver_help = (
        "Resolver to use for unknown entities. Valid values: 'default' (resolves "
        "using static default values), 'command-line' (resolves using interactive command-line interface, "
        "default option)"
    )
    training_help = (
        "Uses training mode, where all interactions take place with a training database located "
        "in the user's home folder. No actions will affect the database configured in the Pepys config file."
    )
    validation_help = "Skip the validation steps"
    parser.add_argument("--path", help=path_help, required=False, default=DIRECTORY_PATH)
    parser.add_argument(
        "--archive",
        dest="archive",
        help=archive_help,
        action="store_true",
        default=False,
    )
    parser.add_argument("--resolver", help=resolver_help, required=False, default="command-line")

    # Make --training and --db mutually exclusive, as --training automatically specifies the db
    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        "--training", help=training_help, dest="training", default=False, action="store_true"
    )
    group.add_argument("--db", help=db_help, required=False, default=None)

    parser.add_argument(
        "--skip-validation",
        help=validation_help,
        required=False,
        action="store_true",
        default=False,
    )
    args = parser.parse_args()
    process(
        path=args.path,
        archive=args.archive,
        db=args.db,
        resolver=args.resolver,
        skip_validation=args.skip_validation,
        training=args.training,
    )


[docs]def process(
    path=DIRECTORY_PATH,
    archive=False,
    db=None,
    resolver="command-line",
    training=False,
    skip_validation=None,
):
    if resolver == "command-line":
        resolver_obj = CommandLineResolver()
    elif resolver == "default":
        resolver_obj = DefaultResolver()
    else:
        custom_print_formatted_text(
            format_error_message(f"Invalid option '{resolver}' for --resolver.")
        )
        return

    if training:
        set_up_training_mode()

    # Reload the config file in case we're in a long-running process because of pytest and
    # the config file details have changed since the last test
    reload(config)

    if db is None:
        data_store = DataStore(
            db_username=config.DB_USERNAME,
            db_password=config.DB_PASSWORD,
            db_host=config.DB_HOST,
            db_port=config.DB_PORT,
            db_name=config.DB_NAME,
            db_type=config.DB_TYPE,
            missing_data_resolver=resolver_obj,
            error_on_db_version_mismatch=True,
        )
    elif type(db) is dict:
        data_store = DataStore(
            db_username=db["username"],
            db_password=db["password"],
            db_host=db["host"],
            db_port=db["port"],
            db_name=db["name"],
            db_type=db["type"],
            missing_data_resolver=resolver_obj,
            error_on_db_version_mismatch=True,
        )
    else:
        data_store = DataStore(
            db_username="",
            db_password="",
            db_host="",
            db_port=0,
            db_name=db,
            db_type="sqlite",
            missing_data_resolver=resolver_obj,
            error_on_db_version_mismatch=True,
        )
    if not is_schema_created(data_store.engine, data_store.db_type):
        # The number of tables don't match the expected number of tables, so check
        # whether the number of tables is actually zero. If so, initialise, if not
        # then stop and give error.
        inspector = inspect(data_store.engine)
        table_names = inspector.get_table_names()
        if len(table_names) == 0:
            data_store.initialise()
        else:
            print(
                f"The number of tables in the database ({len(table_names)}) does not match the expected number of tables.\n"
                "Please run database migration."
            )
    with data_store.session_scope():
        if data_store.is_empty():
            data_store.populate_reference()
            data_store.populate_metadata()

    processor = FileProcessor(
        archive=archive,
        skip_validation=skip_validation,
        archive_path=config.ARCHIVE_PATH,
        local_parsers=config.LOCAL_PARSERS,
    )
    processor.load_importers_dynamically()

    try:
        with handle_database_errors():
            processor.process(path, data_store, True)
    except SystemExit:
        pass

    if training:
        answer = prompt(format_command("Would you like to reset the training database? (y/N) "))
        if answer.upper() == "Y":
            if os.path.exists(config.DB_NAME):
                os.remove(config.DB_NAME)


[docs]def set_up_training_mode():
    training_data_folder = os.path.expanduser(os.path.join("~", "Pepys_Training_Data"))
    # Training database will be located in user's home folder
    db_path = os.path.join(training_data_folder, "pepys_training_database.db")

    print("#" * 80)
    print(" " * 28 + "Running in Training Mode" + " " * 28)
    print("")
    print("Changes are only made to a local training database (see full path below)")
    print("#" * 80)

    if os.path.exists(db_path):
        # Training db already exists, ask if we want to clear it
        answer = prompt(format_command("Would you like to reset the training database? (y/N) "))
        if answer.upper() == "Y":
            os.remove(db_path)

    if not os.path.exists(training_data_folder):
        os.mkdir(training_data_folder)

    config_file_path = os.path.join(training_data_folder, "pepys_training_config.ini")

    archive_folder = os.path.join(training_data_folder, "output")

    config_contents = f"""[database]
database_username =
database_password =
database_host =
database_port = 0
database_name = {db_path}
database_type = sqlite

[archive]
path = {archive_folder}"""

    with open(config_file_path, "w") as f:
        f.write(config_contents)

    os.environ["PEPYS_CONFIG_FILE"] = config_file_path

    # If the database doesn't already exist, then import some example data
    # if not os.path.exists(db_path):


if __name__ == "__main__":
    main()