Source code for pepys_import.file.highlighter.highlighter

from tqdm import tqdm

from pepys_import.file.highlighter.support.line import Line

from ...utils.text_formatting_utils import custom_print_formatted_text, format_error_message
from .support.char import Char
from .support.export import export_report
from .support.token import SubToken


[docs]class HighlightedFile:
    """
    class that can load/tokenize a datafile, record changes to the file,
    then export a highlighted version of the file that indicates extraction
    """

    def __init__(self, filename: str, number_of_lines=None, datafile=None):
        """
        Constructor for this object
        Args:
            filename (str): The name of the file to be parsed/reported upon
            number_of_lines(int) Number of lines that should be shown
                   in the output (all lines if None)
        """
        self.chars = []
        self.filename = filename
        self.dict_color = {}
        self.number_of_lines = number_of_lines
        self.datafile = datafile

        self.importer_highlighting_levels = {}

[docs]    def reinitialise(self, filename, datafile):
        """Re-initialise the highlighted file object, to set a new filename as the source
        of data to be highlighted.

        Designed to be called with a new filename from within `_load_this_file` in an importer
        if the original file needs transforming before being processed and highlighted."""
        self.__init__(filename=filename, datafile=datafile)
        self.datafile = datafile

[docs]    def chars_debug(self):
        """
        Debug method, to check contents of chars
        """
        self.fill_char_array_if_needed()
        return self.chars

[docs]    def lines(self):
        """
        Slice the file into lines and return a list of Line objects
        """
        if self.number_of_lines is None:
            return self.not_limited_lines()
        elif self.number_of_lines <= 0:
            custom_print_formatted_text(
                format_error_message("Non-positive number of lines. Please provide positive number")
            )
            exit(1)
        else:
            return self.limited_lines()

[docs]    def export(self, filename: str, include_key=False):
        """
        Provide highlighted summary for this file
        Args:
        filename (str): The name of the destination for the HTML output
        include_key (bool): Whether to include a key at the bottom of the output
        showing what each colour refers to
        """
        if len(self.chars) > 0:
            export_report(filename, self.chars, self.dict_color, include_key)

[docs]    def limited_contents(self):
        with open(self.filename, "r") as file:
            whole_file_contents = file.read()

        lines_list = whole_file_contents.splitlines()

        lines_list = lines_list[0 : self.number_of_lines]
        limited_contents = "\n".join(str(e) for e in lines_list)

        return limited_contents, lines_list

[docs]    def limited_lines(self):
        """
        Return a list of Line objects for each line in the file,
        producing only self.number_of_lines objects (to limit length
        of output for very large files)
        """

        limited_contents, lines_list = self.limited_contents()

        lines = self.create_lines(limited_contents, lines_list)
        return lines

[docs]    def not_limited_lines(self):
        """
        Return a list of Line objects for each line in the file
        """
        with open(self.filename, "r") as file:
            file_contents = file.read()

        lines_list = file_contents.splitlines()
        lines = self.create_lines(file_contents, lines_list)

        return lines

[docs]    def fill_char_array_if_needed(self):
        if len(self.chars) > 0:
            # Char array already filled, so no need to do anything
            return

        if self.number_of_lines is None:
            with open(self.filename, "r") as file:
                file_contents = file.read()
        elif self.number_of_lines <= 0:
            raise ValueError("Non-positive number of lines. Please provide positive number")
        else:
            file_contents, _ = self.limited_contents()

        # We need to store the contents of the file as bytes so that
        # the record call on an XML element can convert from bytes offsets to character offsets
        with open(self.filename, "rb") as f:
            self.file_byte_contents = f.read()

        # Initialise the char index (self.chars), with one Char entry for
        # each character in the file. (Note: a reference to this char array is
        # given to each SubToken)
        # We do this as a list comprehension as it's more efficient, but we have to
        # add it to list that already exists in self.chars, as references have already
        # been made to this list
        self.chars += [Char(c) for c in tqdm(file_contents)]

[docs]    def set_usages_for_slice(self, start, end, usage):
        for i in range(start, end):
            self.chars[i].usages.append(usage)

        return (start, end)

[docs]    def create_lines(self, file_contents, lines_list):
        """
        Create individual Line objects
        for each line, with appropriate references to the character array
        """
        # Keeps track of which character in the file a line starts on
        line_start_counter = 0
        lines = []

        # For each line in the file create a Line object with a SubToken
        # object as its child, keeping track of the length of the line
        # and which character of the file this line starts on
        for this_line in lines_list:
            line_length = len(this_line)
            line_span = (0, len(this_line))
            # Create SubToken object to keep track of the line length, the line itself
            # the start character of the line in the file, and a reference to the overall
            # list of characters
            sub_token = SubToken(line_span, this_line, int(line_start_counter), self.chars)
            new_l = Line([sub_token], self)
            lines.append(new_l)
            # Update the starting character of the line ready for next time
            line_start_counter += line_length + 1

        return lines