import argparse
import collections
import logging
import os.path
import pathlib
import shutil
import stat
import subprocess
import sys
import tarfile
import textwrap
import typing
import types

import debian.deb822

from deb_build_artifact_gather.chroot import (
    BindmountAccessToBuildSystemDescriptor,
    HostWasBuildSystem,
    BuildSystemDescriptor,
)
from deb_build_artifact_gather.rules import RULES, ENV_VARS, VARIABLES
from deb_build_artifact_gather.tags import Tag
from deb_build_artifact_gather.util import program_name, _error, setup_logging


class FileContainer:

    def __init__(self, logger: logging.Logger) -> None:
        self.logger = logger

    def __enter__(self) -> typing.Self:
        return self

    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: types.TracebackType | None,
    ) -> bool:
        return False

    def add_path(self, raw_match: str, normalised_path: str, fs_match: str) -> bool:
        raise NotImplementedError

    def collect_pattern(
        self,
        bsd: BuildSystemDescriptor,
        directory: str,
        glob_or_basename: str,
        seen_paths: set[str],
    ) -> bool:
        pattern_name = f"{directory.rstrip('/')}/{glob_or_basename}"
        self.logger.debug(f"Expanding {pattern_name}")
        results = list(bsd.expand_glob(directory, glob_or_basename))
        collected_any = False
        if not results:
            self.logger.debug(f"No matches found for {pattern_name}")
            return False
        for path, fs_path in results:
            if fs_path in seen_paths:
                continue
            seen_paths.add(fs_path)
            match = path
            if os.path.isabs(match):
                cleaned_path = "root"
                match = match.lstrip("/")
            else:
                cleaned_path = "unpacked-source"
            cleaned_match = _clean_dir(match)
            if cleaned_match:
                cleaned_path = os.path.join(cleaned_path, cleaned_match)
            seen_paths.add(fs_path)
            if self.add_path(
                path,
                cleaned_path,
                fs_path,
            ):
                collected_any = True
        return collected_any


class WriteToDirectory(FileContainer):

    def __init__(self, logger: logging.Logger, output_path: str) -> None:
        super().__init__(logger)
        self.output_path = output_path

    def add_path(self, path: str, normalised_path: str, fs_path: str) -> bool:
        if not stat.S_ISREG(os.lstat(fs_path).st_mode):
            return False
        full_output_path = os.path.join(self.output_path, normalised_path)
        output_path_dir = os.path.dirname(full_output_path)
        os.makedirs(output_path_dir, exist_ok=True)
        try:
            shutil.copyfile(fs_path, full_output_path, follow_symlinks=False)
            self.logger.info(f"Extracted {fs_path} into {full_output_path}")
            return True
        except IsADirectoryError:
            self.logger.info(f"Ignoring {fs_path}: it is a directory")
        except shutil.SpecialFileError:
            self.logger.info(f"Ignoring {fs_path}: it is not a regular file")

        return False


class WriteToTarFile(FileContainer):
    def __init__(self, logger: logging.Logger, output_file: str) -> None:
        super().__init__(logger)
        self.output_file = output_file
        self._compression_command = self._derive_compression_command()
        self._tar_fd: tarfile.TarFile | None = None
        self._compress_proc: subprocess.Popen[bytes] | None = None
        self._created_directories = set[str]()
        self._closeables: list[typing.IO[typing.Any]] = []

    def _derive_compression_command(self) -> list[str] | None:
        if self.output_file.endswith(".tar.gz"):
            return ["gzip", "-9n", "--rsyncable"]
        elif self.output_file.endswith(".tar.xz"):
            return ["xz", "-6", "--quiet", "--no-warn", "--no-adjust"]
        elif self.output_file.endswith(".tar"):
            return None
        raise ValueError(f"Unsupported output file name {self.output_file}")

    def _open_tar_fd(self) -> tarfile.TarFile:
        tar_fd = self._tar_fd
        if tar_fd is not None:
            return tar_fd
        compress_command = self._compression_command
        if compress_command is not None:
            fd = open(self.output_file, "wb")
            self._closeables.append(fd)
            compress_proc = subprocess.Popen(
                args=typing.cast(list[str], compress_command),
                stdin=subprocess.PIPE,
                stdout=fd,
            )
            self._compress_proc = compress_proc
            new_tar_fd = tarfile.open(
                mode="w|",
                fileobj=compress_proc.stdin,
                format=tarfile.GNU_FORMAT,
                errorlevel=1,
            )
        else:
            new_tar_fd = tarfile.open(
                name=self.output_file,
                mode="w",
                format=tarfile.GNU_FORMAT,
                errorlevel=1,
            )
        self._tar_fd = new_tar_fd
        return new_tar_fd

    def add_path(self, path: str, normalised_path: str, fs_path: str) -> bool:
        st = os.lstat(fs_path)
        if not stat.S_ISREG(st.st_mode):
            return False
        # Ensure the path starts with `./`. This ensures we create a proper root directory
        # as a side effect.
        if not normalised_path.startswith("./"):
            normalised_path = f"./{normalised_path}"

        directories_missing = []
        parent_dir = os.path.dirname(normalised_path)

        while parent_dir and parent_dir not in self._created_directories:
            directories_missing.append(parent_dir)
            parent_dir = os.path.dirname(parent_dir)

        tar_fd = self._open_tar_fd()

        for directory_to_create in reversed(directories_missing):
            dir_tar_info = tar_fd.tarinfo(f"{directory_to_create}/")
            dir_tar_info.size = 0
            dir_tar_info.type = tarfile.DIRTYPE
            dir_tar_info.uname = "root"
            dir_tar_info.uid = 0
            dir_tar_info.gname = "root"
            dir_tar_info.gid = 0
            tar_fd.addfile(dir_tar_info)
            self._created_directories.add(directory_to_create)

        tar_info = tar_fd.gettarinfo(name=fs_path, arcname=normalised_path)
        # Normalise owner/group. The UID used inside the container is unlikely to work
        # outside the container anyhow.
        tar_info.uname = "root"
        tar_info.uid = 0
        tar_info.gname = "root"
        tar_info.gid = 0
        with open(fs_path, "rb") as fd:
            tar_fd.addfile(tar_info, fileobj=fd)

    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: types.TracebackType | None,
    ) -> bool:
        tar_fd = self._tar_fd
        compress_proc = self._compress_proc
        retval: int | None = None
        pending_error: BaseException | None = None
        if tar_fd is not None:
            try:
                tar_fd.close()
            except Exception as e:
                self.logger.warning(f"Error while writing tarball (tar_fd): {str(e)}")
                pending_error = pending_error if pending_error else e
        if compress_proc is not None:
            try:
                compress_proc.wait(120)
                if compress_proc.returncode is None:
                    self.logger.warning("Sending SIGTERM to compression process.")
                    compress_proc.terminate()
                    compress_proc.wait(5)
                if compress_proc.returncode is None:
                    self.logger.warning("Sending SIGKILL to compression process.")
                    compress_proc.kill()
                retval = compress_proc.returncode
            except Exception as e:
                self.logger.warning(
                    f"Error while writing tarball (compression process): {str(e)}"
                )
                pending_error = pending_error if pending_error else e

        for closeable in self._closeables:
            try:
                closeable.close()
            except Exception as e:
                self.logger.warning(
                    f"Error while writing tarball (closeable): {str(e)}"
                )
                pending_error = pending_error if pending_error else e
        if pending_error:
            _error("Failed to write the tarball (see above warnings)")
        if retval:
            cmd = " ".join(self._compression_command)
            _error(
                f"Failed to write the tarball. The compress process {cmd!r} exited with code {retval}"
            )
        return False


def _package_provided_packages() -> list[str]:
    dctrl_path = pathlib.Path("debian/control")
    if not dctrl_path.is_file():
        return []
    stanzas = list(debian.deb822.Deb822.iter_paragraphs(dctrl_path))
    if not stanzas:
        return []
    source_stanza = stanzas[0]
    for prefix in ("", "X-"):
        value = source_stanza.get(f"{prefix}Build-Time-Aux-Artifacts", "")
        if value:

            break
    value = value.strip()
    if not value:
        return []
    return value.splitlines(keepends=False)


def _extraction_mode_host_fs_path(
    logger: logging.Logger,
    args: argparse.Namespace,
) -> None:
    if not os.path.isfile("debian/changelog"):
        _error(
            "Current working directory must be the path to the unpacked source tree where the build happened"
        )
    output_path: str | None = args.output_path
    if output_path is None:
        _error("The --output-path parameter is mandatory")
    if os.path.exists(output_path):
        _error(
            f"The output path exists ({args.output_path!r}). Please choose another path or delete it first"
        )

    static_extra_patterns = _package_provided_packages()

    if output_path.endswith("/"):
        container = WriteToDirectory(logger, output_path)
    else:
        try:
            container = WriteToTarFile(logger, output_path)
        except ValueError:
            _error(
                "Unsupported output path: End with `/` to make a directory or `.tar.{gz,xz}` for a tarball"
            )

    try:
        chroot_dir = (
            os.path.realpath(args.chroot_dir, strict=True) if args.chroot_dir else None
        )
    except FileNotFoundError:
        _error(f"The provided --chroot path {args.chroot_dir!r} does not exist")
    except OSError as e:
        _error(f"Could not resolve {args.chroot_dir}: {str(e)}")

    if chroot_dir is not None and chroot_dir != "/":
        bsd = BindmountAccessToBuildSystemDescriptor(
            chroot_dir,
            os.getcwd(),
        )
    else:
        bsd = HostWasBuildSystem()

    environ = bsd.environ()

    logger.info("Environment variables considered:")
    for k in ENV_VARS:
        v = environ.get(k)
        if v is None:
            logger.info(f"  * {k} (<unset>)")
        else:
            logger.info(f"  * {k}={v!r}")
    ipc_dir = environ.get("DEB_ARTIFACTS_EXTRACTION_IPC_DIR")
    if ipc_dir is not None:
        variable_definitions = collections.defaultdict(set)
        for var, value in bsd.collect_variable_definitions(ipc_dir):
            variable_definitions[var.variable_name].add(value)
        collection_patterns = list(bsd.collect_extra_patterns(ipc_dir))
    else:
        variable_definitions = {}
        collection_patterns = []
        logger.warning(
            "Assuming no variables since DEB_ARTIFACTS_EXTRACTION_IPC_DIR was unset,"
        )

    logger.info("Variables:")
    for k, vdef in VARIABLES.items():
        default_value = vdef.default_value
        vs = variable_definitions.get(k)
        values = []
        if default_value is not None:
            values.append(default_value)
        if vs:
            values.extend(vs)
        if not values:
            logger.info(f"  * {k} (<unset>)")
        elif len(values) == 1:
            v = values[0]
            logger.info(f"  * {k}={v!r}")
        else:
            logger.info(f"  * {k} had multiple definitions:")
            for v in sorted(values):
                logger.info(f"     {v!r}")

    if collection_patterns:
        logger.info("Collection patterns from build tools:")
        for pattern in collection_patterns:
            logger.info(f"  * {pattern!r}")
    else:
        logger.info(
            "Collection patterns from build tools: None provided via DEB_ARTIFACTS_EXTRACTION_IPC_DIR"
        )

    if static_extra_patterns:
        logger.info("Collection patterns from (X-)Build-Time-Aux-Artifacts:")
        for pattern in static_extra_patterns:
            logger.info(f"  * {pattern!r}")
        collection_patterns.extend(static_extra_patterns)
    else:
        logger.info("Collection patterns from (X-)Build-Time-Aux-Artifacts: None")
    selection_tags = args.rule_tags
    if selection_tags:
        allowed_tags = {Tag(t.strip()) for ts in selection_tags for t in ts.split(",")}
        rules = [r for r in RULES if not allowed_tags.isdisjoint(r.tags)]
    else:
        rules = RULES

    seen_paths = set[str]()

    for rule in rules:
        for directory in rule.relative_to_directory.possible_matches(
            {k: sorted(v) for k, v in variable_definitions.items()},
            environ,
        ):
            container.collect_pattern(
                bsd,
                directory,
                rule.glob_or_basename,
                seen_paths,
            )

    for raw_pattern in collection_patterns:
        directory, pattern = _split_dir(raw_pattern)
        container.collect_pattern(
            bsd,
            directory,
            pattern,
            seen_paths,
        )
    if seen_paths:
        logger.info(f"Collection complete. Results available at {output_path}")
        sys.exit(0)
    else:
        logger.info("Collection complete. No paths matched the rules")
        sys.exit(args.exit_code_on_empty)


def _collect_pattern(
    logger: logging.Logger,
    output_path: str,
    bsd: BuildSystemDescriptor,
    directory: str,
    glob_or_basename: str,
    seen_paths: set[str],
) -> bool:
    pattern_name = f"{directory.rstrip('/')}/{glob_or_basename}"
    logger.debug(f"Expanding {pattern_name}")
    results = list(bsd.expand_glob(directory, glob_or_basename))
    collected_any = False
    if not results:
        logger.debug(f"No matches found for {pattern_name}")
        return False
    for match, fs_match in results:
        if fs_match in seen_paths:
            continue
        seen_paths.add(fs_match)
        if os.path.isabs(match):
            segment = "root"
            match = match.lstrip("/")
        else:
            segment = "unpacked-source"
        cleaned_match = _clean_dir(match)
        if cleaned_match:
            segment = os.path.join(segment, cleaned_match)
        full_output_path = os.path.join(output_path, segment)
        seen_paths.add(fs_match)
        output_path_dir = os.path.dirname(full_output_path)
        os.makedirs(output_path_dir, exist_ok=True)
        collected_any = True
        try:
            shutil.copyfile(fs_match, full_output_path, follow_symlinks=True)
            logger.info(f"Extracted {fs_match} into {full_output_path}")
        except IsADirectoryError:
            logger.info(f"Ignoring {fs_match}: it is a directory")
            continue
        except shutil.SpecialFileError:
            logger.info(f"Ignoring {fs_match}: it is not a regular file")
            continue
    return collected_any


def _split_dir(glob_pattern: str) -> tuple[str, str]:
    if glob_pattern.startswith("/"):
        return "/", _clean_dir(glob_pattern.lstrip("/"))
    return ".", _clean_dir(glob_pattern)


def _clean_dir(dir_path: str) -> str:
    while dir_path.startswith("./"):
        dir_path = dir_path.removeprefix("./")
    return dir_path if dir_path != "." else ""


def _parse_args() -> None:
    description = textwrap.dedent(
        """\
    The `deb-build-artifact-gather` program is a Debian build support tool.

    It is aimed at collecting various artifacts from the build tree such as
    logs from intermediate steps such as the upstream build system, ICE logs,
    etc.
    """
    )

    eplilog = textwrap.dedent(
        """\

    Bug tracker: https://salsa.debian.org/debian/deb-build-artifact-gather/-/issues
    """
    )
    parser = argparse.ArgumentParser(
        description=description,
        formatter_class=argparse.RawDescriptionHelpFormatter,
        allow_abbrev=False,
        epilog=eplilog,
        prog=program_name(),
    )
    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbosity",
        action="count",
        default=0,
        help="Increase verbosity (can be repeated)",
    )
    parser.add_argument(
        "--quiet",
        dest="quietness",
        action="count",
        default=0,
        help="Decrease verbosity (can be repeated)",
    )
    command_subparser = parser.add_subparsers(
        title="command",
        dest="command",
        required=True,
        help="The command to run",
    )
    collect_command_parser = command_subparser.add_parser(
        "collect-into",
        help="Collect artifacts",
    )
    collect_command_parser.add_argument(
        "output_path",
        metavar="output-path",
        help="Where the collected artifacts should be stored (path/to/dir/ or path/to/tar.{gz,xz})",
    )
    collect_command_parser.add_argument(
        "--collect-tag",
        dest="rule_tags",
        default=[],
        action="append",
        help="Only collect paths matching this tag",
    )
    collect_command_parser.add_argument(
        "--exit-code-on-empty-collection",
        dest="exit_code_on_empty",
        default=2,
        type=int,
        action="store",
        help="Exit code to use if nothing was collected",
    )
    collect_command_parser.set_defaults(
        handler=_extraction_mode_host_fs_path,
        chroot_dir=None,
        output_path=None,
    )
    extraction_mode_subparser = collect_command_parser.add_subparsers(
        title="extraction-mode",
        help="The extraction-mode method to use",
    )
    in_build_tree_parser = extraction_mode_subparser.add_parser("from-chroot-dir")
    in_build_tree_parser.add_argument(
        "chroot_dir",
        metavar="chroot-dir",
        default=None,
        help="The build used the provided directory as its root directory. Affects collections paths that are absolute",
    )
    args = parser.parse_args()
    # logging levels are all increments of 10. Multiply to account for that.
    initial_logging_level = logging.INFO - (args.verbosity - args.quietness) * 10
    initial_logging_level = min(
        max(initial_logging_level, logging.DEBUG),
        logging.ERROR,
    )

    logger = setup_logging(initial_logging_level=initial_logging_level)
    args.handler(logger, args)


def main() -> None:
    _parse_args()


if __name__ == "__main__":
    main()
