diff --git a/.isort.cfg b/.isort.cfg index 5bc2f237603d937d42e4db424ed6e105ca2d0c45..075196758037f883400e99e11037996b5419a90c 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -8,4 +8,4 @@ line_length = 88 default_section=FIRSTPARTY known_first_party = arkindex,arkindex_common -known_third_party =PIL,apistar,gnupg,pytest,requests,setuptools,tenacity,yaml +known_third_party =PIL,apistar,gitlab,gnupg,pytest,requests,setuptools,sh,tenacity,yaml diff --git a/arkindex_worker/git.py b/arkindex_worker/git.py new file mode 100644 index 0000000000000000000000000000000000000000..3a84acb4465e5064712919293c32667e4f406f72 --- /dev/null +++ b/arkindex_worker/git.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import shutil +import time +from datetime import datetime +from pathlib import Path + +import gitlab +import sh + +from arkindex_worker import logger + +NOTHING_TO_COMMIT_MSG = "nothing to commit, working tree clean" + + +class GitlabHelper: + """Helper class to save files to GitLab repository""" + + def __init__(self, project_id, gitlab_url, gitlab_token, branch): + """ + + :param project_id: the id of the gitlab project + :param gitlab_url: gitlab server url + :param gitlab_token: gitlab private token of user with permission to accept merge requests + :param branch: name of the branch to where the exported branch will be merged + """ + self.project_id = project_id + self.gitlab_url = gitlab_url + self.gitlab_token = str(gitlab_token).strip() + self.branch = branch + + logger.info("Creating a Gitlab client") + self._api = gitlab.Gitlab(self.gitlab_url, private_token=self.gitlab_token) + self.project = self._api.projects.get(self.project_id) + + def merge(self, branch_name, title): + """Create a merge request and try to merge""" + logger.info(f"Creating a merge request for {branch_name}") + mr = self.project.mergerequests.create( + { + "source_branch": branch_name, + "target_branch": self.branch, + "title": title, + } + ) + logger.info("Attempting to merge") + try: + mr.merge() + logger.info("Merge successful") + return True + except gitlab.GitlabMRClosedError as e: + logger.error(f"Merge was not successful: {e}") + return False + + +def make_backup(path): + """ + Create a backup file in the same directory with timestamp as suffix ".bak_{timestamp}" + :param path: file to be backed up + """ + path = Path(path) + if not path.exists(): + raise ValueError(f"No file to backup! File not found: {path}") + # timestamp with milliseconds + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] + backup_path = Path(str(path) + f".bak_{timestamp}") + shutil.copy(path, backup_path) + logger.info(f"Made a backup {backup_path}") + + +def prepare_git_key( + private_key, + known_hosts, + private_key_path="~/.ssh/id_ed25519", + known_hosts_path="~/.ssh/known_hosts", +): + """ + Prepare the git keys (put them in to the correct place) so that git could be used. + Fixes some whitespace problems that come from arkindex secrets store (Django admin). + + Also creates a backup of the previous keys if they exist, to avoid losing the + original keys of the developers. + + :param private_key: git private key contents + :param known_hosts: git known_hosts contents + :param private_key_path: path where to put the private key + :param known_hosts_path: path where to put the known_hosts + """ + # secrets admin UI seems to strip the trailing whitespace + # but git requires the key file to have a new line at the end + # for some reason uses CRLF line endings, but git doesn't like that + private_key = private_key.replace("\r", "") + "\n" + known_hosts = known_hosts.replace("\r", "") + "\n" + + private_key_path = Path(private_key_path).expanduser() + known_hosts_path = Path(known_hosts_path).expanduser() + + if private_key_path.exists(): + if private_key_path.read_text() != private_key: + make_backup(private_key_path) + + if known_hosts_path.exists(): + if known_hosts_path.read_text() != known_hosts: + make_backup(known_hosts_path) + + private_key_path.write_text(private_key) + # private key must be private, otherwise git will fail + # expecting octal for permissions + private_key_path.chmod(0o600) + known_hosts_path.write_text(known_hosts) + + logger.info(f"Private key size after: {private_key_path.stat().st_size}") + logger.info(f"Known size after: {known_hosts_path.stat().st_size}") + + +class GitHelper: + """ + A helper class for running git commands + + At the beginning of the workflow call `run_clone_in_background`. + When all the files are ready to be added to git then call + `save_files` to move the files in to the git repository + and try to push them. + + Pseudo code example: + in worker.configure() configure the git helper and start the cloning: + ``` + gitlab = GitlabHelper(...) + workflow_id = os.environ["ARKINDEX_PROCESS_ID"] + prepare_git_key(...) + self.git_helper = GitHelper(workflow_id=workflow_id, gitlab_helper=gitlab, ...) + self.git_helper.run_clone_in_background() + ``` + + at the end of the workflow (at the end of worker.run()) push the files to git: + ``` + self.git_helper.save_files(self.out_dir) + ``` + """ + + def __init__( + self, + repo_url, + git_dir, + export_path, + workflow_id, + gitlab_helper: GitlabHelper, + git_clone_wait_period=1, + ): + """ + + :param repo_url: the url of the git repository where the export will be pushed + :param git_dir: the directory where to clone the git repository + :param export_path: the path inside the git repository where to put the exported files + :param workflow_id: the process id to see the workflow graph in the frontend + :param gitlab_helper: helper for gitlab + :param git_clone_wait_period: check if clone has finished every N seconds at the end of the workflow + """ + logger.info("Creating git helper") + self.repo_url = repo_url + self.git_dir = Path(git_dir) + self.export_path = self.git_dir / export_path + self.workflow_id = workflow_id + self.gitlab_helper = gitlab_helper + self.git_clone_wait_period = git_clone_wait_period + self.is_clone_finished = False + self.cmd = None + self.success = None + self.exit_code = None + + self.git_dir.mkdir(parents=True, exist_ok=True) + # run git commands outside of the repository (no need to change dir) + self._git = sh.git.bake("-C", self.git_dir) + + def _clone_done(self, cmd, success, exit_code): + """ + Method that is called when git clone has finished in the background + """ + logger.info("Finishing cloning") + self.cmd = cmd + self.success = success + self.exit_code = exit_code + self.is_clone_finished = True + if not success: + logger.error(f"Clone failed: {cmd} : {success} : {exit_code}") + logger.info("Cloning finished") + + def run_clone_in_background(self): + """ + Clones the git repository in the background in to the self.git_dir directory. + + `self.is_clone_finished` can be used to know whether the cloning has finished + or not. + """ + logger.info(f"Starting clone {self.repo_url} in background") + cmd = sh.git.clone( + self.repo_url, self.git_dir, _bg=True, _done=self._clone_done + ) + logger.info(f"Continuing clone {self.repo_url} in background") + return cmd + + def _wait_for_clone_to_finish(self): + logger.info("Checking if cloning has finished..") + while not self.is_clone_finished: + time.sleep(self.git_clone_wait_period) + logger.info("Cloning has finished") + + if not self.success: + logger.error("Clone was not a success") + logger.error(f"Clone error exit code: {str(self.exit_code)}") + raise ValueError("Clone was not a success") + + def save_files(self, export_out_dir: Path): + """ + Move files in export_out_dir to the cloned git repository + and try to merge the created files if possible. + """ + self._wait_for_clone_to_finish() + + # move exported files to git directory + file_count = self._move_files_to_git(export_out_dir) + + # add files to a new branch + branch_name = f"workflow_{self.workflow_id}" + self._git.checkout("-b", branch_name) + self._git.add("-A") + try: + self._git.commit("-m", f"Exported files from workflow: {self.workflow_id}") + except sh.ErrorReturnCode as e: + if NOTHING_TO_COMMIT_MSG in str(e.stdout): + logger.warning("Nothing to commit (no changes)") + return + else: + logger.error(f"Commit failed:: {e}") + raise e + + # count the number of lines in the output + wc_cmd_out = str( + sh.wc(self._git.show("--stat", "--name-status", "--oneline", "HEAD"), "-l") + ) + # -1 because the of the git command header + files_committed = int(wc_cmd_out.strip()) - 1 + logger.info(f"Committed {files_committed} files") + if file_count != files_committed: + logger.warning( + f"Of {file_count} added files only {files_committed} were committed" + ) + + self._git.push("-u", "origin", "HEAD") + + if self.gitlab_helper: + self.gitlab_helper.merge(branch_name, f"Merge {branch_name}") + else: + logger.info( + "No gitlab_helper defined, not trying to merge the pushed branch" + ) + + def _move_files_to_git(self, export_out_dir: Path) -> int: + """ + Move all files in the export_out_dir to the git repository + while keeping the same directory structure + """ + file_count = 0 + for file in export_out_dir.rglob("*.*"): + rel_file_path = file.relative_to(export_out_dir) + out_file = self.export_path / rel_file_path + if not out_file.exists(): + out_file.parent.mkdir(parents=True, exist_ok=True) + # rename does not work if the source and destination are not on the same mounts + # it will give an error: "OSError: [Errno 18] Invalid cross-device link:" + shutil.copy(file, out_file) + file.unlink() + file_count += 1 + logger.info(f"Moved {file_count} files") + return file_count diff --git a/requirements.txt b/requirements.txt index e3bc00f71221517b3d4a57eb5366c3e732e5c8ed..9547fd6fdb08211ad48d212ca45dee071b2d505f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ arkindex-client==1.0.4 Pillow==7.2.0 +python-gitlab==2.5.0 python-gnupg==0.4.6 +sh==1.14.0 tenacity==6.2.0 diff --git a/tests/conftest.py b/tests/conftest.py index c71746f634c54d5085ad041be780096283b3ba02..fd91f7015ca1776c1089cf7d02d51e63735f843e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,7 @@ from pathlib import Path import pytest from arkindex.mock import MockApiClient +from arkindex_worker.git import GitHelper, GitlabHelper from arkindex_worker.worker import ElementsWorker FIXTURES_DIR = Path(__file__).resolve().parent / "data" @@ -116,3 +117,29 @@ def fake_dummy_worker(): worker = ElementsWorker() worker.api_client = api_client return worker + + +@pytest.fixture +def fake_git_helper(mocker): + gitlab_helper = mocker.MagicMock() + return GitHelper( + "repo_url", + "/tmp/git_test/foo/", + "/tmp/test/path/", + "tmp_workflow_id", + gitlab_helper, + ) + + +@pytest.fixture +def fake_gitlab_helper_factory(): + # have to set up the responses, before creating the client + def run(): + return GitlabHelper( + "balsac_exporter/balsac-exported-xmls-testing", + "https://gitlab.com", + "<GITLAB_TOKEN>", + "gitlab_branch", + ) + + return run diff --git a/tests/test_git.py b/tests/test_git.py new file mode 100644 index 0000000000000000000000000000000000000000..744b89a8594f60292c211473e194d09f73365a58 --- /dev/null +++ b/tests/test_git.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +from pathlib import Path + +import pytest + +from arkindex_worker.git import GitlabHelper + + +def test_clone_done(fake_git_helper): + assert not fake_git_helper.is_clone_finished + fake_git_helper._clone_done(None, None, None) + assert fake_git_helper.is_clone_finished + + +def test_clone(fake_git_helper): + command = fake_git_helper.run_clone_in_background() + cmd_str = " ".join(list(map(str, command.cmd))) + + assert "git" in cmd_str + assert "clone" in cmd_str + + +def _get_fn_name_from_call(call): + # call.add(2, 3) => "add" + return str(call)[len("call.") :].split("(")[0] + + +def test_save_files(fake_git_helper, mocker): + mocker.patch("sh.wc", return_value=2) + fake_git_helper._git = mocker.MagicMock() + fake_git_helper.is_clone_finished = True + fake_git_helper.success = True + + fake_git_helper.save_files(Path("/tmp/test_1234/tmp/")) + + expected_calls = ["checkout", "add", "commit", "show", "push"] + actual_calls = list(map(_get_fn_name_from_call, fake_git_helper._git.mock_calls)) + + assert actual_calls == expected_calls + assert fake_git_helper.gitlab_helper.merge.call_count == 1 + + +def test_save_files__fail_with_failed_clone(fake_git_helper, mocker): + mocker.patch("sh.wc", return_value=2) + fake_git_helper._git = mocker.MagicMock() + fake_git_helper.is_clone_finished = True + + with pytest.raises(Exception) as execinfo: + fake_git_helper.save_files(Path("/tmp/test_1234/tmp/")) + + assert execinfo.value.args[0] == "Clone was not a success" + + +def test_merge(mocker): + api = mocker.MagicMock() + project = mocker.MagicMock() + api.projects.get.return_value = project + merqe_request = mocker.MagicMock() + project.mergerequests.create.return_value = merqe_request + mocker.patch("gitlab.Gitlab", return_value=api) + + gitlab_helper = GitlabHelper("project_id", "url", "token", "branch") + + success = gitlab_helper.merge("source", "merge title") + + assert success + assert project.mergerequests.create.call_count == 1 + assert merqe_request.merge.call_count == 1 + + +def test_merge_request(responses, fake_gitlab_helper_factory): + project_id = 21259233 + merge_request_id = 7 + source_branch = "new_branch" + target_branch = "master" + mr_title = "merge request title" + + responses.add( + responses.GET, + "https://gitlab.com/api/v4/projects/balsac_exporter%2Fbalsac-exported-xmls-testing", + json={ + "id": project_id, + # several fields omitted + }, + ) + + responses.add( + responses.POST, + f"https://gitlab.com/api/v4/projects/{project_id}/merge_requests", + json={ + "id": 107, + "iid": merge_request_id, + "project_id": project_id, + "title": mr_title, + "target_branch": target_branch, + "source_branch": source_branch, + # several fields omitted + }, + ) + + responses.add( + responses.PUT, + f"https://gitlab.com/api/v4/projects/{project_id}/merge_requests/{merge_request_id}/merge", + json={ + "iid": merge_request_id, + "state": "merged", + # several fields omitted + }, + ) + + # the responses are defined in the same order as they are expected to be called + expected_http_methods = [r.method for r in responses._matches] + expected_urls = [r.url for r in responses._matches] + + gitlab_helper = fake_gitlab_helper_factory() + + success = gitlab_helper.merge(source_branch, mr_title) + + assert success + assert len(responses.calls) == 3 + assert [c.request.method for c in responses.calls] == expected_http_methods + assert [c.request.url for c in responses.calls] == expected_urls + + +def test_merge_request_fail(responses, fake_gitlab_helper_factory): + project_id = 21259233 + merge_request_id = 7 + source_branch = "new_branch" + target_branch = "master" + mr_title = "merge request title" + + responses.add( + responses.GET, + "https://gitlab.com/api/v4/projects/balsac_exporter%2Fbalsac-exported-xmls-testing", + json={ + "id": project_id, + # several fields omitted + }, + ) + + responses.add( + responses.POST, + f"https://gitlab.com/api/v4/projects/{project_id}/merge_requests", + json={ + "id": 107, + "iid": merge_request_id, + "project_id": project_id, + "title": mr_title, + "target_branch": target_branch, + "source_branch": source_branch, + # several fields omitted + }, + ) + + responses.add( + responses.PUT, + f"https://gitlab.com/api/v4/projects/{project_id}/merge_requests/{merge_request_id}/merge", + json={"error": "Method not allowed"}, + status=405, + ) + + # the responses are defined in the same order as they are expected to be called + expected_http_methods = [r.method for r in responses._matches] + expected_urls = [r.url for r in responses._matches] + + gitlab_helper = fake_gitlab_helper_factory() + success = gitlab_helper.merge(source_branch, mr_title) + + assert not success + assert len(responses.calls) == 3 + assert [c.request.method for c in responses.calls] == expected_http_methods + assert [c.request.url for c in responses.calls] == expected_urls