From 80e464b220e1e01b01396dc5e6b73e19f2345ddc Mon Sep 17 00:00:00 2001 From: Martin Maarand <maarand@teklia.com> Date: Mon, 1 Feb 2021 12:45:46 +0000 Subject: [PATCH] support chunks in git export; rebase before merging to avoid merging manually --- arkindex_worker/git.py | 120 ++++++++++-- tests/test_git.py | 435 +++++++++++++++++++++++++++++++++++------ 2 files changed, 475 insertions(+), 80 deletions(-) diff --git a/arkindex_worker/git.py b/arkindex_worker/git.py index 3a84acb4..4f4f8e41 100644 --- a/arkindex_worker/git.py +++ b/arkindex_worker/git.py @@ -6,51 +6,130 @@ from datetime import datetime from pathlib import Path import gitlab +import requests import sh from arkindex_worker import logger NOTHING_TO_COMMIT_MSG = "nothing to commit, working tree clean" +MR_HAS_CONFLICTS_ERROR_CODE = 406 class GitlabHelper: """Helper class to save files to GitLab repository""" - def __init__(self, project_id, gitlab_url, gitlab_token, branch): + def __init__( + self, + project_id, + gitlab_url, + gitlab_token, + branch, + rebase_wait_period=1, + delete_source_branch=True, + max_rebase_tries=10, + ): """ - :param project_id: the id of the gitlab project :param gitlab_url: gitlab server url :param gitlab_token: gitlab private token of user with permission to accept merge requests :param branch: name of the branch to where the exported branch will be merged + :param rebase_wait_period: seconds to wait between each poll to check whether rebase has finished + :param delete_source_branch: should delete the source branch after merging? + :param max_rebase_tries: max number of tries to rebase when merging before giving up """ self.project_id = project_id self.gitlab_url = gitlab_url self.gitlab_token = str(gitlab_token).strip() self.branch = branch + self.rebase_wait_period = rebase_wait_period + self.delete_source_branch = delete_source_branch + self.max_rebase_tries = max_rebase_tries logger.info("Creating a Gitlab client") self._api = gitlab.Gitlab(self.gitlab_url, private_token=self.gitlab_token) self.project = self._api.projects.get(self.project_id) + self.is_rebase_finished = False - def merge(self, branch_name, title): - """Create a merge request and try to merge""" + def merge(self, branch_name, title) -> bool: + """ + Create a merge request and try to merge. + Always rebase first to avoid conflicts from MRs made in parallel + :param branch_name: source branch name + :param title: title of the merge request + :return: was the branch successfully merged? + """ + mr = None + # always rebase first, because other workers might have merged already + for i in range(self.max_rebase_tries): + logger.info(f"Trying to merge, try nr: {i}") + try: + if mr is None: + mr = self._create_merge_request(branch_name, title) + + mr.rebase() + rebase_success = self._wait_for_rebase_to_finish(mr.iid) + if not rebase_success: + logger.error("Rebase failed, won't be able to merge!") + return False + + mr.merge(should_remove_source_branch=self.delete_source_branch) + logger.info("Merge successful") + return True + except gitlab.GitlabMRClosedError as e: + if e.response_code == MR_HAS_CONFLICTS_ERROR_CODE: + logger.info("Merge failed, trying to rebase and merge again.") + continue + else: + logger.error(f"Merge was not successful: {e}") + return False + except gitlab.GitlabError as e: + logger.error(f"Gitlab error: {e}") + if 400 <= e.response_code < 500: + # 4XX errors shouldn't be fixed by retrying + raise e + except requests.exceptions.ConnectionError as e: + logger.error(f"Server connection error, will wait and retry: {e}") + time.sleep(self.rebase_wait_period) + + return False + + def _create_merge_request(self, branch_name, title): logger.info(f"Creating a merge request for {branch_name}") + # retry_transient_error will retry the request on 50X errors + # https://github.com/python-gitlab/python-gitlab/blob/265dbbdd37af88395574564aeb3fd0350288a18c/gitlab/__init__.py#L539 mr = self.project.mergerequests.create( { "source_branch": branch_name, "target_branch": self.branch, "title": title, - } + }, ) - logger.info("Attempting to merge") - try: - mr.merge() - logger.info("Merge successful") + return mr + + def _get_merge_request(self, merge_request_id, include_rebase_in_progress=True): + return self.project.mergerequests.get( + merge_request_id, include_rebase_in_progress=include_rebase_in_progress + ) + + def _wait_for_rebase_to_finish(self, merge_request_id) -> bool: + """ + Poll the merge request until it has finished rebasing + :param merge_request_id: + :return: rebase finished successfully? + """ + + logger.info("Checking if rebase has finished..") + self.is_rebase_finished = False + while not self.is_rebase_finished: + time.sleep(self.rebase_wait_period) + mr = self._get_merge_request(merge_request_id) + self.is_rebase_finished = not mr.rebase_in_progress + if mr.merge_error is None: + logger.info("Rebase has finished") return True - except gitlab.GitlabMRClosedError as e: - logger.error(f"Merge was not successful: {e}") - return False + + logger.error(f"Rebase failed: {mr.merge_error}") + return False def make_backup(path): @@ -220,12 +299,19 @@ class GitHelper: # move exported files to git directory file_count = self._move_files_to_git(export_out_dir) + # use timestamp to avoid branch name conflicts with multiple chunks + current_timestamp = datetime.isoformat(datetime.now()) + # ":" is not allowed in a branch name + branch_timestamp = current_timestamp.replace(":", ".") # add files to a new branch - branch_name = f"workflow_{self.workflow_id}" + branch_name = f"workflow_{self.workflow_id}_{branch_timestamp}" self._git.checkout("-b", branch_name) self._git.add("-A") try: - self._git.commit("-m", f"Exported files from workflow: {self.workflow_id}") + self._git.commit( + "-m", + f"Exported files from workflow: {self.workflow_id} at {current_timestamp}", + ) except sh.ErrorReturnCode as e: if NOTHING_TO_COMMIT_MSG in str(e.stdout): logger.warning("Nothing to commit (no changes)") @@ -249,7 +335,11 @@ class GitHelper: self._git.push("-u", "origin", "HEAD") if self.gitlab_helper: - self.gitlab_helper.merge(branch_name, f"Merge {branch_name}") + try: + self.gitlab_helper.merge(branch_name, f"Merge {branch_name}") + except Exception as e: + logger.error(f"Merge failed: {e}") + raise e else: logger.info( "No gitlab_helper defined, not trying to merge the pushed branch" diff --git a/tests/test_git.py b/tests/test_git.py index 744b89a8..4b10cddd 100644 --- a/tests/test_git.py +++ b/tests/test_git.py @@ -2,9 +2,39 @@ from pathlib import Path import pytest +from gitlab import GitlabCreateError, GitlabError +from requests import ConnectionError from arkindex_worker.git import GitlabHelper +PROJECT_ID = 21259233 +MERGE_REQUEST_ID = 7 +SOURCE_BRANCH = "new_branch" +TARGET_BRANCH = "master" +MR_TITLE = "merge request title" +CREATE_MR_RESPONSE_JSON = { + "id": 107, + "iid": MERGE_REQUEST_ID, + "project_id": PROJECT_ID, + "title": MR_TITLE, + "target_branch": TARGET_BRANCH, + "source_branch": SOURCE_BRANCH, + # several fields omitted +} + + +@pytest.fixture +def fake_responses(responses): + responses.add( + responses.GET, + "https://gitlab.com/api/v4/projects/balsac_exporter%2Fbalsac-exported-xmls-testing", + json={ + "id": PROJECT_ID, + # several fields omitted + }, + ) + return responses + def test_clone_done(fake_git_helper): assert not fake_git_helper.is_clone_finished @@ -61,6 +91,9 @@ def test_merge(mocker): gitlab_helper = GitlabHelper("project_id", "url", "token", "branch") + gitlab_helper._wait_for_rebase_to_finish = mocker.MagicMock() + gitlab_helper._wait_for_rebase_to_finish.return_value = True + success = gitlab_helper.merge("source", "merge title") assert success @@ -68,105 +101,377 @@ def test_merge(mocker): assert merqe_request.merge.call_count == 1 -def test_merge_request(responses, fake_gitlab_helper_factory): - project_id = 21259233 - merge_request_id = 7 - source_branch = "new_branch" - target_branch = "master" - mr_title = "merge request title" +def test_merge__rebase_failed(mocker): + api = mocker.MagicMock() + project = mocker.MagicMock() + api.projects.get.return_value = project + merqe_request = mocker.MagicMock() + project.mergerequests.create.return_value = merqe_request + mocker.patch("gitlab.Gitlab", return_value=api) - responses.add( - responses.GET, - "https://gitlab.com/api/v4/projects/balsac_exporter%2Fbalsac-exported-xmls-testing", + gitlab_helper = GitlabHelper("project_id", "url", "token", "branch") + + gitlab_helper._wait_for_rebase_to_finish = mocker.MagicMock() + gitlab_helper._wait_for_rebase_to_finish.return_value = False + + success = gitlab_helper.merge("source", "merge title") + + assert not success + assert project.mergerequests.create.call_count == 1 + assert merqe_request.merge.call_count == 0 + + +def test_wait_for_rebase_to_finish(fake_responses, fake_gitlab_helper_factory): + get_mr_url = f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}?include_rebase_in_progress=True" + + fake_responses.add( + fake_responses.GET, + get_mr_url, json={ - "id": project_id, - # several fields omitted + "rebase_in_progress": True, + "merge_error": None, }, ) - responses.add( - responses.POST, - f"https://gitlab.com/api/v4/projects/{project_id}/merge_requests", + fake_responses.add( + fake_responses.GET, + get_mr_url, json={ - "id": 107, - "iid": merge_request_id, - "project_id": project_id, - "title": mr_title, - "target_branch": target_branch, - "source_branch": source_branch, - # several fields omitted + "rebase_in_progress": True, + "merge_error": None, }, ) - responses.add( - responses.PUT, - f"https://gitlab.com/api/v4/projects/{project_id}/merge_requests/{merge_request_id}/merge", + fake_responses.add( + fake_responses.GET, + get_mr_url, + json={ + "rebase_in_progress": False, + "merge_error": None, + }, + ) + + gitlab_helper = fake_gitlab_helper_factory() + + success = gitlab_helper._wait_for_rebase_to_finish(MERGE_REQUEST_ID) + + assert success + assert len(fake_responses.calls) == 4 + assert gitlab_helper.is_rebase_finished + + +def test_wait_for_rebase_to_finish__fail_connection_error( + fake_responses, fake_gitlab_helper_factory +): + get_mr_url = f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}?include_rebase_in_progress=True" + + fake_responses.add( + fake_responses.GET, + get_mr_url, + body=ConnectionError(), + ) + + gitlab_helper = fake_gitlab_helper_factory() + + with pytest.raises(ConnectionError): + gitlab_helper._wait_for_rebase_to_finish(MERGE_REQUEST_ID) + + assert len(fake_responses.calls) == 2 + assert not gitlab_helper.is_rebase_finished + + +def test_wait_for_rebase_to_finish__fail_server_error( + fake_responses, fake_gitlab_helper_factory +): + get_mr_url = f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}?include_rebase_in_progress=True" + + fake_responses.add( + fake_responses.GET, + get_mr_url, + body="Service Unavailable", + status=503, + ) + + gitlab_helper = fake_gitlab_helper_factory() + + with pytest.raises(GitlabError): + gitlab_helper._wait_for_rebase_to_finish(MERGE_REQUEST_ID) + + assert len(fake_responses.calls) == 2 + assert not gitlab_helper.is_rebase_finished + + +def test_merge_request(fake_responses, fake_gitlab_helper_factory, mocker): + fake_responses.add( + fake_responses.POST, + f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests", + json=CREATE_MR_RESPONSE_JSON, + ) + + fake_responses.add( + fake_responses.PUT, + f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}/rebase", + json={}, + ) + + fake_responses.add( + fake_responses.PUT, + f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}/merge?should_remove_source_branch=True", json={ - "iid": merge_request_id, + "iid": MERGE_REQUEST_ID, "state": "merged", # several fields omitted }, ) - # the responses are defined in the same order as they are expected to be called - expected_http_methods = [r.method for r in responses._matches] - expected_urls = [r.url for r in responses._matches] + # the fake_responses are defined in the same order as they are expected to be called + expected_http_methods = [r.method for r in fake_responses._matches] + expected_urls = [r.url for r in fake_responses._matches] gitlab_helper = fake_gitlab_helper_factory() + gitlab_helper._wait_for_rebase_to_finish = mocker.MagicMock() + gitlab_helper._wait_for_rebase_to_finish.return_value = True - success = gitlab_helper.merge(source_branch, mr_title) + success = gitlab_helper.merge(SOURCE_BRANCH, MR_TITLE) assert success - assert len(responses.calls) == 3 - assert [c.request.method for c in responses.calls] == expected_http_methods - assert [c.request.url for c in responses.calls] == expected_urls + assert len(fake_responses.calls) == 4 + assert [c.request.method for c in fake_responses.calls] == expected_http_methods + assert [c.request.url for c in fake_responses.calls] == expected_urls -def test_merge_request_fail(responses, fake_gitlab_helper_factory): - project_id = 21259233 - merge_request_id = 7 - source_branch = "new_branch" - target_branch = "master" - mr_title = "merge request title" +def test_merge_request_fail(fake_responses, fake_gitlab_helper_factory, mocker): + fake_responses.add( + fake_responses.POST, + f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests", + json=CREATE_MR_RESPONSE_JSON, + ) - responses.add( - responses.GET, - "https://gitlab.com/api/v4/projects/balsac_exporter%2Fbalsac-exported-xmls-testing", + fake_responses.add( + fake_responses.PUT, + f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}/rebase", + json={}, + ) + + fake_responses.add( + fake_responses.PUT, + f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}/merge?should_remove_source_branch=True", + json={"error": "Method not allowed"}, + status=405, + ) + + # the fake_responses are defined in the same order as they are expected to be called + expected_http_methods = [r.method for r in fake_responses._matches] + expected_urls = [r.url for r in fake_responses._matches] + + gitlab_helper = fake_gitlab_helper_factory() + gitlab_helper._wait_for_rebase_to_finish = mocker.MagicMock() + gitlab_helper._wait_for_rebase_to_finish.return_value = True + + success = gitlab_helper.merge(SOURCE_BRANCH, MR_TITLE) + + assert not success + assert len(fake_responses.calls) == 4 + assert [c.request.method for c in fake_responses.calls] == expected_http_methods + assert [c.request.url for c in fake_responses.calls] == expected_urls + + +def test_merge_request__success_after_errors( + fake_responses, fake_gitlab_helper_factory +): + fake_responses.add( + fake_responses.POST, + f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests", + json=CREATE_MR_RESPONSE_JSON, + ) + + rebase_url = f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}/rebase" + + fake_responses.add( + fake_responses.PUT, + rebase_url, + json={"rebase_in_progress": True}, + ) + + get_mr_url = f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}?include_rebase_in_progress=True" + + fake_responses.add( + fake_responses.GET, + get_mr_url, + body="Service Unavailable", + status=503, + ) + + fake_responses.add( + fake_responses.PUT, + rebase_url, + json={"rebase_in_progress": True}, + ) + + fake_responses.add( + fake_responses.GET, + get_mr_url, + body=ConnectionError(), + ) + + fake_responses.add( + fake_responses.PUT, + rebase_url, + json={"rebase_in_progress": True}, + ) + + fake_responses.add( + fake_responses.GET, + get_mr_url, json={ - "id": project_id, - # several fields omitted + "rebase_in_progress": True, + "merge_error": None, }, ) - responses.add( - responses.POST, - f"https://gitlab.com/api/v4/projects/{project_id}/merge_requests", + fake_responses.add( + fake_responses.GET, + get_mr_url, json={ - "id": 107, - "iid": merge_request_id, - "project_id": project_id, - "title": mr_title, - "target_branch": target_branch, - "source_branch": source_branch, + "rebase_in_progress": False, + "merge_error": None, + }, + ) + + fake_responses.add( + fake_responses.PUT, + f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}/merge?should_remove_source_branch=True", + json={ + "iid": MERGE_REQUEST_ID, + "state": "merged", # several fields omitted }, ) - responses.add( - responses.PUT, - f"https://gitlab.com/api/v4/projects/{project_id}/merge_requests/{merge_request_id}/merge", - json={"error": "Method not allowed"}, - status=405, + # the fake_responses are defined in the same order as they are expected to be called + expected_http_methods = [r.method for r in fake_responses._matches] + expected_urls = [r.url for r in fake_responses._matches] + + gitlab_helper = fake_gitlab_helper_factory() + + success = gitlab_helper.merge(SOURCE_BRANCH, MR_TITLE) + + assert success + assert len(fake_responses.calls) == 10 + assert [c.request.method for c in fake_responses.calls] == expected_http_methods + assert [c.request.url for c in fake_responses.calls] == expected_urls + + +def test_merge_request__fail_bad_request(fake_responses, fake_gitlab_helper_factory): + fake_responses.add( + fake_responses.POST, + f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests", + json=CREATE_MR_RESPONSE_JSON, + ) + + rebase_url = f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}/rebase" + + fake_responses.add( + fake_responses.PUT, + rebase_url, + json={"rebase_in_progress": True}, ) - # the responses are defined in the same order as they are expected to be called - expected_http_methods = [r.method for r in responses._matches] - expected_urls = [r.url for r in responses._matches] + get_mr_url = f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests/{MERGE_REQUEST_ID}?include_rebase_in_progress=True" + + fake_responses.add( + fake_responses.GET, + get_mr_url, + body="Bad Request", + status=400, + ) + + # the fake_responses are defined in the same order as they are expected to be called + expected_http_methods = [r.method for r in fake_responses._matches] + expected_urls = [r.url for r in fake_responses._matches] gitlab_helper = fake_gitlab_helper_factory() - success = gitlab_helper.merge(source_branch, mr_title) - assert not success - assert len(responses.calls) == 3 - assert [c.request.method for c in responses.calls] == expected_http_methods - assert [c.request.url for c in responses.calls] == expected_urls + with pytest.raises(GitlabError): + gitlab_helper.merge(SOURCE_BRANCH, MR_TITLE) + + assert len(fake_responses.calls) == 4 + assert [c.request.method for c in fake_responses.calls] == expected_http_methods + assert [c.request.url for c in fake_responses.calls] == expected_urls + + +def test_create_merge_request__no_retry_5xx_error( + fake_responses, fake_gitlab_helper_factory +): + request_url = f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests" + + fake_responses.add( + fake_responses.POST, + request_url, + body="Service Unavailable", + status=503, + ) + + # the fake_responses are defined in the same order as they are expected to be called + expected_http_methods = [r.method for r in fake_responses._matches] + expected_urls = [r.url for r in fake_responses._matches] + + gitlab_helper = fake_gitlab_helper_factory() + + with pytest.raises(GitlabCreateError): + gitlab_helper.project.mergerequests.create( + { + "source_branch": "branch", + "target_branch": gitlab_helper.branch, + "title": "MR title", + } + ) + + assert len(fake_responses.calls) == 2 + assert [c.request.method for c in fake_responses.calls] == expected_http_methods + assert [c.request.url for c in fake_responses.calls] == expected_urls + + +def test_create_merge_request__retry_5xx_error( + fake_responses, fake_gitlab_helper_factory +): + request_url = f"https://gitlab.com/api/v4/projects/{PROJECT_ID}/merge_requests?retry_transient_errors=True" + + fake_responses.add( + fake_responses.POST, + request_url, + body="Service Unavailable", + status=503, + ) + + fake_responses.add( + fake_responses.POST, + request_url, + body="Service Unavailable", + status=503, + ) + + fake_responses.add( + fake_responses.POST, + request_url, + json=CREATE_MR_RESPONSE_JSON, + ) + + # the fake_responses are defined in the same order as they are expected to be called + expected_http_methods = [r.method for r in fake_responses._matches] + expected_urls = [r.url for r in fake_responses._matches] + + gitlab_helper = fake_gitlab_helper_factory() + + gitlab_helper.project.mergerequests.create( + { + "source_branch": "branch", + "target_branch": gitlab_helper.branch, + "title": "MR title", + }, + retry_transient_errors=True, + ) + + assert len(fake_responses.calls) == 4 + assert [c.request.method for c in fake_responses.calls] == expected_http_methods + assert [c.request.url for c in fake_responses.calls] == expected_urls -- GitLab