diff --git a/.github/workflows/system.yml b/.github/workflows/system.yml index fa9eb70..43f478e 100644 --- a/.github/workflows/system.yml +++ b/.github/workflows/system.yml @@ -25,19 +25,13 @@ jobs: - name: Get tests run: uv run commit0 get-tests simpy - name: Test - env: - MODAL_TOKEN_ID: ${{secrets.MODAL_TOKEN_ID}} - MODAL_TOKEN_SECRET: ${{secrets.MODAL_TOKEN_SECRET}} run: | - uv run commit0 test simpy tests/test_event.py::test_succeed --reference --rebuild - uv run commit0 test simpy tests/test_event.py::test_succeed --reference + uv run commit0 test simpy tests/test_event.py::test_succeed --reference --rebuild --backend local + uv run commit0 test simpy tests/test_event.py::test_succeed --reference --backend local - name: Evaluate - env: - MODAL_TOKEN_ID: ${{secrets.MODAL_TOKEN_ID}} - MODAL_TOKEN_SECRET: ${{secrets.MODAL_TOKEN_SECRET}} run: | - uv run commit0 evaluate --reference --rebuild - uv run commit0 evaluate --reference + uv run commit0 evaluate --reference --rebuild --backend local + uv run commit0 evaluate --reference --backend local - name: Lint run: uv run commit0 lint commit0/harness/ - name: Save diff --git a/commit0/harness/spec.py b/commit0/harness/spec.py index 7a71118..e879cb8 100644 --- a/commit0/harness/spec.py +++ b/commit0/harness/spec.py @@ -114,9 +114,13 @@ def make_repo_script_list(self) -> list[str]: base_commit = self.instance["base_commit"] setup_commands = [ - f"git clone -o origin https://github.com/{repo} {self.repo_directory}", + # Use --depth 1 for shallow clone to prevent agents from accessing + # git history and exploiting it to retrieve original implementations + f"git clone --depth 1 -o origin https://github.com/{repo} {self.repo_directory}", f"chmod -R 777 {self.repo_directory}", # So nonroot user can run tests f"cd {self.repo_directory}", + # Fetch both commits needed: env_setup_commit for setup and base_commit for later reset + f"git fetch --depth 1 origin {env_setup_commit} {base_commit}", f"git reset --hard {env_setup_commit}", # Remove the remote so the agent won't see newer commits. "git remote remove origin", @@ -217,10 +221,15 @@ def make_repo_script_list(self) -> list[str]: if version < 7: specs["python"] = 3.7 + base_commit = self.instance["base_commit"] setup_commands = [ - f"git clone -o origin https://github.com/{repo} {self.repo_directory}", + # Use --depth 1 for shallow clone to prevent agents from accessing + # git history and exploiting it to retrieve original implementations + f"git clone --depth 1 -o origin https://github.com/{repo} {self.repo_directory}", f"chmod -R 777 {self.repo_directory}", # So nonroot user can run tests f"cd {self.repo_directory}", + # Fetch base_commit needed for eval script reset + f"git fetch --depth 1 origin {base_commit}", # Remove the remote so the agent won't see newer commits. "git remote remove origin", f"uv venv --python {specs['python']}",