diff --git a/package-lock.json b/package-lock.json index 5aa28e8..dee28f5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1717,6 +1717,15 @@ "node": ">=18" } }, + "node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, "packages/safe-chain": { "name": "@aikidosec/safe-chain", "version": "1.0.0", @@ -1728,7 +1737,8 @@ "node-forge": "1.3.1", "npm-registry-fetch": "18.0.2", "ora": "8.2.0", - "semver": "7.7.2" + "semver": "7.7.2", + "yargs-parser": "^21.1.1" }, "bin": { "aikido-bun": "bin/aikido-bun.js", diff --git a/packages/safe-chain/package.json b/packages/safe-chain/package.json index ee95737..08d8a19 100644 --- a/packages/safe-chain/package.json +++ b/packages/safe-chain/package.json @@ -32,6 +32,7 @@ "license": "AGPL-3.0-or-later", "description": "The Aikido Safe Chain wraps around the [npm cli](https://github.com/npm/cli), [npx](https://github.com/npm/cli/blob/latest/docs/content/commands/npx.md), [yarn](https://yarnpkg.com/), [pnpm](https://pnpm.io/), [pnpx](https://pnpm.io/cli/dlx), [bun](https://bun.sh/), and [bunx](https://bun.sh/docs/cli/bunx) to provide extra checks before installing new packages. This tool will detect when a package contains malware and prompt you to exit, preventing npm, npx, yarn, pnpm, pnpx, bun, or bunx from downloading or running the malware.", "dependencies": { + "yargs-parser": "^21.1.1", "chalk": "5.4.1", "https-proxy-agent": "7.0.6", "make-fetch-happen": "14.0.3", diff --git a/packages/safe-chain/src/packagemanager/pip/runPipCommand.js b/packages/safe-chain/src/packagemanager/pip/runPipCommand.js index 969363b..c4a1ac6 100644 --- a/packages/safe-chain/src/packagemanager/pip/runPipCommand.js +++ b/packages/safe-chain/src/packagemanager/pip/runPipCommand.js @@ -2,21 +2,77 @@ import { ui } from "../../environment/userInteraction.js"; import { safeSpawn } from "../../utils/safeSpawn.js"; import { mergeSafeChainProxyEnvironmentVariables } from "../../registryProxy/registryProxy.js"; import { getCaCertPath } from "../../registryProxy/certUtils.js"; +import { knownPipRegistries } from "../../registryProxy/parsePackageFromUrl.js"; +import yargsParser from "yargs-parser"; + +function extractHostsFromPipArgs(args) { + function hostFromString(input) { + if (typeof input !== "string" || input.length === 0) return undefined; + try { + const u = new URL(input); + return u.hostname || undefined; + } catch { + // ignore: not a valid absolute URL + } + // Try adding a scheme if it's a schemeless URL-like value + try { + const u2 = new URL(`https://${input}`); + return u2.hostname || undefined; + } catch { + // ignore: not a valid schemeless URL either + } + return undefined; + } + + const parsed = yargsParser(args, { + configuration: { + "short-option-groups": true, + "camel-case-expansion": false, + "dot-notation": false, + "duplicate-arguments-array": true, + "flatten-duplicate-arrays": false, + "greedy-arrays": false, + "unknown-options-as-args": true, + }, + }); + const toArray = (v) => (v == null ? [] : Array.isArray(v) ? v : [v]); + const candidateUrls = [ + ...toArray(parsed.i), + ...toArray(parsed["index-url"]), + ...toArray(parsed["extra-index-url"]), + ...toArray(parsed["find-links"]), + ...toArray(parsed._).filter( + (a) => typeof a === "string" && (a.startsWith("https://") || a.startsWith("http://")) + ), + ]; + const hosts = new Set(); + for (const u of candidateUrls) { + const h = hostFromString(u); + if (h) hosts.add(h); + } + return Array.from(hosts); +} + export async function runPip(command, args) { try { const env = mergeSafeChainProxyEnvironmentVariables(process.env); + // Re-introduce conditional --cert injection: only for known registries (MITM). + // No global env overrides for Python trust. + const hosts = extractHostsFromPipArgs(args); + const allKnown = hosts.length === 0 + ? true // No explicit sources => default PyPI (known) -> MITM + : hosts.every((h) => knownPipRegistries.includes(h)); - // If the user already provided --cert, respect their choice and do not override. - // Support both "--cert " and "--cert=" forms. - const hasUserCert = args.some((a) => { - if (a === "--cert") return true; - return typeof a === "string" && a.startsWith("--cert="); - }); + // Respect user-provided --cert: detect both "--cert " and "--cert=" + const hasUserCert = args.some( + (a) => a === "--cert" || (typeof a === "string" && a.startsWith("--cert=")) + ); - // By default, pass --cert with our CA so pip trusts our MITM for known registries. - // Note: pip treats --cert as the CA bundle to use for TLS (it does not merge with system CAs). - const finalArgs = hasUserCert ? [...args] : [...args, "--cert", getCaCertPath()]; + let finalArgs = [...args]; + if (allKnown && !hasUserCert) { + finalArgs = [...args, "--cert", getCaCertPath()]; + } const result = await safeSpawn(command, finalArgs, { stdio: "inherit", diff --git a/packages/safe-chain/src/packagemanager/pip/runPipCommand.spec.js b/packages/safe-chain/src/packagemanager/pip/runPipCommand.spec.js index f9ef43d..1aecc30 100644 --- a/packages/safe-chain/src/packagemanager/pip/runPipCommand.spec.js +++ b/packages/safe-chain/src/packagemanager/pip/runPipCommand.spec.js @@ -43,7 +43,7 @@ describe("runPipCommand --cert handling", () => { mock.reset(); }); - it("should append --cert with our CA path to pip args", async () => { + it("should append --cert with our CA path to pip args by default (PyPI)", async () => { const res = await runPip("pip3", ["install", "requests"]); assert.strictEqual(res.status, 0); @@ -59,6 +59,10 @@ describe("runPipCommand --cert handling", () => { // Original args should be preserved before --cert assert.strictEqual(capturedArgs.args[0], "install"); assert.strictEqual(capturedArgs.args[1], "requests"); + + // No Python CA env overrides expected + assert.strictEqual(capturedArgs.options.env.REQUESTS_CA_BUNDLE, undefined); + assert.strictEqual(capturedArgs.options.env.SSL_CERT_FILE, undefined); }); it("should not override user-provided --cert ", async () => { @@ -72,6 +76,9 @@ describe("runPipCommand --cert handling", () => { assert.strictEqual(certIndices.length, 1, "should not inject an extra --cert"); const userPath = capturedArgs.args[certIndices[0] + 1]; assert.strictEqual(userPath, "/tmp/user-ca.pem", "should preserve user-provided cert path"); + // No Python CA env overrides expected + assert.strictEqual(capturedArgs.options.env.REQUESTS_CA_BUNDLE, undefined); + assert.strictEqual(capturedArgs.options.env.SSL_CERT_FILE, undefined); }); it("should not override user-provided --cert=", async () => { @@ -83,5 +90,37 @@ describe("runPipCommand --cert handling", () => { assert.ok(hasInline, "should keep inline --cert="); const injectedIndex = capturedArgs.args.indexOf("--cert"); assert.strictEqual(injectedIndex, -1, "should not inject separate --cert when inline is provided"); + // No Python CA env overrides expected + assert.strictEqual(capturedArgs.options.env.REQUESTS_CA_BUNDLE, undefined); + assert.strictEqual(capturedArgs.options.env.SSL_CERT_FILE, undefined); + }); + + it("should inject --cert when explicit index is a known PyPI host", async () => { + const res = await runPip("pip3", ["install", "requests", "--index-url", "https://pypi.org/simple"]); + assert.strictEqual(res.status, 0); + const idx = capturedArgs.args.indexOf("--cert"); + assert.ok(idx >= 0, "--cert should be present for known registries"); + }); + + it("should NOT inject --cert when index points to an unknown external mirror (tunneled)", async () => { + const res = await runPip("pip3", [ + "install", + "certifi", + "--index-url", + "https://pypi.tuna.tsinghua.edu.cn/simple", + ]); + assert.strictEqual(res.status, 0); + const idx = capturedArgs.args.indexOf("--cert"); + assert.strictEqual(idx, -1, "--cert should be omitted for tunneled external hosts"); + }); + + it("should NOT inject --cert when installing from a direct external URL", async () => { + const res = await runPip("pip3", [ + "install", + "https://example.com/pkg-1.0.0-py3-none-any.whl", + ]); + assert.strictEqual(res.status, 0); + const idx = capturedArgs.args.indexOf("--cert"); + assert.strictEqual(idx, -1, "--cert should be omitted for direct external URLs"); }); }); diff --git a/test/e2e/pip.e2e.spec.js b/test/e2e/pip.e2e.spec.js index 05bdde9..153eca5 100644 --- a/test/e2e/pip.e2e.spec.js +++ b/test/e2e/pip.e2e.spec.js @@ -262,4 +262,29 @@ describe("E2E: pip coverage", () => { ); }); + it(`pip3 can install from alternate PyPI mirror (tunneled, not MITM)`, async () => { + const shell = await container.openShell("zsh"); + // Use Tsinghua PyPI mirror which is NOT in knownPipRegistries + // This tests tunneled HTTPS with --cert containing only Safe Chain CA + // If the CA bundle doesn't include public roots, this will fail with CERTIFICATE_VERIFY_FAILED + const result = await shell.runCommand('pip3 install --break-system-packages --index-url https://pypi.tuna.tsinghua.edu.cn/simple certifi'); + + assert.ok( + result.output.includes("no malicious packages found."), + `Output did not include expected text. Output was:\n${result.output}` + ); + + // Should succeed if CA bundle properly handles tunneled hosts + assert.ok( + result.output.includes("Successfully installed") || result.output.includes("Requirement already satisfied"), + `Installation from PyPI mirror failed. This may indicate --cert CA bundle lacks public roots. Output was:\n${result.output}` + ); + + // Should NOT contain certificate verification errors + assert.ok( + !result.output.match(/SSL|certificate verify failed|CERTIFICATE_VERIFY_FAILED/i), + `Should not have SSL/certificate errors for tunneled hosts. Output was:\n${result.output}` + ); + }); + });