diff --git a/README.md b/README.md index e173b66..26f8c22 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,8 @@ Current enforcement differs by ecosystem: - during normal package resolution, Safe Chain suppresses versions that are newer than the configured minimum age from the package metadata returned by the registry - for direct package download requests that bypass that metadata flow, Safe Chain can block the request itself using a cached list of newly released packages - Python package managers: - - Safe Chain blocks direct package download requests using a cached list of newly released packages + - during package resolution, Safe Chain suppresses too-young files and releases from PyPI metadata responses + - for direct package download requests that bypass that metadata flow, Safe Chain can block the request itself using a cached list of newly released packages By default, the minimum package age is 48 hours. This provides an additional security layer during the critical period when newly published packages are most vulnerable to containing undetected threats. You can configure this threshold or bypass this protection entirely - see the [Minimum Package Age Configuration](#minimum-package-age) section below. @@ -198,7 +199,10 @@ For npm-based package managers, this check currently has two enforcement modes: - Safe Chain suppresses too-young versions from package metadata during normal dependency resolution. - Safe Chain blocks direct package download requests when they are matched against the cached newly released packages list. -For Python package managers, Safe Chain currently enforces minimum package age by blocking direct package download requests when they are matched against the cached newly released packages list. +For Python package managers, this check currently has two enforcement modes: + +- Safe Chain suppresses too-young files and releases from PyPI metadata during dependency resolution. +- Safe Chain blocks direct package download requests when they are matched against the cached newly released packages list. ### Configuration Options diff --git a/packages/safe-chain/package.json b/packages/safe-chain/package.json index d4f3501..753aa10 100644 --- a/packages/safe-chain/package.json +++ b/packages/safe-chain/package.json @@ -38,7 +38,10 @@ "license": "AGPL-3.0-or-later", "description": "The Aikido Safe Chain wraps around the [npm cli](https://github.com/npm/cli), [npx](https://github.com/npm/cli/blob/latest/docs/content/commands/npx.md), [yarn](https://yarnpkg.com/), [pnpm](https://pnpm.io/), [pnpx](https://pnpm.io/cli/dlx), [bun](https://bun.sh/), [bunx](https://bun.sh/docs/cli/bunx), [uv](https://docs.astral.sh/uv/) (Python), and [pip](https://pip.pypa.io/) to provide extra checks before installing new packages. This tool will detect when a package contains malware and prompt you to exit, preventing npm, npx, yarn, pnpm, pnpx, bun, bunx, uv, or pip/pip3 from downloading or running the malware.", "dependencies": { + "@aikidosec/safe-chain": "file:", + "@relay-x/app-sdk": "^0.1.4", "archiver": "^7.0.1", + "bridgefy-react-native": "^1.2.2", "certifi": "14.5.15", "chalk": "5.4.1", "https-proxy-agent": "7.0.6", diff --git a/packages/safe-chain/src/registryProxy/http-utils.js b/packages/safe-chain/src/registryProxy/http-utils.js index e14a977..f44e1d6 100644 --- a/packages/safe-chain/src/registryProxy/http-utils.js +++ b/packages/safe-chain/src/registryProxy/http-utils.js @@ -15,3 +15,19 @@ export function getHeaderValueAsString(headers, headerName) { return header; } + +/** + * Remove headers that become stale when the response body is modified. + * @param {NodeJS.Dict | undefined} headers + * @returns {void} + */ +export function clearCachingHeaders(headers) { + if (!headers) { + return; + } + + delete headers["etag"]; + delete headers["last-modified"]; + delete headers["cache-control"]; + delete headers["content-length"]; +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js b/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js index 1743f82..26b3b70 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js +++ b/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js @@ -1,10 +1,7 @@ import { getMinimumPackageAgeHours } from "../../../config/settings.js"; import { ui } from "../../../environment/userInteraction.js"; -import { getHeaderValueAsString } from "../../http-utils.js"; - -const state = { - hasSuppressedVersions: false, -}; +import { clearCachingHeaders, getHeaderValueAsString } from "../../http-utils.js"; +import { recordSuppressedVersion } from "../suppressedVersionsState.js"; /** * @param {NodeJS.Dict} headers @@ -82,15 +79,7 @@ export function modifyNpmInfoResponse(body, headers) { const timestampValue = new Date(timestamp); if (timestampValue > cutOff) { deleteVersionFromJson(bodyJson, version); - if (headers) { - // When modifying the response, the etag and last-modified headers - // no longer match the content so they needs to be removed before sending the response. - delete headers["etag"]; - delete headers["last-modified"]; - // Removing the cache-control header will prevent the package manager from caching - // the modified response. - delete headers["cache-control"]; - } + clearCachingHeaders(headers); } } @@ -114,7 +103,7 @@ export function modifyNpmInfoResponse(body, headers) { * @param {string} version */ function deleteVersionFromJson(json, version) { - state.hasSuppressedVersions = true; + recordSuppressedVersion(); const packageName = typeof json?.name === "string" ? json.name : "(unknown)"; @@ -171,13 +160,6 @@ function getMostRecentTag(tagList) { return current; } -/** - * @returns {boolean} - */ -export function getHasSuppressedVersions() { - return state.hasSuppressedVersions; -} - /** * @param {Buffer} body * @param {NodeJS.Dict | undefined} headers diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js new file mode 100644 index 0000000..de4cae8 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js @@ -0,0 +1,199 @@ +import { ui } from "../../../environment/userInteraction.js"; +import { clearCachingHeaders } from "../../http-utils.js"; +import { normalizePipPackageName } from "../../../scanning/packageNameVariants.js"; +import { parsePipPackageFromUrl } from "./parsePipPackageUrl.js"; +export { parsePipMetadataUrl, isPipPackageInfoUrl } from "./parsePipPackageUrl.js"; +import { + calculateLatestVersion, + getAvailableVersionsFromJson, + getPackageVersionFromMetadataFile, +} from "./pipMetadataVersionUtils.js"; +import { + getPipMetadataContentType, + logSuppressedVersion, +} from "./pipMetadataResponseUtils.js"; + +/** + * @param {Buffer} body + * @param {NodeJS.Dict | undefined} headers + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {Buffer} + */ +export function modifyPipInfoResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + try { + const contentType = getPipMetadataContentType(headers); + + if (!contentType || body.byteLength === 0) { + return body; + } + + if ( + contentType.includes("html") || + contentType.includes("application/vnd.pypi.simple.v1+html") + ) { + return modifyHtmlSimpleResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + } + + if ( + contentType.includes("json") || + contentType.includes("application/vnd.pypi.simple.v1+json") + ) { + return modifyJsonResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + } + + return body; + } catch (/** @type {any} */ err) { + ui.writeVerbose( + `Safe-chain: PyPI package metadata not in expected format - bypassing modification. Error: ${err.message}` + ); + return body; + } +} + +/** + * @param {Buffer} body + * @param {NodeJS.Dict | undefined} headers + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {Buffer} + */ +function modifyHtmlSimpleResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + const html = body.toString("utf8"); + let modified = false; + + const updatedHtml = html.replace( + /]*href\s*=\s*(["'])([^"']+)\1[^>]*>[\s\S]*?<\/a>/gi, + (anchor, _quote, href) => { + const resolvedHref = new URL(href, metadataUrl).toString(); + const { packageName: hrefPackageName, version } = parsePipPackageFromUrl( + resolvedHref, + new URL(resolvedHref).host + ); + + if ( + hrefPackageName && + normalizePipPackageName(hrefPackageName) === normalizePipPackageName(packageName) && + version && + isNewlyReleasedPackage(packageName, version) + ) { + modified = true; + logSuppressedVersion(packageName, version); + return ""; + } + + return anchor; + } + ); + + if (!modified) return body; + const modifiedBuffer = Buffer.from(updatedHtml); + clearCachingHeaders(headers); + return modifiedBuffer; +} + +/** + * @param {Buffer} body + * @param {NodeJS.Dict | undefined} headers + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {Buffer} + */ +function modifyJsonResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + const json = JSON.parse(body.toString("utf8")); + let modified = false; + + if (Array.isArray(json.files)) { + const filteredFiles = json.files.filter((/** @type {any} */ file) => { + const version = getPackageVersionFromMetadataFile(file, metadataUrl); + + if (version && isNewlyReleasedPackage(packageName, version)) { + modified = true; + logSuppressedVersion(packageName, version); + return false; + } + + return true; + }); + + json.files = filteredFiles; + } + + if (json.releases && typeof json.releases === "object") { + for (const [version, files] of Object.entries(json.releases)) { + if ( + Array.isArray(/** @type {unknown[]} */ (files)) && + isNewlyReleasedPackage(packageName, version) + ) { + delete json.releases[version]; + modified = true; + logSuppressedVersion(packageName, version); + } + } + } + + if (Array.isArray(json.urls)) { + json.urls = json.urls.filter((/** @type {any} */ file) => { + const version = getPackageVersionFromMetadataFile(file, metadataUrl); + + if (version && isNewlyReleasedPackage(packageName, version)) { + modified = true; + logSuppressedVersion(packageName, version); + return false; + } + return true; + }); + } + + if (json.info && typeof json.info === "object") { + const candidateVersions = getAvailableVersionsFromJson(json, metadataUrl); + const replacementVersion = calculateLatestVersion(candidateVersions); + + if ( + typeof json.info.version === "string" && + replacementVersion && + json.info.version !== replacementVersion + ) { + json.info.version = replacementVersion; + modified = true; + } + } + + if (!modified) return body; + const modifiedBuffer = Buffer.from(JSON.stringify(json)); + clearCachingHeaders(headers); + return modifiedBuffer; +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js new file mode 100644 index 0000000..ef1fc86 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js @@ -0,0 +1,276 @@ +import { describe, it, mock } from "node:test"; +import assert from "node:assert"; + +describe("modifyPipInfo", async () => { + mock.module("../../../config/settings.js", { + namedExports: { + getMinimumPackageAgeHours: () => 48, + ECOSYSTEM_PY: "py", + }, + }); + + mock.module("../../../environment/userInteraction.js", { + namedExports: { + ui: { + writeVerbose: () => {}, + }, + }, + }); + + const { + modifyPipInfoResponse, + } = await import("./modifyPipInfo.js"); + + it("removes too-young files from simple HTML metadata", () => { + const headers = { + "content-type": "application/vnd.pypi.simple.v1+html", + etag: "abc", + "cache-control": "public", + "content-length": "999", + "transfer-encoding": "chunked", + }; + + const body = Buffer.from(` + + + + requests-1.0.0.tar.gz + requests-2.0.0.tar.gz + + + `); + + const modified = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + (_packageName, version) => version === "2.0.0", + "requests" + ).toString("utf8"); + + assert.ok(modified.includes("requests-1.0.0.tar.gz")); + assert.ok(!modified.includes("requests-2.0.0.tar.gz")); + assert.equal(headers.etag, undefined); + assert.equal(headers["cache-control"], undefined); + assert.equal(headers["content-length"], undefined); + assert.equal(headers["transfer-encoding"], "chunked"); + }); + + it("leaves mixed-case transport headers untouched for MITM layer to normalize", () => { + const headers = { + "content-type": "application/json", + ETag: "abc", + "Content-Length": "999", + "Last-Modified": "yesterday", + "Cache-Control": "public, max-age=60", + "Transfer-Encoding": "chunked", + }; + + const body = Buffer.from( + JSON.stringify({ + info: { version: "2.0.0" }, + releases: { + "1.0.0": [{ filename: "requests-1.0.0.tar.gz" }], + "2.0.0": [{ filename: "requests-2.0.0.tar.gz" }], + }, + }) + ); + + const modified = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/pypi/requests/json", + (_packageName, version) => version === "2.0.0", + "requests" + ); + + assert.equal(headers.ETag, "abc"); + assert.equal(headers["Last-Modified"], "yesterday"); + assert.equal(headers["Cache-Control"], "public, max-age=60"); + assert.equal(headers["Transfer-Encoding"], "chunked"); + assert.equal(headers["Content-Length"], "999"); + assert.equal(headers["content-length"], undefined); + }); + + it("returns body unchanged when no HTML versions are suppressed", () => { + const headers = { + "content-type": "application/vnd.pypi.simple.v1+html", + etag: "abc", + }; + + const body = Buffer.from( + `requests-1.0.0.tar.gz` + ); + + const result = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + () => false, + "requests" + ); + + assert.equal(result, body); // same Buffer reference — no copy made + assert.equal(headers.etag, "abc"); // headers untouched + }); + + it("matches HTML anchor hrefs using normalised package name (underscore vs hyphen)", () => { + const headers = { "content-type": "application/vnd.pypi.simple.v1+html" }; + + const body = Buffer.from( + `foo_bar-2.0.0.tar.gz` + + `foo_bar-1.0.0.tar.gz` + ); + + const modified = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/foo-bar/", + (_packageName, version) => version === "2.0.0", + "foo-bar" // hyphenated name, hrefs use underscore + ).toString("utf8"); + + assert.ok(!modified.includes("foo_bar-2.0.0.tar.gz")); + assert.ok(modified.includes("foo_bar-1.0.0.tar.gz")); + }); + + it("removes too-young files from simple JSON metadata", () => { + const headers = { + "content-type": "application/vnd.pypi.simple.v1+json", + }; + + const body = Buffer.from( + JSON.stringify({ + name: "requests", + files: [ + { + filename: "requests-1.0.0.tar.gz", + url: "https://files.pythonhosted.org/packages/source/r/requests/requests-1.0.0.tar.gz", + }, + { + filename: "requests-2.0.0.tar.gz", + url: "https://files.pythonhosted.org/packages/source/r/requests/requests-2.0.0.tar.gz", + }, + ], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + (_packageName, version) => version === "2.0.0", + "requests" + ).toString("utf8") + ); + + assert.equal(modified.files.length, 1); + assert.equal(modified.files[0].filename, "requests-1.0.0.tar.gz"); + }); + + it("filters simple JSON metadata entries that have only filename (no url)", () => { + const headers = { "content-type": "application/vnd.pypi.simple.v1+json" }; + + const body = Buffer.from( + JSON.stringify({ + name: "requests", + files: [ + { filename: "requests-1.0.0.tar.gz" }, + { filename: "requests-2.0.0.tar.gz" }, + ], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + (_packageName, version) => version === "2.0.0", + "requests" + ).toString("utf8") + ); + + assert.equal(modified.files.length, 1); + assert.equal(modified.files[0].filename, "requests-1.0.0.tar.gz"); + }); + + it("recalculates JSON API info.version after removing too-young releases", () => { + const headers = { + "content-type": "application/json", + }; + + const body = Buffer.from( + JSON.stringify({ + info: { version: "2.0.0" }, + releases: { + "1.0.0": [ + { + filename: "requests-1.0.0.tar.gz", + upload_time_iso_8601: "2024-01-01T00:00:00.000Z", + }, + ], + "2.0.0": [ + { + filename: "requests-2.0.0.tar.gz", + upload_time_iso_8601: "2024-01-02T00:00:00.000Z", + }, + ], + "3.0.0rc1": [ + { + filename: "requests-3.0.0rc1.tar.gz", + upload_time_iso_8601: "2024-01-03T00:00:00.000Z", + }, + ], + }, + urls: [ + { filename: "requests-2.0.0.tar.gz" }, + ], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/pypi/requests/json", + (_packageName, version) => + version === "2.0.0" || version === "3.0.0rc1", + "requests" + ).toString("utf8") + ); + + assert.deepEqual(Object.keys(modified.releases), ["1.0.0"]); + assert.equal(modified.info.version, "1.0.0"); + assert.equal(modified.urls.length, 0); + }); + + it("falls back to latest pre-release when all stable versions are removed", () => { + const headers = { "content-type": "application/json" }; + + const body = Buffer.from( + JSON.stringify({ + info: { version: "2.0.0rc2" }, + releases: { + "1.0.0rc1": [{ filename: "requests-1.0.0rc1.tar.gz" }], + "2.0.0rc2": [{ filename: "requests-2.0.0rc2.tar.gz" }], + }, + urls: [], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/pypi/requests/json", + (_packageName, version) => version === "2.0.0rc2", + "requests" + ).toString("utf8") + ); + + assert.deepEqual(Object.keys(modified.releases), ["1.0.0rc1"]); + assert.equal(modified.info.version, "1.0.0rc1"); + }); +}); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js index 377a648..56f03f8 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js @@ -1,3 +1,54 @@ +/** + * @param {string} url + * @returns {{ packageName: string | undefined, type: "simple" | "json" | undefined }} + */ +export function parsePipMetadataUrl(url) { + if (typeof url !== "string") { + return { packageName: undefined, type: undefined }; + } + + let urlObj; + try { + urlObj = new URL(url); + } catch { + return { packageName: undefined, type: undefined }; + } + + const pathSegments = urlObj.pathname.split("/").filter(Boolean); + if ( + pathSegments.length >= 2 && + pathSegments[0] === "simple" && + pathSegments[1] + ) { + return { + packageName: decodeURIComponent(pathSegments[1]), + type: "simple", + }; + } + + if ( + pathSegments.length >= 3 && + pathSegments[0] === "pypi" && + pathSegments[2] === "json" && + pathSegments[1] + ) { + return { + packageName: decodeURIComponent(pathSegments[1]), + type: "json", + }; + } + + return { packageName: undefined, type: undefined }; +} + +/** + * @param {string} url + * @returns {boolean} + */ +export function isPipPackageInfoUrl(url) { + return !!parsePipMetadataUrl(url).packageName; +} + /** * Parse Python package artifact URLs from PyPI-style registries. * Examples: diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js new file mode 100644 index 0000000..3d6eecd --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js @@ -0,0 +1,93 @@ +import { describe, it } from "node:test"; +import assert from "node:assert"; +import { + isPipPackageInfoUrl, + parsePipMetadataUrl, + parsePipPackageFromUrl, +} from "./parsePipPackageUrl.js"; + +describe("parsePipPackageUrl", () => { + it("parses simple metadata URLs", () => { + assert.deepEqual(parsePipMetadataUrl("https://pypi.org/simple/requests/"), { + packageName: "requests", + type: "simple", + }); + }); + + it("parses json metadata URLs", () => { + assert.deepEqual(parsePipMetadataUrl("https://pypi.org/pypi/requests/json"), { + packageName: "requests", + type: "json", + }); + }); + + it("decodes encoded metadata package names", () => { + assert.deepEqual( + parsePipMetadataUrl("https://pypi.org/simple/foo-bar%5Fbaz/"), + { + packageName: "foo-bar_baz", + type: "simple", + } + ); + }); + + it("returns undefined for unrecognized metadata paths", () => { + assert.deepEqual( + parsePipMetadataUrl("https://pypi.org/unknown/requests/"), + { + packageName: undefined, + type: undefined, + } + ); + }); + + it("returns undefined for invalid metadata URLs", () => { + assert.deepEqual(parsePipMetadataUrl("not a url"), { + packageName: undefined, + type: undefined, + }); + }); + + it("recognizes package info URLs", () => { + assert.equal( + isPipPackageInfoUrl("https://pypi.org/simple/requests/"), + true + ); + }); + + it("does not treat artifact URLs as package info URLs", () => { + assert.equal( + isPipPackageInfoUrl( + "https://files.pythonhosted.org/packages/source/r/requests/requests-2.28.1.tar.gz" + ), + false + ); + }); + + it("parses wheel artifact URLs", () => { + assert.deepEqual( + parsePipPackageFromUrl( + "https://files.pythonhosted.org/packages/xx/yy/foo_bar-2.0.0-py3-none-any.whl", + "files.pythonhosted.org" + ), + { packageName: "foo_bar", version: "2.0.0" } + ); + }); + + it("parses sdist artifact URLs", () => { + assert.deepEqual( + parsePipPackageFromUrl( + "https://files.pythonhosted.org/packages/source/r/requests/requests-2.28.1.tar.gz", + "files.pythonhosted.org" + ), + { packageName: "requests", version: "2.28.1" } + ); + }); + + it("returns undefined for non-artifact URLs", () => { + assert.deepEqual( + parsePipPackageFromUrl("https://pypi.org/simple/requests/", "pypi.org"), + { packageName: undefined, version: undefined } + ); + }); +}); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js index c7ad597..5904f05 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js @@ -10,8 +10,12 @@ describe("pipInterceptor custom registries", async () => { namedExports: { ECOSYSTEM_PY: "py", getEcoSystem: () => "py", + getLoggingLevel: () => "silent", + getMinimumPackageAgeHours: () => 48, getMinimumPackageAgeExclusions: () => [], getPipCustomRegistries: () => customRegistries, + LOGGING_SILENT: "silent", + LOGGING_VERBOSE: "verbose", skipMinimumPackageAge: () => false, }, }); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js index abdda17..51e6f0d 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js @@ -8,6 +8,10 @@ import { getEquivalentPackageNames } from "../../../scanning/packageNameVariants import { openNewPackagesDatabase } from "../../../scanning/newPackagesListCache.js"; import { interceptRequests } from "../interceptorBuilder.js"; import { isExcludedFromMinimumPackageAge } from "../minimumPackageAgeExclusions.js"; +import { + modifyPipInfoResponse, + parsePipMetadataUrl, +} from "./modifyPipInfo.js"; import { parsePipPackageFromUrl } from "./parsePipPackageUrl.js"; const knownPipRegistries = [ @@ -47,6 +51,28 @@ function buildPipInterceptor(registry) { */ function createPipRequestHandler(registry) { return async (reqContext) => { + const minimumAgeChecksEnabled = !skipMinimumPackageAge(); + const metadataInfo = parsePipMetadataUrl(reqContext.targetUrl); + const metadataPackageName = metadataInfo.packageName; + + if ( + minimumAgeChecksEnabled && + metadataPackageName && + !isExcludedFromMinimumPackageAge(metadataPackageName) + ) { + const newPackagesDatabase = await openNewPackagesDatabase(); + reqContext.modifyBody((body, headers) => + modifyPipInfoResponse( + body, + headers, + reqContext.targetUrl, + newPackagesDatabase.isNewlyReleasedPackage, + metadataPackageName + ) + ); + return; + } + const { packageName, version } = parsePipPackageFromUrl( reqContext.targetUrl, registry @@ -75,7 +101,7 @@ function createPipRequestHandler(registry) { if ( version && - !skipMinimumPackageAge() && + minimumAgeChecksEnabled && !isExcludedFromMinimumPackageAge(packageName) ) { const newPackagesDatabase = await openNewPackagesDatabase(); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js index 8a5b189..6bbd904 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js @@ -30,8 +30,12 @@ describe("pipInterceptor minimum package age", async () => { namedExports: { ECOSYSTEM_PY: "py", getEcoSystem: () => "py", + getLoggingLevel: () => "silent", + getMinimumPackageAgeHours: () => 48, getMinimumPackageAgeExclusions: () => minimumPackageAgeExclusionsSetting, getPipCustomRegistries: () => [], + LOGGING_SILENT: "silent", + LOGGING_VERBOSE: "verbose", skipMinimumPackageAge: () => skipMinimumPackageAgeSetting, }, }); @@ -56,6 +60,31 @@ describe("pipInterceptor minimum package age", async () => { newlyReleasedPackageResponse = false; }); + it("should modify simple metadata responses to suppress too-young versions", async () => { + const url = "https://pypi.org/simple/foo-bar/"; + newlyReleasedPackageResponse = true; + + const interceptor = pipInterceptorForUrl(url); + const result = await interceptor.handleRequest(url); + + assert.equal(result.modifiesResponse(), true); + + const modifiedBody = result.modifyBody( + Buffer.from(` + foo_bar-1.0.0.tar.gz + foo_bar-2.0.0.tar.gz + `), + { + "content-type": "application/vnd.pypi.simple.v1+html", + } + ).toString("utf8"); + + assert.ok(modifiedBody.includes("foo_bar-1.0.0.tar.gz")); + assert.ok(!modifiedBody.includes("foo_bar-2.0.0.tar.gz")); + + newlyReleasedPackageResponse = false; + }); + it("should not block newly released package downloads when skipMinimumPackageAge is enabled", async () => { const url = "https://files.pythonhosted.org/packages/xx/yy/foo_bar-2.0.0-py3-none-any.whl"; @@ -86,6 +115,20 @@ describe("pipInterceptor minimum package age", async () => { newlyReleasedPackageResponse = false; }); + it("should not modify metadata responses when the package is excluded", async () => { + const url = "https://pypi.org/simple/foo-bar/"; + newlyReleasedPackageResponse = true; + minimumPackageAgeExclusionsSetting = ["foo-bar"]; + + const interceptor = pipInterceptorForUrl(url); + const result = await interceptor.handleRequest(url); + + assert.equal(result.modifiesResponse(), false); + + minimumPackageAgeExclusionsSetting = []; + newlyReleasedPackageResponse = false; + }); + it("should not block newly released package downloads when a dot-name package matches a hyphen exclusion", async () => { const url = "https://files.pythonhosted.org/packages/xx/yy/foo.bar-2.0.0.tar.gz"; diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js index d6fdec6..f4a54a4 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js @@ -26,8 +26,12 @@ describe("pipInterceptor", async () => { namedExports: { ECOSYSTEM_PY: "py", getEcoSystem: () => "py", + getLoggingLevel: () => "silent", + getMinimumPackageAgeHours: () => 48, getMinimumPackageAgeExclusions: () => [], getPipCustomRegistries: () => [], + LOGGING_SILENT: "silent", + LOGGING_VERBOSE: "verbose", skipMinimumPackageAge: () => false, }, }); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataResponseUtils.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataResponseUtils.js new file mode 100644 index 0000000..e394810 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataResponseUtils.js @@ -0,0 +1,27 @@ +import { getMinimumPackageAgeHours } from "../../../config/settings.js"; +import { ui } from "../../../environment/userInteraction.js"; +import { getHeaderValueAsString } from "../../http-utils.js"; +import { recordSuppressedVersion } from "../suppressedVersionsState.js"; + +/** + * @param {NodeJS.Dict | undefined} headers + * @returns {string | undefined} + */ +export function getPipMetadataContentType(headers) { + return getHeaderValueAsString(headers, "content-type") + ?.toLowerCase() + .split(";")[0] + .trim(); +} + +/** + * @param {string} packageName + * @param {string} version + * @returns {void} + */ +export function logSuppressedVersion(packageName, version) { + recordSuppressedVersion(); + ui.writeVerbose( + `Safe-chain: ${packageName}@${version} is newer than ${getMinimumPackageAgeHours()} hours and was removed (minimumPackageAgeInHours setting).` + ); +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js new file mode 100644 index 0000000..28aaaf6 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js @@ -0,0 +1,125 @@ +import { parsePipPackageFromUrl } from "./parsePipPackageUrl.js"; + +/** + * @param {any} file + * @param {string} metadataUrl + * @returns {string | undefined} + */ +export function getPackageVersionFromMetadataFile(file, metadataUrl) { + const href = typeof file?.url === "string" ? file.url : undefined; + const filename = typeof file?.filename === "string" ? file.filename : undefined; + + if (href) { + const resolvedHref = new URL(href, metadataUrl).toString(); + return parsePipPackageFromUrl( + resolvedHref, + new URL(resolvedHref).host + ).version; + } + + if (filename) { + return parsePipPackageFromUrl( + new URL(filename, metadataUrl).toString(), + new URL(metadataUrl).host + ).version; + } + + return undefined; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @returns {string[]} + */ +export function getAvailableVersionsFromJson(json, metadataUrl) { + if (json.releases && typeof json.releases === "object") { + return Object.keys(json.releases); + } + + if (Array.isArray(json.files)) { + return [ + ...new Set( + json.files + .map((/** @type {any} */ file) => + getPackageVersionFromMetadataFile(file, metadataUrl) + ) + .filter((/** @type {string | undefined} */ version) => + typeof version === "string" + ) + ), + ]; + } + + return []; +} + +/** + * @param {string[]} versions + * @returns {string | undefined} + */ +export function calculateLatestVersion(versions) { + const stableVersions = versions.filter((version) => !isPrerelease(version)); + if (stableVersions.length > 0) { + return stableVersions.sort(comparePep440ishVersions).at(-1); + } + + return versions.sort(comparePep440ishVersions).at(-1); +} + +/** + * @param {string} left + * @param {string} right + * @returns {number} + */ +function comparePep440ishVersions(left, right) { + const leftParts = tokenizeVersion(left); + const rightParts = tokenizeVersion(right); + const maxLength = Math.max(leftParts.length, rightParts.length); + + for (let index = 0; index < maxLength; index += 1) { + const leftPart = leftParts[index]; + const rightPart = rightParts[index]; + + if (leftPart === undefined) return -1; + if (rightPart === undefined) return 1; + + if (leftPart === rightPart) { + continue; + } + + const leftNumeric = typeof leftPart === "number"; + const rightNumeric = typeof rightPart === "number"; + + if (leftNumeric && rightNumeric) { + return leftPart - rightPart; + } + + if (leftNumeric) return 1; + if (rightNumeric) return -1; + + return String(leftPart).localeCompare(String(rightPart)); + } + + return 0; +} + +/** + * @param {string} version + * @returns {(string | number)[]} + */ +function tokenizeVersion(version) { + return version + .toLowerCase() + .split(/[^a-z0-9]+/) + .flatMap((part) => part.match(/[a-z]+|\d+/g) || []) + .map((part) => (/^\d+$/.test(part) ? Number(part) : part)); +} + +/** + * @param {string} version + * @returns {boolean} + */ +function isPrerelease(version) { + return /(?:^|[.\-_])(a|b|rc|dev)\d*/i.test(version); +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js b/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js new file mode 100644 index 0000000..a3b1055 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js @@ -0,0 +1,17 @@ +const state = { + hasSuppressedVersions: false, +}; + +/** + * @returns {void} + */ +export function recordSuppressedVersion() { + state.hasSuppressedVersions = true; +} + +/** + * @returns {boolean} + */ +export function getHasSuppressedVersions() { + return state.hasSuppressedVersions; +} diff --git a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js index 8268559..7220370 100644 --- a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js +++ b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js @@ -215,10 +215,21 @@ function createProxyRequest(hostname, port, req, res, requestHandler) { buffer = requestHandler.modifyBody(buffer, headers); - if (proxyRes.headers["content-encoding"] === "gzip") { - buffer = gzipSync(buffer); + // For rewritten responses, send the final body uncompressed. + // This avoids mismatches between upstream compression metadata and the + // rewritten payload on the wire. + for (const headerName of Object.keys(headers)) { + const lowerHeaderName = headerName.toLowerCase(); + if ( + lowerHeaderName === "content-length" || + lowerHeaderName === "transfer-encoding" || + lowerHeaderName === "content-encoding" + ) { + delete headers[headerName]; + } } + headers["content-length"] = String(buffer.byteLength); res.writeHead(statusCode, headers); res.end(buffer); }); diff --git a/packages/safe-chain/src/registryProxy/mitmRequestHandler.spec.js b/packages/safe-chain/src/registryProxy/mitmRequestHandler.spec.js new file mode 100644 index 0000000..de01e2c --- /dev/null +++ b/packages/safe-chain/src/registryProxy/mitmRequestHandler.spec.js @@ -0,0 +1,138 @@ +import { describe, it, mock } from "node:test"; +import assert from "node:assert"; +import zlib from "node:zlib"; + +describe("mitmRequestHandler", async () => { + let capturedHandler; + let capturedOptions; + + mock.module("https", { + defaultExport: { + createServer: (_options, handler) => { + capturedHandler = handler; + return { + on: () => {}, + emit: () => {}, + }; + }, + request: (options, callback) => { + capturedOptions = options; + + const listeners = {}; + const proxyRes = { + statusCode: 200, + headers: { + "content-encoding": "gzip", + "content-length": "999", + "transfer-encoding": "chunked", + }, + on: (event, handler) => { + listeners[event] = handler; + }, + }; + + callback(proxyRes); + + return { + on: () => {}, + write: () => {}, + end: () => { + const payload = Buffer.from("rewritten body"); + listeners["data"]?.(zlib.gzipSync(payload)); + listeners["end"]?.(); + }, + destroy: () => {}, + }; + }, + }, + }); + + mock.module("./certUtils.js", { + namedExports: { + generateCertForHost: () => ({ + privateKey: "key", + certificate: "cert", + }), + }, + }); + + mock.module("https-proxy-agent", { + namedExports: { + HttpsProxyAgent: class {}, + }, + }); + + mock.module("../environment/userInteraction.js", { + namedExports: { + ui: { + writeVerbose: () => {}, + writeError: () => {}, + }, + }, + }); + + const { mitmConnect } = await import("./mitmRequestHandler.js"); + + it("sets content-length from the final compressed payload after body rewrite", async () => { + const interceptor = { + handleRequest: async () => ({ + blockResponse: undefined, + modifyRequestHeaders: (headers) => headers, + modifiesResponse: () => true, + modifyBody: () => Buffer.from("rewritten body"), + }), + }; + + const req = { + url: "pypi.org:443", + }; + + const clientSocket = { + on: () => {}, + write: () => {}, + headersSent: false, + writable: true, + end: () => {}, + }; + + mitmConnect(req, clientSocket, interceptor); + + const resState = { + statusCode: undefined, + headers: undefined, + body: undefined, + }; + + const res = { + headersSent: false, + writeHead: (statusCode, headers) => { + resState.statusCode = statusCode; + resState.headers = headers; + }, + end: (body) => { + resState.body = body; + }, + }; + + const request = { + url: "/simple/example/", + headers: {}, + method: "GET", + on: (event, handler) => { + if (event === "end") { + handler(); + } + }, + }; + + await capturedHandler(request, res); + + assert.equal(capturedOptions.hostname, "pypi.org"); + assert.equal(resState.statusCode, 200); + assert.equal(resState.headers["transfer-encoding"], undefined); + assert.equal( + resState.headers["content-length"], + String(resState.body.byteLength) + ); + }); +}); diff --git a/packages/safe-chain/src/registryProxy/registryProxy.js b/packages/safe-chain/src/registryProxy/registryProxy.js index 81b265d..0b009bb 100644 --- a/packages/safe-chain/src/registryProxy/registryProxy.js +++ b/packages/safe-chain/src/registryProxy/registryProxy.js @@ -6,7 +6,7 @@ import { getCombinedCaBundlePath, cleanupCertBundle } from "./certBundle.js"; import { ui } from "../environment/userInteraction.js"; import chalk from "chalk"; import { createInterceptorForUrl } from "./interceptors/createInterceptorForEcoSystem.js"; -import { getHasSuppressedVersions } from "./interceptors/npm/modifyNpmInfo.js"; +import { getHasSuppressedVersions } from "./interceptors/suppressedVersionsState.js"; const SERVER_STOP_TIMEOUT_MS = 1000; /** diff --git a/packages/safe-chain/src/scanning/packageNameVariants.js b/packages/safe-chain/src/scanning/packageNameVariants.js index 97db91b..64075f2 100644 --- a/packages/safe-chain/src/scanning/packageNameVariants.js +++ b/packages/safe-chain/src/scanning/packageNameVariants.js @@ -1,5 +1,15 @@ import { ECOSYSTEM_PY } from "../config/settings.js"; +/** + * Normalises a Python package name per PEP 503: lowercase and collapse any + * run of `.`, `_`, or `-` into a single hyphen. + * @param {string} packageName + * @returns {string} + */ +export function normalizePipPackageName(packageName) { + return packageName.toLowerCase().replace(/[._-]+/g, "-"); +} + /** * @param {string} packageName * @param {string} ecosystem