From 4564b7f6078f28a4fbcea10e5343b7cb625c07d6 Mon Sep 17 00:00:00 2001 From: Reinier Criel Date: Wed, 1 Apr 2026 14:32:36 -0700 Subject: [PATCH 1/9] Initial --- README.md | 8 +- packages/safe-chain/package.json | 3 + .../src/registryProxy/http-utils.js | 16 + .../interceptors/npm/modifyNpmInfo.js | 26 +- .../interceptors/pip/modifyPipInfo.js | 199 +++++++++++++ .../interceptors/pip/modifyPipInfo.spec.js | 276 ++++++++++++++++++ .../interceptors/pip/parsePipPackageUrl.js | 51 ++++ .../pip/parsePipPackageUrl.spec.js | 93 ++++++ .../pipInterceptor.customRegistries.spec.js | 4 + .../interceptors/pip/pipInterceptor.js | 28 +- .../pip/pipInterceptor.minPackageAge.spec.js | 43 +++ .../pipInterceptor.packageDownload.spec.js | 4 + .../pip/pipMetadataResponseUtils.js | 27 ++ .../pip/pipMetadataVersionUtils.js | 125 ++++++++ .../interceptors/suppressedVersionsState.js | 17 ++ .../src/registryProxy/mitmRequestHandler.js | 15 +- .../registryProxy/mitmRequestHandler.spec.js | 138 +++++++++ .../src/registryProxy/registryProxy.js | 2 +- .../src/scanning/packageNameVariants.js | 10 + 19 files changed, 1057 insertions(+), 28 deletions(-) create mode 100644 packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js create mode 100644 packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js create mode 100644 packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js create mode 100644 packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataResponseUtils.js create mode 100644 packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js create mode 100644 packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js create mode 100644 packages/safe-chain/src/registryProxy/mitmRequestHandler.spec.js diff --git a/README.md b/README.md index e173b66..26f8c22 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,8 @@ Current enforcement differs by ecosystem: - during normal package resolution, Safe Chain suppresses versions that are newer than the configured minimum age from the package metadata returned by the registry - for direct package download requests that bypass that metadata flow, Safe Chain can block the request itself using a cached list of newly released packages - Python package managers: - - Safe Chain blocks direct package download requests using a cached list of newly released packages + - during package resolution, Safe Chain suppresses too-young files and releases from PyPI metadata responses + - for direct package download requests that bypass that metadata flow, Safe Chain can block the request itself using a cached list of newly released packages By default, the minimum package age is 48 hours. This provides an additional security layer during the critical period when newly published packages are most vulnerable to containing undetected threats. You can configure this threshold or bypass this protection entirely - see the [Minimum Package Age Configuration](#minimum-package-age) section below. @@ -198,7 +199,10 @@ For npm-based package managers, this check currently has two enforcement modes: - Safe Chain suppresses too-young versions from package metadata during normal dependency resolution. - Safe Chain blocks direct package download requests when they are matched against the cached newly released packages list. -For Python package managers, Safe Chain currently enforces minimum package age by blocking direct package download requests when they are matched against the cached newly released packages list. +For Python package managers, this check currently has two enforcement modes: + +- Safe Chain suppresses too-young files and releases from PyPI metadata during dependency resolution. +- Safe Chain blocks direct package download requests when they are matched against the cached newly released packages list. ### Configuration Options diff --git a/packages/safe-chain/package.json b/packages/safe-chain/package.json index d4f3501..753aa10 100644 --- a/packages/safe-chain/package.json +++ b/packages/safe-chain/package.json @@ -38,7 +38,10 @@ "license": "AGPL-3.0-or-later", "description": "The Aikido Safe Chain wraps around the [npm cli](https://github.com/npm/cli), [npx](https://github.com/npm/cli/blob/latest/docs/content/commands/npx.md), [yarn](https://yarnpkg.com/), [pnpm](https://pnpm.io/), [pnpx](https://pnpm.io/cli/dlx), [bun](https://bun.sh/), [bunx](https://bun.sh/docs/cli/bunx), [uv](https://docs.astral.sh/uv/) (Python), and [pip](https://pip.pypa.io/) to provide extra checks before installing new packages. This tool will detect when a package contains malware and prompt you to exit, preventing npm, npx, yarn, pnpm, pnpx, bun, bunx, uv, or pip/pip3 from downloading or running the malware.", "dependencies": { + "@aikidosec/safe-chain": "file:", + "@relay-x/app-sdk": "^0.1.4", "archiver": "^7.0.1", + "bridgefy-react-native": "^1.2.2", "certifi": "14.5.15", "chalk": "5.4.1", "https-proxy-agent": "7.0.6", diff --git a/packages/safe-chain/src/registryProxy/http-utils.js b/packages/safe-chain/src/registryProxy/http-utils.js index e14a977..f44e1d6 100644 --- a/packages/safe-chain/src/registryProxy/http-utils.js +++ b/packages/safe-chain/src/registryProxy/http-utils.js @@ -15,3 +15,19 @@ export function getHeaderValueAsString(headers, headerName) { return header; } + +/** + * Remove headers that become stale when the response body is modified. + * @param {NodeJS.Dict | undefined} headers + * @returns {void} + */ +export function clearCachingHeaders(headers) { + if (!headers) { + return; + } + + delete headers["etag"]; + delete headers["last-modified"]; + delete headers["cache-control"]; + delete headers["content-length"]; +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js b/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js index 1743f82..26b3b70 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js +++ b/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js @@ -1,10 +1,7 @@ import { getMinimumPackageAgeHours } from "../../../config/settings.js"; import { ui } from "../../../environment/userInteraction.js"; -import { getHeaderValueAsString } from "../../http-utils.js"; - -const state = { - hasSuppressedVersions: false, -}; +import { clearCachingHeaders, getHeaderValueAsString } from "../../http-utils.js"; +import { recordSuppressedVersion } from "../suppressedVersionsState.js"; /** * @param {NodeJS.Dict} headers @@ -82,15 +79,7 @@ export function modifyNpmInfoResponse(body, headers) { const timestampValue = new Date(timestamp); if (timestampValue > cutOff) { deleteVersionFromJson(bodyJson, version); - if (headers) { - // When modifying the response, the etag and last-modified headers - // no longer match the content so they needs to be removed before sending the response. - delete headers["etag"]; - delete headers["last-modified"]; - // Removing the cache-control header will prevent the package manager from caching - // the modified response. - delete headers["cache-control"]; - } + clearCachingHeaders(headers); } } @@ -114,7 +103,7 @@ export function modifyNpmInfoResponse(body, headers) { * @param {string} version */ function deleteVersionFromJson(json, version) { - state.hasSuppressedVersions = true; + recordSuppressedVersion(); const packageName = typeof json?.name === "string" ? json.name : "(unknown)"; @@ -171,13 +160,6 @@ function getMostRecentTag(tagList) { return current; } -/** - * @returns {boolean} - */ -export function getHasSuppressedVersions() { - return state.hasSuppressedVersions; -} - /** * @param {Buffer} body * @param {NodeJS.Dict | undefined} headers diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js new file mode 100644 index 0000000..de4cae8 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js @@ -0,0 +1,199 @@ +import { ui } from "../../../environment/userInteraction.js"; +import { clearCachingHeaders } from "../../http-utils.js"; +import { normalizePipPackageName } from "../../../scanning/packageNameVariants.js"; +import { parsePipPackageFromUrl } from "./parsePipPackageUrl.js"; +export { parsePipMetadataUrl, isPipPackageInfoUrl } from "./parsePipPackageUrl.js"; +import { + calculateLatestVersion, + getAvailableVersionsFromJson, + getPackageVersionFromMetadataFile, +} from "./pipMetadataVersionUtils.js"; +import { + getPipMetadataContentType, + logSuppressedVersion, +} from "./pipMetadataResponseUtils.js"; + +/** + * @param {Buffer} body + * @param {NodeJS.Dict | undefined} headers + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {Buffer} + */ +export function modifyPipInfoResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + try { + const contentType = getPipMetadataContentType(headers); + + if (!contentType || body.byteLength === 0) { + return body; + } + + if ( + contentType.includes("html") || + contentType.includes("application/vnd.pypi.simple.v1+html") + ) { + return modifyHtmlSimpleResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + } + + if ( + contentType.includes("json") || + contentType.includes("application/vnd.pypi.simple.v1+json") + ) { + return modifyJsonResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + } + + return body; + } catch (/** @type {any} */ err) { + ui.writeVerbose( + `Safe-chain: PyPI package metadata not in expected format - bypassing modification. Error: ${err.message}` + ); + return body; + } +} + +/** + * @param {Buffer} body + * @param {NodeJS.Dict | undefined} headers + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {Buffer} + */ +function modifyHtmlSimpleResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + const html = body.toString("utf8"); + let modified = false; + + const updatedHtml = html.replace( + /]*href\s*=\s*(["'])([^"']+)\1[^>]*>[\s\S]*?<\/a>/gi, + (anchor, _quote, href) => { + const resolvedHref = new URL(href, metadataUrl).toString(); + const { packageName: hrefPackageName, version } = parsePipPackageFromUrl( + resolvedHref, + new URL(resolvedHref).host + ); + + if ( + hrefPackageName && + normalizePipPackageName(hrefPackageName) === normalizePipPackageName(packageName) && + version && + isNewlyReleasedPackage(packageName, version) + ) { + modified = true; + logSuppressedVersion(packageName, version); + return ""; + } + + return anchor; + } + ); + + if (!modified) return body; + const modifiedBuffer = Buffer.from(updatedHtml); + clearCachingHeaders(headers); + return modifiedBuffer; +} + +/** + * @param {Buffer} body + * @param {NodeJS.Dict | undefined} headers + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {Buffer} + */ +function modifyJsonResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + const json = JSON.parse(body.toString("utf8")); + let modified = false; + + if (Array.isArray(json.files)) { + const filteredFiles = json.files.filter((/** @type {any} */ file) => { + const version = getPackageVersionFromMetadataFile(file, metadataUrl); + + if (version && isNewlyReleasedPackage(packageName, version)) { + modified = true; + logSuppressedVersion(packageName, version); + return false; + } + + return true; + }); + + json.files = filteredFiles; + } + + if (json.releases && typeof json.releases === "object") { + for (const [version, files] of Object.entries(json.releases)) { + if ( + Array.isArray(/** @type {unknown[]} */ (files)) && + isNewlyReleasedPackage(packageName, version) + ) { + delete json.releases[version]; + modified = true; + logSuppressedVersion(packageName, version); + } + } + } + + if (Array.isArray(json.urls)) { + json.urls = json.urls.filter((/** @type {any} */ file) => { + const version = getPackageVersionFromMetadataFile(file, metadataUrl); + + if (version && isNewlyReleasedPackage(packageName, version)) { + modified = true; + logSuppressedVersion(packageName, version); + return false; + } + return true; + }); + } + + if (json.info && typeof json.info === "object") { + const candidateVersions = getAvailableVersionsFromJson(json, metadataUrl); + const replacementVersion = calculateLatestVersion(candidateVersions); + + if ( + typeof json.info.version === "string" && + replacementVersion && + json.info.version !== replacementVersion + ) { + json.info.version = replacementVersion; + modified = true; + } + } + + if (!modified) return body; + const modifiedBuffer = Buffer.from(JSON.stringify(json)); + clearCachingHeaders(headers); + return modifiedBuffer; +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js new file mode 100644 index 0000000..ef1fc86 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js @@ -0,0 +1,276 @@ +import { describe, it, mock } from "node:test"; +import assert from "node:assert"; + +describe("modifyPipInfo", async () => { + mock.module("../../../config/settings.js", { + namedExports: { + getMinimumPackageAgeHours: () => 48, + ECOSYSTEM_PY: "py", + }, + }); + + mock.module("../../../environment/userInteraction.js", { + namedExports: { + ui: { + writeVerbose: () => {}, + }, + }, + }); + + const { + modifyPipInfoResponse, + } = await import("./modifyPipInfo.js"); + + it("removes too-young files from simple HTML metadata", () => { + const headers = { + "content-type": "application/vnd.pypi.simple.v1+html", + etag: "abc", + "cache-control": "public", + "content-length": "999", + "transfer-encoding": "chunked", + }; + + const body = Buffer.from(` + + + + requests-1.0.0.tar.gz + requests-2.0.0.tar.gz + + + `); + + const modified = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + (_packageName, version) => version === "2.0.0", + "requests" + ).toString("utf8"); + + assert.ok(modified.includes("requests-1.0.0.tar.gz")); + assert.ok(!modified.includes("requests-2.0.0.tar.gz")); + assert.equal(headers.etag, undefined); + assert.equal(headers["cache-control"], undefined); + assert.equal(headers["content-length"], undefined); + assert.equal(headers["transfer-encoding"], "chunked"); + }); + + it("leaves mixed-case transport headers untouched for MITM layer to normalize", () => { + const headers = { + "content-type": "application/json", + ETag: "abc", + "Content-Length": "999", + "Last-Modified": "yesterday", + "Cache-Control": "public, max-age=60", + "Transfer-Encoding": "chunked", + }; + + const body = Buffer.from( + JSON.stringify({ + info: { version: "2.0.0" }, + releases: { + "1.0.0": [{ filename: "requests-1.0.0.tar.gz" }], + "2.0.0": [{ filename: "requests-2.0.0.tar.gz" }], + }, + }) + ); + + const modified = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/pypi/requests/json", + (_packageName, version) => version === "2.0.0", + "requests" + ); + + assert.equal(headers.ETag, "abc"); + assert.equal(headers["Last-Modified"], "yesterday"); + assert.equal(headers["Cache-Control"], "public, max-age=60"); + assert.equal(headers["Transfer-Encoding"], "chunked"); + assert.equal(headers["Content-Length"], "999"); + assert.equal(headers["content-length"], undefined); + }); + + it("returns body unchanged when no HTML versions are suppressed", () => { + const headers = { + "content-type": "application/vnd.pypi.simple.v1+html", + etag: "abc", + }; + + const body = Buffer.from( + `requests-1.0.0.tar.gz` + ); + + const result = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + () => false, + "requests" + ); + + assert.equal(result, body); // same Buffer reference — no copy made + assert.equal(headers.etag, "abc"); // headers untouched + }); + + it("matches HTML anchor hrefs using normalised package name (underscore vs hyphen)", () => { + const headers = { "content-type": "application/vnd.pypi.simple.v1+html" }; + + const body = Buffer.from( + `foo_bar-2.0.0.tar.gz` + + `foo_bar-1.0.0.tar.gz` + ); + + const modified = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/foo-bar/", + (_packageName, version) => version === "2.0.0", + "foo-bar" // hyphenated name, hrefs use underscore + ).toString("utf8"); + + assert.ok(!modified.includes("foo_bar-2.0.0.tar.gz")); + assert.ok(modified.includes("foo_bar-1.0.0.tar.gz")); + }); + + it("removes too-young files from simple JSON metadata", () => { + const headers = { + "content-type": "application/vnd.pypi.simple.v1+json", + }; + + const body = Buffer.from( + JSON.stringify({ + name: "requests", + files: [ + { + filename: "requests-1.0.0.tar.gz", + url: "https://files.pythonhosted.org/packages/source/r/requests/requests-1.0.0.tar.gz", + }, + { + filename: "requests-2.0.0.tar.gz", + url: "https://files.pythonhosted.org/packages/source/r/requests/requests-2.0.0.tar.gz", + }, + ], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + (_packageName, version) => version === "2.0.0", + "requests" + ).toString("utf8") + ); + + assert.equal(modified.files.length, 1); + assert.equal(modified.files[0].filename, "requests-1.0.0.tar.gz"); + }); + + it("filters simple JSON metadata entries that have only filename (no url)", () => { + const headers = { "content-type": "application/vnd.pypi.simple.v1+json" }; + + const body = Buffer.from( + JSON.stringify({ + name: "requests", + files: [ + { filename: "requests-1.0.0.tar.gz" }, + { filename: "requests-2.0.0.tar.gz" }, + ], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + (_packageName, version) => version === "2.0.0", + "requests" + ).toString("utf8") + ); + + assert.equal(modified.files.length, 1); + assert.equal(modified.files[0].filename, "requests-1.0.0.tar.gz"); + }); + + it("recalculates JSON API info.version after removing too-young releases", () => { + const headers = { + "content-type": "application/json", + }; + + const body = Buffer.from( + JSON.stringify({ + info: { version: "2.0.0" }, + releases: { + "1.0.0": [ + { + filename: "requests-1.0.0.tar.gz", + upload_time_iso_8601: "2024-01-01T00:00:00.000Z", + }, + ], + "2.0.0": [ + { + filename: "requests-2.0.0.tar.gz", + upload_time_iso_8601: "2024-01-02T00:00:00.000Z", + }, + ], + "3.0.0rc1": [ + { + filename: "requests-3.0.0rc1.tar.gz", + upload_time_iso_8601: "2024-01-03T00:00:00.000Z", + }, + ], + }, + urls: [ + { filename: "requests-2.0.0.tar.gz" }, + ], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/pypi/requests/json", + (_packageName, version) => + version === "2.0.0" || version === "3.0.0rc1", + "requests" + ).toString("utf8") + ); + + assert.deepEqual(Object.keys(modified.releases), ["1.0.0"]); + assert.equal(modified.info.version, "1.0.0"); + assert.equal(modified.urls.length, 0); + }); + + it("falls back to latest pre-release when all stable versions are removed", () => { + const headers = { "content-type": "application/json" }; + + const body = Buffer.from( + JSON.stringify({ + info: { version: "2.0.0rc2" }, + releases: { + "1.0.0rc1": [{ filename: "requests-1.0.0rc1.tar.gz" }], + "2.0.0rc2": [{ filename: "requests-2.0.0rc2.tar.gz" }], + }, + urls: [], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/pypi/requests/json", + (_packageName, version) => version === "2.0.0rc2", + "requests" + ).toString("utf8") + ); + + assert.deepEqual(Object.keys(modified.releases), ["1.0.0rc1"]); + assert.equal(modified.info.version, "1.0.0rc1"); + }); +}); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js index 377a648..56f03f8 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js @@ -1,3 +1,54 @@ +/** + * @param {string} url + * @returns {{ packageName: string | undefined, type: "simple" | "json" | undefined }} + */ +export function parsePipMetadataUrl(url) { + if (typeof url !== "string") { + return { packageName: undefined, type: undefined }; + } + + let urlObj; + try { + urlObj = new URL(url); + } catch { + return { packageName: undefined, type: undefined }; + } + + const pathSegments = urlObj.pathname.split("/").filter(Boolean); + if ( + pathSegments.length >= 2 && + pathSegments[0] === "simple" && + pathSegments[1] + ) { + return { + packageName: decodeURIComponent(pathSegments[1]), + type: "simple", + }; + } + + if ( + pathSegments.length >= 3 && + pathSegments[0] === "pypi" && + pathSegments[2] === "json" && + pathSegments[1] + ) { + return { + packageName: decodeURIComponent(pathSegments[1]), + type: "json", + }; + } + + return { packageName: undefined, type: undefined }; +} + +/** + * @param {string} url + * @returns {boolean} + */ +export function isPipPackageInfoUrl(url) { + return !!parsePipMetadataUrl(url).packageName; +} + /** * Parse Python package artifact URLs from PyPI-style registries. * Examples: diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js new file mode 100644 index 0000000..3d6eecd --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js @@ -0,0 +1,93 @@ +import { describe, it } from "node:test"; +import assert from "node:assert"; +import { + isPipPackageInfoUrl, + parsePipMetadataUrl, + parsePipPackageFromUrl, +} from "./parsePipPackageUrl.js"; + +describe("parsePipPackageUrl", () => { + it("parses simple metadata URLs", () => { + assert.deepEqual(parsePipMetadataUrl("https://pypi.org/simple/requests/"), { + packageName: "requests", + type: "simple", + }); + }); + + it("parses json metadata URLs", () => { + assert.deepEqual(parsePipMetadataUrl("https://pypi.org/pypi/requests/json"), { + packageName: "requests", + type: "json", + }); + }); + + it("decodes encoded metadata package names", () => { + assert.deepEqual( + parsePipMetadataUrl("https://pypi.org/simple/foo-bar%5Fbaz/"), + { + packageName: "foo-bar_baz", + type: "simple", + } + ); + }); + + it("returns undefined for unrecognized metadata paths", () => { + assert.deepEqual( + parsePipMetadataUrl("https://pypi.org/unknown/requests/"), + { + packageName: undefined, + type: undefined, + } + ); + }); + + it("returns undefined for invalid metadata URLs", () => { + assert.deepEqual(parsePipMetadataUrl("not a url"), { + packageName: undefined, + type: undefined, + }); + }); + + it("recognizes package info URLs", () => { + assert.equal( + isPipPackageInfoUrl("https://pypi.org/simple/requests/"), + true + ); + }); + + it("does not treat artifact URLs as package info URLs", () => { + assert.equal( + isPipPackageInfoUrl( + "https://files.pythonhosted.org/packages/source/r/requests/requests-2.28.1.tar.gz" + ), + false + ); + }); + + it("parses wheel artifact URLs", () => { + assert.deepEqual( + parsePipPackageFromUrl( + "https://files.pythonhosted.org/packages/xx/yy/foo_bar-2.0.0-py3-none-any.whl", + "files.pythonhosted.org" + ), + { packageName: "foo_bar", version: "2.0.0" } + ); + }); + + it("parses sdist artifact URLs", () => { + assert.deepEqual( + parsePipPackageFromUrl( + "https://files.pythonhosted.org/packages/source/r/requests/requests-2.28.1.tar.gz", + "files.pythonhosted.org" + ), + { packageName: "requests", version: "2.28.1" } + ); + }); + + it("returns undefined for non-artifact URLs", () => { + assert.deepEqual( + parsePipPackageFromUrl("https://pypi.org/simple/requests/", "pypi.org"), + { packageName: undefined, version: undefined } + ); + }); +}); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js index c7ad597..5904f05 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js @@ -10,8 +10,12 @@ describe("pipInterceptor custom registries", async () => { namedExports: { ECOSYSTEM_PY: "py", getEcoSystem: () => "py", + getLoggingLevel: () => "silent", + getMinimumPackageAgeHours: () => 48, getMinimumPackageAgeExclusions: () => [], getPipCustomRegistries: () => customRegistries, + LOGGING_SILENT: "silent", + LOGGING_VERBOSE: "verbose", skipMinimumPackageAge: () => false, }, }); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js index abdda17..51e6f0d 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js @@ -8,6 +8,10 @@ import { getEquivalentPackageNames } from "../../../scanning/packageNameVariants import { openNewPackagesDatabase } from "../../../scanning/newPackagesListCache.js"; import { interceptRequests } from "../interceptorBuilder.js"; import { isExcludedFromMinimumPackageAge } from "../minimumPackageAgeExclusions.js"; +import { + modifyPipInfoResponse, + parsePipMetadataUrl, +} from "./modifyPipInfo.js"; import { parsePipPackageFromUrl } from "./parsePipPackageUrl.js"; const knownPipRegistries = [ @@ -47,6 +51,28 @@ function buildPipInterceptor(registry) { */ function createPipRequestHandler(registry) { return async (reqContext) => { + const minimumAgeChecksEnabled = !skipMinimumPackageAge(); + const metadataInfo = parsePipMetadataUrl(reqContext.targetUrl); + const metadataPackageName = metadataInfo.packageName; + + if ( + minimumAgeChecksEnabled && + metadataPackageName && + !isExcludedFromMinimumPackageAge(metadataPackageName) + ) { + const newPackagesDatabase = await openNewPackagesDatabase(); + reqContext.modifyBody((body, headers) => + modifyPipInfoResponse( + body, + headers, + reqContext.targetUrl, + newPackagesDatabase.isNewlyReleasedPackage, + metadataPackageName + ) + ); + return; + } + const { packageName, version } = parsePipPackageFromUrl( reqContext.targetUrl, registry @@ -75,7 +101,7 @@ function createPipRequestHandler(registry) { if ( version && - !skipMinimumPackageAge() && + minimumAgeChecksEnabled && !isExcludedFromMinimumPackageAge(packageName) ) { const newPackagesDatabase = await openNewPackagesDatabase(); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js index 8a5b189..6bbd904 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js @@ -30,8 +30,12 @@ describe("pipInterceptor minimum package age", async () => { namedExports: { ECOSYSTEM_PY: "py", getEcoSystem: () => "py", + getLoggingLevel: () => "silent", + getMinimumPackageAgeHours: () => 48, getMinimumPackageAgeExclusions: () => minimumPackageAgeExclusionsSetting, getPipCustomRegistries: () => [], + LOGGING_SILENT: "silent", + LOGGING_VERBOSE: "verbose", skipMinimumPackageAge: () => skipMinimumPackageAgeSetting, }, }); @@ -56,6 +60,31 @@ describe("pipInterceptor minimum package age", async () => { newlyReleasedPackageResponse = false; }); + it("should modify simple metadata responses to suppress too-young versions", async () => { + const url = "https://pypi.org/simple/foo-bar/"; + newlyReleasedPackageResponse = true; + + const interceptor = pipInterceptorForUrl(url); + const result = await interceptor.handleRequest(url); + + assert.equal(result.modifiesResponse(), true); + + const modifiedBody = result.modifyBody( + Buffer.from(` + foo_bar-1.0.0.tar.gz + foo_bar-2.0.0.tar.gz + `), + { + "content-type": "application/vnd.pypi.simple.v1+html", + } + ).toString("utf8"); + + assert.ok(modifiedBody.includes("foo_bar-1.0.0.tar.gz")); + assert.ok(!modifiedBody.includes("foo_bar-2.0.0.tar.gz")); + + newlyReleasedPackageResponse = false; + }); + it("should not block newly released package downloads when skipMinimumPackageAge is enabled", async () => { const url = "https://files.pythonhosted.org/packages/xx/yy/foo_bar-2.0.0-py3-none-any.whl"; @@ -86,6 +115,20 @@ describe("pipInterceptor minimum package age", async () => { newlyReleasedPackageResponse = false; }); + it("should not modify metadata responses when the package is excluded", async () => { + const url = "https://pypi.org/simple/foo-bar/"; + newlyReleasedPackageResponse = true; + minimumPackageAgeExclusionsSetting = ["foo-bar"]; + + const interceptor = pipInterceptorForUrl(url); + const result = await interceptor.handleRequest(url); + + assert.equal(result.modifiesResponse(), false); + + minimumPackageAgeExclusionsSetting = []; + newlyReleasedPackageResponse = false; + }); + it("should not block newly released package downloads when a dot-name package matches a hyphen exclusion", async () => { const url = "https://files.pythonhosted.org/packages/xx/yy/foo.bar-2.0.0.tar.gz"; diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js index d6fdec6..f4a54a4 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js @@ -26,8 +26,12 @@ describe("pipInterceptor", async () => { namedExports: { ECOSYSTEM_PY: "py", getEcoSystem: () => "py", + getLoggingLevel: () => "silent", + getMinimumPackageAgeHours: () => 48, getMinimumPackageAgeExclusions: () => [], getPipCustomRegistries: () => [], + LOGGING_SILENT: "silent", + LOGGING_VERBOSE: "verbose", skipMinimumPackageAge: () => false, }, }); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataResponseUtils.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataResponseUtils.js new file mode 100644 index 0000000..e394810 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataResponseUtils.js @@ -0,0 +1,27 @@ +import { getMinimumPackageAgeHours } from "../../../config/settings.js"; +import { ui } from "../../../environment/userInteraction.js"; +import { getHeaderValueAsString } from "../../http-utils.js"; +import { recordSuppressedVersion } from "../suppressedVersionsState.js"; + +/** + * @param {NodeJS.Dict | undefined} headers + * @returns {string | undefined} + */ +export function getPipMetadataContentType(headers) { + return getHeaderValueAsString(headers, "content-type") + ?.toLowerCase() + .split(";")[0] + .trim(); +} + +/** + * @param {string} packageName + * @param {string} version + * @returns {void} + */ +export function logSuppressedVersion(packageName, version) { + recordSuppressedVersion(); + ui.writeVerbose( + `Safe-chain: ${packageName}@${version} is newer than ${getMinimumPackageAgeHours()} hours and was removed (minimumPackageAgeInHours setting).` + ); +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js new file mode 100644 index 0000000..28aaaf6 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js @@ -0,0 +1,125 @@ +import { parsePipPackageFromUrl } from "./parsePipPackageUrl.js"; + +/** + * @param {any} file + * @param {string} metadataUrl + * @returns {string | undefined} + */ +export function getPackageVersionFromMetadataFile(file, metadataUrl) { + const href = typeof file?.url === "string" ? file.url : undefined; + const filename = typeof file?.filename === "string" ? file.filename : undefined; + + if (href) { + const resolvedHref = new URL(href, metadataUrl).toString(); + return parsePipPackageFromUrl( + resolvedHref, + new URL(resolvedHref).host + ).version; + } + + if (filename) { + return parsePipPackageFromUrl( + new URL(filename, metadataUrl).toString(), + new URL(metadataUrl).host + ).version; + } + + return undefined; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @returns {string[]} + */ +export function getAvailableVersionsFromJson(json, metadataUrl) { + if (json.releases && typeof json.releases === "object") { + return Object.keys(json.releases); + } + + if (Array.isArray(json.files)) { + return [ + ...new Set( + json.files + .map((/** @type {any} */ file) => + getPackageVersionFromMetadataFile(file, metadataUrl) + ) + .filter((/** @type {string | undefined} */ version) => + typeof version === "string" + ) + ), + ]; + } + + return []; +} + +/** + * @param {string[]} versions + * @returns {string | undefined} + */ +export function calculateLatestVersion(versions) { + const stableVersions = versions.filter((version) => !isPrerelease(version)); + if (stableVersions.length > 0) { + return stableVersions.sort(comparePep440ishVersions).at(-1); + } + + return versions.sort(comparePep440ishVersions).at(-1); +} + +/** + * @param {string} left + * @param {string} right + * @returns {number} + */ +function comparePep440ishVersions(left, right) { + const leftParts = tokenizeVersion(left); + const rightParts = tokenizeVersion(right); + const maxLength = Math.max(leftParts.length, rightParts.length); + + for (let index = 0; index < maxLength; index += 1) { + const leftPart = leftParts[index]; + const rightPart = rightParts[index]; + + if (leftPart === undefined) return -1; + if (rightPart === undefined) return 1; + + if (leftPart === rightPart) { + continue; + } + + const leftNumeric = typeof leftPart === "number"; + const rightNumeric = typeof rightPart === "number"; + + if (leftNumeric && rightNumeric) { + return leftPart - rightPart; + } + + if (leftNumeric) return 1; + if (rightNumeric) return -1; + + return String(leftPart).localeCompare(String(rightPart)); + } + + return 0; +} + +/** + * @param {string} version + * @returns {(string | number)[]} + */ +function tokenizeVersion(version) { + return version + .toLowerCase() + .split(/[^a-z0-9]+/) + .flatMap((part) => part.match(/[a-z]+|\d+/g) || []) + .map((part) => (/^\d+$/.test(part) ? Number(part) : part)); +} + +/** + * @param {string} version + * @returns {boolean} + */ +function isPrerelease(version) { + return /(?:^|[.\-_])(a|b|rc|dev)\d*/i.test(version); +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js b/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js new file mode 100644 index 0000000..a3b1055 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js @@ -0,0 +1,17 @@ +const state = { + hasSuppressedVersions: false, +}; + +/** + * @returns {void} + */ +export function recordSuppressedVersion() { + state.hasSuppressedVersions = true; +} + +/** + * @returns {boolean} + */ +export function getHasSuppressedVersions() { + return state.hasSuppressedVersions; +} diff --git a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js index 8268559..7220370 100644 --- a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js +++ b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js @@ -215,10 +215,21 @@ function createProxyRequest(hostname, port, req, res, requestHandler) { buffer = requestHandler.modifyBody(buffer, headers); - if (proxyRes.headers["content-encoding"] === "gzip") { - buffer = gzipSync(buffer); + // For rewritten responses, send the final body uncompressed. + // This avoids mismatches between upstream compression metadata and the + // rewritten payload on the wire. + for (const headerName of Object.keys(headers)) { + const lowerHeaderName = headerName.toLowerCase(); + if ( + lowerHeaderName === "content-length" || + lowerHeaderName === "transfer-encoding" || + lowerHeaderName === "content-encoding" + ) { + delete headers[headerName]; + } } + headers["content-length"] = String(buffer.byteLength); res.writeHead(statusCode, headers); res.end(buffer); }); diff --git a/packages/safe-chain/src/registryProxy/mitmRequestHandler.spec.js b/packages/safe-chain/src/registryProxy/mitmRequestHandler.spec.js new file mode 100644 index 0000000..de01e2c --- /dev/null +++ b/packages/safe-chain/src/registryProxy/mitmRequestHandler.spec.js @@ -0,0 +1,138 @@ +import { describe, it, mock } from "node:test"; +import assert from "node:assert"; +import zlib from "node:zlib"; + +describe("mitmRequestHandler", async () => { + let capturedHandler; + let capturedOptions; + + mock.module("https", { + defaultExport: { + createServer: (_options, handler) => { + capturedHandler = handler; + return { + on: () => {}, + emit: () => {}, + }; + }, + request: (options, callback) => { + capturedOptions = options; + + const listeners = {}; + const proxyRes = { + statusCode: 200, + headers: { + "content-encoding": "gzip", + "content-length": "999", + "transfer-encoding": "chunked", + }, + on: (event, handler) => { + listeners[event] = handler; + }, + }; + + callback(proxyRes); + + return { + on: () => {}, + write: () => {}, + end: () => { + const payload = Buffer.from("rewritten body"); + listeners["data"]?.(zlib.gzipSync(payload)); + listeners["end"]?.(); + }, + destroy: () => {}, + }; + }, + }, + }); + + mock.module("./certUtils.js", { + namedExports: { + generateCertForHost: () => ({ + privateKey: "key", + certificate: "cert", + }), + }, + }); + + mock.module("https-proxy-agent", { + namedExports: { + HttpsProxyAgent: class {}, + }, + }); + + mock.module("../environment/userInteraction.js", { + namedExports: { + ui: { + writeVerbose: () => {}, + writeError: () => {}, + }, + }, + }); + + const { mitmConnect } = await import("./mitmRequestHandler.js"); + + it("sets content-length from the final compressed payload after body rewrite", async () => { + const interceptor = { + handleRequest: async () => ({ + blockResponse: undefined, + modifyRequestHeaders: (headers) => headers, + modifiesResponse: () => true, + modifyBody: () => Buffer.from("rewritten body"), + }), + }; + + const req = { + url: "pypi.org:443", + }; + + const clientSocket = { + on: () => {}, + write: () => {}, + headersSent: false, + writable: true, + end: () => {}, + }; + + mitmConnect(req, clientSocket, interceptor); + + const resState = { + statusCode: undefined, + headers: undefined, + body: undefined, + }; + + const res = { + headersSent: false, + writeHead: (statusCode, headers) => { + resState.statusCode = statusCode; + resState.headers = headers; + }, + end: (body) => { + resState.body = body; + }, + }; + + const request = { + url: "/simple/example/", + headers: {}, + method: "GET", + on: (event, handler) => { + if (event === "end") { + handler(); + } + }, + }; + + await capturedHandler(request, res); + + assert.equal(capturedOptions.hostname, "pypi.org"); + assert.equal(resState.statusCode, 200); + assert.equal(resState.headers["transfer-encoding"], undefined); + assert.equal( + resState.headers["content-length"], + String(resState.body.byteLength) + ); + }); +}); diff --git a/packages/safe-chain/src/registryProxy/registryProxy.js b/packages/safe-chain/src/registryProxy/registryProxy.js index 81b265d..0b009bb 100644 --- a/packages/safe-chain/src/registryProxy/registryProxy.js +++ b/packages/safe-chain/src/registryProxy/registryProxy.js @@ -6,7 +6,7 @@ import { getCombinedCaBundlePath, cleanupCertBundle } from "./certBundle.js"; import { ui } from "../environment/userInteraction.js"; import chalk from "chalk"; import { createInterceptorForUrl } from "./interceptors/createInterceptorForEcoSystem.js"; -import { getHasSuppressedVersions } from "./interceptors/npm/modifyNpmInfo.js"; +import { getHasSuppressedVersions } from "./interceptors/suppressedVersionsState.js"; const SERVER_STOP_TIMEOUT_MS = 1000; /** diff --git a/packages/safe-chain/src/scanning/packageNameVariants.js b/packages/safe-chain/src/scanning/packageNameVariants.js index 97db91b..64075f2 100644 --- a/packages/safe-chain/src/scanning/packageNameVariants.js +++ b/packages/safe-chain/src/scanning/packageNameVariants.js @@ -1,5 +1,15 @@ import { ECOSYSTEM_PY } from "../config/settings.js"; +/** + * Normalises a Python package name per PEP 503: lowercase and collapse any + * run of `.`, `_`, or `-` into a single hyphen. + * @param {string} packageName + * @returns {string} + */ +export function normalizePipPackageName(packageName) { + return packageName.toLowerCase().replace(/[._-]+/g, "-"); +} + /** * @param {string} packageName * @param {string} ecosystem From e29c11546c4d83615c099e387f240f2eb3a05e81 Mon Sep 17 00:00:00 2001 From: Reinier Criel Date: Wed, 1 Apr 2026 14:43:00 -0700 Subject: [PATCH 2/9] Some cleanup --- packages/safe-chain/package.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/safe-chain/package.json b/packages/safe-chain/package.json index 753aa10..d4f3501 100644 --- a/packages/safe-chain/package.json +++ b/packages/safe-chain/package.json @@ -38,10 +38,7 @@ "license": "AGPL-3.0-or-later", "description": "The Aikido Safe Chain wraps around the [npm cli](https://github.com/npm/cli), [npx](https://github.com/npm/cli/blob/latest/docs/content/commands/npx.md), [yarn](https://yarnpkg.com/), [pnpm](https://pnpm.io/), [pnpx](https://pnpm.io/cli/dlx), [bun](https://bun.sh/), [bunx](https://bun.sh/docs/cli/bunx), [uv](https://docs.astral.sh/uv/) (Python), and [pip](https://pip.pypa.io/) to provide extra checks before installing new packages. This tool will detect when a package contains malware and prompt you to exit, preventing npm, npx, yarn, pnpm, pnpx, bun, bunx, uv, or pip/pip3 from downloading or running the malware.", "dependencies": { - "@aikidosec/safe-chain": "file:", - "@relay-x/app-sdk": "^0.1.4", "archiver": "^7.0.1", - "bridgefy-react-native": "^1.2.2", "certifi": "14.5.15", "chalk": "5.4.1", "https-proxy-agent": "7.0.6", From 1a811edc95002c3fe10873a3600301e4b9a589a9 Mon Sep 17 00:00:00 2001 From: Reinier Criel Date: Wed, 1 Apr 2026 14:57:24 -0700 Subject: [PATCH 3/9] More cleanup --- .../src/registryProxy/http-utils.js | 2 + .../interceptors/pip/modifyPipInfo.js | 74 +------- .../interceptors/pip/modifyPipJsonResponse.js | 168 ++++++++++++++++++ .../interceptors/suppressedVersionsState.js | 4 + .../src/registryProxy/mitmRequestHandler.js | 31 ++-- 5 files changed, 201 insertions(+), 78 deletions(-) create mode 100644 packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js diff --git a/packages/safe-chain/src/registryProxy/http-utils.js b/packages/safe-chain/src/registryProxy/http-utils.js index f44e1d6..967aec8 100644 --- a/packages/safe-chain/src/registryProxy/http-utils.js +++ b/packages/safe-chain/src/registryProxy/http-utils.js @@ -18,6 +18,8 @@ export function getHeaderValueAsString(headers, headerName) { /** * Remove headers that become stale when the response body is modified. + * Mutates the provided headers object in place. + * * @param {NodeJS.Dict | undefined} headers * @returns {void} */ diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js index de4cae8..d3d10fe 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js @@ -3,15 +3,8 @@ import { clearCachingHeaders } from "../../http-utils.js"; import { normalizePipPackageName } from "../../../scanning/packageNameVariants.js"; import { parsePipPackageFromUrl } from "./parsePipPackageUrl.js"; export { parsePipMetadataUrl, isPipPackageInfoUrl } from "./parsePipPackageUrl.js"; -import { - calculateLatestVersion, - getAvailableVersionsFromJson, - getPackageVersionFromMetadataFile, -} from "./pipMetadataVersionUtils.js"; -import { - getPipMetadataContentType, - logSuppressedVersion, -} from "./pipMetadataResponseUtils.js"; +import { getPipMetadataContentType, logSuppressedVersion } from "./pipMetadataResponseUtils.js"; +import { modifyPipJsonResponse } from "./modifyPipJsonResponse.js"; /** * @param {Buffer} body @@ -134,63 +127,12 @@ function modifyJsonResponse( packageName ) { const json = JSON.parse(body.toString("utf8")); - let modified = false; - - if (Array.isArray(json.files)) { - const filteredFiles = json.files.filter((/** @type {any} */ file) => { - const version = getPackageVersionFromMetadataFile(file, metadataUrl); - - if (version && isNewlyReleasedPackage(packageName, version)) { - modified = true; - logSuppressedVersion(packageName, version); - return false; - } - - return true; - }); - - json.files = filteredFiles; - } - - if (json.releases && typeof json.releases === "object") { - for (const [version, files] of Object.entries(json.releases)) { - if ( - Array.isArray(/** @type {unknown[]} */ (files)) && - isNewlyReleasedPackage(packageName, version) - ) { - delete json.releases[version]; - modified = true; - logSuppressedVersion(packageName, version); - } - } - } - - if (Array.isArray(json.urls)) { - json.urls = json.urls.filter((/** @type {any} */ file) => { - const version = getPackageVersionFromMetadataFile(file, metadataUrl); - - if (version && isNewlyReleasedPackage(packageName, version)) { - modified = true; - logSuppressedVersion(packageName, version); - return false; - } - return true; - }); - } - - if (json.info && typeof json.info === "object") { - const candidateVersions = getAvailableVersionsFromJson(json, metadataUrl); - const replacementVersion = calculateLatestVersion(candidateVersions); - - if ( - typeof json.info.version === "string" && - replacementVersion && - json.info.version !== replacementVersion - ) { - json.info.version = replacementVersion; - modified = true; - } - } + const modified = modifyPipJsonResponse( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); if (!modified) return body; const modifiedBuffer = Buffer.from(JSON.stringify(json)); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js new file mode 100644 index 0000000..869a516 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js @@ -0,0 +1,168 @@ +import { + calculateLatestVersion, + getAvailableVersionsFromJson, + getPackageVersionFromMetadataFile, +} from "./pipMetadataVersionUtils.js"; +import { logSuppressedVersion } from "./pipMetadataResponseUtils.js"; + +/** + * @param {any} json + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {boolean} + */ +export function modifyPipJsonResponse( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + const filesModified = filterJsonMetadataFiles( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + const releasesModified = removeJsonMetadataReleases( + json, + isNewlyReleasedPackage, + packageName + ); + const urlsModified = filterJsonMetadataUrls( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + const versionModified = updateJsonInfoVersion(json, metadataUrl); + + return filesModified || releasesModified || urlsModified || versionModified; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {boolean} + */ +function filterJsonMetadataFiles( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + if (!Array.isArray(json.files)) { + return false; + } + + let modified = false; + json.files = json.files.filter((/** @type {any} */ file) => { + const version = getPackageVersionFromMetadataFile(file, metadataUrl); + + if (version && isNewlyReleasedPackage(packageName, version)) { + modified = true; + logSuppressedVersion(packageName, version); + return false; + } + + return true; + }); + + return modified; +} + +/** + * @param {any} json + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {boolean} + */ +function removeJsonMetadataReleases(json, isNewlyReleasedPackage, packageName) { + if (!json.releases || typeof json.releases !== "object") { + return false; + } + + let modified = false; + + for (const [version, files] of Object.entries(json.releases)) { + if ( + Array.isArray(/** @type {unknown[]} */ (files)) && + isNewlyReleasedPackage(packageName, version) + ) { + delete json.releases[version]; + modified = true; + logSuppressedVersion(packageName, version); + } + } + + return modified; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {boolean} + */ +function filterJsonMetadataUrls( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + if (!Array.isArray(json.urls)) { + return false; + } + + let modified = false; + json.urls = json.urls.filter((/** @type {any} */ file) => { + const version = getPackageVersionFromMetadataFile(file, metadataUrl); + + if (version && isNewlyReleasedPackage(packageName, version)) { + modified = true; + logSuppressedVersion(packageName, version); + return false; + } + + return true; + }); + + return modified; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @returns {boolean} + */ +function updateJsonInfoVersion(json, metadataUrl) { + if (!json.info || typeof json.info !== "object") { + return false; + } + + const replacementVersion = computeReplacementVersion(json, metadataUrl); + + if ( + typeof json.info.version !== "string" || + !replacementVersion || + json.info.version === replacementVersion + ) { + return false; + } + + json.info.version = replacementVersion; + return true; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @returns {string | undefined} + */ +function computeReplacementVersion(json, metadataUrl) { + const candidateVersions = getAvailableVersionsFromJson(json, metadataUrl); + return calculateLatestVersion(candidateVersions); +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js b/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js index a3b1055..26c0559 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js +++ b/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js @@ -3,6 +3,10 @@ const state = { }; /** + * Tracks whether any rewritten metadata response suppressed versions during the + * current process lifetime. This is intentional shared state used only for the + * end-of-run summary message exposed through the proxy API. + * * @returns {void} */ export function recordSuppressedVersion() { diff --git a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js index 7220370..1b76c81 100644 --- a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js +++ b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js @@ -2,7 +2,7 @@ import https from "https"; import { generateCertForHost } from "./certUtils.js"; import { HttpsProxyAgent } from "https-proxy-agent"; import { ui } from "../environment/userInteraction.js"; -import { gunzipSync, gzipSync } from "zlib"; +import { gunzipSync } from "zlib"; /** * @typedef {import("./interceptors/interceptorBuilder.js").Interceptor} Interceptor @@ -107,6 +107,23 @@ function getRequestPathAndQuery(url) { return url; } +/** + * @param {NodeJS.Dict} headers + * @returns {void} + */ +function normalizeRewrittenResponseHeaders(headers) { + for (const headerName of Object.keys(headers)) { + const lowerHeaderName = headerName.toLowerCase(); + if ( + lowerHeaderName === "content-length" || + lowerHeaderName === "transfer-encoding" || + lowerHeaderName === "content-encoding" + ) { + delete headers[headerName]; + } + } +} + /** * @param {import("http").IncomingMessage} req * @param {string} hostname @@ -218,17 +235,7 @@ function createProxyRequest(hostname, port, req, res, requestHandler) { // For rewritten responses, send the final body uncompressed. // This avoids mismatches between upstream compression metadata and the // rewritten payload on the wire. - for (const headerName of Object.keys(headers)) { - const lowerHeaderName = headerName.toLowerCase(); - if ( - lowerHeaderName === "content-length" || - lowerHeaderName === "transfer-encoding" || - lowerHeaderName === "content-encoding" - ) { - delete headers[headerName]; - } - } - + normalizeRewrittenResponseHeaders(headers); headers["content-length"] = String(buffer.byteLength); res.writeHead(statusCode, headers); res.end(buffer); From 27e77d9b0b7c851cfd6b18df8a1ca7b28d4f1be9 Mon Sep 17 00:00:00 2001 From: Reinier Criel Date: Wed, 1 Apr 2026 15:19:39 -0700 Subject: [PATCH 4/9] Fix regex --- .../registryProxy/interceptors/pip/pipMetadataVersionUtils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js index 28aaaf6..938b149 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js @@ -121,5 +121,5 @@ function tokenizeVersion(version) { * @returns {boolean} */ function isPrerelease(version) { - return /(?:^|[.\-_])(a|b|rc|dev)\d*/i.test(version); + return /(a|b|rc|dev)\d+/i.test(version); } From 2b1247cf365039a4dea55aa4c8c24abc868584fc Mon Sep 17 00:00:00 2001 From: Reinier Criel Date: Wed, 1 Apr 2026 15:23:25 -0700 Subject: [PATCH 5/9] Code Quality --- .../interceptors/pip/modifyPipInfo.spec.js | 2 +- .../src/registryProxy/mitmRequestHandler.js | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js index ef1fc86..46a872f 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js @@ -76,7 +76,7 @@ describe("modifyPipInfo", async () => { }) ); - const modified = modifyPipInfoResponse( + modifyPipInfoResponse( body, headers, "https://pypi.org/pypi/requests/json", diff --git a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js index 1b76c81..b2d82e9 100644 --- a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js +++ b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js @@ -109,9 +109,12 @@ function getRequestPathAndQuery(url) { /** * @param {NodeJS.Dict} headers - * @returns {void} + * @returns {NodeJS.Dict} */ function normalizeRewrittenResponseHeaders(headers) { + /** @type {NodeJS.Dict} */ + const normalizedHeaders = { ...headers }; + for (const headerName of Object.keys(headers)) { const lowerHeaderName = headerName.toLowerCase(); if ( @@ -119,9 +122,11 @@ function normalizeRewrittenResponseHeaders(headers) { lowerHeaderName === "transfer-encoding" || lowerHeaderName === "content-encoding" ) { - delete headers[headerName]; + delete normalizedHeaders[headerName]; } } + + return normalizedHeaders; } /** @@ -235,9 +240,9 @@ function createProxyRequest(hostname, port, req, res, requestHandler) { // For rewritten responses, send the final body uncompressed. // This avoids mismatches between upstream compression metadata and the // rewritten payload on the wire. - normalizeRewrittenResponseHeaders(headers); - headers["content-length"] = String(buffer.byteLength); - res.writeHead(statusCode, headers); + const rewrittenHeaders = normalizeRewrittenResponseHeaders(headers); + rewrittenHeaders["content-length"] = String(buffer.byteLength); + res.writeHead(statusCode, rewrittenHeaders); res.end(buffer); }); } else { From c6963868250936824ff1e0510a0fcc458b964158 Mon Sep 17 00:00:00 2001 From: Reinier Criel Date: Wed, 1 Apr 2026 15:38:42 -0700 Subject: [PATCH 6/9] Some more cleanup --- .../interceptors/pip/modifyPipJsonResponse.js | 12 ++++++++++-- .../interceptors/pip/parsePipPackageUrl.js | 17 ++++++++++++++++- .../interceptors/pip/parsePipPackageUrl.spec.js | 7 +++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js index 869a516..e005237 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js @@ -58,12 +58,16 @@ function filterJsonMetadataFiles( } let modified = false; + const loggedVersions = new Set(); json.files = json.files.filter((/** @type {any} */ file) => { const version = getPackageVersionFromMetadataFile(file, metadataUrl); if (version && isNewlyReleasedPackage(packageName, version)) { modified = true; - logSuppressedVersion(packageName, version); + if (!loggedVersions.has(version)) { + logSuppressedVersion(packageName, version); + loggedVersions.add(version); + } return false; } @@ -118,12 +122,16 @@ function filterJsonMetadataUrls( } let modified = false; + const loggedVersions = new Set(); json.urls = json.urls.filter((/** @type {any} */ file) => { const version = getPackageVersionFromMetadataFile(file, metadataUrl); if (version && isNewlyReleasedPackage(packageName, version)) { modified = true; - logSuppressedVersion(packageName, version); + if (!loggedVersions.has(version)) { + logSuppressedVersion(packageName, version); + loggedVersions.add(version); + } return false; } diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js index 56f03f8..5a89e81 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js @@ -1,4 +1,19 @@ /** + * Parses a PyPI metadata URL and returns the package name and API type. + * + * @example + * parsePipMetadataUrl("https://pypi.org/simple/requests/") + * // => { packageName: "requests", type: "simple" } + * + * parsePipMetadataUrl("https://pypi.org/pypi/requests/json") + * // => { packageName: "requests", type: "json" } + * + * parsePipMetadataUrl("https://pypi.org/pypi/requests/2.28.1/json") + * // => { packageName: "requests", type: "json" } + * + * parsePipMetadataUrl("https://files.pythonhosted.org/packages/requests-2.28.1.tar.gz") + * // => { packageName: undefined, type: undefined } + * * @param {string} url * @returns {{ packageName: string | undefined, type: "simple" | "json" | undefined }} */ @@ -29,7 +44,7 @@ export function parsePipMetadataUrl(url) { if ( pathSegments.length >= 3 && pathSegments[0] === "pypi" && - pathSegments[2] === "json" && + pathSegments[pathSegments.length - 1] === "json" && pathSegments[1] ) { return { diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js index 3d6eecd..1345dd4 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js @@ -21,6 +21,13 @@ describe("parsePipPackageUrl", () => { }); }); + it("parses per-version json metadata URLs", () => { + assert.deepEqual( + parsePipMetadataUrl("https://pypi.org/pypi/requests/2.28.1/json"), + { packageName: "requests", type: "json" } + ); + }); + it("decodes encoded metadata package names", () => { assert.deepEqual( parsePipMetadataUrl("https://pypi.org/simple/foo-bar%5Fbaz/"), From 06ef0c399034b5024e633f74898c0e5768267229 Mon Sep 17 00:00:00 2001 From: Reinier Criel Date: Wed, 1 Apr 2026 20:08:56 -0700 Subject: [PATCH 7/9] Adapt per review --- .../registryProxy/interceptors/pip/parsePipPackageUrl.js | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js index 5a89e81..da3d29f 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js @@ -30,11 +30,7 @@ export function parsePipMetadataUrl(url) { } const pathSegments = urlObj.pathname.split("/").filter(Boolean); - if ( - pathSegments.length >= 2 && - pathSegments[0] === "simple" && - pathSegments[1] - ) { + if (pathSegments[0] === "simple" && pathSegments[1]) { return { packageName: decodeURIComponent(pathSegments[1]), type: "simple", @@ -42,7 +38,6 @@ export function parsePipMetadataUrl(url) { } if ( - pathSegments.length >= 3 && pathSegments[0] === "pypi" && pathSegments[pathSegments.length - 1] === "json" && pathSegments[1] From 0aabba668e94a34e3c37dbe7ebc6272b93d5755b Mon Sep 17 00:00:00 2001 From: Reinier Criel Date: Thu, 2 Apr 2026 08:56:20 -0700 Subject: [PATCH 8/9] Adapt per review --- .../src/registryProxy/http-utils.js | 55 +++++++++++++++++-- .../pip/pipMetadataVersionUtils.js | 32 ++++++----- .../src/registryProxy/mitmRequestHandler.js | 29 ++-------- 3 files changed, 75 insertions(+), 41 deletions(-) diff --git a/packages/safe-chain/src/registryProxy/http-utils.js b/packages/safe-chain/src/registryProxy/http-utils.js index 967aec8..8e2f8e2 100644 --- a/packages/safe-chain/src/registryProxy/http-utils.js +++ b/packages/safe-chain/src/registryProxy/http-utils.js @@ -16,9 +16,42 @@ export function getHeaderValueAsString(headers, headerName) { return header; } +/** + * Returns a copy of headers without the provided header names, matched + * either exactly or case-insensitively. + * + * @param {NodeJS.Dict | undefined} headers + * @param {string[]} headerNames + * @param {{ caseInsensitive?: boolean }} [options] + * @returns {NodeJS.Dict | undefined} + */ +export function omitHeaders(headers, headerNames, options = {}) { + if (!headers) { + return headers; + } + + const omittedHeaderNames = new Set( + options.caseInsensitive + ? headerNames.map((name) => name.toLowerCase()) + : headerNames + ); + /** @type {NodeJS.Dict} */ + const filteredHeaders = {}; + + for (const [headerName, value] of Object.entries(headers)) { + const comparableHeaderName = options.caseInsensitive + ? headerName.toLowerCase() + : headerName; + if (!omittedHeaderNames.has(comparableHeaderName)) { + filteredHeaders[headerName] = value; + } + } + + return filteredHeaders; +} + /** * Remove headers that become stale when the response body is modified. - * Mutates the provided headers object in place. * * @param {NodeJS.Dict | undefined} headers * @returns {void} @@ -28,8 +61,20 @@ export function clearCachingHeaders(headers) { return; } - delete headers["etag"]; - delete headers["last-modified"]; - delete headers["cache-control"]; - delete headers["content-length"]; + const filteredHeaders = omitHeaders(headers, [ + "etag", + "last-modified", + "cache-control", + "content-length", + ]); + + if (!filteredHeaders) { + return; + } + + for (const key of Object.keys(headers)) { + delete headers[key]; + } + + Object.assign(headers, filteredHeaders); } diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js index 938b149..4ccb953 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js @@ -37,21 +37,27 @@ export function getAvailableVersionsFromJson(json, metadataUrl) { return Object.keys(json.releases); } - if (Array.isArray(json.files)) { - return [ - ...new Set( - json.files - .map((/** @type {any} */ file) => - getPackageVersionFromMetadataFile(file, metadataUrl) - ) - .filter((/** @type {string | undefined} */ version) => - typeof version === "string" - ) - ), - ]; + if (!Array.isArray(json.files)) { + return []; } - return []; + return [ + ...new Set( + json.files + .map((/** @type {any} */ file) => + getPackageVersionFromMetadataFile(file, metadataUrl) + ) + .filter(isDefinedString) + ), + ]; +} + +/** + * @param {string | undefined} value + * @returns {value is string} + */ +function isDefinedString(value) { + return typeof value === "string"; } /** diff --git a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js index b2d82e9..4c4e9ec 100644 --- a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js +++ b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js @@ -3,6 +3,7 @@ import { generateCertForHost } from "./certUtils.js"; import { HttpsProxyAgent } from "https-proxy-agent"; import { ui } from "../environment/userInteraction.js"; import { gunzipSync } from "zlib"; +import { omitHeaders } from "./http-utils.js"; /** * @typedef {import("./interceptors/interceptorBuilder.js").Interceptor} Interceptor @@ -107,28 +108,6 @@ function getRequestPathAndQuery(url) { return url; } -/** - * @param {NodeJS.Dict} headers - * @returns {NodeJS.Dict} - */ -function normalizeRewrittenResponseHeaders(headers) { - /** @type {NodeJS.Dict} */ - const normalizedHeaders = { ...headers }; - - for (const headerName of Object.keys(headers)) { - const lowerHeaderName = headerName.toLowerCase(); - if ( - lowerHeaderName === "content-length" || - lowerHeaderName === "transfer-encoding" || - lowerHeaderName === "content-encoding" - ) { - delete normalizedHeaders[headerName]; - } - } - - return normalizedHeaders; -} - /** * @param {import("http").IncomingMessage} req * @param {string} hostname @@ -240,7 +219,11 @@ function createProxyRequest(hostname, port, req, res, requestHandler) { // For rewritten responses, send the final body uncompressed. // This avoids mismatches between upstream compression metadata and the // rewritten payload on the wire. - const rewrittenHeaders = normalizeRewrittenResponseHeaders(headers); + const rewrittenHeaders = omitHeaders( + headers, + ["content-length", "transfer-encoding", "content-encoding"], + { caseInsensitive: true } + ) || {}; rewrittenHeaders["content-length"] = String(buffer.byteLength); res.writeHead(statusCode, rewrittenHeaders); res.end(buffer); From 1a2805ba56539d35d86d452c71555ae0673b9864 Mon Sep 17 00:00:00 2001 From: Reinier Criel Date: Thu, 2 Apr 2026 13:00:01 -0700 Subject: [PATCH 9/9] Adapt per review --- .../interceptors/pip/modifyPipInfo.js | 70 +++++++++++++------ .../interceptors/pip/modifyPipInfo.spec.js | 26 +++++++ 2 files changed, 74 insertions(+), 22 deletions(-) diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js index d3d10fe..9ef4328 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js @@ -6,6 +6,11 @@ export { parsePipMetadataUrl, isPipPackageInfoUrl } from "./parsePipPackageUrl.j import { getPipMetadataContentType, logSuppressedVersion } from "./pipMetadataResponseUtils.js"; import { modifyPipJsonResponse } from "./modifyPipJsonResponse.js"; +// Match simple-index anchor tags and capture their href so we can suppress +// individual distribution links from PyPI HTML metadata responses. +const HTML_ANCHOR_HREF_RE = + /]*href\s*=\s*(["'])([^"']+)\1[^>]*>[\s\S]*?<\/a>/gi; + /** * @param {Buffer} body * @param {NodeJS.Dict | undefined} headers @@ -80,30 +85,15 @@ function modifyHtmlSimpleResponse( ) { const html = body.toString("utf8"); let modified = false; - - const updatedHtml = html.replace( - /]*href\s*=\s*(["'])([^"']+)\1[^>]*>[\s\S]*?<\/a>/gi, - (anchor, _quote, href) => { - const resolvedHref = new URL(href, metadataUrl).toString(); - const { packageName: hrefPackageName, version } = parsePipPackageFromUrl( - resolvedHref, - new URL(resolvedHref).host - ); - - if ( - hrefPackageName && - normalizePipPackageName(hrefPackageName) === normalizePipPackageName(packageName) && - version && - isNewlyReleasedPackage(packageName, version) - ) { - modified = true; - logSuppressedVersion(packageName, version); - return ""; - } - - return anchor; + const rewriteHtmlAnchor = createHtmlAnchorRewriter( + metadataUrl, + isNewlyReleasedPackage, + packageName, + () => { + modified = true; } ); + const updatedHtml = html.replace(HTML_ANCHOR_HREF_RE, rewriteHtmlAnchor); if (!modified) return body; const modifiedBuffer = Buffer.from(updatedHtml); @@ -111,6 +101,42 @@ function modifyHtmlSimpleResponse( return modifiedBuffer; } +/** + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @param {() => void} onModified + * @returns {(anchor: string, quote: string, href: string) => string} + */ +function createHtmlAnchorRewriter( + metadataUrl, + isNewlyReleasedPackage, + packageName, + onModified +) { + return (anchor, _quote, href) => { + const resolvedHref = new URL(href, metadataUrl).toString(); + const { packageName: hrefPackageName, version } = parsePipPackageFromUrl( + resolvedHref, + new URL(resolvedHref).host + ); + + if ( + hrefPackageName && + normalizePipPackageName(hrefPackageName) === + normalizePipPackageName(packageName) && + version && + isNewlyReleasedPackage(packageName, version) + ) { + onModified(); + logSuppressedVersion(packageName, version); + return ""; + } + + return anchor; + }; +} + /** * @param {Buffer} body * @param {NodeJS.Dict | undefined} headers diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js index 46a872f..900941d 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js @@ -134,6 +134,32 @@ describe("modifyPipInfo", async () => { assert.ok(modified.includes("foo_bar-1.0.0.tar.gz")); }); + it("matches anchor href regex with single quotes and extra attributes", () => { + const headers = { "content-type": "application/vnd.pypi.simple.v1+html" }; + + const body = Buffer.from(` + + foo_bar-2.0.0.tar.gz + + foo_bar-1.0.0.tar.gz + `); + + const modified = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/foo-bar/", + (_packageName, version) => version === "2.0.0", + "foo-bar" + ).toString("utf8"); + + assert.ok(!modified.includes("foo_bar-2.0.0.tar.gz")); + assert.ok(modified.includes("foo_bar-1.0.0.tar.gz")); + }); + it("removes too-young files from simple JSON metadata", () => { const headers = { "content-type": "application/vnd.pypi.simple.v1+json",