diff --git a/README.md b/README.md index e173b66..3e73137 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,8 @@ Current enforcement differs by ecosystem: - during normal package resolution, Safe Chain suppresses versions that are newer than the configured minimum age from the package metadata returned by the registry - for direct package download requests that bypass that metadata flow, Safe Chain can block the request itself using a cached list of newly released packages - Python package managers: - - Safe Chain blocks direct package download requests using a cached list of newly released packages + - during package resolution, Safe Chain suppresses too-young files and releases from PyPI metadata responses + - for direct package download requests that bypass that metadata flow, Safe Chain can block the request itself using a cached list of newly released packages By default, the minimum package age is 48 hours. This provides an additional security layer during the critical period when newly published packages are most vulnerable to containing undetected threats. You can configure this threshold or bypass this protection entirely - see the [Minimum Package Age Configuration](#minimum-package-age) section below. @@ -198,7 +199,10 @@ For npm-based package managers, this check currently has two enforcement modes: - Safe Chain suppresses too-young versions from package metadata during normal dependency resolution. - Safe Chain blocks direct package download requests when they are matched against the cached newly released packages list. -For Python package managers, Safe Chain currently enforces minimum package age by blocking direct package download requests when they are matched against the cached newly released packages list. +For Python package managers, this check currently has two enforcement modes: + +- Safe Chain suppresses too-young files and releases from PyPI metadata during dependency resolution. +- Safe Chain blocks direct package download requests when they are matched against the cached newly released packages list. ### Configuration Options @@ -277,6 +281,41 @@ You can set custom registries through environment variable or config file. Both } ``` +## Malware List Base URL + +Configure Safe Chain to fetch malware databases and new packages lists from a custom mirror URL. This allows you to host your own copy of the Aikido malware database. + +### Configuration Options + +You can set the malware list base URL through multiple sources (in order of priority): + +1. **CLI Argument** (highest priority): + + ```shell + npm install express --safe-chain-malware-list-base-url=https://your-mirror.com + ``` + +2. **Environment Variable**: + + ```shell + export SAFE_CHAIN_MALWARE_LIST_BASE_URL=https://your-mirror.com + npm install express + ``` + +3. **Config File** (`~/.safe-chain/config.json`): + + ```json + { + "malwareListBaseUrl": "https://your-mirror.com" + } + ``` + +The base URL should point to a server that mirrors the structure of `https://malware-list.aikido.dev/`, including the following paths: +- `/malware_predictions.json` (JavaScript ecosystem malware database) +- `/malware_pypi.json` (Python ecosystem malware database) +- `/releases/npm.json` (JavaScript new packages list) +- `/releases/pypi.json` (Python new packages list) + # Usage in CI/CD You can protect your CI/CD pipelines from malicious packages by integrating Aikido Safe Chain into your build process. This ensures that any packages installed during your automated builds are checked for malware before installation. diff --git a/packages/safe-chain/src/api/aikido.js b/packages/safe-chain/src/api/aikido.js index 0ceec21..25babb9 100644 --- a/packages/safe-chain/src/api/aikido.js +++ b/packages/safe-chain/src/api/aikido.js @@ -3,17 +3,18 @@ import { getEcoSystem, ECOSYSTEM_JS, ECOSYSTEM_PY, + getMalwareListBaseUrl, } from "../config/settings.js"; import { ui } from "../environment/userInteraction.js"; -const malwareDatabaseUrls = { - [ECOSYSTEM_JS]: "https://malware-list.aikido.dev/malware_predictions.json", - [ECOSYSTEM_PY]: "https://malware-list.aikido.dev/malware_pypi.json", +const malwareDatabasePaths = { + [ECOSYSTEM_JS]: "malware_predictions.json", + [ECOSYSTEM_PY]: "malware_pypi.json", }; -const newPackagesListUrls = { - [ECOSYSTEM_JS]: "https://malware-list.aikido.dev/releases/npm.json", - [ECOSYSTEM_PY]: "https://malware-list.aikido.dev/releases/pypi.json", +const newPackagesListPaths = { + [ECOSYSTEM_JS]: "releases/npm.json", + [ECOSYSTEM_PY]: "releases/pypi.json", }; const DEFAULT_FETCH_RETRY_ATTEMPTS = 4; @@ -40,10 +41,11 @@ const DEFAULT_FETCH_RETRY_ATTEMPTS = 4; export async function fetchMalwareDatabase() { return retry(async () => { const ecosystem = getEcoSystem(); - const malwareDatabaseUrl = - malwareDatabaseUrls[ - /** @type {keyof typeof malwareDatabaseUrls} */ (ecosystem) - ]; + const baseUrl = getMalwareListBaseUrl(); + const path = malwareDatabasePaths[ + /** @type {keyof typeof malwareDatabasePaths} */ (ecosystem) + ]; + const malwareDatabaseUrl = `${baseUrl}/${path}`; const response = await fetch(malwareDatabaseUrl); if (!response.ok) { throw new Error( @@ -69,10 +71,11 @@ export async function fetchMalwareDatabase() { export async function fetchMalwareDatabaseVersion() { return retry(async () => { const ecosystem = getEcoSystem(); - const malwareDatabaseUrl = - malwareDatabaseUrls[ - /** @type {keyof typeof malwareDatabaseUrls} */ (ecosystem) - ]; + const baseUrl = getMalwareListBaseUrl(); + const path = malwareDatabasePaths[ + /** @type {keyof typeof malwareDatabasePaths} */ (ecosystem) + ]; + const malwareDatabaseUrl = `${baseUrl}/${path}`; const response = await fetch(malwareDatabaseUrl, { method: "HEAD", }); @@ -92,13 +95,15 @@ export async function fetchMalwareDatabaseVersion() { export async function fetchNewPackagesList() { return retry(async () => { const ecosystem = getEcoSystem(); - const url = - newPackagesListUrls[/** @type {keyof typeof newPackagesListUrls} */ (ecosystem)]; + const baseUrl = getMalwareListBaseUrl(); + const path = newPackagesListPaths[/** @type {keyof typeof newPackagesListPaths} */ (ecosystem)]; - if (!url) { + if (!path) { return { newPackagesList: [], version: undefined }; } + const url = `${baseUrl}/${path}`; + const response = await fetch(url); if (!response.ok) { throw new Error( @@ -124,13 +129,15 @@ export async function fetchNewPackagesList() { export async function fetchNewPackagesListVersion() { return retry(async () => { const ecosystem = getEcoSystem(); - const url = - newPackagesListUrls[/** @type {keyof typeof newPackagesListUrls} */ (ecosystem)]; + const baseUrl = getMalwareListBaseUrl(); + const path = newPackagesListPaths[/** @type {keyof typeof newPackagesListPaths} */ (ecosystem)]; - if (!url) { + if (!path) { return undefined; } + const url = `${baseUrl}/${path}`; + const response = await fetch(url, { method: "HEAD" }); if (!response.ok) { throw new Error( diff --git a/packages/safe-chain/src/api/aikido.spec.js b/packages/safe-chain/src/api/aikido.spec.js index 0c6c7d9..f41b9d2 100644 --- a/packages/safe-chain/src/api/aikido.spec.js +++ b/packages/safe-chain/src/api/aikido.spec.js @@ -22,6 +22,7 @@ describe("aikido API", async () => { getEcoSystem: () => ecosystem, ECOSYSTEM_JS: "js", ECOSYSTEM_PY: "py", + getMalwareListBaseUrl: () => "https://malware-list.aikido.dev", }, }); @@ -184,6 +185,15 @@ describe("aikido API", async () => { assert.deepStrictEqual(result.newPackagesList, []); assert.strictEqual(result.version, undefined); }); + + it("should return undefined version without fetching for unsupported ecosystems", async () => { + ecosystem = "ruby"; + + const result = await fetchNewPackagesListVersion(); + + assert.strictEqual(mockFetch.mock.calls.length, 0); + assert.strictEqual(result, undefined); + }); }); describe("fetchNewPackagesListVersion", () => { diff --git a/packages/safe-chain/src/config/cliArguments.js b/packages/safe-chain/src/config/cliArguments.js index 25013fb..918761c 100644 --- a/packages/safe-chain/src/config/cliArguments.js +++ b/packages/safe-chain/src/config/cliArguments.js @@ -1,12 +1,13 @@ import { ui } from "../environment/userInteraction.js"; /** - * @type {{loggingLevel: string | undefined, skipMinimumPackageAge: boolean | undefined, minimumPackageAgeHours: string | undefined}} + * @type {{loggingLevel: string | undefined, skipMinimumPackageAge: boolean | undefined, minimumPackageAgeHours: string | undefined, malwareListBaseUrl: string | undefined}} */ const state = { loggingLevel: undefined, skipMinimumPackageAge: undefined, minimumPackageAgeHours: undefined, + malwareListBaseUrl: undefined, }; const SAFE_CHAIN_ARG_PREFIX = "--safe-chain-"; @@ -20,6 +21,7 @@ export function initializeCliArguments(args) { state.loggingLevel = undefined; state.skipMinimumPackageAge = undefined; state.minimumPackageAgeHours = undefined; + state.malwareListBaseUrl = undefined; const safeChainArgs = []; const remainingArgs = []; @@ -35,6 +37,7 @@ export function initializeCliArguments(args) { setLoggingLevel(safeChainArgs); setSkipMinimumPackageAge(safeChainArgs); setMinimumPackageAgeHours(safeChainArgs); + setMalwareListBaseUrl(safeChainArgs); checkDeprecatedPythonFlag(args); return remainingArgs; } @@ -109,6 +112,26 @@ export function getMinimumPackageAgeHours() { return state.minimumPackageAgeHours; } +/** + * @param {string[]} args + * @returns {void} + */ +function setMalwareListBaseUrl(args) { + const argName = SAFE_CHAIN_ARG_PREFIX + "malware-list-base-url="; + + const value = getLastArgEqualsValue(args, argName); + if (value) { + state.malwareListBaseUrl = value; + } +} + +/** + * @returns {string | undefined} + */ +export function getMalwareListBaseUrl() { + return state.malwareListBaseUrl; +} + /** * @param {string[]} args * @param {string} flagName diff --git a/packages/safe-chain/src/config/configFile.js b/packages/safe-chain/src/config/configFile.js index e132c90..3fb0f21 100644 --- a/packages/safe-chain/src/config/configFile.js +++ b/packages/safe-chain/src/config/configFile.js @@ -10,6 +10,7 @@ import { getEcoSystem } from "./settings.js"; * We cannot trust the input and should add the necessary validations * @property {unknown | Number} scanTimeout * @property {unknown | Number} minimumPackageAgeHours + * @property {unknown | string} malwareListBaseUrl * @property {unknown | SafeChainRegistryConfiguration} npm * @property {unknown | SafeChainRegistryConfiguration} pip * @@ -84,6 +85,18 @@ export function getMinimumPackageAgeHours() { return undefined; } +/** + * Gets the malware list base URL from config file only + * @returns {string | undefined} + */ +export function getMalwareListBaseUrl() { + const config = readConfigFile(); + if (config.malwareListBaseUrl && typeof config.malwareListBaseUrl === "string") { + return config.malwareListBaseUrl; + } + return undefined; +} + /** * Gets the custom npm registries from the config file (format parsing only, no validation) * @returns {string[]} @@ -214,6 +227,7 @@ function readConfigFile() { const emptyConfig = { scanTimeout: undefined, minimumPackageAgeHours: undefined, + malwareListBaseUrl: undefined, npm: { customRegistries: undefined, }, diff --git a/packages/safe-chain/src/config/environmentVariables.js b/packages/safe-chain/src/config/environmentVariables.js index 6ed041f..932eff7 100644 --- a/packages/safe-chain/src/config/environmentVariables.js +++ b/packages/safe-chain/src/config/environmentVariables.js @@ -45,3 +45,13 @@ export function getMinimumPackageAgeExclusions() { return process.env.SAFE_CHAIN_MINIMUM_PACKAGE_AGE_EXCLUSIONS || process.env.SAFE_CHAIN_NPM_MINIMUM_PACKAGE_AGE_EXCLUSIONS; } + +/** + * Gets the malware list base URL from environment variable + * Expected format: full URL without trailing slash + * Example: "https://malware-list.aikido.dev" + * @returns {string | undefined} + */ +export function getMalwareListBaseUrl() { + return process.env.SAFE_CHAIN_MALWARE_LIST_BASE_URL; +} diff --git a/packages/safe-chain/src/config/settings.js b/packages/safe-chain/src/config/settings.js index b864bf9..47c98c4 100644 --- a/packages/safe-chain/src/config/settings.js +++ b/packages/safe-chain/src/config/settings.js @@ -1,6 +1,7 @@ import * as cliArguments from "./cliArguments.js"; import * as configFile from "./configFile.js"; import * as environmentVariables from "./environmentVariables.js"; +import { ui } from "../environment/userInteraction.js"; export const LOGGING_SILENT = "silent"; export const LOGGING_NORMAL = "normal"; @@ -198,3 +199,51 @@ export function getMinimumPackageAgeExclusions() { const allExclusions = [...envExclusions, ...configExclusions]; return [...new Set(allExclusions)]; } + +/** + * Gets the malware list base URL with priority: CLI argument > environment variable > config file > default + * @returns {string} + */ +export function getMalwareListBaseUrl() { + // Priority 1: CLI argument + const cliValue = cliArguments.getMalwareListBaseUrl(); + if (cliValue) { + const url = removeTrailingSlashes(cliValue); + ui.writeInformation(`Fetching malware lists from ${url} as defined by CLI argument --safe-chain-malware-list-base-url`); + return url; + } + + // Priority 2: Environment variable + const envValue = environmentVariables.getMalwareListBaseUrl(); + if (envValue) { + const url = removeTrailingSlashes(envValue); + ui.writeInformation(`Fetching malware lists from ${url} as defined by environment variable SAFE_CHAIN_MALWARE_LIST_BASE_URL`); + return url; + } + + // Priority 3: Config file + const configValue = configFile.getMalwareListBaseUrl(); + if (configValue) { + const url = removeTrailingSlashes(configValue); + ui.writeInformation(`Fetching malware lists from ${url} as defined by config file (malwareListBaseUrl)`); + return url; + } + + // Default + const url = removeTrailingSlashes("https://malware-list.aikido.dev"); + ui.writeInformation(`Fetching malware lists from ${url} (default)`); + return url; +} + +/** + * Removes trailing slashes from a URL-like string. + * @param {string} value + * @returns {string} + */ +function removeTrailingSlashes(value) { + if (!value || typeof value !== "string") { + return value; + } + + return value.replace(/\/+$/, ""); +} diff --git a/packages/safe-chain/src/config/settings.spec.js b/packages/safe-chain/src/config/settings.spec.js index 18b5156..48108c4 100644 --- a/packages/safe-chain/src/config/settings.spec.js +++ b/packages/safe-chain/src/config/settings.spec.js @@ -15,6 +15,7 @@ const { getNpmCustomRegistries, getPipCustomRegistries, getMinimumPackageAgeExclusions, + getMalwareListBaseUrl, setEcoSystem, ECOSYSTEM_JS, ECOSYSTEM_PY, @@ -534,3 +535,113 @@ describe("getMinimumPackageAgeExclusions", () => { assert.deepStrictEqual(exclusions, ["requests", "urllib3"]); }); }); + +describe("getMalwareListBaseUrl", () => { + let originalEnv; + const envVarName = "SAFE_CHAIN_MALWARE_LIST_BASE_URL"; + + beforeEach(() => { + originalEnv = process.env[envVarName]; + delete process.env[envVarName]; + // Reset CLI arguments state + initializeCliArguments([]); + }); + + afterEach(() => { + if (originalEnv !== undefined) { + process.env[envVarName] = originalEnv; + } else { + delete process.env[envVarName]; + } + configFileContent = undefined; + }); + + it("should return default URL when nothing is configured", () => { + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://malware-list.aikido.dev"); + }); + + it("should trim trailing slash from CLI argument", () => { + initializeCliArguments(["--safe-chain-malware-list-base-url=https://cli-mirror.com/"]); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://cli-mirror.com"); + }); + + it("should trim trailing slash from environment variable", () => { + process.env[envVarName] = "https://env-mirror.com/"; + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://env-mirror.com"); + }); + + it("should trim trailing slash from config file value", () => { + configFileContent = JSON.stringify({ + malwareListBaseUrl: "https://config-mirror.com/", + }); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://config-mirror.com"); + }); + + it("should return CLI argument value with highest priority", () => { + initializeCliArguments(["--safe-chain-malware-list-base-url=https://cli-mirror.com"]); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://cli-mirror.com"); + }); + + it("should return environment variable value when no CLI argument", () => { + process.env[envVarName] = "https://env-mirror.com"; + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://env-mirror.com"); + }); + + it("should return config file value when no CLI or env", () => { + configFileContent = JSON.stringify({ + malwareListBaseUrl: "https://config-mirror.com", + }); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://config-mirror.com"); + }); + + it("should prioritize CLI over environment variable", () => { + process.env[envVarName] = "https://env-mirror.com"; + initializeCliArguments(["--safe-chain-malware-list-base-url=https://cli-mirror.com"]); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://cli-mirror.com"); + }); + + it("should prioritize environment variable over config file", () => { + process.env[envVarName] = "https://env-mirror.com"; + configFileContent = JSON.stringify({ + malwareListBaseUrl: "https://config-mirror.com", + }); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://env-mirror.com"); + }); + + it("should prioritize CLI over config file", () => { + initializeCliArguments(["--safe-chain-malware-list-base-url=https://cli-mirror.com"]); + configFileContent = JSON.stringify({ + malwareListBaseUrl: "https://config-mirror.com", + }); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://cli-mirror.com"); + }); +}); diff --git a/packages/safe-chain/src/registryProxy/http-utils.js b/packages/safe-chain/src/registryProxy/http-utils.js index e14a977..8e2f8e2 100644 --- a/packages/safe-chain/src/registryProxy/http-utils.js +++ b/packages/safe-chain/src/registryProxy/http-utils.js @@ -15,3 +15,66 @@ export function getHeaderValueAsString(headers, headerName) { return header; } + +/** + * Returns a copy of headers without the provided header names, matched + * either exactly or case-insensitively. + * + * @param {NodeJS.Dict | undefined} headers + * @param {string[]} headerNames + * @param {{ caseInsensitive?: boolean }} [options] + * @returns {NodeJS.Dict | undefined} + */ +export function omitHeaders(headers, headerNames, options = {}) { + if (!headers) { + return headers; + } + + const omittedHeaderNames = new Set( + options.caseInsensitive + ? headerNames.map((name) => name.toLowerCase()) + : headerNames + ); + /** @type {NodeJS.Dict} */ + const filteredHeaders = {}; + + for (const [headerName, value] of Object.entries(headers)) { + const comparableHeaderName = options.caseInsensitive + ? headerName.toLowerCase() + : headerName; + if (!omittedHeaderNames.has(comparableHeaderName)) { + filteredHeaders[headerName] = value; + } + } + + return filteredHeaders; +} + +/** + * Remove headers that become stale when the response body is modified. + * + * @param {NodeJS.Dict | undefined} headers + * @returns {void} + */ +export function clearCachingHeaders(headers) { + if (!headers) { + return; + } + + const filteredHeaders = omitHeaders(headers, [ + "etag", + "last-modified", + "cache-control", + "content-length", + ]); + + if (!filteredHeaders) { + return; + } + + for (const key of Object.keys(headers)) { + delete headers[key]; + } + + Object.assign(headers, filteredHeaders); +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js b/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js index 1743f82..26b3b70 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js +++ b/packages/safe-chain/src/registryProxy/interceptors/npm/modifyNpmInfo.js @@ -1,10 +1,7 @@ import { getMinimumPackageAgeHours } from "../../../config/settings.js"; import { ui } from "../../../environment/userInteraction.js"; -import { getHeaderValueAsString } from "../../http-utils.js"; - -const state = { - hasSuppressedVersions: false, -}; +import { clearCachingHeaders, getHeaderValueAsString } from "../../http-utils.js"; +import { recordSuppressedVersion } from "../suppressedVersionsState.js"; /** * @param {NodeJS.Dict} headers @@ -82,15 +79,7 @@ export function modifyNpmInfoResponse(body, headers) { const timestampValue = new Date(timestamp); if (timestampValue > cutOff) { deleteVersionFromJson(bodyJson, version); - if (headers) { - // When modifying the response, the etag and last-modified headers - // no longer match the content so they needs to be removed before sending the response. - delete headers["etag"]; - delete headers["last-modified"]; - // Removing the cache-control header will prevent the package manager from caching - // the modified response. - delete headers["cache-control"]; - } + clearCachingHeaders(headers); } } @@ -114,7 +103,7 @@ export function modifyNpmInfoResponse(body, headers) { * @param {string} version */ function deleteVersionFromJson(json, version) { - state.hasSuppressedVersions = true; + recordSuppressedVersion(); const packageName = typeof json?.name === "string" ? json.name : "(unknown)"; @@ -171,13 +160,6 @@ function getMostRecentTag(tagList) { return current; } -/** - * @returns {boolean} - */ -export function getHasSuppressedVersions() { - return state.hasSuppressedVersions; -} - /** * @param {Buffer} body * @param {NodeJS.Dict | undefined} headers diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js new file mode 100644 index 0000000..9ef4328 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.js @@ -0,0 +1,167 @@ +import { ui } from "../../../environment/userInteraction.js"; +import { clearCachingHeaders } from "../../http-utils.js"; +import { normalizePipPackageName } from "../../../scanning/packageNameVariants.js"; +import { parsePipPackageFromUrl } from "./parsePipPackageUrl.js"; +export { parsePipMetadataUrl, isPipPackageInfoUrl } from "./parsePipPackageUrl.js"; +import { getPipMetadataContentType, logSuppressedVersion } from "./pipMetadataResponseUtils.js"; +import { modifyPipJsonResponse } from "./modifyPipJsonResponse.js"; + +// Match simple-index anchor tags and capture their href so we can suppress +// individual distribution links from PyPI HTML metadata responses. +const HTML_ANCHOR_HREF_RE = + /]*href\s*=\s*(["'])([^"']+)\1[^>]*>[\s\S]*?<\/a>/gi; + +/** + * @param {Buffer} body + * @param {NodeJS.Dict | undefined} headers + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {Buffer} + */ +export function modifyPipInfoResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + try { + const contentType = getPipMetadataContentType(headers); + + if (!contentType || body.byteLength === 0) { + return body; + } + + if ( + contentType.includes("html") || + contentType.includes("application/vnd.pypi.simple.v1+html") + ) { + return modifyHtmlSimpleResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + } + + if ( + contentType.includes("json") || + contentType.includes("application/vnd.pypi.simple.v1+json") + ) { + return modifyJsonResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + } + + return body; + } catch (/** @type {any} */ err) { + ui.writeVerbose( + `Safe-chain: PyPI package metadata not in expected format - bypassing modification. Error: ${err.message}` + ); + return body; + } +} + +/** + * @param {Buffer} body + * @param {NodeJS.Dict | undefined} headers + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {Buffer} + */ +function modifyHtmlSimpleResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + const html = body.toString("utf8"); + let modified = false; + const rewriteHtmlAnchor = createHtmlAnchorRewriter( + metadataUrl, + isNewlyReleasedPackage, + packageName, + () => { + modified = true; + } + ); + const updatedHtml = html.replace(HTML_ANCHOR_HREF_RE, rewriteHtmlAnchor); + + if (!modified) return body; + const modifiedBuffer = Buffer.from(updatedHtml); + clearCachingHeaders(headers); + return modifiedBuffer; +} + +/** + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @param {() => void} onModified + * @returns {(anchor: string, quote: string, href: string) => string} + */ +function createHtmlAnchorRewriter( + metadataUrl, + isNewlyReleasedPackage, + packageName, + onModified +) { + return (anchor, _quote, href) => { + const resolvedHref = new URL(href, metadataUrl).toString(); + const { packageName: hrefPackageName, version } = parsePipPackageFromUrl( + resolvedHref, + new URL(resolvedHref).host + ); + + if ( + hrefPackageName && + normalizePipPackageName(hrefPackageName) === + normalizePipPackageName(packageName) && + version && + isNewlyReleasedPackage(packageName, version) + ) { + onModified(); + logSuppressedVersion(packageName, version); + return ""; + } + + return anchor; + }; +} + +/** + * @param {Buffer} body + * @param {NodeJS.Dict | undefined} headers + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {Buffer} + */ +function modifyJsonResponse( + body, + headers, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + const json = JSON.parse(body.toString("utf8")); + const modified = modifyPipJsonResponse( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + + if (!modified) return body; + const modifiedBuffer = Buffer.from(JSON.stringify(json)); + clearCachingHeaders(headers); + return modifiedBuffer; +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js new file mode 100644 index 0000000..900941d --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipInfo.spec.js @@ -0,0 +1,302 @@ +import { describe, it, mock } from "node:test"; +import assert from "node:assert"; + +describe("modifyPipInfo", async () => { + mock.module("../../../config/settings.js", { + namedExports: { + getMinimumPackageAgeHours: () => 48, + ECOSYSTEM_PY: "py", + }, + }); + + mock.module("../../../environment/userInteraction.js", { + namedExports: { + ui: { + writeVerbose: () => {}, + }, + }, + }); + + const { + modifyPipInfoResponse, + } = await import("./modifyPipInfo.js"); + + it("removes too-young files from simple HTML metadata", () => { + const headers = { + "content-type": "application/vnd.pypi.simple.v1+html", + etag: "abc", + "cache-control": "public", + "content-length": "999", + "transfer-encoding": "chunked", + }; + + const body = Buffer.from(` + + + + requests-1.0.0.tar.gz + requests-2.0.0.tar.gz + + + `); + + const modified = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + (_packageName, version) => version === "2.0.0", + "requests" + ).toString("utf8"); + + assert.ok(modified.includes("requests-1.0.0.tar.gz")); + assert.ok(!modified.includes("requests-2.0.0.tar.gz")); + assert.equal(headers.etag, undefined); + assert.equal(headers["cache-control"], undefined); + assert.equal(headers["content-length"], undefined); + assert.equal(headers["transfer-encoding"], "chunked"); + }); + + it("leaves mixed-case transport headers untouched for MITM layer to normalize", () => { + const headers = { + "content-type": "application/json", + ETag: "abc", + "Content-Length": "999", + "Last-Modified": "yesterday", + "Cache-Control": "public, max-age=60", + "Transfer-Encoding": "chunked", + }; + + const body = Buffer.from( + JSON.stringify({ + info: { version: "2.0.0" }, + releases: { + "1.0.0": [{ filename: "requests-1.0.0.tar.gz" }], + "2.0.0": [{ filename: "requests-2.0.0.tar.gz" }], + }, + }) + ); + + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/pypi/requests/json", + (_packageName, version) => version === "2.0.0", + "requests" + ); + + assert.equal(headers.ETag, "abc"); + assert.equal(headers["Last-Modified"], "yesterday"); + assert.equal(headers["Cache-Control"], "public, max-age=60"); + assert.equal(headers["Transfer-Encoding"], "chunked"); + assert.equal(headers["Content-Length"], "999"); + assert.equal(headers["content-length"], undefined); + }); + + it("returns body unchanged when no HTML versions are suppressed", () => { + const headers = { + "content-type": "application/vnd.pypi.simple.v1+html", + etag: "abc", + }; + + const body = Buffer.from( + `requests-1.0.0.tar.gz` + ); + + const result = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + () => false, + "requests" + ); + + assert.equal(result, body); // same Buffer reference — no copy made + assert.equal(headers.etag, "abc"); // headers untouched + }); + + it("matches HTML anchor hrefs using normalised package name (underscore vs hyphen)", () => { + const headers = { "content-type": "application/vnd.pypi.simple.v1+html" }; + + const body = Buffer.from( + `foo_bar-2.0.0.tar.gz` + + `foo_bar-1.0.0.tar.gz` + ); + + const modified = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/foo-bar/", + (_packageName, version) => version === "2.0.0", + "foo-bar" // hyphenated name, hrefs use underscore + ).toString("utf8"); + + assert.ok(!modified.includes("foo_bar-2.0.0.tar.gz")); + assert.ok(modified.includes("foo_bar-1.0.0.tar.gz")); + }); + + it("matches anchor href regex with single quotes and extra attributes", () => { + const headers = { "content-type": "application/vnd.pypi.simple.v1+html" }; + + const body = Buffer.from(` + + foo_bar-2.0.0.tar.gz + + foo_bar-1.0.0.tar.gz + `); + + const modified = modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/foo-bar/", + (_packageName, version) => version === "2.0.0", + "foo-bar" + ).toString("utf8"); + + assert.ok(!modified.includes("foo_bar-2.0.0.tar.gz")); + assert.ok(modified.includes("foo_bar-1.0.0.tar.gz")); + }); + + it("removes too-young files from simple JSON metadata", () => { + const headers = { + "content-type": "application/vnd.pypi.simple.v1+json", + }; + + const body = Buffer.from( + JSON.stringify({ + name: "requests", + files: [ + { + filename: "requests-1.0.0.tar.gz", + url: "https://files.pythonhosted.org/packages/source/r/requests/requests-1.0.0.tar.gz", + }, + { + filename: "requests-2.0.0.tar.gz", + url: "https://files.pythonhosted.org/packages/source/r/requests/requests-2.0.0.tar.gz", + }, + ], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + (_packageName, version) => version === "2.0.0", + "requests" + ).toString("utf8") + ); + + assert.equal(modified.files.length, 1); + assert.equal(modified.files[0].filename, "requests-1.0.0.tar.gz"); + }); + + it("filters simple JSON metadata entries that have only filename (no url)", () => { + const headers = { "content-type": "application/vnd.pypi.simple.v1+json" }; + + const body = Buffer.from( + JSON.stringify({ + name: "requests", + files: [ + { filename: "requests-1.0.0.tar.gz" }, + { filename: "requests-2.0.0.tar.gz" }, + ], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/simple/requests/", + (_packageName, version) => version === "2.0.0", + "requests" + ).toString("utf8") + ); + + assert.equal(modified.files.length, 1); + assert.equal(modified.files[0].filename, "requests-1.0.0.tar.gz"); + }); + + it("recalculates JSON API info.version after removing too-young releases", () => { + const headers = { + "content-type": "application/json", + }; + + const body = Buffer.from( + JSON.stringify({ + info: { version: "2.0.0" }, + releases: { + "1.0.0": [ + { + filename: "requests-1.0.0.tar.gz", + upload_time_iso_8601: "2024-01-01T00:00:00.000Z", + }, + ], + "2.0.0": [ + { + filename: "requests-2.0.0.tar.gz", + upload_time_iso_8601: "2024-01-02T00:00:00.000Z", + }, + ], + "3.0.0rc1": [ + { + filename: "requests-3.0.0rc1.tar.gz", + upload_time_iso_8601: "2024-01-03T00:00:00.000Z", + }, + ], + }, + urls: [ + { filename: "requests-2.0.0.tar.gz" }, + ], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/pypi/requests/json", + (_packageName, version) => + version === "2.0.0" || version === "3.0.0rc1", + "requests" + ).toString("utf8") + ); + + assert.deepEqual(Object.keys(modified.releases), ["1.0.0"]); + assert.equal(modified.info.version, "1.0.0"); + assert.equal(modified.urls.length, 0); + }); + + it("falls back to latest pre-release when all stable versions are removed", () => { + const headers = { "content-type": "application/json" }; + + const body = Buffer.from( + JSON.stringify({ + info: { version: "2.0.0rc2" }, + releases: { + "1.0.0rc1": [{ filename: "requests-1.0.0rc1.tar.gz" }], + "2.0.0rc2": [{ filename: "requests-2.0.0rc2.tar.gz" }], + }, + urls: [], + }) + ); + + const modified = JSON.parse( + modifyPipInfoResponse( + body, + headers, + "https://pypi.org/pypi/requests/json", + (_packageName, version) => version === "2.0.0rc2", + "requests" + ).toString("utf8") + ); + + assert.deepEqual(Object.keys(modified.releases), ["1.0.0rc1"]); + assert.equal(modified.info.version, "1.0.0rc1"); + }); +}); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js new file mode 100644 index 0000000..e005237 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/modifyPipJsonResponse.js @@ -0,0 +1,176 @@ +import { + calculateLatestVersion, + getAvailableVersionsFromJson, + getPackageVersionFromMetadataFile, +} from "./pipMetadataVersionUtils.js"; +import { logSuppressedVersion } from "./pipMetadataResponseUtils.js"; + +/** + * @param {any} json + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {boolean} + */ +export function modifyPipJsonResponse( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + const filesModified = filterJsonMetadataFiles( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + const releasesModified = removeJsonMetadataReleases( + json, + isNewlyReleasedPackage, + packageName + ); + const urlsModified = filterJsonMetadataUrls( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName + ); + const versionModified = updateJsonInfoVersion(json, metadataUrl); + + return filesModified || releasesModified || urlsModified || versionModified; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {boolean} + */ +function filterJsonMetadataFiles( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + if (!Array.isArray(json.files)) { + return false; + } + + let modified = false; + const loggedVersions = new Set(); + json.files = json.files.filter((/** @type {any} */ file) => { + const version = getPackageVersionFromMetadataFile(file, metadataUrl); + + if (version && isNewlyReleasedPackage(packageName, version)) { + modified = true; + if (!loggedVersions.has(version)) { + logSuppressedVersion(packageName, version); + loggedVersions.add(version); + } + return false; + } + + return true; + }); + + return modified; +} + +/** + * @param {any} json + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {boolean} + */ +function removeJsonMetadataReleases(json, isNewlyReleasedPackage, packageName) { + if (!json.releases || typeof json.releases !== "object") { + return false; + } + + let modified = false; + + for (const [version, files] of Object.entries(json.releases)) { + if ( + Array.isArray(/** @type {unknown[]} */ (files)) && + isNewlyReleasedPackage(packageName, version) + ) { + delete json.releases[version]; + modified = true; + logSuppressedVersion(packageName, version); + } + } + + return modified; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @param {(packageName: string | undefined, version: string | undefined) => boolean} isNewlyReleasedPackage + * @param {string} packageName + * @returns {boolean} + */ +function filterJsonMetadataUrls( + json, + metadataUrl, + isNewlyReleasedPackage, + packageName +) { + if (!Array.isArray(json.urls)) { + return false; + } + + let modified = false; + const loggedVersions = new Set(); + json.urls = json.urls.filter((/** @type {any} */ file) => { + const version = getPackageVersionFromMetadataFile(file, metadataUrl); + + if (version && isNewlyReleasedPackage(packageName, version)) { + modified = true; + if (!loggedVersions.has(version)) { + logSuppressedVersion(packageName, version); + loggedVersions.add(version); + } + return false; + } + + return true; + }); + + return modified; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @returns {boolean} + */ +function updateJsonInfoVersion(json, metadataUrl) { + if (!json.info || typeof json.info !== "object") { + return false; + } + + const replacementVersion = computeReplacementVersion(json, metadataUrl); + + if ( + typeof json.info.version !== "string" || + !replacementVersion || + json.info.version === replacementVersion + ) { + return false; + } + + json.info.version = replacementVersion; + return true; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @returns {string | undefined} + */ +function computeReplacementVersion(json, metadataUrl) { + const candidateVersions = getAvailableVersionsFromJson(json, metadataUrl); + return calculateLatestVersion(candidateVersions); +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js index 377a648..da3d29f 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.js @@ -1,3 +1,64 @@ +/** + * Parses a PyPI metadata URL and returns the package name and API type. + * + * @example + * parsePipMetadataUrl("https://pypi.org/simple/requests/") + * // => { packageName: "requests", type: "simple" } + * + * parsePipMetadataUrl("https://pypi.org/pypi/requests/json") + * // => { packageName: "requests", type: "json" } + * + * parsePipMetadataUrl("https://pypi.org/pypi/requests/2.28.1/json") + * // => { packageName: "requests", type: "json" } + * + * parsePipMetadataUrl("https://files.pythonhosted.org/packages/requests-2.28.1.tar.gz") + * // => { packageName: undefined, type: undefined } + * + * @param {string} url + * @returns {{ packageName: string | undefined, type: "simple" | "json" | undefined }} + */ +export function parsePipMetadataUrl(url) { + if (typeof url !== "string") { + return { packageName: undefined, type: undefined }; + } + + let urlObj; + try { + urlObj = new URL(url); + } catch { + return { packageName: undefined, type: undefined }; + } + + const pathSegments = urlObj.pathname.split("/").filter(Boolean); + if (pathSegments[0] === "simple" && pathSegments[1]) { + return { + packageName: decodeURIComponent(pathSegments[1]), + type: "simple", + }; + } + + if ( + pathSegments[0] === "pypi" && + pathSegments[pathSegments.length - 1] === "json" && + pathSegments[1] + ) { + return { + packageName: decodeURIComponent(pathSegments[1]), + type: "json", + }; + } + + return { packageName: undefined, type: undefined }; +} + +/** + * @param {string} url + * @returns {boolean} + */ +export function isPipPackageInfoUrl(url) { + return !!parsePipMetadataUrl(url).packageName; +} + /** * Parse Python package artifact URLs from PyPI-style registries. * Examples: diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js new file mode 100644 index 0000000..1345dd4 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/parsePipPackageUrl.spec.js @@ -0,0 +1,100 @@ +import { describe, it } from "node:test"; +import assert from "node:assert"; +import { + isPipPackageInfoUrl, + parsePipMetadataUrl, + parsePipPackageFromUrl, +} from "./parsePipPackageUrl.js"; + +describe("parsePipPackageUrl", () => { + it("parses simple metadata URLs", () => { + assert.deepEqual(parsePipMetadataUrl("https://pypi.org/simple/requests/"), { + packageName: "requests", + type: "simple", + }); + }); + + it("parses json metadata URLs", () => { + assert.deepEqual(parsePipMetadataUrl("https://pypi.org/pypi/requests/json"), { + packageName: "requests", + type: "json", + }); + }); + + it("parses per-version json metadata URLs", () => { + assert.deepEqual( + parsePipMetadataUrl("https://pypi.org/pypi/requests/2.28.1/json"), + { packageName: "requests", type: "json" } + ); + }); + + it("decodes encoded metadata package names", () => { + assert.deepEqual( + parsePipMetadataUrl("https://pypi.org/simple/foo-bar%5Fbaz/"), + { + packageName: "foo-bar_baz", + type: "simple", + } + ); + }); + + it("returns undefined for unrecognized metadata paths", () => { + assert.deepEqual( + parsePipMetadataUrl("https://pypi.org/unknown/requests/"), + { + packageName: undefined, + type: undefined, + } + ); + }); + + it("returns undefined for invalid metadata URLs", () => { + assert.deepEqual(parsePipMetadataUrl("not a url"), { + packageName: undefined, + type: undefined, + }); + }); + + it("recognizes package info URLs", () => { + assert.equal( + isPipPackageInfoUrl("https://pypi.org/simple/requests/"), + true + ); + }); + + it("does not treat artifact URLs as package info URLs", () => { + assert.equal( + isPipPackageInfoUrl( + "https://files.pythonhosted.org/packages/source/r/requests/requests-2.28.1.tar.gz" + ), + false + ); + }); + + it("parses wheel artifact URLs", () => { + assert.deepEqual( + parsePipPackageFromUrl( + "https://files.pythonhosted.org/packages/xx/yy/foo_bar-2.0.0-py3-none-any.whl", + "files.pythonhosted.org" + ), + { packageName: "foo_bar", version: "2.0.0" } + ); + }); + + it("parses sdist artifact URLs", () => { + assert.deepEqual( + parsePipPackageFromUrl( + "https://files.pythonhosted.org/packages/source/r/requests/requests-2.28.1.tar.gz", + "files.pythonhosted.org" + ), + { packageName: "requests", version: "2.28.1" } + ); + }); + + it("returns undefined for non-artifact URLs", () => { + assert.deepEqual( + parsePipPackageFromUrl("https://pypi.org/simple/requests/", "pypi.org"), + { packageName: undefined, version: undefined } + ); + }); +}); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js index c7ad597..5904f05 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.customRegistries.spec.js @@ -10,8 +10,12 @@ describe("pipInterceptor custom registries", async () => { namedExports: { ECOSYSTEM_PY: "py", getEcoSystem: () => "py", + getLoggingLevel: () => "silent", + getMinimumPackageAgeHours: () => 48, getMinimumPackageAgeExclusions: () => [], getPipCustomRegistries: () => customRegistries, + LOGGING_SILENT: "silent", + LOGGING_VERBOSE: "verbose", skipMinimumPackageAge: () => false, }, }); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js index abdda17..51e6f0d 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.js @@ -8,6 +8,10 @@ import { getEquivalentPackageNames } from "../../../scanning/packageNameVariants import { openNewPackagesDatabase } from "../../../scanning/newPackagesListCache.js"; import { interceptRequests } from "../interceptorBuilder.js"; import { isExcludedFromMinimumPackageAge } from "../minimumPackageAgeExclusions.js"; +import { + modifyPipInfoResponse, + parsePipMetadataUrl, +} from "./modifyPipInfo.js"; import { parsePipPackageFromUrl } from "./parsePipPackageUrl.js"; const knownPipRegistries = [ @@ -47,6 +51,28 @@ function buildPipInterceptor(registry) { */ function createPipRequestHandler(registry) { return async (reqContext) => { + const minimumAgeChecksEnabled = !skipMinimumPackageAge(); + const metadataInfo = parsePipMetadataUrl(reqContext.targetUrl); + const metadataPackageName = metadataInfo.packageName; + + if ( + minimumAgeChecksEnabled && + metadataPackageName && + !isExcludedFromMinimumPackageAge(metadataPackageName) + ) { + const newPackagesDatabase = await openNewPackagesDatabase(); + reqContext.modifyBody((body, headers) => + modifyPipInfoResponse( + body, + headers, + reqContext.targetUrl, + newPackagesDatabase.isNewlyReleasedPackage, + metadataPackageName + ) + ); + return; + } + const { packageName, version } = parsePipPackageFromUrl( reqContext.targetUrl, registry @@ -75,7 +101,7 @@ function createPipRequestHandler(registry) { if ( version && - !skipMinimumPackageAge() && + minimumAgeChecksEnabled && !isExcludedFromMinimumPackageAge(packageName) ) { const newPackagesDatabase = await openNewPackagesDatabase(); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js index 8a5b189..6bbd904 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.minPackageAge.spec.js @@ -30,8 +30,12 @@ describe("pipInterceptor minimum package age", async () => { namedExports: { ECOSYSTEM_PY: "py", getEcoSystem: () => "py", + getLoggingLevel: () => "silent", + getMinimumPackageAgeHours: () => 48, getMinimumPackageAgeExclusions: () => minimumPackageAgeExclusionsSetting, getPipCustomRegistries: () => [], + LOGGING_SILENT: "silent", + LOGGING_VERBOSE: "verbose", skipMinimumPackageAge: () => skipMinimumPackageAgeSetting, }, }); @@ -56,6 +60,31 @@ describe("pipInterceptor minimum package age", async () => { newlyReleasedPackageResponse = false; }); + it("should modify simple metadata responses to suppress too-young versions", async () => { + const url = "https://pypi.org/simple/foo-bar/"; + newlyReleasedPackageResponse = true; + + const interceptor = pipInterceptorForUrl(url); + const result = await interceptor.handleRequest(url); + + assert.equal(result.modifiesResponse(), true); + + const modifiedBody = result.modifyBody( + Buffer.from(` + foo_bar-1.0.0.tar.gz + foo_bar-2.0.0.tar.gz + `), + { + "content-type": "application/vnd.pypi.simple.v1+html", + } + ).toString("utf8"); + + assert.ok(modifiedBody.includes("foo_bar-1.0.0.tar.gz")); + assert.ok(!modifiedBody.includes("foo_bar-2.0.0.tar.gz")); + + newlyReleasedPackageResponse = false; + }); + it("should not block newly released package downloads when skipMinimumPackageAge is enabled", async () => { const url = "https://files.pythonhosted.org/packages/xx/yy/foo_bar-2.0.0-py3-none-any.whl"; @@ -86,6 +115,20 @@ describe("pipInterceptor minimum package age", async () => { newlyReleasedPackageResponse = false; }); + it("should not modify metadata responses when the package is excluded", async () => { + const url = "https://pypi.org/simple/foo-bar/"; + newlyReleasedPackageResponse = true; + minimumPackageAgeExclusionsSetting = ["foo-bar"]; + + const interceptor = pipInterceptorForUrl(url); + const result = await interceptor.handleRequest(url); + + assert.equal(result.modifiesResponse(), false); + + minimumPackageAgeExclusionsSetting = []; + newlyReleasedPackageResponse = false; + }); + it("should not block newly released package downloads when a dot-name package matches a hyphen exclusion", async () => { const url = "https://files.pythonhosted.org/packages/xx/yy/foo.bar-2.0.0.tar.gz"; diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js index d6fdec6..f4a54a4 100644 --- a/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipInterceptor.packageDownload.spec.js @@ -26,8 +26,12 @@ describe("pipInterceptor", async () => { namedExports: { ECOSYSTEM_PY: "py", getEcoSystem: () => "py", + getLoggingLevel: () => "silent", + getMinimumPackageAgeHours: () => 48, getMinimumPackageAgeExclusions: () => [], getPipCustomRegistries: () => [], + LOGGING_SILENT: "silent", + LOGGING_VERBOSE: "verbose", skipMinimumPackageAge: () => false, }, }); diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataResponseUtils.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataResponseUtils.js new file mode 100644 index 0000000..e394810 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataResponseUtils.js @@ -0,0 +1,27 @@ +import { getMinimumPackageAgeHours } from "../../../config/settings.js"; +import { ui } from "../../../environment/userInteraction.js"; +import { getHeaderValueAsString } from "../../http-utils.js"; +import { recordSuppressedVersion } from "../suppressedVersionsState.js"; + +/** + * @param {NodeJS.Dict | undefined} headers + * @returns {string | undefined} + */ +export function getPipMetadataContentType(headers) { + return getHeaderValueAsString(headers, "content-type") + ?.toLowerCase() + .split(";")[0] + .trim(); +} + +/** + * @param {string} packageName + * @param {string} version + * @returns {void} + */ +export function logSuppressedVersion(packageName, version) { + recordSuppressedVersion(); + ui.writeVerbose( + `Safe-chain: ${packageName}@${version} is newer than ${getMinimumPackageAgeHours()} hours and was removed (minimumPackageAgeInHours setting).` + ); +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js new file mode 100644 index 0000000..4ccb953 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/pip/pipMetadataVersionUtils.js @@ -0,0 +1,131 @@ +import { parsePipPackageFromUrl } from "./parsePipPackageUrl.js"; + +/** + * @param {any} file + * @param {string} metadataUrl + * @returns {string | undefined} + */ +export function getPackageVersionFromMetadataFile(file, metadataUrl) { + const href = typeof file?.url === "string" ? file.url : undefined; + const filename = typeof file?.filename === "string" ? file.filename : undefined; + + if (href) { + const resolvedHref = new URL(href, metadataUrl).toString(); + return parsePipPackageFromUrl( + resolvedHref, + new URL(resolvedHref).host + ).version; + } + + if (filename) { + return parsePipPackageFromUrl( + new URL(filename, metadataUrl).toString(), + new URL(metadataUrl).host + ).version; + } + + return undefined; +} + +/** + * @param {any} json + * @param {string} metadataUrl + * @returns {string[]} + */ +export function getAvailableVersionsFromJson(json, metadataUrl) { + if (json.releases && typeof json.releases === "object") { + return Object.keys(json.releases); + } + + if (!Array.isArray(json.files)) { + return []; + } + + return [ + ...new Set( + json.files + .map((/** @type {any} */ file) => + getPackageVersionFromMetadataFile(file, metadataUrl) + ) + .filter(isDefinedString) + ), + ]; +} + +/** + * @param {string | undefined} value + * @returns {value is string} + */ +function isDefinedString(value) { + return typeof value === "string"; +} + +/** + * @param {string[]} versions + * @returns {string | undefined} + */ +export function calculateLatestVersion(versions) { + const stableVersions = versions.filter((version) => !isPrerelease(version)); + if (stableVersions.length > 0) { + return stableVersions.sort(comparePep440ishVersions).at(-1); + } + + return versions.sort(comparePep440ishVersions).at(-1); +} + +/** + * @param {string} left + * @param {string} right + * @returns {number} + */ +function comparePep440ishVersions(left, right) { + const leftParts = tokenizeVersion(left); + const rightParts = tokenizeVersion(right); + const maxLength = Math.max(leftParts.length, rightParts.length); + + for (let index = 0; index < maxLength; index += 1) { + const leftPart = leftParts[index]; + const rightPart = rightParts[index]; + + if (leftPart === undefined) return -1; + if (rightPart === undefined) return 1; + + if (leftPart === rightPart) { + continue; + } + + const leftNumeric = typeof leftPart === "number"; + const rightNumeric = typeof rightPart === "number"; + + if (leftNumeric && rightNumeric) { + return leftPart - rightPart; + } + + if (leftNumeric) return 1; + if (rightNumeric) return -1; + + return String(leftPart).localeCompare(String(rightPart)); + } + + return 0; +} + +/** + * @param {string} version + * @returns {(string | number)[]} + */ +function tokenizeVersion(version) { + return version + .toLowerCase() + .split(/[^a-z0-9]+/) + .flatMap((part) => part.match(/[a-z]+|\d+/g) || []) + .map((part) => (/^\d+$/.test(part) ? Number(part) : part)); +} + +/** + * @param {string} version + * @returns {boolean} + */ +function isPrerelease(version) { + return /(a|b|rc|dev)\d+/i.test(version); +} diff --git a/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js b/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js new file mode 100644 index 0000000..26c0559 --- /dev/null +++ b/packages/safe-chain/src/registryProxy/interceptors/suppressedVersionsState.js @@ -0,0 +1,21 @@ +const state = { + hasSuppressedVersions: false, +}; + +/** + * Tracks whether any rewritten metadata response suppressed versions during the + * current process lifetime. This is intentional shared state used only for the + * end-of-run summary message exposed through the proxy API. + * + * @returns {void} + */ +export function recordSuppressedVersion() { + state.hasSuppressedVersions = true; +} + +/** + * @returns {boolean} + */ +export function getHasSuppressedVersions() { + return state.hasSuppressedVersions; +} diff --git a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js index 8268559..4c4e9ec 100644 --- a/packages/safe-chain/src/registryProxy/mitmRequestHandler.js +++ b/packages/safe-chain/src/registryProxy/mitmRequestHandler.js @@ -2,7 +2,8 @@ import https from "https"; import { generateCertForHost } from "./certUtils.js"; import { HttpsProxyAgent } from "https-proxy-agent"; import { ui } from "../environment/userInteraction.js"; -import { gunzipSync, gzipSync } from "zlib"; +import { gunzipSync } from "zlib"; +import { omitHeaders } from "./http-utils.js"; /** * @typedef {import("./interceptors/interceptorBuilder.js").Interceptor} Interceptor @@ -215,11 +216,16 @@ function createProxyRequest(hostname, port, req, res, requestHandler) { buffer = requestHandler.modifyBody(buffer, headers); - if (proxyRes.headers["content-encoding"] === "gzip") { - buffer = gzipSync(buffer); - } - - res.writeHead(statusCode, headers); + // For rewritten responses, send the final body uncompressed. + // This avoids mismatches between upstream compression metadata and the + // rewritten payload on the wire. + const rewrittenHeaders = omitHeaders( + headers, + ["content-length", "transfer-encoding", "content-encoding"], + { caseInsensitive: true } + ) || {}; + rewrittenHeaders["content-length"] = String(buffer.byteLength); + res.writeHead(statusCode, rewrittenHeaders); res.end(buffer); }); } else { diff --git a/packages/safe-chain/src/registryProxy/mitmRequestHandler.spec.js b/packages/safe-chain/src/registryProxy/mitmRequestHandler.spec.js new file mode 100644 index 0000000..de01e2c --- /dev/null +++ b/packages/safe-chain/src/registryProxy/mitmRequestHandler.spec.js @@ -0,0 +1,138 @@ +import { describe, it, mock } from "node:test"; +import assert from "node:assert"; +import zlib from "node:zlib"; + +describe("mitmRequestHandler", async () => { + let capturedHandler; + let capturedOptions; + + mock.module("https", { + defaultExport: { + createServer: (_options, handler) => { + capturedHandler = handler; + return { + on: () => {}, + emit: () => {}, + }; + }, + request: (options, callback) => { + capturedOptions = options; + + const listeners = {}; + const proxyRes = { + statusCode: 200, + headers: { + "content-encoding": "gzip", + "content-length": "999", + "transfer-encoding": "chunked", + }, + on: (event, handler) => { + listeners[event] = handler; + }, + }; + + callback(proxyRes); + + return { + on: () => {}, + write: () => {}, + end: () => { + const payload = Buffer.from("rewritten body"); + listeners["data"]?.(zlib.gzipSync(payload)); + listeners["end"]?.(); + }, + destroy: () => {}, + }; + }, + }, + }); + + mock.module("./certUtils.js", { + namedExports: { + generateCertForHost: () => ({ + privateKey: "key", + certificate: "cert", + }), + }, + }); + + mock.module("https-proxy-agent", { + namedExports: { + HttpsProxyAgent: class {}, + }, + }); + + mock.module("../environment/userInteraction.js", { + namedExports: { + ui: { + writeVerbose: () => {}, + writeError: () => {}, + }, + }, + }); + + const { mitmConnect } = await import("./mitmRequestHandler.js"); + + it("sets content-length from the final compressed payload after body rewrite", async () => { + const interceptor = { + handleRequest: async () => ({ + blockResponse: undefined, + modifyRequestHeaders: (headers) => headers, + modifiesResponse: () => true, + modifyBody: () => Buffer.from("rewritten body"), + }), + }; + + const req = { + url: "pypi.org:443", + }; + + const clientSocket = { + on: () => {}, + write: () => {}, + headersSent: false, + writable: true, + end: () => {}, + }; + + mitmConnect(req, clientSocket, interceptor); + + const resState = { + statusCode: undefined, + headers: undefined, + body: undefined, + }; + + const res = { + headersSent: false, + writeHead: (statusCode, headers) => { + resState.statusCode = statusCode; + resState.headers = headers; + }, + end: (body) => { + resState.body = body; + }, + }; + + const request = { + url: "/simple/example/", + headers: {}, + method: "GET", + on: (event, handler) => { + if (event === "end") { + handler(); + } + }, + }; + + await capturedHandler(request, res); + + assert.equal(capturedOptions.hostname, "pypi.org"); + assert.equal(resState.statusCode, 200); + assert.equal(resState.headers["transfer-encoding"], undefined); + assert.equal( + resState.headers["content-length"], + String(resState.body.byteLength) + ); + }); +}); diff --git a/packages/safe-chain/src/registryProxy/registryProxy.js b/packages/safe-chain/src/registryProxy/registryProxy.js index 81b265d..0b009bb 100644 --- a/packages/safe-chain/src/registryProxy/registryProxy.js +++ b/packages/safe-chain/src/registryProxy/registryProxy.js @@ -6,7 +6,7 @@ import { getCombinedCaBundlePath, cleanupCertBundle } from "./certBundle.js"; import { ui } from "../environment/userInteraction.js"; import chalk from "chalk"; import { createInterceptorForUrl } from "./interceptors/createInterceptorForEcoSystem.js"; -import { getHasSuppressedVersions } from "./interceptors/npm/modifyNpmInfo.js"; +import { getHasSuppressedVersions } from "./interceptors/suppressedVersionsState.js"; const SERVER_STOP_TIMEOUT_MS = 1000; /** diff --git a/packages/safe-chain/src/scanning/newPackagesDatabase.spec.js b/packages/safe-chain/src/scanning/newPackagesDatabase.spec.js index f363f27..32de737 100644 --- a/packages/safe-chain/src/scanning/newPackagesDatabase.spec.js +++ b/packages/safe-chain/src/scanning/newPackagesDatabase.spec.js @@ -51,6 +51,7 @@ mock.module("../config/settings.js", { namedExports: { getMinimumPackageAgeHours: () => minimumPackageAgeHours, getEcoSystem: () => ecosystem, + getMalwareListBaseUrl: () => "https://malware-list.aikido.dev", ECOSYSTEM_JS: "js", ECOSYSTEM_PY: "py", }, diff --git a/packages/safe-chain/src/scanning/newPackagesDatabaseBuilder.spec.js b/packages/safe-chain/src/scanning/newPackagesDatabaseBuilder.spec.js index 9670a9e..1424a20 100644 --- a/packages/safe-chain/src/scanning/newPackagesDatabaseBuilder.spec.js +++ b/packages/safe-chain/src/scanning/newPackagesDatabaseBuilder.spec.js @@ -8,6 +8,7 @@ mock.module("../config/settings.js", { namedExports: { getMinimumPackageAgeHours: () => minimumPackageAgeHours, getEcoSystem: () => ecosystem, + getMalwareListBaseUrl: () => "https://malware-list.aikido.dev", ECOSYSTEM_JS: "js", ECOSYSTEM_PY: "py", }, diff --git a/packages/safe-chain/src/scanning/newPackagesListCache.spec.js b/packages/safe-chain/src/scanning/newPackagesListCache.spec.js index 8616876..503a0cc 100644 --- a/packages/safe-chain/src/scanning/newPackagesListCache.spec.js +++ b/packages/safe-chain/src/scanning/newPackagesListCache.spec.js @@ -20,6 +20,7 @@ mock.module("../config/settings.js", { namedExports: { getEcoSystem: () => ecosystem, getMinimumPackageAgeHours: () => 24, + getMalwareListBaseUrl: () => "https://malware-list.aikido.dev", ECOSYSTEM_JS: "js", ECOSYSTEM_PY: "py", }, diff --git a/packages/safe-chain/src/scanning/packageNameVariants.js b/packages/safe-chain/src/scanning/packageNameVariants.js index 97db91b..64075f2 100644 --- a/packages/safe-chain/src/scanning/packageNameVariants.js +++ b/packages/safe-chain/src/scanning/packageNameVariants.js @@ -1,5 +1,15 @@ import { ECOSYSTEM_PY } from "../config/settings.js"; +/** + * Normalises a Python package name per PEP 503: lowercase and collapse any + * run of `.`, `_`, or `-` into a single hyphen. + * @param {string} packageName + * @returns {string} + */ +export function normalizePipPackageName(packageName) { + return packageName.toLowerCase().replace(/[._-]+/g, "-"); +} + /** * @param {string} packageName * @param {string} ecosystem