AikidoSec-safe-chain/packages/safe-chain/src/registryProxy/interceptors/pipInterceptor.js

132 lines
4.6 KiB
JavaScript

import { getPipCustomRegistries } from "../../config/settings.js";
import { isMalwarePackage } from "../../scanning/audit/index.js";
import { interceptRequests } from "./interceptorBuilder.js";
const knownPipRegistries = [
"files.pythonhosted.org",
"pypi.org",
"pypi.python.org",
"pythonhosted.org",
];
/**
* @param {string} url
* @returns {import("./interceptorBuilder.js").Interceptor | undefined}
*/
export function pipInterceptorForUrl(url) {
const customRegistries = getPipCustomRegistries();
const registries = [...knownPipRegistries, ...customRegistries];
const registry = registries.find((reg) => url.includes(reg));
if (registry) {
return buildPipInterceptor(registry);
}
return undefined;
}
/**
* @param {string} registry
* @returns {import("./interceptorBuilder.js").Interceptor | undefined}
*/
function buildPipInterceptor(registry) {
return interceptRequests(async (reqContext) => {
const { packageName, version } = parsePipPackageFromUrl(
reqContext.targetUrl,
registry
);
// Normalize underscores to hyphens for DB matching, as PyPI allows underscores in distribution names.
// Per python, packages that differ only by hyphen vs underscore are considered the same.
const hyphenName = packageName?.includes("_") ? packageName.replace(/_/g, "-") : packageName;
const isMalicious =
await isMalwarePackage(packageName, version)
|| await isMalwarePackage(hyphenName, version);
if (isMalicious) {
reqContext.blockMalware(packageName, version);
}
});
}
/**
* @param {string} url
* @param {string} registry
* @returns {{packageName: string | undefined, version: string | undefined}}
*/
function parsePipPackageFromUrl(url, registry) {
let packageName, version;
// Basic validation
if (!registry || typeof url !== "string") {
return { packageName, version };
}
// Quick sanity check on the URL + parse
let urlObj;
try {
urlObj = new URL(url);
} catch {
return { packageName, version };
}
// Get the last path segment (filename) and decode it (strip query & fragment automatically)
const lastSegment = urlObj.pathname.split("/").filter(Boolean).pop();
if (!lastSegment) {
return { packageName, version };
}
const filename = decodeURIComponent(lastSegment);
// Parse Python package downloads from PyPI/pythonhosted.org
// Example wheel: https://files.pythonhosted.org/packages/xx/yy/requests-2.28.1-py3-none-any.whl
// Example sdist: https://files.pythonhosted.org/packages/xx/yy/requests-2.28.1.tar.gz
// Wheel (.whl) and Poetry's preflight metadata (.whl.metadata)
// Examples:
// foo_bar-2.0.0-py3-none-any.whl
// foo_bar-2.0.0-py3-none-any.whl.metadata
const wheelExtRe = /\.whl(?:\.metadata)?$/;
const wheelExtMatch = filename.match(wheelExtRe);
if (wheelExtMatch) {
const base = filename.replace(wheelExtRe, "");
const firstDash = base.indexOf("-");
if (firstDash > 0) {
const dist = base.slice(0, firstDash); // may contain underscores
const rest = base.slice(firstDash + 1); // version + the rest of tags
const secondDash = rest.indexOf("-");
const rawVersion = secondDash >= 0 ? rest.slice(0, secondDash) : rest;
packageName = dist;
version = rawVersion;
// Reject "latest" as it's a placeholder, not a real version
// When version is "latest", this signals the URL doesn't contain actual version info
// Returning undefined allows the request (see registryProxy.js isAllowedUrl)
if (version === "latest" || !packageName || !version) {
return { packageName: undefined, version: undefined };
}
return { packageName, version };
}
}
// Source dist (sdist) and potential metadata sidecars (e.g., .tar.gz.metadata)
const sdistExtWithMetadataRe = /\.(tar\.gz|zip|tar\.bz2|tar\.xz)(\.metadata)?$/i;
const sdistExtMatch = filename.match(sdistExtWithMetadataRe);
if (sdistExtMatch) {
const base = filename.replace(sdistExtWithMetadataRe, "");
const lastDash = base.lastIndexOf("-");
if (lastDash > 0 && lastDash < base.length - 1) {
packageName = base.slice(0, lastDash);
version = base.slice(lastDash + 1);
// Reject "latest" as it's a placeholder, not a real version
// When version is "latest", this signals the URL doesn't contain actual version info
// Returning undefined allows the request (see registryProxy.js isAllowedUrl)
if (version === "latest" || !packageName || !version) {
return { packageName: undefined, version: undefined };
}
return { packageName, version };
}
}
// Unknown file type or invalid
return { packageName: undefined, version: undefined };
}