Only use mitm for pip packages

This commit is contained in:
Reinier Criel 2025-11-05 08:34:40 -08:00
parent 96d7c460fa
commit f0a3ae51db
8 changed files with 9 additions and 698 deletions

View file

@ -1,79 +1,15 @@
import { commandArgumentScanner } from "./dependencyScanner/commandArgumentScanner.js";
import { runPip } from "./runPipCommand.js";
import {
getPipCommandForArgs,
pipInstallCommand,
pipDownloadCommand,
pipWheelCommand,
} from "./utils/pipCommands.js";
/**
* @param {string} [command]
* @returns {import("../currentPackageManager.js").PackageManager}
*/
export function createPipPackageManager(command = "pip") {
/**
* @param {string[]} args
* @returns {boolean}
*/
function isSupportedCommand(args) {
const scanner = findDependencyScannerForCommand(
commandScannerMapping,
args
);
return scanner.shouldScan(args);
}
/**
* @param {string[]} args
* @returns {ReturnType<import("../currentPackageManager.js").PackageManager["getDependencyUpdatesForCommand"]>}
*/
function getDependencyUpdatesForCommand(args) {
const scanner = findDependencyScannerForCommand(
commandScannerMapping,
args
);
return scanner.scan(args);
}
return {
runCommand: /** @param {string[]} args */ (args) => runPip(command, args),
isSupportedCommand,
getDependencyUpdatesForCommand,
// For pip, rely solely on MITM proxy to detect/deny downloads from known registries.
isSupportedCommand: () => false,
getDependencyUpdatesForCommand: () => [],
};
}
/**
* @type {Record<string, import("./dependencyScanner/commandArgumentScanner.js").CommandArgumentScanner>}
*/
const commandScannerMapping = {
[pipInstallCommand]: commandArgumentScanner(),
[pipDownloadCommand]: commandArgumentScanner(), // download also fetches packages from PyPI
[pipWheelCommand]: commandArgumentScanner(), // wheel downloads and builds packages
// Other commands return null scanner by default
};
/**
* @returns {import("./dependencyScanner/commandArgumentScanner.js").CommandArgumentScanner}
*/
function nullScanner() {
return {
shouldScan: () => false,
scan: () => [],
};
}
/**
* @param {Record<string, import("./dependencyScanner/commandArgumentScanner.js").CommandArgumentScanner>} scanners
* @param {string[]} args
* @returns {import("./dependencyScanner/commandArgumentScanner.js").CommandArgumentScanner}
*/
function findDependencyScannerForCommand(scanners, args) {
const command = getPipCommandForArgs(args);
if (!command) {
return nullScanner();
}
const scanner = scanners[command];
return scanner || nullScanner();
}

View file

@ -19,10 +19,10 @@ test("createPipPackageManager", async (t) => {
await t.test("should support install, download, and wheel commands", () => {
const pm = createPipPackageManager();
assert.strictEqual(pm.isSupportedCommand(["install", "requests"]), true);
assert.strictEqual(pm.isSupportedCommand(["download", "requests"]), true);
assert.strictEqual(pm.isSupportedCommand(["wheel", "requests"]), true);
// With MITM-only approach, pip does not pre-scan by args
assert.strictEqual(pm.isSupportedCommand(["install", "requests"]), false);
assert.strictEqual(pm.isSupportedCommand(["download", "requests"]), false);
assert.strictEqual(pm.isSupportedCommand(["wheel", "requests"]), false);
});
await t.test("should not support uninstall and info commands", () => {
@ -35,12 +35,10 @@ test("createPipPackageManager", async (t) => {
await t.test("should extract packages from install command", () => {
const pm = createPipPackageManager();
// MITM-only: no dependency extraction from args
const result = pm.getDependencyUpdatesForCommand(["install", "requests==2.28.0"]);
assert.ok(Array.isArray(result));
assert.strictEqual(result.length, 1);
assert.strictEqual(result[0].name, "requests");
assert.strictEqual(result[0].version, "2.28.0");
assert.strictEqual(result.length, 0);
});
await t.test("should return empty array for unsupported commands", () => {

View file

@ -1,77 +0,0 @@
import { parsePackagesFromInstallArgs } from "../parsing/parsePackagesFromInstallArgs.js";
import { hasDryRunArg } from "../utils/pipCommands.js";
/**
* @typedef {Object} ScanResult
* @property {string} name
* @property {string} version
* @property {string} type
*/
/**
* @typedef {Object} ScannerOptions
* @property {boolean} [ignoreDryRun]
*/
/**
* @typedef {Object} CommandArgumentScanner
* @property {(args: string[]) => Promise<ScanResult[]> | ScanResult[]} scan
* @property {(args: string[]) => boolean} shouldScan
*/
/**
* @param {ScannerOptions} [options]
*
* @returns {CommandArgumentScanner}
*/
export function commandArgumentScanner(options = {}) {
const { ignoreDryRun = false } = options;
/**
* @param {string[]} args
*/
function shouldScan(args) {
return shouldScanDependencies(args, ignoreDryRun);
}
/**
* @param {string[]} args
* @returns {Promise<ScanResult[]> | ScanResult[]}
*/
function scan(args) {
return scanDependencies(args);
}
return {
shouldScan,
scan,
};
}
/**
* @param {string[]} args
* @param {boolean} ignoreDryRun
*/
function shouldScanDependencies(args, ignoreDryRun) {
return ignoreDryRun || !hasDryRunArg(args);
}
/**
* @param {string[]} args
* @returns {Promise<ScanResult[]> | ScanResult[]}
*/
function scanDependencies(args) {
return checkChangesFromArgs(args);
}
/**
* @param {string[]} args
* @returns {Promise<ScanResult[]> | ScanResult[]}
*/
export function checkChangesFromArgs(args) {
const packageUpdates = parsePackagesFromInstallArgs(args);
// Parser already provides exact versions or "latest", no need to resolve
// Just return the packages with type "add"
return packageUpdates;
}

View file

@ -1,144 +0,0 @@
import { test } from "node:test";
import assert from "node:assert";
import { commandArgumentScanner, checkChangesFromArgs } from "./commandArgumentScanner.js";
test("commandArgumentScanner factory", async (t) => {
await t.test("should create scanner with required interface", () => {
const scanner = commandArgumentScanner();
assert.ok(scanner);
assert.strictEqual(typeof scanner.shouldScan, "function");
assert.strictEqual(typeof scanner.scan, "function");
});
});
test("shouldScan", async (t) => {
await t.test("should return true for normal install command", () => {
const scanner = commandArgumentScanner();
const result = scanner.shouldScan(["install", "requests"]);
assert.strictEqual(result, true);
});
await t.test("should return false for install with --dry-run", () => {
const scanner = commandArgumentScanner();
const result = scanner.shouldScan(["install", "--dry-run", "requests"]);
assert.strictEqual(result, false);
});
await t.test("should return true for install with --dry-run when ignoreDryRun is true", () => {
const scanner = commandArgumentScanner({ ignoreDryRun: true });
const result = scanner.shouldScan(["install", "--dry-run", "requests"]);
assert.strictEqual(result, true);
});
});
test("scan", async (t) => {
await t.test("should scan simple package installation", () => {
const scanner = commandArgumentScanner();
const result = scanner.scan(["install", "requests"]);
assert.ok(Array.isArray(result));
assert.strictEqual(result.length, 1);
assert.deepEqual(result[0], {
name: "requests",
version: "latest",
type: "add",
});
});
await t.test("should scan package with exact version", () => {
const scanner = commandArgumentScanner();
const result = scanner.scan(["install", "requests==2.28.0"]);
assert.strictEqual(result.length, 1);
assert.deepEqual(result[0], {
name: "requests",
version: "2.28.0",
type: "add",
});
});
await t.test("should scan multiple packages", () => {
const scanner = commandArgumentScanner();
const result = scanner.scan(["install", "requests==2.28.0", "flask"]);
assert.strictEqual(result.length, 2);
assert.deepEqual(result[0], {
name: "requests",
version: "2.28.0",
type: "add",
});
assert.deepEqual(result[1], {
name: "flask",
version: "latest",
type: "add",
});
});
await t.test("should skip packages with range specifiers", () => {
const scanner = commandArgumentScanner();
const result = scanner.scan(["install", "requests>=2.0.0", "flask==2.0.0"]);
assert.strictEqual(result.length, 1);
assert.deepEqual(result[0], {
name: "flask",
version: "2.0.0",
type: "add",
});
});
await t.test("should skip flags with parameters", () => {
const scanner = commandArgumentScanner();
const result = scanner.scan([
"install",
"-r",
"requirements.txt",
"requests==2.28.0",
]);
assert.strictEqual(result.length, 1);
assert.deepEqual(result[0], {
name: "requests",
version: "2.28.0",
type: "add",
});
});
await t.test("should handle === exact version specifier", () => {
const scanner = commandArgumentScanner();
const result = scanner.scan(["install", "requests===2.28.0"]);
assert.strictEqual(result.length, 1);
assert.deepEqual(result[0], {
name: "requests",
version: "2.28.0",
type: "add",
});
});
});
test("checkChangesFromArgs helper", async (t) => {
await t.test("should extract packages from args", () => {
const result = checkChangesFromArgs(["install", "requests==2.28.0", "flask"]);
assert.strictEqual(result.length, 2);
assert.deepEqual(result[0], {
name: "requests",
version: "2.28.0",
type: "add",
});
assert.deepEqual(result[1], {
name: "flask",
version: "latest",
type: "add",
});
});
await t.test("should handle empty args", () => {
const result = checkChangesFromArgs([]);
assert.deepStrictEqual(result, []);
});
});

View file

@ -1,179 +0,0 @@
/**
* @typedef {Object} PackageDetail
* @property {string} name
* @property {string} version
* @property {string} type
*/
/**
* @typedef {Object} PipOption
* @property {string} name
* @property {number} numberOfParameters
*/
/**
* Supported formats that will be returned:
* - package_name (no version)
* - package_name==version (exact version)
* - package_name===version (exact version, PEP 440)
*
* Ranges: Because they don't specify an exact version, the following formats are skipped and we rely on the MITM scanner:
* - package_name>=version
* - package_name<=version
* - package_name>version
* - package_name<version
* - package_name~=version
* - package_name!=version
* - git+https://... (VCS URLs)
* - -r requirements.txt (handled by flag skipping)
*
* @param {string[]} args
* @returns {PackageDetail[]}
*/
export function parsePackagesFromInstallArgs(args) {
/** @type {PackageDetail[]} */
const packages = [];
let skipNext = false;
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (skipNext) {
skipNext = false;
continue;
}
// Skip the command itself (install, etc.)
if (i === 0 && !arg.startsWith("-")) {
continue;
}
// Skip flags and their values
if (arg.startsWith("-")) {
if (isPipOptionWithParameter(arg)) {
skipNext = true;
}
continue;
}
const parsed = parsePipSpec(arg);
if (parsed) {
packages.push({ ...parsed, type: "add" });
}
}
return packages;
}
/**
* @param {string} arg
* @returns {boolean}
*/
function isPipOptionWithParameter(arg) {
// Check if a pip flag takes a parameter
const optionsWithParameters = [
// Install options
"-r",
"--requirement",
"-c",
"--constraint",
"-e",
"--editable",
"-t",
"--target",
"--platform",
"--python-version",
"--implementation",
"--abi",
"--root",
"--prefix",
"--src",
"--upgrade-strategy",
"--progress-bar",
"--root-user-action",
"--report",
"--group",
// Package index options
"-i",
"--index-url",
"--extra-index-url",
"-f",
"--find-links",
// General options
"--python",
"--log",
"--keyring-provider",
"--proxy",
"--retries",
"--timeout",
"--exists-action",
"--trusted-host",
"--cert",
"--client-cert",
"--cache-dir",
"--use-feature",
"--use-deprecated",
"--resume-retries",
];
return optionsWithParameters.includes(arg);
}
/**
* @param {string} spec
* @returns {{ name: string, version: string } | null}
*/
function parsePipSpec(spec) {
// Ignore obvious URLs and paths, rely on mitm scanner
const lower = spec.toLowerCase();
if (
lower.startsWith("git+") ||
lower.startsWith("hg+") ||
lower.startsWith("svn+") ||
lower.startsWith("bzr+") ||
lower.startsWith("http:") ||
lower.startsWith("https:") ||
lower.startsWith("file:") ||
spec.startsWith("./") ||
spec.startsWith("../") ||
spec.startsWith("/")
) {
return null;
}
// Strip extras: package[extra1,extra2]
const extrasStart = spec.indexOf("[");
const extrasEnd = extrasStart >= 0 ? spec.indexOf("]", extrasStart) : -1;
let base = spec;
if (extrasStart >= 0 && extrasEnd > extrasStart) {
base = spec.slice(0, extrasStart) + spec.slice(extrasEnd + 1);
}
// Split on first occurrence of a comparator or comma spec
// Support multi-constraint lists like ">=1,<2" by detecting the first comparator
const comparatorRegex = /(===|==|!=|~=|>=|<=|<|>)/;
const m = base.match(comparatorRegex);
if (!m) {
// No comparator => just a name, use "latest" as version
return { name: base, version: "latest" };
}
const idx = m.index;
const name = base.slice(0, idx);
const versionPart = base.slice(idx); // e.g. '==2.28.0' or '>=1,<2'
// Normalize whitespace inside versionPart
const versionWithOperator = versionPart.replace(/\s+/g, "");
// Only return packages with exact version specifiers (== or ===)
// Skip range specifiers (<, >, <=, >=, ~=, !=) since they don't provide a specific version
if (!versionWithOperator.startsWith("==")) {
return null;
}
// Strip the == or === operator to get just the version number
const version = versionWithOperator.replace(/^===?/, "");
return { name, version };
}

View file

@ -1,110 +0,0 @@
import { describe, it } from "node:test";
import assert from "node:assert";
import { parsePackagesFromInstallArgs } from "./parsePackagesFromInstallArgs.js";
describe("parsePackagesFromInstallArgs", () => {
it("should parse simple package name", () => {
const result = parsePackagesFromInstallArgs(["install", "requests"]);
assert.deepEqual(result, [
{ name: "requests", version: "latest", type: "add" },
]);
});
it("should parse package with version specifier", () => {
const result = parsePackagesFromInstallArgs(["install", "requests==2.28.0"]);
assert.deepEqual(result, [
{ name: "requests", version: "2.28.0", type: "add" },
]);
});
it("should skip flags", () => {
const result = parsePackagesFromInstallArgs(["install", "--upgrade", "requests"]);
assert.deepEqual(result, [
{ name: "requests", version: "latest", type: "add" },
]);
});
it("should parse multiple packages", () => {
const result = parsePackagesFromInstallArgs(["install", "requests", "flask", "django==4.0"]);
assert.deepEqual(result, [
{ name: "requests", version: "latest", type: "add" },
{ name: "flask", version: "latest", type: "add" },
{ name: "django", version: "4.0", type: "add" },
]);
});
it("should parse extras and strip them from name", () => {
const result = parsePackagesFromInstallArgs(["install", "django[postgres]==4.2.1"]);
assert.deepEqual(result, [
{ name: "django", version: "4.2.1", type: "add" },
]);
});
it("should skip ranges", () => {
const result = parsePackagesFromInstallArgs(["install", "requests>=2,<3"]);
assert.deepEqual(result, []);
});
it("should skip packages with range specifiers", () => {
const result = parsePackagesFromInstallArgs([
"install",
"requests>=2.0.0",
"flask>1.0",
"django<=4.0",
"numpy~=1.20",
"scipy!=1.5.0",
"pandas==1.3.0",
]);
// Only pandas with exact version (==) should be returned
assert.deepEqual(result, [
{ name: "pandas", version: "1.3.0", type: "add" },
]);
});
it("should support === exact version specifier", () => {
const result = parsePackagesFromInstallArgs(["install", "requests===2.28.0"]);
assert.deepEqual(result, [
{ name: "requests", version: "2.28.0", type: "add" },
]);
});
it("should skip VCS/URL/path)", () => {
const result = parsePackagesFromInstallArgs([
"install",
"git+https://github.com/pallets/flask.git",
"https://files.pythonhosted.org/packages/foo/bar.whl",
"file:/tmp/pkg.whl",
"./localpkg",
]);
assert.deepEqual(result, []);
});
it("should return empty array for no packages", () => {
const result = parsePackagesFromInstallArgs(["install", "--help"]);
assert.deepEqual(result, []);
});
it("should skip all flags with parameters", () => {
const result = parsePackagesFromInstallArgs([
"install",
"--target",
"/tmp/target",
"--platform",
"linux",
"--python-version",
"3.9",
"--index-url",
"https://pypi.org/simple",
"--trusted-host",
"pypi.org",
"requests==2.28.0",
"--cache-dir",
"/tmp/cache",
"flask",
]);
assert.deepEqual(result, [
{ name: "requests", version: "2.28.0", type: "add" },
{ name: "flask", version: "latest", type: "add" },
]);
});
});

View file

@ -1,30 +0,0 @@
export const pipInstallCommand = "install";
export const pipDownloadCommand = "download";
export const pipWheelCommand = "wheel";
/**
* @param {string[]} args
* @returns {string | null}
*/
export function getPipCommandForArgs(args) {
if (!args || args.length === 0) {
return null;
}
// The first non-flag argument is the command
for (const arg of args) {
if (!arg.startsWith("-")) {
return arg;
}
}
return null;
}
/**
* @param {string[]} args
* @returns {boolean}
*/
export function hasDryRunArg(args) {
return args.some((arg) => arg === "--dry-run");
}

View file

@ -1,83 +0,0 @@
import { test } from "node:test";
import assert from "node:assert";
import {
getPipCommandForArgs,
hasDryRunArg,
pipInstallCommand,
pipDownloadCommand,
pipWheelCommand,
} from "./pipCommands.js";
test("getPipCommandForArgs", async (t) => {
await t.test("should return null for empty args", () => {
assert.strictEqual(getPipCommandForArgs([]), null);
});
await t.test("should return null for null args", () => {
assert.strictEqual(getPipCommandForArgs(null), null);
});
await t.test("should return the first non-flag argument", () => {
assert.strictEqual(getPipCommandForArgs(["install"]), "install");
});
await t.test("should skip flags and return command", () => {
assert.strictEqual(
getPipCommandForArgs(["-v", "--verbose", "install"]),
"install"
);
});
await t.test("should return install command", () => {
assert.strictEqual(
getPipCommandForArgs(["install", "requests"]),
"install"
);
});
await t.test("should return uninstall command", () => {
assert.strictEqual(
getPipCommandForArgs(["uninstall", "requests"]),
"uninstall"
);
});
await t.test("should return null if only flags", () => {
assert.strictEqual(getPipCommandForArgs(["--version", "-v"]), null);
});
});
test("hasDryRunArg", async (t) => {
await t.test("should return false for empty args", () => {
assert.strictEqual(hasDryRunArg([]), false);
});
await t.test("should return true if --dry-run is present", () => {
assert.strictEqual(hasDryRunArg(["install", "--dry-run", "requests"]), true);
});
await t.test("should return false if --dry-run is not present", () => {
assert.strictEqual(hasDryRunArg(["install", "requests"]), false);
});
await t.test("should return true for --dry-run with other flags", () => {
assert.strictEqual(
hasDryRunArg(["install", "-v", "--dry-run", "--upgrade", "requests"]),
true
);
});
});
test("command constants", async (t) => {
await t.test("should have correct install command", () => {
assert.strictEqual(pipInstallCommand, "install");
});
await t.test("should have correct download command", () => {
assert.strictEqual(pipDownloadCommand, "download");
});
await t.test("should have correct wheel command", () => {
assert.strictEqual(pipWheelCommand, "wheel");
});
});