Add tests

This commit is contained in:
Reinier Criel 2025-10-22 14:51:44 -07:00
parent 1f707c1e13
commit fbb7e0f95f
10 changed files with 1934 additions and 22 deletions

View file

@ -2,6 +2,7 @@
import { main } from "../src/main.js";
import { initializePackageManager } from "../src/packagemanager/currentPackageManager.js";
import { setEcoSystem } from "../src/config/settings.js";
// Defaults
let packageManagerName = "pip";
@ -32,6 +33,9 @@ if (targetVersionMajor && String(targetVersionMajor).trim() === "3") {
console.log("** aikido-pip ** Final arguments (after processing):", argv);
// Set eco system
setEcoSystem("py");
initializePackageManager(packageManagerName);
var exitCode = await main(argv);

View file

@ -1,12 +1,20 @@
import fetch from "make-fetch-happen";
import { getEcoSystem } from "../config/settings.js";
const malwareDatabaseUrl =
"https://malware-list.aikido.dev/malware_predictions.json";
const malwareDatabaseUrls = {
js: "https://malware-list.aikido.dev/malware_predictions.json",
python: "https://malware-list.aikido.dev/malware_predictions_python.json",
};
export async function fetchMalwareDatabase() {
const ecosystem = getEcoSystem() || "js";
if (ecosystem === "py") {
console.log("**aikido.js** Using 'python' ecosystem for malware database fetch");
}
const malwareDatabaseUrl = malwareDatabaseUrls[ecosystem];
const response = await fetch(malwareDatabaseUrl);
if (!response.ok) {
throw new Error(`Error fetching malware database: ${response.statusText}`);
throw new Error(`Error fetching ${ecosystem} malware database: ${response.statusText}`);
}
try {
@ -16,17 +24,23 @@ export async function fetchMalwareDatabase() {
version: response.headers.get("etag") || undefined,
};
} catch (error) {
throw new Error(`Error parsing malware database: ${error.message}`);
throw new Error(`Error parsing ${ecosystem} malware database: ${error.message}`);
}
}
export async function fetchMalwareDatabaseVersion() {
const ecosystem = getEcoSystem() || "js";
if (ecosystem === "py") {
console.log("**aikido.js** Using 'python' ecosystem for malware database fetch");
}
const malwareDatabaseUrl = malwareDatabaseUrls[ecosystem];
const response = await fetch(malwareDatabaseUrl, {
method: "HEAD",
});
if (!response.ok) {
throw new Error(
`Error fetching malware database version: ${response.statusText}`
`Error fetching ${ecosystem} malware database version: ${response.statusText}`
);
}
return response.headers.get("etag") || undefined;

View file

@ -12,3 +12,15 @@ export function getMalwareAction() {
export const MALWARE_ACTION_BLOCK = "block";
export const MALWARE_ACTION_PROMPT = "prompt";
// Default to JavaScript ecosystem
const ecosystemSettings = {
ecoSystem: "js",
};
export function getEcoSystem() {
return ecosystemSettings.ecoSystem;
}
export function setEcoSystem(setting) {
ecosystemSettings.ecoSystem = setting;
}

View file

@ -1,15 +1,41 @@
export const knownRegistries = ["registry.npmjs.org", "registry.yarnpkg.com"];
import { parse } from "semver";
export const knownNpmRegistries = ["registry.npmjs.org"];
export const knownYarnRegistries = ["registry.yarnpkg.com"];
export const knownPipRegistries = ["files.pythonhosted.org", "pypi.org", "pypi.python.org", "pythonhosted.org"];
export function parsePackageFromUrl(url) {
let packageName, version, registry;
let registry;
for (const knownRegistry of knownRegistries) {
for (const knownRegistry of knownNpmRegistries) {
if (url.includes(knownRegistry)) {
registry = knownRegistry;
break;
return parseNpmYarnPackageFromUrl(url, registry);
}
}
for (const knownRegistry of knownPipRegistries) {
console.log("**parsePackageFromUrl.js** Checking pip registry:", knownRegistry);
if (url.includes(knownRegistry)) {
console.log("**parsePackageFromUrl.js** Matched pip registry:", knownRegistry);
registry = knownRegistry;
return parsePipPackageFromUrl(url, registry);
}
}
for (const knownRegistry of knownYarnRegistries) {
if (url.includes(knownRegistry)) {
registry = knownRegistry;
return parseNpmYarnPackageFromUrl(url, registry);
}
}
// If no known registry matched, return { packageName: undefined, version: undefined }
return { packageName: undefined, version: undefined };
}
function parseNpmYarnPackageFromUrl(url, registry) {
let packageName, version;
if (!registry || !url.endsWith(".tgz")) {
return { packageName, version };
}
@ -44,5 +70,73 @@ export function parsePackageFromUrl(url) {
}
}
console.log("**parsePackageFromUrl.js** Parsed package:", { packageName, version });
return { packageName, version };
}
function parsePipPackageFromUrl(url, registry) {
let packageName, version
// Basic validation
if (!registry || typeof url !== "string") {
console.log("**parsePackageFromUrl.js** Invalid registry or URL");
return { packageName, version};
}
// Quick sanity check on the URL + parse
let u;
try {
u = new URL(url);
} catch {
console.log("**parsePackageFromUrl.js** Malformed URL:", url);
return { packageName, version};
}
// Get the last path segment (filename) and decode it (strip query & fragment automatically)
const lastSegment = u.pathname.split("/").filter(Boolean).pop();
if (!lastSegment){
console.log("**parsePackageFromUrl.js** No filename in URL path:", url);
return { packageName, version};
}
const filename = decodeURIComponent(lastSegment);
// Wheel (.whl)
if (filename.endsWith(".whl")) {
const base = filename.slice(0, -4); // remove ".whl"
const firstDash = base.indexOf("-");
if (firstDash > 0) {
const dist = base.slice(0, firstDash); // may contain underscores
const rest = base.slice(firstDash + 1); // version + the rest of tags
const secondDash = rest.indexOf("-");
const rawVersion = secondDash >= 0 ? rest.slice(0, secondDash) : rest;
packageName = dist; // preserve underscores
version = rawVersion;
if (version === "latest" || !packageName || !version) {
return { packageName: undefined, version: undefined };
}
console.log("**parsePackageFromUrl.js** Parsed package:", { packageName, version });
return { packageName, version };
}
}
// Source dist (sdist)
const sdistExtMatch = filename.match(/\.(tar\.gz|zip|tar\.bz2|tar\.xz)$/i);
if (sdistExtMatch) {
const base = filename.slice(0, -sdistExtMatch[0].length);
const lastDash = base.lastIndexOf("-");
if (lastDash > 0 && lastDash < base.length - 1) {
packageName = base.slice(0, lastDash);
version = base.slice(lastDash + 1);
if (version === "latest" || !packageName || !version) {
return { packageName: undefined, version: undefined };
}
console.log("**parsePackageFromUrl.js** Parsed package:", { packageName, version });
return { packageName, version };
}
}
// Unknown file type or invalid
console.log("**parsePackageFromUrl.js** Unknown file type for URL:", url);
return { packageName: undefined, version: undefined };
}

View file

@ -112,3 +112,81 @@ describe("parsePackageFromUrl", () => {
});
});
});
describe("parsePackageFromUrl - pip URLs", () => {
const pipTestCases = [
// Valid pip URLs
{
url: "https://files.pythonhosted.org/packages/xx/yy/foobar-1.2.3.tar.gz",
expected: { packageName: "foobar", version: "1.2.3" },
},
{
url: "https://pypi.org/packages/source/f/foobar/foobar-1.2.3.tar.gz",
expected: { packageName: "foobar", version: "1.2.3" },
},
{
url: "https://pypi.org/packages/source/f/foo-bar/foo-bar-0.9.0.tar.gz",
expected: { packageName: "foo-bar", version: "0.9.0" },
},
{
url: "https://pypi.org/packages/source/f/foo_bar/foo_bar-2.0.0-py3-none-any.whl",
expected: { packageName: "foo_bar", version: "2.0.0" },
},
{
url: "https://files.pythonhosted.org/packages/xx/yy/foo_bar-2.0.0-py3-none-any.whl",
expected: { packageName: "foo_bar", version: "2.0.0" },
},
{
url: "https://pypi.org/packages/source/f/foo.bar/foo.bar-1.0.0.tar.gz",
expected: { packageName: "foo.bar", version: "1.0.0" },
},
{
url: "https://pypi.org/packages/source/f/foo_bar/foo_bar-2.0.0b1.tar.gz",
expected: { packageName: "foo_bar", version: "2.0.0b1" },
},
{
url: "https://pypi.org/packages/source/f/foo_bar/foo_bar-2.0.0rc1.tar.gz",
expected: { packageName: "foo_bar", version: "2.0.0rc1" },
},
{
url: "https://pypi.org/packages/source/f/foo_bar/foo_bar-2.0.0.post1.tar.gz",
expected: { packageName: "foo_bar", version: "2.0.0.post1" },
},
{
url: "https://pypi.org/packages/source/f/foo_bar/foo_bar-2.0.0.dev1.tar.gz",
expected: { packageName: "foo_bar", version: "2.0.0.dev1" },
},
{
url: "https://pypi.org/packages/source/f/foo_bar/foo_bar-2.0.0a1.tar.gz",
expected: { packageName: "foo_bar", version: "2.0.0a1" },
},
{
url: "https://pypi.org/packages/source/f/foo_bar/foo_bar-2.0.0-cp38-cp38-manylinux1_x86_64.whl",
expected: { packageName: "foo_bar", version: "2.0.0" },
},
// Invalid pip URLs
{
url: "https://pypi.org/simple/",
expected: { packageName: undefined, version: undefined },
},
{
url: "https://pypi.org/project/foobar/",
expected: { packageName: undefined, version: undefined },
},
{
url: "https://files.pythonhosted.org/packages/xx/yy/foobar-latest.tar.gz",
expected: { packageName: undefined, version: undefined },
},
{
url: "https://pypi.org/packages/source/f/foo_bar/foo_bar-latest.tar.gz",
expected: { packageName: undefined, version: undefined },
},
];
pipTestCases.forEach(({ url, expected }, index) => {
it(`should parse pip URL ${index + 1}: ${url}`, () => {
const result = parsePackageFromUrl(url);
assert.deepEqual(result, expected);
});
});
});

View file

@ -4,7 +4,7 @@ import { mitmConnect } from "./mitmRequestHandler.js";
import { handleHttpProxyRequest } from "./plainHttpProxy.js";
import { getCaCertPath } from "./certUtils.js";
import { auditChanges } from "../scanning/audit/index.js";
import { knownRegistries, parsePackageFromUrl } from "./parsePackageFromUrl.js";
import { knownNpmRegistries, knownYarnRegistries, knownPipRegistries, parsePackageFromUrl } from "./parsePackageFromUrl.js";
import { ui } from "../environment/userInteraction.js";
import chalk from "chalk";
@ -108,10 +108,11 @@ function handleConnect(req, clientSocket, head) {
// CONNECT method is used for HTTPS requests
// It establishes a tunnel to the server identified by the request URL
if (knownRegistries.some((reg) => req.url.includes(reg))) {
// For npm and yarn registries, we want to intercept and inspect the traffic
// so we can block packages with malware
mitmConnect(req, clientSocket, isAllowedUrl);
console.log("**registryProxy.js** Handling CONNECT request for:", req.url);
if ((knownNpmRegistries.some((reg) => req.url.includes(reg)))
|| (knownYarnRegistries.some((reg) => req.url.includes(reg)))
|| (knownPipRegistries.some((reg) => req.url.includes(reg)))) {
mitmConnect(req, clientSocket, isAllowedUrl);
} else {
// For other hosts, just tunnel the request to the destination tcp socket
tunnelRequest(req, clientSocket, head);
@ -124,6 +125,7 @@ async function isAllowedUrl(url) {
// packageName and version are undefined when the URL is not a package download
// In that case, we can allow the request to proceed
if (!packageName || !version) {
console.log("**registryProxy.js** Non-package URL, allowing:", url);
return true;
}
@ -132,6 +134,7 @@ async function isAllowedUrl(url) {
]);
if (!auditResult.isAllowed) {
console.log("**registryProxy.js** Blocking malicious package:", { packageName, version, url });
state.blockedRequests.push({ packageName, version, url });
return false;
}

View file

@ -140,6 +140,59 @@ describe("registryProxy.mitm", () => {
// Same hostname should get the same certificate (fingerprint)
assert.strictEqual(cert1.fingerprint, cert2.fingerprint);
});
// --- Pip registry MITM and env var tests ---
it("should set pip CA trust environment variables", () => {
const envVars = mergeSafeChainProxyEnvironmentVariables([]);
const caPath = getCaCertPath();
assert.strictEqual(envVars.PIP_CERT, caPath);
assert.strictEqual(envVars.REQUESTS_CA_BUNDLE, caPath);
assert.strictEqual(envVars.SSL_CERT_FILE, caPath);
});
it("should intercept HTTPS requests to pypi.org for pip package", async () => {
const response = await makeRegistryRequest(
proxyHost,
proxyPort,
"pypi.org",
"/packages/source/f/foo_bar/foo_bar-2.0.0.tar.gz"
);
assert.notStrictEqual(response.statusCode, 403);
assert.ok(typeof response.body === "string");
});
it("should intercept HTTPS requests to files.pythonhosted.org for pip wheel", async () => {
const response = await makeRegistryRequest(
proxyHost,
proxyPort,
"files.pythonhosted.org",
"/packages/xx/yy/foo_bar-2.0.0-py3-none-any.whl"
);
assert.notStrictEqual(response.statusCode, 403);
assert.ok(typeof response.body === "string");
});
it("should handle pip package with a1 version", async () => {
const response = await makeRegistryRequest(
proxyHost,
proxyPort,
"pypi.org",
"/packages/source/f/foo_bar/foo_bar-2.0.0a1.tar.gz"
);
assert.notStrictEqual(response.statusCode, 403);
assert.ok(typeof response.body === "string");
});
it("should handle pip package with latest version (should not block)", async () => {
const response = await makeRegistryRequest(
proxyHost,
proxyPort,
"pypi.org",
"/packages/source/f/foo_bar/foo_bar-latest.tar.gz"
);
assert.notStrictEqual(response.statusCode, 403);
assert.ok(typeof response.body === "string");
});
});
async function makeRegistryRequest(proxyHost, proxyPort, targetHost, path) {

View file

@ -7,6 +7,7 @@ export async function auditChanges(changes) {
const allowedChanges = [];
const disallowedChanges = [];
console.log("**audit/index.js** Auditing changes:", changes);
var malwarePackages = await getPackagesWithMalware(
changes.filter(
(change) => change.type === "add" || change.type === "change"