Adapt per review

This commit is contained in:
Reinier Criel 2025-10-27 09:23:47 -07:00
parent 9dacf5cff3
commit 190607de92
27 changed files with 191 additions and 114 deletions

View file

@ -1,20 +1,26 @@
import { getEcoSystem, ECOSYSTEM_JS, ECOSYSTEM_PY } from "../config/settings.js";
export const knownJsRegistries = ["registry.npmjs.org","registry.yarnpkg.com"];
export const knownPipRegistries = ["files.pythonhosted.org", "pypi.org", "pypi.python.org", "pythonhosted.org"];
export function parsePackageFromUrl(url) {
const ecosystem = getEcoSystem();
let registry;
for (const knownRegistry of knownJsRegistries) {
if (url.includes(knownRegistry)) {
registry = knownRegistry;
return parseJsPackageFromUrl(url, registry);
// Only check registries that match the current ecosystem
if (ecosystem === ECOSYSTEM_JS) {
for (const knownRegistry of knownJsRegistries) {
if (url.includes(knownRegistry)) {
registry = knownRegistry;
return parseJsPackageFromUrl(url, registry);
}
}
}
for (const knownRegistry of knownPipRegistries) {
if (url.includes(knownRegistry)) {
registry = knownRegistry;
return parsePipPackageFromUrl(url, registry);
} else if (ecosystem === ECOSYSTEM_PY) {
for (const knownRegistry of knownPipRegistries) {
if (url.includes(knownRegistry)) {
registry = knownRegistry;
return parsePipPackageFromUrl(url, registry);
}
}
}
@ -70,21 +76,25 @@ function parsePipPackageFromUrl(url, registry) {
}
// Quick sanity check on the URL + parse
let u;
let urlObj;
try {
u = new URL(url);
urlObj = new URL(url);
} catch {
return { packageName, version};
}
// Get the last path segment (filename) and decode it (strip query & fragment automatically)
const lastSegment = u.pathname.split("/").filter(Boolean).pop();
const lastSegment = urlObj.pathname.split("/").filter(Boolean).pop();
if (!lastSegment){
return { packageName, version};
}
const filename = decodeURIComponent(lastSegment);
// Parse Python package downloads from PyPI/pythonhosted.org
// Example wheel: https://files.pythonhosted.org/packages/xx/yy/requests-2.28.1-py3-none-any.whl
// Example sdist: https://files.pythonhosted.org/packages/xx/yy/requests-2.28.1.tar.gz
// Wheel (.whl)
if (filename.endsWith(".whl")) {
const base = filename.slice(0, -4); // remove ".whl"
@ -96,6 +106,9 @@ function parsePipPackageFromUrl(url, registry) {
const rawVersion = secondDash >= 0 ? rest.slice(0, secondDash) : rest;
packageName = dist; // preserve underscores
version = rawVersion;
// Reject "latest" as it's a placeholder, not a real version
// When version is "latest", this signals the URL doesn't contain actual version info
// Returning undefined allows the request (see registryProxy.js isAllowedUrl)
if (version === "latest" || !packageName || !version) {
return { packageName: undefined, version: undefined };
}
@ -111,6 +124,9 @@ function parsePipPackageFromUrl(url, registry) {
if (lastDash > 0 && lastDash < base.length - 1) {
packageName = base.slice(0, lastDash);
version = base.slice(lastDash + 1);
// Reject "latest" as it's a placeholder, not a real version
// When version is "latest", this signals the URL doesn't contain actual version info
// Returning undefined allows the request (see registryProxy.js isAllowedUrl)
if (version === "latest" || !packageName || !version) {
return { packageName: undefined, version: undefined };
}

View file

@ -1,8 +1,13 @@
import { describe, it } from "node:test";
import { describe, it, beforeEach } from "node:test";
import assert from "node:assert";
import { parsePackageFromUrl } from "./parsePackageFromUrl.js";
import { setEcoSystem, ECOSYSTEM_JS, ECOSYSTEM_PY } from "../config/settings.js";
describe("parsePackageFromUrl", () => {
beforeEach(() => {
setEcoSystem(ECOSYSTEM_JS);
});
const testCases = [
// Regular packages
{
@ -114,6 +119,10 @@ describe("parsePackageFromUrl", () => {
});
describe("parsePackageFromUrl - pip URLs", () => {
beforeEach(() => {
setEcoSystem(ECOSYSTEM_PY);
});
const pipTestCases = [
// Valid pip URLs
{

View file

@ -49,6 +49,32 @@ describe("registryProxy.connectTunnel", () => {
socket.destroy();
});
it("should use destination's real certificate (not safe-chain's self-signed CA)", async () => {
const socket = await connectToProxy(proxyHost, proxyPort);
await establishHttpsTunnel(socket, "postman-echo.com", 443);
// Verifies that tunnel requests pass through the destination's real certificate
// without interception by the safe-chain MITM proxy.
const certInfo = await getTlsCertificateInfo(
socket,
new URL("https://postman-echo.com")
);
// Verify the certificate is NOT issued by our safe-chain CA
// Our self-signed CA would have issuer: "Safe-Chain Proxy CA"
assert.ok(certInfo.issuer !== undefined, "Certificate should have an issuer");
assert.ok(
!certInfo.issuer.includes("Safe-Chain"),
`Tunnel should use destination's real certificate, not safe-chain CA. Issuer: ${certInfo.issuer}`
);
// Verify it's a real certificate with proper hostname
assert.strictEqual(certInfo.subject.includes("postman-echo.com"), true,
`Certificate subject should include postman-echo.com, got: ${certInfo.subject}`);
socket.destroy();
});
describe("Error Handling", () => {
it("should return 502 Bad Gateway for invalid hostname", async () => {
const socket = await connectToProxy(proxyHost, proxyPort);
@ -141,7 +167,7 @@ function establishHttpsTunnel(socket, targetHost, targetPort) {
});
}
function sendHttpsRequestThroughTunnel(socket, verb, url) {
function sendHttpsRequestThroughTunnel(socket, verb, url, rejectUnauthorized = false) {
return new Promise((resolve, reject) => {
const tlsSocket = tls.connect(
{
@ -149,7 +175,7 @@ function sendHttpsRequestThroughTunnel(socket, verb, url) {
servername: url.hostname,
// Tests should focus on tunnel behavior, not system CA state;
// disable CA verification to avoid flakiness on machines without full roots.
rejectUnauthorized: false,
rejectUnauthorized: rejectUnauthorized,
},
() => {
tlsSocket.write(
@ -173,3 +199,35 @@ function sendHttpsRequestThroughTunnel(socket, verb, url) {
});
});
}
function getTlsCertificateInfo(socket, url) {
return new Promise((resolve, reject) => {
const tlsSocket = tls.connect(
{
socket: socket,
servername: url.hostname,
// Don't reject unauthorized to avoid system CA issues in CI
// We just want to inspect the certificate
rejectUnauthorized: false,
},
() => {
const cert = tlsSocket.getPeerCertificate();
// Extract issuer and subject information
const issuer = cert.issuer ?
Object.entries(cert.issuer).map(([k, v]) => `${k}=${v}`).join(", ") :
"unknown";
const subject = cert.subject ?
Object.entries(cert.subject).map(([k, v]) => `${k}=${v}`).join(", ") :
"unknown";
tlsSocket.end();
resolve({ issuer, subject });
}
);
tlsSocket.on("error", (err) => {
reject(err);
});
});
}

View file

@ -5,6 +5,7 @@ import { handleHttpProxyRequest } from "./plainHttpProxy.js";
import { getCaCertPath } from "./certUtils.js";
import { auditChanges } from "../scanning/audit/index.js";
import { knownJsRegistries, knownPipRegistries, parsePackageFromUrl } from "./parsePackageFromUrl.js";
import { getEcoSystem, ECOSYSTEM_JS, ECOSYSTEM_PY } from "../config/settings.js";
import { ui } from "../environment/userInteraction.js";
import chalk from "chalk";
@ -111,9 +112,18 @@ function handleConnect(req, clientSocket, head) {
// CONNECT method is used for HTTPS requests
// It establishes a tunnel to the server identified by the request URL
if ((knownJsRegistries.some((reg) => req.url.includes(reg)))
|| (knownPipRegistries.some((reg) => req.url.includes(reg)))) {
mitmConnect(req, clientSocket, isAllowedUrl);
const ecosystem = getEcoSystem();
const url = req.url || "";
let isKnownRegistry = false;
if (ecosystem === ECOSYSTEM_JS) {
isKnownRegistry = knownJsRegistries.some((reg) => url.includes(reg));
} else if (ecosystem === ECOSYSTEM_PY) {
isKnownRegistry = knownPipRegistries.some((reg) => url.includes(reg));
}
if (isKnownRegistry) {
mitmConnect(req, clientSocket, isAllowedUrl);
} else {
// For other hosts, just tunnel the request to the destination tcp socket
tunnelRequest(req, clientSocket, head);

View file

@ -7,6 +7,7 @@ import {
mergeSafeChainProxyEnvironmentVariables,
} from "./registryProxy.js";
import { getCaCertPath } from "./certUtils.js";
import { setEcoSystem, ECOSYSTEM_JS, ECOSYSTEM_PY } from "../config/settings.js";
import fs from "fs";
describe("registryProxy.mitm", () => {
@ -19,6 +20,8 @@ describe("registryProxy.mitm", () => {
const proxyUrl = new URL(envVars.HTTPS_PROXY);
proxyHost = proxyUrl.hostname;
proxyPort = parseInt(proxyUrl.port, 10);
// Default to JS ecosystem for JS registry tests
setEcoSystem(ECOSYSTEM_JS);
});
after(async () => {
@ -151,6 +154,8 @@ describe("registryProxy.mitm", () => {
});
it("should intercept HTTPS requests to pypi.org for pip package", async () => {
// Switch to Python ecosystem for pip registry MITM tests
setEcoSystem(ECOSYSTEM_PY);
const response = await makeRegistryRequest(
proxyHost,
proxyPort,
@ -162,6 +167,8 @@ describe("registryProxy.mitm", () => {
});
it("should intercept HTTPS requests to files.pythonhosted.org for pip wheel", async () => {
// Ensure Python ecosystem
setEcoSystem(ECOSYSTEM_PY);
const response = await makeRegistryRequest(
proxyHost,
proxyPort,
@ -173,6 +180,8 @@ describe("registryProxy.mitm", () => {
});
it("should handle pip package with a1 version", async () => {
// Ensure Python ecosystem
setEcoSystem(ECOSYSTEM_PY);
const response = await makeRegistryRequest(
proxyHost,
proxyPort,
@ -184,6 +193,8 @@ describe("registryProxy.mitm", () => {
});
it("should handle pip package with latest version (should not block)", async () => {
// Ensure Python ecosystem
setEcoSystem(ECOSYSTEM_PY);
const response = await makeRegistryRequest(
proxyHost,
proxyPort,