From 1abe5932adf3c20878e2f9c43fbb1205ddad62c5 Mon Sep 17 00:00:00 2001 From: 123Haynes <209302+123Haynes@users.noreply.github.com> Date: Tue, 31 Mar 2026 11:52:26 +0000 Subject: [PATCH] add a configuration option for custom malwaredb and newpackagelist urls. --- README.md | 35 ++++++++ packages/safe-chain/src/api/aikido.js | 41 +++++---- packages/safe-chain/src/api/aikido.spec.js | 1 + .../safe-chain/src/config/cliArguments.js | 25 +++++- packages/safe-chain/src/config/configFile.js | 14 +++ .../src/config/environmentVariables.js | 10 +++ packages/safe-chain/src/config/settings.js | 27 ++++++ .../safe-chain/src/config/settings.spec.js | 85 +++++++++++++++++++ 8 files changed, 219 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index e173b66..fad26af 100644 --- a/README.md +++ b/README.md @@ -277,6 +277,41 @@ You can set custom registries through environment variable or config file. Both } ``` +## Malware List Base URL + +Configure Safe Chain to fetch malware databases and new packages lists from a custom mirror URL. This allows you to host your own copy of the Aikido malware database. + +### Configuration Options + +You can set the malware list base URL through multiple sources (in order of priority): + +1. **CLI Argument** (highest priority): + + ```shell + npm install express --safe-chain-malware-list-base-url=https://your-mirror.com + ``` + +2. **Environment Variable**: + + ```shell + export SAFE_CHAIN_MALWARE_LIST_BASE_URL=https://your-mirror.com + npm install express + ``` + +3. **Config File** (`~/.safe-chain/config.json`): + + ```json + { + "malwareListBaseUrl": "https://your-mirror.com" + } + ``` + +The base URL should point to a server that mirrors the structure of `https://malware-list.aikido.dev/`, including the following paths: +- `/malware_predictions.json` (JavaScript ecosystem malware database) +- `/malware_pypi.json` (Python ecosystem malware database) +- `/releases/npm.json` (JavaScript new packages list) +- `/releases/pypi.json` (Python new packages list) + # Usage in CI/CD You can protect your CI/CD pipelines from malicious packages by integrating Aikido Safe Chain into your build process. This ensures that any packages installed during your automated builds are checked for malware before installation. diff --git a/packages/safe-chain/src/api/aikido.js b/packages/safe-chain/src/api/aikido.js index 0ceec21..91ed692 100644 --- a/packages/safe-chain/src/api/aikido.js +++ b/packages/safe-chain/src/api/aikido.js @@ -3,17 +3,18 @@ import { getEcoSystem, ECOSYSTEM_JS, ECOSYSTEM_PY, + getMalwareListBaseUrl, } from "../config/settings.js"; import { ui } from "../environment/userInteraction.js"; -const malwareDatabaseUrls = { - [ECOSYSTEM_JS]: "https://malware-list.aikido.dev/malware_predictions.json", - [ECOSYSTEM_PY]: "https://malware-list.aikido.dev/malware_pypi.json", +const malwareDatabasePaths = { + [ECOSYSTEM_JS]: "malware_predictions.json", + [ECOSYSTEM_PY]: "malware_pypi.json", }; -const newPackagesListUrls = { - [ECOSYSTEM_JS]: "https://malware-list.aikido.dev/releases/npm.json", - [ECOSYSTEM_PY]: "https://malware-list.aikido.dev/releases/pypi.json", +const newPackagesListPaths = { + [ECOSYSTEM_JS]: "releases/npm.json", + [ECOSYSTEM_PY]: "releases/pypi.json", }; const DEFAULT_FETCH_RETRY_ATTEMPTS = 4; @@ -40,10 +41,11 @@ const DEFAULT_FETCH_RETRY_ATTEMPTS = 4; export async function fetchMalwareDatabase() { return retry(async () => { const ecosystem = getEcoSystem(); - const malwareDatabaseUrl = - malwareDatabaseUrls[ - /** @type {keyof typeof malwareDatabaseUrls} */ (ecosystem) - ]; + const baseUrl = getMalwareListBaseUrl(); + const path = malwareDatabasePaths[ + /** @type {keyof typeof malwareDatabasePaths} */ (ecosystem) + ]; + const malwareDatabaseUrl = `${baseUrl}/${path}`; const response = await fetch(malwareDatabaseUrl); if (!response.ok) { throw new Error( @@ -69,10 +71,11 @@ export async function fetchMalwareDatabase() { export async function fetchMalwareDatabaseVersion() { return retry(async () => { const ecosystem = getEcoSystem(); - const malwareDatabaseUrl = - malwareDatabaseUrls[ - /** @type {keyof typeof malwareDatabaseUrls} */ (ecosystem) - ]; + const baseUrl = getMalwareListBaseUrl(); + const path = malwareDatabasePaths[ + /** @type {keyof typeof malwareDatabasePaths} */ (ecosystem) + ]; + const malwareDatabaseUrl = `${baseUrl}/${path}`; const response = await fetch(malwareDatabaseUrl, { method: "HEAD", }); @@ -92,8 +95,9 @@ export async function fetchMalwareDatabaseVersion() { export async function fetchNewPackagesList() { return retry(async () => { const ecosystem = getEcoSystem(); - const url = - newPackagesListUrls[/** @type {keyof typeof newPackagesListUrls} */ (ecosystem)]; + const baseUrl = getMalwareListBaseUrl(); + const path = newPackagesListPaths[/** @type {keyof typeof newPackagesListPaths} */ (ecosystem)]; + const url = `${baseUrl}/${path}`; if (!url) { return { newPackagesList: [], version: undefined }; @@ -124,8 +128,9 @@ export async function fetchNewPackagesList() { export async function fetchNewPackagesListVersion() { return retry(async () => { const ecosystem = getEcoSystem(); - const url = - newPackagesListUrls[/** @type {keyof typeof newPackagesListUrls} */ (ecosystem)]; + const baseUrl = getMalwareListBaseUrl(); + const path = newPackagesListPaths[/** @type {keyof typeof newPackagesListPaths} */ (ecosystem)]; + const url = `${baseUrl}/${path}`; if (!url) { return undefined; diff --git a/packages/safe-chain/src/api/aikido.spec.js b/packages/safe-chain/src/api/aikido.spec.js index 0c6c7d9..8b8d2dc 100644 --- a/packages/safe-chain/src/api/aikido.spec.js +++ b/packages/safe-chain/src/api/aikido.spec.js @@ -22,6 +22,7 @@ describe("aikido API", async () => { getEcoSystem: () => ecosystem, ECOSYSTEM_JS: "js", ECOSYSTEM_PY: "py", + getMalwareListBaseUrl: () => "https://malware-list.aikido.dev", }, }); diff --git a/packages/safe-chain/src/config/cliArguments.js b/packages/safe-chain/src/config/cliArguments.js index 25013fb..918761c 100644 --- a/packages/safe-chain/src/config/cliArguments.js +++ b/packages/safe-chain/src/config/cliArguments.js @@ -1,12 +1,13 @@ import { ui } from "../environment/userInteraction.js"; /** - * @type {{loggingLevel: string | undefined, skipMinimumPackageAge: boolean | undefined, minimumPackageAgeHours: string | undefined}} + * @type {{loggingLevel: string | undefined, skipMinimumPackageAge: boolean | undefined, minimumPackageAgeHours: string | undefined, malwareListBaseUrl: string | undefined}} */ const state = { loggingLevel: undefined, skipMinimumPackageAge: undefined, minimumPackageAgeHours: undefined, + malwareListBaseUrl: undefined, }; const SAFE_CHAIN_ARG_PREFIX = "--safe-chain-"; @@ -20,6 +21,7 @@ export function initializeCliArguments(args) { state.loggingLevel = undefined; state.skipMinimumPackageAge = undefined; state.minimumPackageAgeHours = undefined; + state.malwareListBaseUrl = undefined; const safeChainArgs = []; const remainingArgs = []; @@ -35,6 +37,7 @@ export function initializeCliArguments(args) { setLoggingLevel(safeChainArgs); setSkipMinimumPackageAge(safeChainArgs); setMinimumPackageAgeHours(safeChainArgs); + setMalwareListBaseUrl(safeChainArgs); checkDeprecatedPythonFlag(args); return remainingArgs; } @@ -109,6 +112,26 @@ export function getMinimumPackageAgeHours() { return state.minimumPackageAgeHours; } +/** + * @param {string[]} args + * @returns {void} + */ +function setMalwareListBaseUrl(args) { + const argName = SAFE_CHAIN_ARG_PREFIX + "malware-list-base-url="; + + const value = getLastArgEqualsValue(args, argName); + if (value) { + state.malwareListBaseUrl = value; + } +} + +/** + * @returns {string | undefined} + */ +export function getMalwareListBaseUrl() { + return state.malwareListBaseUrl; +} + /** * @param {string[]} args * @param {string} flagName diff --git a/packages/safe-chain/src/config/configFile.js b/packages/safe-chain/src/config/configFile.js index e132c90..3fb0f21 100644 --- a/packages/safe-chain/src/config/configFile.js +++ b/packages/safe-chain/src/config/configFile.js @@ -10,6 +10,7 @@ import { getEcoSystem } from "./settings.js"; * We cannot trust the input and should add the necessary validations * @property {unknown | Number} scanTimeout * @property {unknown | Number} minimumPackageAgeHours + * @property {unknown | string} malwareListBaseUrl * @property {unknown | SafeChainRegistryConfiguration} npm * @property {unknown | SafeChainRegistryConfiguration} pip * @@ -84,6 +85,18 @@ export function getMinimumPackageAgeHours() { return undefined; } +/** + * Gets the malware list base URL from config file only + * @returns {string | undefined} + */ +export function getMalwareListBaseUrl() { + const config = readConfigFile(); + if (config.malwareListBaseUrl && typeof config.malwareListBaseUrl === "string") { + return config.malwareListBaseUrl; + } + return undefined; +} + /** * Gets the custom npm registries from the config file (format parsing only, no validation) * @returns {string[]} @@ -214,6 +227,7 @@ function readConfigFile() { const emptyConfig = { scanTimeout: undefined, minimumPackageAgeHours: undefined, + malwareListBaseUrl: undefined, npm: { customRegistries: undefined, }, diff --git a/packages/safe-chain/src/config/environmentVariables.js b/packages/safe-chain/src/config/environmentVariables.js index 6ed041f..932eff7 100644 --- a/packages/safe-chain/src/config/environmentVariables.js +++ b/packages/safe-chain/src/config/environmentVariables.js @@ -45,3 +45,13 @@ export function getMinimumPackageAgeExclusions() { return process.env.SAFE_CHAIN_MINIMUM_PACKAGE_AGE_EXCLUSIONS || process.env.SAFE_CHAIN_NPM_MINIMUM_PACKAGE_AGE_EXCLUSIONS; } + +/** + * Gets the malware list base URL from environment variable + * Expected format: full URL without trailing slash + * Example: "https://malware-list.aikido.dev" + * @returns {string | undefined} + */ +export function getMalwareListBaseUrl() { + return process.env.SAFE_CHAIN_MALWARE_LIST_BASE_URL; +} diff --git a/packages/safe-chain/src/config/settings.js b/packages/safe-chain/src/config/settings.js index b864bf9..9171849 100644 --- a/packages/safe-chain/src/config/settings.js +++ b/packages/safe-chain/src/config/settings.js @@ -198,3 +198,30 @@ export function getMinimumPackageAgeExclusions() { const allExclusions = [...envExclusions, ...configExclusions]; return [...new Set(allExclusions)]; } + +/** + * Gets the malware list base URL with priority: CLI argument > environment variable > config file > default + * @returns {string} + */ +export function getMalwareListBaseUrl() { + // Priority 1: CLI argument + const cliValue = cliArguments.getMalwareListBaseUrl(); + if (cliValue) { + return cliValue; + } + + // Priority 2: Environment variable + const envValue = environmentVariables.getMalwareListBaseUrl(); + if (envValue) { + return envValue; + } + + // Priority 3: Config file + const configValue = configFile.getMalwareListBaseUrl(); + if (configValue) { + return configValue; + } + + // Default + return "https://malware-list.aikido.dev"; +} diff --git a/packages/safe-chain/src/config/settings.spec.js b/packages/safe-chain/src/config/settings.spec.js index 18b5156..64e1272 100644 --- a/packages/safe-chain/src/config/settings.spec.js +++ b/packages/safe-chain/src/config/settings.spec.js @@ -15,6 +15,7 @@ const { getNpmCustomRegistries, getPipCustomRegistries, getMinimumPackageAgeExclusions, + getMalwareListBaseUrl, setEcoSystem, ECOSYSTEM_JS, ECOSYSTEM_PY, @@ -534,3 +535,87 @@ describe("getMinimumPackageAgeExclusions", () => { assert.deepStrictEqual(exclusions, ["requests", "urllib3"]); }); }); + +describe("getMalwareListBaseUrl", () => { + let originalEnv; + const envVarName = "SAFE_CHAIN_MALWARE_LIST_BASE_URL"; + + beforeEach(() => { + originalEnv = process.env[envVarName]; + delete process.env[envVarName]; + // Reset CLI arguments state + initializeCliArguments([]); + }); + + afterEach(() => { + if (originalEnv !== undefined) { + process.env[envVarName] = originalEnv; + } else { + delete process.env[envVarName]; + } + configFileContent = undefined; + }); + + it("should return default URL when nothing is configured", () => { + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://malware-list.aikido.dev"); + }); + + it("should return CLI argument value with highest priority", () => { + initializeCliArguments(["--safe-chain-malware-list-base-url=https://cli-mirror.com"]); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://cli-mirror.com"); + }); + + it("should return environment variable value when no CLI argument", () => { + process.env[envVarName] = "https://env-mirror.com"; + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://env-mirror.com"); + }); + + it("should return config file value when no CLI or env", () => { + configFileContent = JSON.stringify({ + malwareListBaseUrl: "https://config-mirror.com", + }); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://config-mirror.com"); + }); + + it("should prioritize CLI over environment variable", () => { + process.env[envVarName] = "https://env-mirror.com"; + initializeCliArguments(["--safe-chain-malware-list-base-url=https://cli-mirror.com"]); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://cli-mirror.com"); + }); + + it("should prioritize environment variable over config file", () => { + process.env[envVarName] = "https://env-mirror.com"; + configFileContent = JSON.stringify({ + malwareListBaseUrl: "https://config-mirror.com", + }); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://env-mirror.com"); + }); + + it("should prioritize CLI over config file", () => { + initializeCliArguments(["--safe-chain-malware-list-base-url=https://cli-mirror.com"]); + configFileContent = JSON.stringify({ + malwareListBaseUrl: "https://config-mirror.com", + }); + + const url = getMalwareListBaseUrl(); + + assert.strictEqual(url, "https://cli-mirror.com"); + }); +});