unblocked-web / double-agent

A test suite of common scraper detection techniques. See how detectable your scraper stack is.
MIT License
135 stars 10 forks source link

make runner useable as a lib for other projects #62

Closed GlenDC closed 2 years ago

GlenDC commented 2 years ago

Closes #59

As part of the refactor I also cleaned up some code.

All this allows other projects to use the runner logic in whatever way they wish, without having to reinvent the wheel or forking any code. E.g. here is an example:

import os from 'os';
import path from 'path';
import { mkdir, rm } from 'fs/promises';

import { downloadRunnerData } from '@double-agent/external-data/lib/downloadRunnerData';
import { writeUserAgentsToTest } from '@double-agent/runner/lib/collectUserAgentsToTest';
import { runAssignments } from '@double-agent/runner/lib/runAssignments';
import { analyzeAssignmentResults } from '@double-agent/runner/lib/analyzeAssignmentResults';
import { IRunnerFactory } from '@double-agent/runner/interfaces/runner';

interface TestOptions {
    dataDir?: string;
    concurrency?: number;
    browserIds?: string[];
}

const DEFAULT_BROWSER_IDS = [
    "chrome-89-0",
    "chrome-90-0",
    // "chrome-91-0",
    // "chrome-92-0",
    // "chrome-93-0",
    // "chrome-94-0",
    // "chrome-95-0",
    // "chrome-96-0",
    // "chrome-97-0",
    // "chrome-98-0"
];

async function configureTestAndAnalyzeStack(runnerFactory: IRunnerFactory, options?: TestOptions) {
    let dataDir = '.';
    let concurrency = os.cpus().length;
    let browserIds = DEFAULT_BROWSER_IDS;
    if (options) {
        if (options.dataDir) {
            dataDir = options.dataDir;
        }
        if (options.concurrency && options.concurrency > 0) {
            concurrency = options.concurrency;
        }
        if (options.browserIds) {
            browserIds = options.browserIds;
        }
    }
    if (options && options.dataDir) {
        dataDir = options.dataDir;
    }
    dataDir = `${dataDir}/data`;
    try {
        await rm(dataDir, { recursive: true });
    } catch(_) {};

    const probesDataDir = `${dataDir}/probes`;
    const tcpProbeBucketsPath = `${probesDataDir}/probe-buckets/tcp.json`;

    const userAgentsToTestFilePath = `${dataDir}/user-agents-to-test`;

    const assignmentsDataDir = `${dataDir}/assignments-data`;
    await mkdir(`${assignmentsDataDir}/individual`, { recursive: true });

    const resultsDir = `${dataDir}/results`;
    await mkdir(resultsDir, { recursive: true });

    // 1. download test data
    await downloadRunnerData(probesDataDir);

    // 2. write user agents to test
    const userAgentsConfig = { browserIds };
    await writeUserAgentsToTest(tcpProbeBucketsPath, userAgentsConfig, userAgentsToTestFilePath);

    // 3. run the tests for the given runner factory
    try {
        await runAssignments(
            runnerFactory,
            path.resolve(userAgentsToTestFilePath),
            path.resolve(assignmentsDataDir),
            { concurrency },
        );
    } catch (error) {
        console.error(`error caught in assignment runs: ${error}`);
    }

    // 4. analyze the results
    try {
        await analyzeAssignmentResults(probesDataDir, assignmentsDataDir, resultsDir);
    } catch (error) {
        console.error(`error occured during analyzing results: ${error}`);
    }
}

export {
    configureTestAndAnalyzeStack,
    TestOptions,
};