Skip to main content

Scanning Pipeline

The scanning pipeline handles background threat detection that can't happen in real-time, including phishing detection, marketplace counterfeit monitoring, and domain surveillance.

Overview

┌─────────────────┐
│ Product Sync │
│ (Shopify API) │
└────────┬────────┘


┌─────────────────┐
│ ProductScanJob │
│ (Prisma) │
└────────┬────────┘
│ Workers claim jobs

┌─────────────────────────────────────────────────────────────┐
│ SCANNING WORKERS │
├─────────────────┬─────────────────┬─────────────────────────┤
│ Phishing │ Marketplace │ Domain Monitoring │
│ Detection │ Monitoring │ │
└─────────────────┴─────────────────┴─────────────────────────┘


┌─────────────────┐
│ Threat Results │
│ (Prisma) │
└─────────────────┘

Job Claiming Pattern

Workers claim jobs to prevent duplicate processing:

// app/db/product-scan.ts
export async function claimScanJob(workerId: string) {
// Atomic claim with transaction
const job = await prisma.$transaction(async (tx) => {
// Find oldest unclaimed job
const pending = await tx.productScanJob.findFirst({
where: { status: 'pending' },
orderBy: { createdAt: 'asc' }
});

if (!pending) return null;

// Claim it
return tx.productScanJob.update({
where: { id: pending.id },
data: {
status: 'processing',
claimedBy: workerId,
claimedAt: new Date()
}
});
});

return job;
}

Phishing Detection

Detects fake websites impersonating merchants.

Domain Variation Generation

// app/phishingDetection/domain-generator.ts
export function generateDomainVariations(domain: string): string[] {
const variations = [];
const name = domain.split('.')[0]; // e.g., "mystore" from "mystore.com"

// Typosquatting
variations.push(...generateTypos(name));

// Homoglyphs (e.g., mystore → mystor3)
variations.push(...generateHomoglyphs(name));

// Common prefixes/suffixes
const affixes = ['shop', 'store', 'official', 'buy', 'get', 'secure'];
for (const affix of affixes) {
variations.push(`${affix}${name}.com`);
variations.push(`${name}${affix}.com`);
variations.push(`${name}-${affix}.com`);
}

// Different TLDs
const tlds = ['.net', '.org', '.co', '.shop', '.store', '.xyz'];
for (const tld of tlds) {
variations.push(`${name}${tld}`);
}

return [...new Set(variations)]; // Dedupe
}

Phishing Analysis

// app/phishingDetection/analyzer.ts
export async function analyzePhishingDomain(domain: string, merchantDomain: string) {
const results = {
isPhishing: false,
confidence: 0,
signals: []
};

// 1. Check if domain exists
const dnsExists = await checkDNS(domain);
if (!dnsExists) return results;

// 2. WHOIS analysis
const whois = await getWhoisData(domain);
if (whois.createdRecently) {
results.signals.push('recently_registered');
results.confidence += 20;
}
if (whois.privacyProtected) {
results.signals.push('whois_privacy');
results.confidence += 10;
}

// 3. SSL certificate check
const ssl = await checkSSL(domain);
if (ssl.isFree) {
results.signals.push('free_ssl');
results.confidence += 10;
}

// 4. Content similarity
const similarity = await compareContent(domain, merchantDomain);
if (similarity > 0.7) {
results.signals.push('content_copy');
results.confidence += 40;
}

// 5. Logo/image similarity
const logoSimilarity = await compareLogo(domain, merchantDomain);
if (logoSimilarity > 0.8) {
results.signals.push('logo_copy');
results.confidence += 30;
}

results.isPhishing = results.confidence >= 60;
return results;
}

Storage

// app/db/phishing-detection.ts
export async function upsertPhishingDomain(shop: string, data: PhishingResult) {
return prisma.phishingDomain.upsert({
where: {
shop_domain: { shop, domain: data.domain }
},
create: {
shop,
domain: data.domain,
status: data.isPhishing ? 'active' : 'safe',
confidence: data.confidence,
signals: data.signals,
firstSeen: new Date(),
lastChecked: new Date()
},
update: {
status: data.isPhishing ? 'active' : 'safe',
confidence: data.confidence,
signals: data.signals,
lastChecked: new Date()
}
});
}

Marketplace Monitoring

Detects counterfeit products on e-commerce platforms.

Supported Platforms

PlatformAPISearch Methods
AmazonSerpAPIText + Image
eBaySerpAPIText + Image
WalmartSerpAPIText
EtsySerpAPIText
Google ShoppingSerpAPIText + Image
Google LensSerpAPIImage only

Search Flow

// app/marketplaceMonitoring/scanner.ts
export async function scanMarketplace(product: ScannedProduct) {
const results = [];

// 1. Text-based search
const textSearches = await Promise.all([
searchAmazon(product.title),
searchEbay(product.title),
searchWalmart(product.title),
searchGoogleShopping(product.title)
]);

// 2. Image-based search (more accurate for counterfeits)
const imageSearches = await Promise.all([
searchGoogleLens(product.imageUrl),
searchAmazonByImage(product.imageUrl)
]);

// 3. Analyze results for threats
for (const result of [...textSearches, ...imageSearches].flat()) {
const threat = await analyzeResult(result, product);
if (threat) {
results.push(threat);
}
}

return results;
}

Threat Analysis

interface MarketplaceThreatAnalysis {
isThreat: boolean;
threatType: 'counterfeit' | 'gray_market' | 'unauthorized';
confidence: number;
signals: string[];
}

export async function analyzeResult(result: SearchResult, product: ScannedProduct): Promise<MarketplaceThreatAnalysis> {
const signals = [];
let confidence = 0;

// Price analysis (counterfeits often much cheaper)
if (result.price < product.price * 0.5) {
signals.push('suspicious_price');
confidence += 30;
}

// Seller analysis (new sellers, low ratings)
if (result.sellerRating < 3 || result.sellerReviewCount < 10) {
signals.push('suspicious_seller');
confidence += 20;
}

// Image similarity
const imageSimilarity = await compareImages(result.imageUrl, product.imageUrl);
if (imageSimilarity > 0.9) {
signals.push('exact_image_match');
confidence += 40;
}

// Title similarity with brand removal
const titleMatch = compareTitles(result.title, product.title);
if (titleMatch > 0.8 && !result.isOfficialSeller) {
signals.push('title_copy');
confidence += 25;
}

return {
isThreat: confidence >= 50,
threatType: determineThreatType(signals),
confidence,
signals
};
}

Image Similarity

// app/image-similarity/compare.ts
import sharp from 'sharp';
import { imageHash } from 'image-hash';

export async function compareImages(url1: string, url2: string): Promise<number> {
// Download and normalize images
const [img1, img2] = await Promise.all([
downloadAndNormalize(url1),
downloadAndNormalize(url2)
]);

// Perceptual hash comparison
const [hash1, hash2] = await Promise.all([
imageHash(img1),
imageHash(img2)
]);

// Hamming distance between hashes
const distance = hammingDistance(hash1, hash2);

// Convert to similarity score (0-1)
const similarity = 1 - (distance / 64);

return similarity;
}

async function downloadAndNormalize(url: string): Promise<Buffer> {
const response = await fetch(url);
const buffer = await response.arrayBuffer();

// Normalize to 64x64 grayscale
return sharp(Buffer.from(buffer))
.resize(64, 64, { fit: 'fill' })
.grayscale()
.toBuffer();
}

Domain Monitoring

Monitors for new domains that might impersonate the merchant.

Certificate Transparency Logs

// app/domain-monitoring/ct-monitor.ts
export async function monitorCertificates(merchantDomain: string) {
// Query Certificate Transparency logs for similar domains
const similar = await queryCTLogs(merchantDomain);

for (const domain of similar) {
// Check if it's suspicious
const isSuspicious = await analyzeNewDomain(domain, merchantDomain);

if (isSuspicious) {
await prisma.domainVariant.create({
data: {
shop: merchantDomain,
domain: domain.name,
source: 'ct_log',
issueDate: domain.notBefore,
status: 'pending_review'
}
});
}
}
}

Worker Architecture

Background workers run on Fly.io:

// app/workers/scan-worker.ts
async function runWorker() {
const workerId = generateWorkerId();

while (true) {
// Claim a job
const job = await claimScanJob(workerId);

if (!job) {
// No jobs, wait and retry
await sleep(30_000);
continue;
}

try {
// Process the job
await processJob(job);

// Mark complete
await prisma.productScanJob.update({
where: { id: job.id },
data: { status: 'completed', completedAt: new Date() }
});
} catch (error) {
// Mark failed
await prisma.productScanJob.update({
where: { id: job.id },
data: {
status: 'failed',
error: error.message,
retryCount: { increment: 1 }
}
});
}
}
}

External APIs

SerpAPI Integration

// app/background-search/serp-client.ts
const SERP_API_KEY = process.env.SERP_API_KEY;

export async function searchAmazon(query: string) {
const response = await fetch(
`https://serpapi.com/search.json?engine=amazon&amazon_domain=amazon.com&k=${encodeURIComponent(query)}&api_key=${SERP_API_KEY}`
);

const data = await response.json();
return data.organic_results || [];
}

export async function searchGoogleLens(imageUrl: string) {
const response = await fetch(
`https://serpapi.com/search.json?engine=google_lens&url=${encodeURIComponent(imageUrl)}&api_key=${SERP_API_KEY}`
);

const data = await response.json();
return data.visual_matches || [];
}