Scanning Pipeline
The scanning pipeline handles background threat detection that can't happen in real-time, including phishing detection, marketplace counterfeit monitoring, and domain surveillance.
Overview
┌─────────────────┐
│ Product Sync │
│ (Shopify API) │
└────────┬────────┘
│
▼
┌─────────────────┐
│ ProductScanJob │
│ (Prisma) │
└────────┬────────┘
│ Workers claim jobs
▼
┌─────────────────────────────────────────────────────────────┐
│ SCANNING WORKERS │
├─────────────────┬─────────────────┬─────────────────────────┤
│ Phishing │ Marketplace │ Domain Monitoring │
│ Detection │ Monitoring │ │
└─────────────────┴─────────────────┴─────────────────────────┘
│
▼
┌─────────────────┐
│ Threat Results │
│ (Prisma) │
└─────────────────┘
Job Claiming Pattern
Workers claim jobs to prevent duplicate processing:
// app/db/product-scan.ts
export async function claimScanJob(workerId: string) {
// Atomic claim with transaction
const job = await prisma.$transaction(async (tx) => {
// Find oldest unclaimed job
const pending = await tx.productScanJob.findFirst({
where: { status: 'pending' },
orderBy: { createdAt: 'asc' }
});
if (!pending) return null;
// Claim it
return tx.productScanJob.update({
where: { id: pending.id },
data: {
status: 'processing',
claimedBy: workerId,
claimedAt: new Date()
}
});
});
return job;
}
Phishing Detection
Detects fake websites impersonating merchants.
Domain Variation Generation
// app/phishingDetection/domain-generator.ts
export function generateDomainVariations(domain: string): string[] {
const variations = [];
const name = domain.split('.')[0]; // e.g., "mystore" from "mystore.com"
// Typosquatting
variations.push(...generateTypos(name));
// Homoglyphs (e.g., mystore → mystor3)
variations.push(...generateHomoglyphs(name));
// Common prefixes/suffixes
const affixes = ['shop', 'store', 'official', 'buy', 'get', 'secure'];
for (const affix of affixes) {
variations.push(`${affix}${name}.com`);
variations.push(`${name}${affix}.com`);
variations.push(`${name}-${affix}.com`);
}
// Different TLDs
const tlds = ['.net', '.org', '.co', '.shop', '.store', '.xyz'];
for (const tld of tlds) {
variations.push(`${name}${tld}`);
}
return [...new Set(variations)]; // Dedupe
}
Phishing Analysis
// app/phishingDetection/analyzer.ts
export async function analyzePhishingDomain(domain: string, merchantDomain: string) {
const results = {
isPhishing: false,
confidence: 0,
signals: []
};
// 1. Check if domain exists
const dnsExists = await checkDNS(domain);
if (!dnsExists) return results;
// 2. WHOIS analysis
const whois = await getWhoisData(domain);
if (whois.createdRecently) {
results.signals.push('recently_registered');
results.confidence += 20;
}
if (whois.privacyProtected) {
results.signals.push('whois_privacy');
results.confidence += 10;
}
// 3. SSL certificate check
const ssl = await checkSSL(domain);
if (ssl.isFree) {
results.signals.push('free_ssl');
results.confidence += 10;
}
// 4. Content similarity
const similarity = await compareContent(domain, merchantDomain);
if (similarity > 0.7) {
results.signals.push('content_copy');
results.confidence += 40;
}
// 5. Logo/image similarity
const logoSimilarity = await compareLogo(domain, merchantDomain);
if (logoSimilarity > 0.8) {
results.signals.push('logo_copy');
results.confidence += 30;
}
results.isPhishing = results.confidence >= 60;
return results;
}
Storage
// app/db/phishing-detection.ts
export async function upsertPhishingDomain(shop: string, data: PhishingResult) {
return prisma.phishingDomain.upsert({
where: {
shop_domain: { shop, domain: data.domain }
},
create: {
shop,
domain: data.domain,
status: data.isPhishing ? 'active' : 'safe',
confidence: data.confidence,
signals: data.signals,
firstSeen: new Date(),
lastChecked: new Date()
},
update: {
status: data.isPhishing ? 'active' : 'safe',
confidence: data.confidence,
signals: data.signals,
lastChecked: new Date()
}
});
}
Marketplace Monitoring
Detects counterfeit products on e-commerce platforms.
Supported Platforms
| Platform | API | Search Methods |
|---|---|---|
| Amazon | SerpAPI | Text + Image |
| eBay | SerpAPI | Text + Image |
| Walmart | SerpAPI | Text |
| Etsy | SerpAPI | Text |
| Google Shopping | SerpAPI | Text + Image |
| Google Lens | SerpAPI | Image only |
Search Flow
// app/marketplaceMonitoring/scanner.ts
export async function scanMarketplace(product: ScannedProduct) {
const results = [];
// 1. Text-based search
const textSearches = await Promise.all([
searchAmazon(product.title),
searchEbay(product.title),
searchWalmart(product.title),
searchGoogleShopping(product.title)
]);
// 2. Image-based search (more accurate for counterfeits)
const imageSearches = await Promise.all([
searchGoogleLens(product.imageUrl),
searchAmazonByImage(product.imageUrl)
]);
// 3. Analyze results for threats
for (const result of [...textSearches, ...imageSearches].flat()) {
const threat = await analyzeResult(result, product);
if (threat) {
results.push(threat);
}
}
return results;
}
Threat Analysis
interface MarketplaceThreatAnalysis {
isThreat: boolean;
threatType: 'counterfeit' | 'gray_market' | 'unauthorized';
confidence: number;
signals: string[];
}
export async function analyzeResult(result: SearchResult, product: ScannedProduct): Promise<MarketplaceThreatAnalysis> {
const signals = [];
let confidence = 0;
// Price analysis (counterfeits often much cheaper)
if (result.price < product.price * 0.5) {
signals.push('suspicious_price');
confidence += 30;
}
// Seller analysis (new sellers, low ratings)
if (result.sellerRating < 3 || result.sellerReviewCount < 10) {
signals.push('suspicious_seller');
confidence += 20;
}
// Image similarity
const imageSimilarity = await compareImages(result.imageUrl, product.imageUrl);
if (imageSimilarity > 0.9) {
signals.push('exact_image_match');
confidence += 40;
}
// Title similarity with brand removal
const titleMatch = compareTitles(result.title, product.title);
if (titleMatch > 0.8 && !result.isOfficialSeller) {
signals.push('title_copy');
confidence += 25;
}
return {
isThreat: confidence >= 50,
threatType: determineThreatType(signals),
confidence,
signals
};
}
Image Similarity
// app/image-similarity/compare.ts
import sharp from 'sharp';
import { imageHash } from 'image-hash';
export async function compareImages(url1: string, url2: string): Promise<number> {
// Download and normalize images
const [img1, img2] = await Promise.all([
downloadAndNormalize(url1),
downloadAndNormalize(url2)
]);
// Perceptual hash comparison
const [hash1, hash2] = await Promise.all([
imageHash(img1),
imageHash(img2)
]);
// Hamming distance between hashes
const distance = hammingDistance(hash1, hash2);
// Convert to similarity score (0-1)
const similarity = 1 - (distance / 64);
return similarity;
}
async function downloadAndNormalize(url: string): Promise<Buffer> {
const response = await fetch(url);
const buffer = await response.arrayBuffer();
// Normalize to 64x64 grayscale
return sharp(Buffer.from(buffer))
.resize(64, 64, { fit: 'fill' })
.grayscale()
.toBuffer();
}
Domain Monitoring
Monitors for new domains that might impersonate the merchant.
Certificate Transparency Logs
// app/domain-monitoring/ct-monitor.ts
export async function monitorCertificates(merchantDomain: string) {
// Query Certificate Transparency logs for similar domains
const similar = await queryCTLogs(merchantDomain);
for (const domain of similar) {
// Check if it's suspicious
const isSuspicious = await analyzeNewDomain(domain, merchantDomain);
if (isSuspicious) {
await prisma.domainVariant.create({
data: {
shop: merchantDomain,
domain: domain.name,
source: 'ct_log',
issueDate: domain.notBefore,
status: 'pending_review'
}
});
}
}
}
Worker Architecture
Background workers run on Fly.io:
// app/workers/scan-worker.ts
async function runWorker() {
const workerId = generateWorkerId();
while (true) {
// Claim a job
const job = await claimScanJob(workerId);
if (!job) {
// No jobs, wait and retry
await sleep(30_000);
continue;
}
try {
// Process the job
await processJob(job);
// Mark complete
await prisma.productScanJob.update({
where: { id: job.id },
data: { status: 'completed', completedAt: new Date() }
});
} catch (error) {
// Mark failed
await prisma.productScanJob.update({
where: { id: job.id },
data: {
status: 'failed',
error: error.message,
retryCount: { increment: 1 }
}
});
}
}
}
External APIs
SerpAPI Integration
// app/background-search/serp-client.ts
const SERP_API_KEY = process.env.SERP_API_KEY;
export async function searchAmazon(query: string) {
const response = await fetch(
`https://serpapi.com/search.json?engine=amazon&amazon_domain=amazon.com&k=${encodeURIComponent(query)}&api_key=${SERP_API_KEY}`
);
const data = await response.json();
return data.organic_results || [];
}
export async function searchGoogleLens(imageUrl: string) {
const response = await fetch(
`https://serpapi.com/search.json?engine=google_lens&url=${encodeURIComponent(imageUrl)}&api_key=${SERP_API_KEY}`
);
const data = await response.json();
return data.visual_matches || [];
}