-
Notifications
You must be signed in to change notification settings - Fork 49
Expand file tree
/
Copy pathcrawl.ts
More file actions
71 lines (68 loc) · 1.74 KB
/
crawl.ts
File metadata and controls
71 lines (68 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
/**
* Types for crawl command
*/
export interface CrawlOptions {
/** API key for Firecrawl */
apiKey?: string;
/** API URL for Firecrawl */
apiUrl?: string;
/** URL to crawl or job ID to check status */
urlOrJobId: string;
/** Check status of existing crawl job */
status?: boolean;
/** Wait for crawl to complete */
wait?: boolean;
/** Polling interval in seconds when waiting */
pollInterval?: number;
/** Timeout in seconds when waiting */
timeout?: number;
/** Show progress dots while waiting */
progress?: boolean;
/** Output file path */
output?: string;
/** Pretty print JSON output */
pretty?: boolean;
/** Maximum number of pages to crawl */
limit?: number;
/** Maximum crawl depth */
maxDepth?: number;
/** Exclude paths */
excludePaths?: string[];
/** Include paths */
includePaths?: string[];
/** Sitemap handling */
sitemap?: 'skip' | 'include';
/** Ignore query parameters */
ignoreQueryParameters?: boolean;
/** Crawl entire domain */
crawlEntireDomain?: boolean;
/** Allow external links */
allowExternalLinks?: boolean;
/** Allow subdomains */
allowSubdomains?: boolean;
/** Delay between requests */
delay?: number;
/** Maximum concurrency */
maxConcurrency?: number;
/** Maximum age of cached content in milliseconds (API-level caching) */
maxAge?: number;
/** Include only main content */
onlyMainContent?: boolean;
}
export interface CrawlResult {
success: boolean;
data?: any;
error?: string;
}
export interface CrawlStatusResult {
success: boolean;
data?: {
id: string;
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
total: number;
completed: number;
creditsUsed?: number;
expiresAt?: string;
};
error?: string;
}