Skip to content

Commit c80e1af

Browse files
authored
Merge pull request #46 from keepsimpleio/chore/llms-files
chore: change the generate file annd separated logic of generating llms
2 parents 842bb41 + db99b18 commit c80e1af

6 files changed

Lines changed: 246 additions & 145 deletions

File tree

.github/workflows/generate-llms.yml

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@ name: Generate LLMs files
22

33
on:
44
workflow_dispatch:
5+
inputs:
6+
environment:
7+
description: 'Environment'
8+
required: true
9+
default: 'dev'
10+
type: choice
11+
options:
12+
- dev
13+
- prod
514

615
permissions:
716
contents: write
@@ -23,27 +32,24 @@ jobs:
2332
- name: Install dependencies
2433
run: yarn install --frozen-lockfile
2534

26-
- name: Copy prod env file
35+
- name: Copy env file
2736
run: |
28-
echo "${{ secrets.ENV_PROD }}" | base64 -d > .env
37+
if [ "${{ inputs.environment }}" = "prod" ]; then
38+
echo "${{ secrets.ENV_PROD }}" | base64 -d > .env
39+
else
40+
echo "${{ secrets.ENV_DEV }}" | base64 -d > .env
41+
fi
2942
rm -f .env.local
3043
31-
- name: Generate llms.txt
32-
run: node scripts/generate-llms.mjs
33-
env:
34-
LLMS_MODE: curated
35-
36-
- name: Generate llms-full.txt
37-
run: node scripts/generate-llms.mjs
38-
env:
39-
LLMS_MODE: full
44+
- name: Generate llms-full-pages
45+
run: yarn generate:llms:pages
4046

4147
- name: Commit and push generated files
4248
run: |
4349
git config user.name "github-actions"
4450
git config user.email "github-actions@github.com"
45-
git add public/keepsimple_/
51+
git add public/uxcore_/
4652
if ! git diff --cached --quiet; then
47-
git commit -m "chore: regenerate llms files"
53+
git commit -m "chore: regenerate llms pages"
4854
git push
49-
fi
55+
fi

package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616
"test:edge": "cypress run --browser edge",
1717
"test:all": "npm run test:chrome && npm run test:firefox && npm run test:edge",
1818
"prepare": "husky install",
19-
"generate:llms": "tsx --tsconfig scripts/tsconfig.json scripts/generate-llms.ts",
20-
"generate:llms:full": "tsx --tsconfig scripts/tsconfig.json scripts/generate-llms-full.ts"
19+
"generate:llms": "cross-env LLMS_MODE=curated ts-node --compiler-options '{\"module\":\"commonjs\",\"target\":\"es2020\"}' scripts/generate-llms.ts",
20+
"generate:llms:full": "ts-node --compiler-options '{\"module\":\"commonjs\",\"target\":\"es2020\"}' scripts/generate-llms-full.ts",
21+
"generate:llms:pages": "ts-node --compiler-options '{\"module\":\"commonjs\",\"target\":\"es2020\"}' scripts/generate-llms-pages.ts"
2122
},
2223
"lint-staged": {
2324
"**/*.{ts,tsx}": [

scripts/generate-llms-full.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
process.env.LLMS_OUTPUT_FILE = 'uxcore_/llms-full.txt';
22
// Large enough to include all current UXCore (105) and UXCG entries.
33
process.env.LLMS_DYNAMIC_LIMIT = '1000';
4-
process.env.LLMS_WRITE_SLUG_MDS = 'true';
5-
process.env.LLMS_SLUG_MD_DIR = 'uxcore_/llms-full-pages';
64

75
void import('./generate-llms');

scripts/generate-llms-pages.ts

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import * as fs from 'fs';
2+
import * as path from 'path';
3+
4+
import {
5+
absoluteRoute,
6+
OutputPage,
7+
pickSeoDescription,
8+
STRAPI_BASE,
9+
strapiGet,
10+
} from './generate-llms-shared';
11+
12+
const OUTPUT_DIR = process.env.LLMS_PAGES_DIR || 'uxcore_/llms-full-pages';
13+
14+
if (!STRAPI_BASE) {
15+
console.error('[error] STRAPI_URL or NEXT_PUBLIC_STRAPI must be set in .env');
16+
process.exit(1);
17+
}
18+
19+
function routeSlug(route: string): string | null {
20+
const normalized = route.replace(/\/+$/, '');
21+
const parts = normalized.split('/');
22+
return parts[parts.length - 1] || null;
23+
}
24+
25+
function writeSlugMarkdownFiles(pages: OutputPage[], baseDir: string): void {
26+
for (const page of pages) {
27+
if (!page.slugSection) continue;
28+
const slug = routeSlug(page.route);
29+
if (!slug) continue;
30+
31+
const sectionDir = path.join(baseDir, page.slugSection);
32+
fs.mkdirSync(sectionDir, { recursive: true });
33+
34+
const content = [
35+
`# ${page.name}`,
36+
'',
37+
`- URL: ${absoluteRoute(page.route)}`,
38+
`- Description: ${page.seoDescription ?? ''}`,
39+
'',
40+
].join('\n');
41+
42+
fs.writeFileSync(path.join(sectionDir, `${slug}.md`), content, 'utf-8');
43+
}
44+
}
45+
46+
async function fetchUxcoreSlugPages(): Promise<OutputPage[]> {
47+
try {
48+
const data = await strapiGet(
49+
'biases?locale=en&sort=number&pagination[pageSize]=1000&pagination[page]=1&populate[OGTags][populate]=ogImage',
50+
);
51+
const items = Array.isArray(data?.data) ? data.data : [];
52+
return items
53+
.map((item: any) => {
54+
const attrs = item?.attributes ?? {};
55+
const slug = attrs?.slug;
56+
if (!slug) return null;
57+
return {
58+
route: `/uxcore/${slug}`,
59+
name: String(attrs?.title ?? `UXCore ${attrs?.number ?? slug}`),
60+
seoDescription: pickSeoDescription(attrs),
61+
slugSection: 'uxcore' as const,
62+
};
63+
})
64+
.filter(Boolean) as OutputPage[];
65+
} catch (err) {
66+
console.log(
67+
`[pages] skipping uxcore slugs — fetch failed: ${(err as Error).message}`,
68+
);
69+
return [];
70+
}
71+
}
72+
73+
async function fetchUxcgSlugPages(): Promise<OutputPage[]> {
74+
try {
75+
const data = await strapiGet(
76+
'questions?locale=en&sort=number&pagination[pageSize]=1000&pagination[page]=1&populate[OGTags][populate]=ogImage',
77+
);
78+
const items = Array.isArray(data?.data) ? data.data : [];
79+
return items
80+
.map((item: any) => {
81+
const attrs = item?.attributes ?? {};
82+
const slug = attrs?.slug;
83+
if (!slug) return null;
84+
return {
85+
route: `/uxcg/${slug}`,
86+
name: String(attrs?.title ?? `UXCG ${attrs?.number ?? slug}`),
87+
seoDescription: pickSeoDescription(attrs),
88+
slugSection: 'uxcg' as const,
89+
};
90+
})
91+
.filter(Boolean) as OutputPage[];
92+
} catch (err) {
93+
console.log(
94+
`[pages] skipping uxcg slugs — fetch failed: ${(err as Error).message}`,
95+
);
96+
return [];
97+
}
98+
}
99+
100+
async function main(): Promise<void> {
101+
console.log('=== generate-llms-pages.ts ===\n');
102+
103+
console.log('[step 1] Fetching all slug pages from Strapi...');
104+
const [uxcorePages, uxcgPages] = await Promise.all([
105+
fetchUxcoreSlugPages(),
106+
fetchUxcgSlugPages(),
107+
]);
108+
109+
const allPages = [...uxcorePages, ...uxcgPages];
110+
console.log(
111+
` found ${uxcorePages.length} uxcore + ${uxcgPages.length} uxcg = ${allPages.length} total\n`,
112+
);
113+
114+
console.log(`[step 2] Writing markdown files to public/${OUTPUT_DIR}...`);
115+
const baseDir = path.join(process.cwd(), 'public', OUTPUT_DIR);
116+
writeSlugMarkdownFiles(allPages, baseDir);
117+
118+
console.log(
119+
`\nSuccessfully wrote ${allPages.length} page files to public/${OUTPUT_DIR}`,
120+
);
121+
}
122+
123+
main().catch(err => {
124+
console.error('\n[error] generate-llms-pages failed:', err);
125+
process.exit(1);
126+
});

scripts/generate-llms-shared.ts

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import * as dotenv from 'dotenv';
2+
import * as http from 'http';
3+
import * as https from 'https';
4+
import * as path from 'path';
5+
6+
dotenv.config({ path: path.join(process.cwd(), '.env'), override: true });
7+
dotenv.config({ path: path.join(process.cwd(), '.env.local'), override: true });
8+
9+
export const STRAPI_BASE =
10+
process.env.STRAPI_URL || process.env.NEXT_PUBLIC_STRAPI || '';
11+
export const SITE_BASE_URL = (process.env.NEXT_PUBLIC_DOMAIN || '').replace(
12+
/\/$/,
13+
'',
14+
);
15+
16+
process.env.NEXT_PUBLIC_STRAPI = process.env.NEXT_PUBLIC_STRAPI || STRAPI_BASE;
17+
18+
export function stripHtml(html: string): string {
19+
return html
20+
.replace(/<[^>]*>/g, '')
21+
.replace(/&nbsp;/g, ' ')
22+
.replace(/&amp;/g, '&')
23+
.replace(/&lt;/g, '<')
24+
.replace(/&gt;/g, '>')
25+
.replace(/&quot;/g, '"')
26+
.replace(/\s+/g, ' ')
27+
.trim();
28+
}
29+
30+
export function getJson(
31+
url: string,
32+
headers: Record<string, string> = {},
33+
): Promise<any> {
34+
return new Promise((resolve, reject) => {
35+
const client = url.startsWith('https://') ? https : http;
36+
const req = client.request(url, { method: 'GET', headers }, res => {
37+
const status = res.statusCode ?? 0;
38+
let raw = '';
39+
res.setEncoding('utf8');
40+
res.on('data', chunk => {
41+
raw += chunk;
42+
});
43+
res.on('end', () => {
44+
if (status < 200 || status >= 300) {
45+
reject(new Error(`HTTP ${status} for ${url}`));
46+
return;
47+
}
48+
try {
49+
resolve(JSON.parse(raw));
50+
} catch (err) {
51+
reject(
52+
new Error(
53+
`Invalid JSON for ${url}: ${(err as Error).message || 'unknown error'}`,
54+
),
55+
);
56+
}
57+
});
58+
});
59+
req.on('error', reject);
60+
req.end();
61+
});
62+
}
63+
64+
export async function strapiGet(endpoint: string): Promise<any> {
65+
const url = `${STRAPI_BASE}/api/${endpoint}`;
66+
return getJson(url);
67+
}
68+
69+
export function pickSeoDescription(attrs: any): string | null {
70+
const raw =
71+
attrs?.seoDescription ??
72+
attrs?.OGTags?.ogDescription ??
73+
attrs?.ogDescription ??
74+
null;
75+
return raw ? stripHtml(String(raw)) : null;
76+
}
77+
78+
export function absoluteRoute(route: string): string {
79+
if (!SITE_BASE_URL) return route;
80+
if (route === '/') return SITE_BASE_URL;
81+
return `${SITE_BASE_URL}${route}`;
82+
}
83+
84+
export interface OutputPage {
85+
route: string;
86+
name: string;
87+
seoDescription: string | null;
88+
slugSection?: 'uxcore' | 'uxcg';
89+
}

0 commit comments

Comments
 (0)