Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ Welcome to your new catalog repo! The primary way to personalize this catalog is
* `API_BASE_URL`: Hugging Face API base URL (default: `"https://huggingface.co/api/"`)
* `REFRESH_INTERVAL_DAYS`: Number of days to consider an item "new" (default: `30`)
* `ADDITIONAL_REPOS`: Array of forked or non-org GitHub repositories to include, formatted `<owner>/<repo-name>` (non-forks are included by default). Use `[]` if there are none you wish to include
* `ADDITIONAL_HF_REPOS`: Array of Hugging Face repos from outside the org to include. Each entry specifies `repo` (`<owner>/<repo-name>`) and `type` (`datasets`, `models`, or `spaces`). Use `[]` if there are none you wish to include

* **Typography:**
* `FONT_FAMILY`: Font family for the site (default: `"Inter"`)
Expand Down
48 changes: 45 additions & 3 deletions main.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ const configPromise = fetch('config.yaml')

// Module-scope lets — assigned after config loads, used by all functions below
let CONFIG;
let ORGANIZATION_NAME, CATALOG_REPO_NAME, API_BASE_URL, REFRESH_INTERVAL_DAYS, ADDITIONAL_REPOS;
let ORGANIZATION_NAME, CATALOG_REPO_NAME, API_BASE_URL, REFRESH_INTERVAL_DAYS, ADDITIONAL_REPOS, ADDITIONAL_HF_REPOS;

// Build a reverse lookup from TAG_GROUPS (defined in tag-groups.js): raw tag → [canonical tags]
// A raw tag may appear in multiple groups, so the value is an array.
Expand Down Expand Up @@ -296,6 +296,36 @@ const fetchHubItems = async (repoType) => {
}
let hfItems = await response.json();

// Fetch additional HF repos of this type from outside the org
const additionalForType = ADDITIONAL_HF_REPOS.filter(entry => entry.type === repoType);
if (additionalForType.length) {
const existingIds = new Set(hfItems.map(item => item.id));
const seenRepos = new Set();
const toFetch = additionalForType.filter(entry => {
if (existingIds.has(entry.repo) || seenRepos.has(entry.repo)) return false;
seenRepos.add(entry.repo);
return true;
});

const fetched = await Promise.all(
toFetch.map(entry =>
fetch(`${API_BASE_URL}${repoType}/${entry.repo}`)
.then(r => {
if (!r.ok) {
console.warn(`Failed to fetch additional HF repo "${entry.repo}": HTTP ${r.status}`);
return null;
}
return r.json();
})
.catch(err => {
console.warn(`Network error fetching additional HF repo "${entry.repo}":`, err);
return null;
})
)
);
hfItems = [...hfItems, ...fetched.filter(item => item && !existingIds.has(item.id))];
}
Comment thread
EmersonFras marked this conversation as resolved.

// Step 2: If we are fetching models, get the full details for each one.
if (repoType === 'models') {
const detailPromises = hfItems.map(item =>
Expand Down Expand Up @@ -731,7 +761,18 @@ document.addEventListener('DOMContentLoaded', async () => {
if (!CONFIG.ORGANIZATION_NAME) missing.push('ORGANIZATION_NAME');
if (!CONFIG.API_BASE_URL) missing.push('API_BASE_URL');
if (CONFIG.REFRESH_INTERVAL_DAYS == null) missing.push('REFRESH_INTERVAL_DAYS');
if (!Array.isArray(CONFIG.ADDITIONAL_REPOS)) missing.push('ADDITIONAL_REPOS (must be a list)');
if (!Array.isArray(CONFIG.ADDITIONAL_REPOS)) missing.push('ADDITIONAL_REPOS (must be a list)');
if (!Array.isArray(CONFIG.ADDITIONAL_HF_REPOS)) {
missing.push('ADDITIONAL_HF_REPOS (must be a list)');
} else {
const validTypes = new Set(['datasets', 'models', 'spaces']);
const badEntries = CONFIG.ADDITIONAL_HF_REPOS.filter(
e => !e || typeof e.repo !== 'string' || !e.repo.trim() || !validTypes.has(e.type)
);
if (badEntries.length) missing.push(
`ADDITIONAL_HF_REPOS entries must each have a non-empty "repo" string and "type" in {datasets, models, spaces}; bad entries: ${badEntries.map(e => JSON.stringify(e)).join(', ')}`
);
}
if (!CONFIG.COLORS || typeof CONFIG.COLORS !== 'object') {
missing.push('COLORS (must be an object with primary, secondary, accent, accentDark, tag)');
} else {
Expand All @@ -754,7 +795,7 @@ document.addEventListener('DOMContentLoaded', async () => {
LOGO_URL: '', FAVICON_URL: '',
COLORS: { primary: '#92991c', secondary: '#5d8095', accent: '#0097b2', accentDark: '#4fd1eb', tag: '#9bcb5e' },
API_BASE_URL: 'https://huggingface.co/api/', REFRESH_INTERVAL_DAYS: 30,
ADDITIONAL_REPOS: [], FONT_FAMILY: 'Inter'
ADDITIONAL_REPOS: [], ADDITIONAL_HF_REPOS: [], FONT_FAMILY: 'Inter'
};
}

Expand All @@ -764,6 +805,7 @@ document.addEventListener('DOMContentLoaded', async () => {
API_BASE_URL = CONFIG.API_BASE_URL;
REFRESH_INTERVAL_DAYS = CONFIG.REFRESH_INTERVAL_DAYS;
ADDITIONAL_REPOS = CONFIG.ADDITIONAL_REPOS;
ADDITIONAL_HF_REPOS = CONFIG.ADDITIONAL_HF_REPOS;

// Apply CSS custom properties and document metadata
document.title = CONFIG.CATALOG_TITLE || 'Catalog';
Expand Down
11 changes: 11 additions & 0 deletions public/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,16 @@ ADDITIONAL_REPOS:
- "jennamk14/autonomous_drone_simulator"
- "ixlab/vidformer"

# Array of Hugging Face repos from outside the org to include.
# Each entry must specify "repo" (owner/name) and "type" (datasets, models, or spaces).
# ADDITIONAL_HF_REPOS:
# - repo: "user/dataset-name"
# type: "datasets"
# - repo: "user/model-name"
# type: "models"
# - repo: "user/space-name"
# type: "spaces"
ADDITIONAL_HF_REPOS: []
Comment thread
egrace479 marked this conversation as resolved.

# Typography
FONT_FAMILY: Inter # Font family for the site
35 changes: 35 additions & 0 deletions scripts/export-tags.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,21 @@ if (!Array.isArray(rawConfig.ADDITIONAL_REPOS)) {

const CONFIG = rawConfig;
const { ORGANIZATION_NAME, API_BASE_URL, ADDITIONAL_REPOS } = CONFIG;
if (!Array.isArray(CONFIG.ADDITIONAL_HF_REPOS)) {
throw new Error(
`Invalid config at ${configPath}: ADDITIONAL_HF_REPOS must be an array.`
);
}
const validHFTypes = new Set(['datasets', 'models', 'spaces']);
const badHFEntries = CONFIG.ADDITIONAL_HF_REPOS.filter(
e => !e || typeof e.repo !== 'string' || !e.repo.trim() || !validHFTypes.has(e.type)
);
if (badHFEntries.length) {
throw new Error(
`Invalid config at ${configPath}: ADDITIONAL_HF_REPOS entries must each have a non-empty "repo" string and "type" in {datasets, models, spaces}; bad entries: ${badHFEntries.map(e => JSON.stringify(e)).join(', ')}`
);
}
const ADDITIONAL_HF_REPOS = CONFIG.ADDITIONAL_HF_REPOS;

// ---------------------------------------------------------------------------
// Fetch helpers
Expand Down Expand Up @@ -104,6 +119,26 @@ const collectHFTags = async (repoType) => {
console.log(`Fetching HF ${repoType}...`);
let items = (await get(`${API_BASE_URL}${repoType}?author=${ORGANIZATION_NAME}&full=true`)).json;

// Fetch additional HF repos of this type
const additionalForType = ADDITIONAL_HF_REPOS.filter(entry => entry.type === repoType);
if (additionalForType.length) {
const existingIds = new Set(items.map(item => item.id));
const seenRepos = new Set();
const toFetch = additionalForType.filter(entry => {
if (existingIds.has(entry.repo) || seenRepos.has(entry.repo)) return false;
seenRepos.add(entry.repo);
return true;
});
const fetched = await Promise.all(
toFetch.map(entry =>
get(`${API_BASE_URL}${repoType}/${entry.repo}`)
.then(({ json }) => json)
.catch(() => null)
)
);
items = [...items, ...fetched.filter(item => item && !existingIds.has(item.id))];
}
Comment thread
EmersonFras marked this conversation as resolved.

if (repoType === 'models') {
const details = await Promise.all(
items.map(item =>
Expand Down