feat(search): add pagefind provider support

Add Pagefind indexing and browser search adapters behind a provider switch.

This lets prebuild generate either Stork or Pagefind search artifacts and lets the existing search UI run against Pagefind while preserving scoped filters, excerpts, and result metadata.
This commit is contained in:
ThatGuySam 2026-03-15 13:42:07 -05:00
parent 727f84e4c2
commit e1da6eb880
12 changed files with 690 additions and 65 deletions

168
helpers/pagefind/browser.js Normal file
View file

@ -0,0 +1,168 @@
import {
isNonEmptyString
} from '~/helpers/check-types.js'
import {
pagefindBundleRelativeURL,
pagefindScriptURL
} from '~/helpers/pagefind/config.js'
function escapeHtml ( text = '' ) {
return text
.replaceAll('&', '&')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&#39;')
}
function escapeRegExp ( value ) {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
}
export function makeHighlightedPagefindTitle ( title, terms = [] ) {
const highlightedTerms = terms
.filter( term => isNonEmptyString( term ) )
.map( term => term.trim() )
.filter( term => term.length > 0 )
.sort( ( a, b ) => b.length - a.length )
const titleMarkup = escapeHtml( title || '' )
if ( highlightedTerms.length === 0 ) {
return titleMarkup
}
const pattern = highlightedTerms.map( escapeRegExp ).join('|')
return titleMarkup.replace(
new RegExp(`(${ pattern })`, 'gi'),
'<span class="stork-highlighted-text font-bold text-white bg-green-800 rounded px-1">$1</span>'
)
}
export function normalizePagefindExcerptMarkup ( excerptMarkup = '' ) {
if ( !isNonEmptyString( excerptMarkup ) ) return ''
return excerptMarkup
.replace(/<mark[^>]*>/g, '<span class="stork-highlighted-text font-bold text-white bg-green-800 rounded px-1">')
.replace(/<\/mark>/g, '</span>')
}
export function mapPagefindDataToListing ( resultData, {
highlightTerms = []
} = {} ) {
const lastUpdatedTimestamp = Number( resultData.meta?.lastUpdatedTimestamp || 0 )
return {
name: makeHighlightedPagefindTitle( resultData.meta?.title || resultData.url, highlightTerms ),
text: resultData.meta?.text || resultData.url,
endpoint: resultData.url,
slug: resultData.meta?.slug || resultData.url,
category: {
slug: resultData.meta?.categorySlug || 'uncategorized'
},
lastUpdated: lastUpdatedTimestamp > 0 ? {
timestamp: lastUpdatedTimestamp
} : null,
resultExcerptsMarkup: isNonEmptyString( resultData.excerpt ) ? [
normalizePagefindExcerptMarkup( resultData.excerpt )
] : []
}
}
export class PagefindClient {
constructor ( options = {} ) {
this.bundlePath = options.bundlePath || pagefindBundleRelativeURL
this.pagefind = options.pagefind || null
this.debounceMs = options.debounceMs || 100
this.cancelCurrentQuery = null
}
setupState = 'not-setup'
get isSetup () {
return this.setupState === 'complete'
}
waitForSetup () {
return new Promise( resolve => {
if ( this.isSetup ) resolve()
const timer = setInterval( () => {
if ( this.isSetup ) {
clearInterval( timer )
resolve()
}
}, 50 )
})
}
async loadPagefindScript () {
if ( this.pagefind ) return
const pagefindModule = await import(/* @vite-ignore */ pagefindScriptURL)
this.pagefind = pagefindModule.default || pagefindModule
this.pagefind.options({
bundlePath: this.bundlePath
})
}
async setup () {
if ( this.setupState !== 'not-setup' ) {
await this.waitForSetup()
return
}
this.setupState = 'pending'
await this.loadPagefindScript()
if ( typeof this.pagefind.init === 'function' ) {
await this.pagefind.init()
}
this.setupState = 'complete'
}
async lazyQuery ( query, {
filters = {},
sort = {}
} = {} ) {
const searchOptions = {}
if ( Object.keys( filters ).length > 0 ) {
searchOptions.filters = filters
}
if ( Object.keys( sort ).length > 0 ) {
searchOptions.sort = sort
}
const trimmedQuery = query.trim()
const result = await new Promise( async ( resolve, reject ) => {
if ( this.cancelCurrentQuery !== null ) {
this.cancelCurrentQuery()
}
this.cancelCurrentQuery = () => { reject({ message: `Cancelled previous query for ${ trimmedQuery }`, canceled: true }) }
if ( !this.isSetup ) await this.setup()
if ( trimmedQuery.length === 0 ) {
resolve( await this.pagefind.search( null, searchOptions ) )
return
}
resolve( await this.pagefind.debouncedSearch( trimmedQuery, searchOptions, this.debounceMs ) )
}).catch( err => {
console.log('Query rejected', err)
return null
})
this.cancelCurrentQuery = null
return result
}
}

View file

@ -0,0 +1,5 @@
export const pagefindLanguage = 'en'
export const pagefindOutputPath = './static/pagefind'
export const pagefindBundleRelativeURL = '/pagefind/'
export const pagefindScriptURL = `${ pagefindBundleRelativeURL }pagefind.js`
export const sitemapEndpointsPath = './static/sitemap-endpoints.json'

181
helpers/pagefind/index.js Normal file
View file

@ -0,0 +1,181 @@
import fs from 'fs-extra'
import * as pagefind from 'pagefind'
import {
isNonEmptyArray,
isNonEmptyString
} from '~/helpers/check-types.js'
import {
getRouteType
} from '~/helpers/app-derived.js'
import {
getAppCategory
} from '~/helpers/categories.js'
import {
pagefindLanguage,
pagefindOutputPath
} from '~/helpers/pagefind/config.js'
function getSearchListing ( sitemapEntry ) {
return sitemapEntry.payload.app || sitemapEntry.payload.listing || sitemapEntry.payload.video || null
}
function pushContentPart ( parts, value ) {
if ( !isNonEmptyString( value ) ) return
parts.push( value.trim() )
}
function pushListContentPart ( parts, values ) {
if ( !isNonEmptyArray( values ) ) return
pushContentPart( parts, values.join(', ') )
}
function normalizeFilterValue ( value ) {
if ( !isNonEmptyString( value ) ) return null
return value.replaceAll('-', '_').trim()
}
export function shouldIndexSitemapEntry ( sitemapEntry ) {
return getSearchListing( sitemapEntry ) !== null
}
export function makePagefindTitle ( sitemapEntry ) {
const listing = getSearchListing( sitemapEntry )
const routeType = getRouteType( sitemapEntry.route )
let title = listing?.name || sitemapEntry.route
if ( routeType === 'benchmarks' ) {
title = `${ title } Benchmarks`
}
return title
}
export function makePagefindContent ( sitemapEntry ) {
const listing = getSearchListing( sitemapEntry )
const routeType = getRouteType( sitemapEntry.route )
const parts = []
pushContentPart( parts, makePagefindTitle( sitemapEntry ) )
pushContentPart( parts, listing?.text )
pushContentPart( parts, listing?.content )
pushContentPart( parts, listing?.description )
pushListContentPart( parts, listing?.aliases )
pushListContentPart( parts, listing?.tags )
pushListContentPart( parts, listing?.timestamps?.map( timestamp => timestamp.fullText ) )
pushListContentPart( parts, listing?.appLinks?.map( appLink => appLink.name ) )
pushContentPart( parts, listing?.category?.label )
pushContentPart( parts, listing?.status )
if ( routeType === 'benchmarks' ) {
pushContentPart( parts, 'Benchmarks')
pushContentPart( parts, 'Apple Silicon App Tested')
}
return parts.join('\n\n')
}
export function makePagefindFilters ( sitemapEntry ) {
const listing = getSearchListing( sitemapEntry )
const routeType = getRouteType( sitemapEntry.route )
const filters = {
type: [ normalizeFilterValue( routeType ) ]
}
const status = normalizeFilterValue( listing?.status )
if ( status !== null ) {
filters.status = [ status ]
}
if ( listing?.category?.slug ) {
filters.category = [ getAppCategory( listing ).snakeSlug ]
}
return filters
}
export function mapSitemapEntryToPagefindRecord ( sitemapEntry ) {
if ( !shouldIndexSitemapEntry( sitemapEntry ) ) return null
const listing = getSearchListing( sitemapEntry )
const routeType = getRouteType( sitemapEntry.route )
const lastUpdatedTimestamp = String( listing?.lastUpdated?.timestamp || 0 )
return {
url: sitemapEntry.route,
content: makePagefindContent( sitemapEntry ),
language: pagefindLanguage,
meta: {
title: makePagefindTitle( sitemapEntry ),
text: listing?.text || '',
slug: listing?.slug || sitemapEntry.route,
categorySlug: listing?.category?.slug || 'uncategorized',
routeType,
lastUpdatedTimestamp
},
filters: makePagefindFilters( sitemapEntry ),
sort: {
updated: lastUpdatedTimestamp
}
}
}
export async function writePagefindIndex ( sitemapEndpoints, {
outputPath = pagefindOutputPath
} = {} ) {
await fs.remove( outputPath )
const {
errors,
index
} = await pagefind.createIndex({
forceLanguage: pagefindLanguage
})
if ( errors.length > 0 ) {
throw new Error(`Pagefind createIndex errors: ${ errors.join(', ') }`)
}
if ( !index ) {
throw new Error('Pagefind index was not created')
}
let recordCount = 0
try {
for ( const sitemapEntry of sitemapEndpoints ) {
const record = mapSitemapEntryToPagefindRecord( sitemapEntry )
if ( record === null ) continue
const response = await index.addCustomRecord( record )
if ( response.errors.length > 0 ) {
throw new Error(`Pagefind addCustomRecord errors for ${ sitemapEntry.route }: ${ response.errors.join(', ') }`)
}
recordCount += 1
}
const writeResponse = await index.writeFiles({
outputPath
})
if ( writeResponse.errors.length > 0 ) {
throw new Error(`Pagefind writeFiles errors: ${ writeResponse.errors.join(', ') }`)
}
return {
outputPath,
recordCount
}
} finally {
await index.deleteIndex().catch( () => null )
await pagefind.close().catch( () => null )
}
}

18
helpers/search/config.js Normal file
View file

@ -0,0 +1,18 @@
export const defaultSearchProvider = 'pagefind'
export const supportedSearchProviders = new Set([
'pagefind',
'stork'
])
export function getSearchProvider ( rawProvider = defaultSearchProvider ) {
const provider = ( rawProvider || defaultSearchProvider ).toLowerCase()
if ( supportedSearchProviders.has( provider ) ) {
return provider
}
console.warn(`Unknown search provider "${ provider }", falling back to "${ defaultSearchProvider }"`)
return defaultSearchProvider
}