diff --git a/helpers/build-app-list.js b/helpers/build-app-list.js index a725e4e..5a6c554 100644 --- a/helpers/build-app-list.js +++ b/helpers/build-app-list.js @@ -16,6 +16,27 @@ import { makeSlug } from './slug.js' const md = new MarkdownIt() +// Find the end of our list +export function getEndOfListTokenIndex ( parsedMarkdown ) { + + return parsedMarkdown.findIndex((Token) => { + // JSON.stringify(Token).includes('end-of-list') + const matches = Token.content.includes('end-of-list') + + // if (matches) { + // console.log('Token', Token) + // } + + return matches + }) +} + +export function getReadmeTokenList ( parsedMarkdown ) { + const endOfListIndex = getEndOfListTokenIndex( parsedMarkdown ) + + return parsedMarkdown.slice(0, endOfListIndex) +} + const getTokenLinks = function ( childTokens ) { const tokenList = [] @@ -56,6 +77,160 @@ const getTokenLinks = function ( childTokens ) { } +export function buildReadmeAppList ({ readmeContent, scanListMap, commits }) { + + // Parse markdown + const result = md.parse(readmeContent) + + // console.log('results', result.length) + // console.log('results', result) + + + // Find the end of our list + // const endOfListIndex = getEndOfListIndex( result ) + + const appListTokens = getReadmeTokenList( result ) + + const appList = [] + + let categorySlug = 'start' + let categoryTitle = 'Start' + let isHeading = false + let isParagraph = false + + for (const token of appListTokens) { + // On heading close switch off heading mode + if (token.type.includes('heading_')) isHeading = !isHeading + + // On heading close switch off heading mode + if (token.type.includes('paragraph_')) isParagraph = !isParagraph + + if (isHeading && token.type === 'inline') { + categoryTitle = token.content + categorySlug = makeSlug( token.content ) + + // appList[categorySlug] = [] + } + + + if ( isParagraph && token.type === 'inline' && token.content.includes(' - ') ) { + + const [ link, text ] = token.content.split(' - ').map(string => string.trim()) + + const [ name, url ] = link.substring(1, link.length-1).split('](') + + const bundleIds = [] + let tags = [] + let aliases = [] + const relatedLinksMap = new Map( getTokenLinks(token.children).map( link => [ link.href, link ] ) ) + + // Search for this app in the scanList and remove duplicates + scanListMap.forEach( ( scannedApp, key ) => { + + for ( const alias of scannedApp.aliases ) { + // console.log( key, alias, name, eitherMatches(alias, name) ) + + if ( eitherMatches(alias, name) ) { + // If we don't have any bundleIds yet + // Add this app's bundleId to the list + if ( !bundleIds.includes( scannedApp.bundleIds[0] ) ) { bundleIds.push(scannedApp.bundleIds[0]) } + + // Merge this scanned app's tags into the matching app + tags = Array.from(new Set([ + ...tags, + ...scannedApp.tags + ])) + + // Merge as set then convert to array to prevent duplicates + aliases = Array.from(new Set([ + ...aliases, + ...scannedApp.aliases + ])) + + // Merge relatated links + for ( const link of scannedApp.relatedLinks ) { + + relatedLinksMap.set( link.href, { + ...link, + label: (link.label === 'View') ? 'App Website' : link.label + } ) + } + + console.log(`Merged ${alias} (${scannedApp.bundleIds[0]}) from scanned apps into ${name} from README`) + scanListMap.delete( key ) + } + } + }) + + + // Convert link map values into array for JSON + const relatedLinks = Array.from( relatedLinksMap.values() ) + + // console.log('relatedLinks', relatedLinks) + + const appSlug = makeSlug( name ) + + const endpoint = getAppEndpoint({ + category: { + slug: null + }, + slug: appSlug + })// `/app/${appSlug}` + + let status = 'unknown' + + for (const statusKey in statuses) { + if (text.includes(statusKey)) { + status = statuses[statusKey] + break + } + } + + const category = { + label: categoryTitle, + slug: categorySlug + } + + const lastUpdatedRaw = lookForLastUpdated({ name, slug: appSlug, endpoint, category }, commits) + + const lastUpdated = (lastUpdatedRaw) ? { + raw: lastUpdatedRaw, + timestamp: parseDate(lastUpdatedRaw).timestamp, + } : null + + + appList.push({ + name, + aliases, + status, + bundleIds, + lastUpdated, + // url, + text, + slug: appSlug, + endpoint, + category, + tags, + // content: token.content, + relatedLinks, + }) + + + // if ( tags.length > 1 ) { + // console.log('tags', name, bundleIds, tags) + // } + } + + // appList[categorySlug] + + + // console.log('token', token) + } + + return appList +} + + const lookForLastUpdated = function (app, commits) { for (const { node: commit } of commits) { @@ -255,162 +430,7 @@ export default async function () { }) - // Parse markdown - const result = md.parse(readmeContent) - - // console.log('results', result.length) - // console.log('results', result) - - - // Finf the end of our list - const endOfListIndex = result.findIndex((Token) => { - // JSON.stringify(Token).includes('end-of-list') - const matches = Token.content.includes('end-of-list') - - // if (matches) { - // console.log('Token', Token) - // } - - return matches - }) - - const appListTokens = result.slice(0, endOfListIndex) - - const appList = [] - - let categorySlug = 'start' - let categoryTitle = 'Start' - let isHeading = false - let isParagraph = false - - for (const token of appListTokens) { - // On heading close switch off heading mode - if (token.type.includes('heading_')) isHeading = !isHeading - - // On heading close switch off heading mode - if (token.type.includes('paragraph_')) isParagraph = !isParagraph - - if (isHeading && token.type === 'inline') { - categoryTitle = token.content - categorySlug = makeSlug( token.content ) - - // appList[categorySlug] = [] - } - - - if ( isParagraph && token.type === 'inline' && token.content.includes(' - ') ) { - - const [ link, text ] = token.content.split(' - ').map(string => string.trim()) - - const [ name, url ] = link.substring(1, link.length-1).split('](') - - const bundleIds = [] - let tags = [] - let aliases = [] - const relatedLinksMap = new Map( getTokenLinks(token.children).map( link => [ link.href, link ] ) ) - - // Search for this app in the scanList and remove duplicates - scanListMap.forEach( ( scannedApp, key ) => { - - for ( const alias of scannedApp.aliases ) { - // console.log( key, alias, name, eitherMatches(alias, name) ) - - if ( eitherMatches(alias, name) ) { - // If we don't have any bundleIds yet - // Add this app's bundleId to the list - if ( !bundleIds.includes( scannedApp.bundleIds[0] ) ) { bundleIds.push(scannedApp.bundleIds[0]) } - - // Merge this scanned app's tags into the matching app - tags = Array.from(new Set([ - ...tags, - ...scannedApp.tags - ])) - - // Merge as set then convert to array to prevent duplicates - aliases = Array.from(new Set([ - ...aliases, - ...scannedApp.aliases - ])) - - // Merge relatated links - for ( const link of scannedApp.relatedLinks ) { - - relatedLinksMap.set( link.href, { - ...link, - label: (link.label === 'View') ? 'App Website' : link.label - } ) - } - - console.log(`Merged ${alias} (${scannedApp.bundleIds[0]}) from scanned apps into ${name} from README`) - scanListMap.delete( key ) - } - } - }) - - - // Convert link map values into array for JSON - const relatedLinks = Array.from( relatedLinksMap.values() ) - - // console.log('relatedLinks', relatedLinks) - - const appSlug = makeSlug( name ) - - const endpoint = getAppEndpoint({ - category: { - slug: null - }, - slug: appSlug - })// `/app/${appSlug}` - - let status = 'unknown' - - for (const statusKey in statuses) { - if (text.includes(statusKey)) { - status = statuses[statusKey] - break - } - } - - const category = { - label: categoryTitle, - slug: categorySlug - } - - const lastUpdatedRaw = lookForLastUpdated({ name, slug: appSlug, endpoint, category }, commits) - - const lastUpdated = (lastUpdatedRaw) ? { - raw: lastUpdatedRaw, - timestamp: parseDate(lastUpdatedRaw).timestamp, - } : null - - - appList.push({ - name, - aliases, - status, - bundleIds, - lastUpdated, - // url, - text, - slug: appSlug, - endpoint, - category, - tags, - // content: token.content, - relatedLinks, - }) - - - // if ( tags.length > 1 ) { - // console.log('tags', name, bundleIds, tags) - // } - } - - // appList[categorySlug] - - - // console.log('token', token) - } + const appList = buildReadmeAppList({ readmeContent, scanListMap, commits }) // console.log('appList', appList) diff --git a/package.json b/package.json index bb71d1c..ca0d403 100644 --- a/package.json +++ b/package.json @@ -10,13 +10,14 @@ ] }, "scripts": { + "test-prebuild": "ava ./test/prebuild.js --verbose", "test": "ava --timeout=1m --verbose", "dev": "nuxt", "build": "nuxt build", "start": "nuxt start", "generate-dev": "npm run generate && npm test", "generate": "npm run clone-readme && npm run build-lists && npm run generate-nuxt && npm run generate-eleventy", - "build-lists": "node -r esm build-lists.js", + "build-lists": "npm run test-prebuild && node -r esm build-lists.js", "generate-nuxt": "NODE_OPTIONS=--max-old-space-size=60000 nuxt generate", "generate-eleventy": "node --max-old-space-size=60000 -r esm node_modules/.bin/eleventy --quiet", "generate-postcss": "ENV=production postcss assets/css/tailwind.css --o static/tailwind.css", diff --git a/test/prebuild.js b/test/prebuild.js new file mode 100644 index 0000000..b97ca88 --- /dev/null +++ b/test/prebuild.js @@ -0,0 +1,79 @@ +import fs from 'fs-extra' +import test from 'ava' +// import MarkdownIt from 'markdown-it' + +import { buildReadmeAppList } from '../helpers/build-app-list.js' + + +require('dotenv').config() + +// const md = new MarkdownIt() + +const allowedTitleCharacters = new Set( 'ABCDEFGHIJKMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890 -.'.split('') ) + + +function isString( maybeString ) { + return (typeof maybeString === 'string' || maybeString instanceof String) +} + + +test.before(async t => { + const readmeFileContent = await fs.readFile('./README.md', 'utf-8') + // const readmeMarkdown = md.parse( readmeFileContent ) + + // t.log( 'readmeMarkdown', readmeMarkdown ) + + + // Store sitemap urls to context + t.context.readmeFileContent = readmeFileContent +}) + +test('README App Titles are alphanumeric only', (t) => { + // console.log('t.context.sitemapUrls', t.context.sitemapUrls) + + const readmeAppList = buildReadmeAppList({ + readmeContent: t.context.readmeFileContent, + scanListMap: new Map(), + commits: [] + }) + + + // console.log('readmeAppList', readmeAppList) + t.log('readmeAppList', readmeAppList.length) + + + for (const readmeApp of readmeAppList) { + const cleanedAppName = readmeApp.name//.toLowerCase() + + // const firstInvisbleCharacterIndex = cleanedAppName.search( invisibleCharacerRegex ) + + // const standardSpaceCharCode = 32 + + for ( const character of cleanedAppName ) { + if ( !allowedTitleCharacters.has( character ) ) { + + // badCharacter = readmeApp.name[firstBadCharacterIndex] + + // t.log( readmeApp ) + t.fail(`README App Title ${readmeApp.name} has non-alphanumeric character ${character}(charCode ${character.charCodeAt(0)})`) + + break + } + } + + // if ( readmeApp.name.includes('Apple Trans') ) { + // const normalNameLength = 'Apple Transporter'.length + // t.log( 'normalNameLength', normalNameLength ) + // t.log( readmeApp.name, readmeApp.name.length ) + // } + } + + // const urlsWithDoubleSlashes = t.context.sitemapUrls.filter( url => url.pathname.includes('//') ) + + // if ( urlsWithDoubleSlashes.length > 0) { + // t.fail( `${ urlsWithDoubleSlashes.length } urls with doubles slashes found including ${ urlsWithDoubleSlashes[0] }` ) + // } + + t.log( `${readmeAppList.length} valid alpanumeric app titles in readme` ) + t.pass() +})