mirror of
https://github.com/ThatGuySam/doesitarm.git
synced 2026-05-15 06:35:20 -07:00
Move parseSitemapXml into helper file
This commit is contained in:
parent
79640781bc
commit
45bfe1c2fa
2 changed files with 79 additions and 9 deletions
73
helpers/api/sitemap/parse.js
Normal file
73
helpers/api/sitemap/parse.js
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
import path from 'path'
|
||||
import fs from 'fs-extra'
|
||||
import axios from 'axios'
|
||||
import { parse } from 'fast-xml-parser'
|
||||
|
||||
import {
|
||||
sitemapLocation,
|
||||
sitemapIndexFileName,
|
||||
} from '~/helpers/constants.js'
|
||||
|
||||
const sitemapFilesToTry = [
|
||||
sitemapIndexFileName,
|
||||
'sitemap.xml'
|
||||
]
|
||||
|
||||
export function parseSitemapXml ( sitemapXml ) {
|
||||
// Get URLs from index
|
||||
const sitemapRoot = parse( sitemapXml )
|
||||
|
||||
const {
|
||||
sitemapindex = null,
|
||||
urlset = null,
|
||||
} = sitemapRoot
|
||||
|
||||
|
||||
if ( sitemapindex !== null ) {
|
||||
const {
|
||||
sitemap
|
||||
} = sitemapindex
|
||||
|
||||
const urlEntries = Array.isArray( sitemap ) ? sitemap : [ sitemap ]
|
||||
|
||||
return urlEntries
|
||||
}
|
||||
|
||||
// console.log( 'sitemapRoot', sitemapRoot )
|
||||
|
||||
return urlset.url
|
||||
}
|
||||
|
||||
export async function getAllUrlsFromLocalSitemap ( sitemapPath ) {
|
||||
// Get intial sitemap
|
||||
const sitemapXml = await fs.readFile( sitemapPath, 'utf8' )
|
||||
const sitemapDirectory = path.dirname( sitemapPath )
|
||||
|
||||
// Get URLs from index
|
||||
const urlEntries = parseSitemapXml( sitemapXml )
|
||||
|
||||
// Check if url entries are sitemaps
|
||||
const isSitemapIndex = !!urlEntries[0].loc && urlEntries[0].loc.includes('.xml')
|
||||
|
||||
if ( !isSitemapIndex ) return urlEntries
|
||||
|
||||
|
||||
// Get urls from our sitemap
|
||||
const sitemaps = await Promise.all( urlEntries.map( async entry => {
|
||||
// Build Sitemap Index URL
|
||||
const sitemapUrl = new URL( entry.loc )
|
||||
|
||||
const childSitemapPath = path.join( sitemapDirectory, sitemapUrl.pathname )
|
||||
|
||||
return await getAllUrlsFromLocalSitemap( childSitemapPath )
|
||||
}))
|
||||
|
||||
// Flatten array
|
||||
return sitemaps.flat()
|
||||
}
|
||||
|
||||
export async function fetchParsedSitemapXmlForDomain ( domain ) {
|
||||
for ( const sitemapFile of sitemapFilesToTry ) {
|
||||
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue