mirror of
https://github.com/ThatGuySam/doesitarm.git
synced 2026-05-18 06:44:46 -07:00
Move parseSitemapXml into helper file
This commit is contained in:
parent
79640781bc
commit
45bfe1c2fa
2 changed files with 79 additions and 9 deletions
73
helpers/api/sitemap/parse.js
Normal file
73
helpers/api/sitemap/parse.js
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
import path from 'path'
|
||||||
|
import fs from 'fs-extra'
|
||||||
|
import axios from 'axios'
|
||||||
|
import { parse } from 'fast-xml-parser'
|
||||||
|
|
||||||
|
import {
|
||||||
|
sitemapLocation,
|
||||||
|
sitemapIndexFileName,
|
||||||
|
} from '~/helpers/constants.js'
|
||||||
|
|
||||||
|
const sitemapFilesToTry = [
|
||||||
|
sitemapIndexFileName,
|
||||||
|
'sitemap.xml'
|
||||||
|
]
|
||||||
|
|
||||||
|
export function parseSitemapXml ( sitemapXml ) {
|
||||||
|
// Get URLs from index
|
||||||
|
const sitemapRoot = parse( sitemapXml )
|
||||||
|
|
||||||
|
const {
|
||||||
|
sitemapindex = null,
|
||||||
|
urlset = null,
|
||||||
|
} = sitemapRoot
|
||||||
|
|
||||||
|
|
||||||
|
if ( sitemapindex !== null ) {
|
||||||
|
const {
|
||||||
|
sitemap
|
||||||
|
} = sitemapindex
|
||||||
|
|
||||||
|
const urlEntries = Array.isArray( sitemap ) ? sitemap : [ sitemap ]
|
||||||
|
|
||||||
|
return urlEntries
|
||||||
|
}
|
||||||
|
|
||||||
|
// console.log( 'sitemapRoot', sitemapRoot )
|
||||||
|
|
||||||
|
return urlset.url
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getAllUrlsFromLocalSitemap ( sitemapPath ) {
|
||||||
|
// Get intial sitemap
|
||||||
|
const sitemapXml = await fs.readFile( sitemapPath, 'utf8' )
|
||||||
|
const sitemapDirectory = path.dirname( sitemapPath )
|
||||||
|
|
||||||
|
// Get URLs from index
|
||||||
|
const urlEntries = parseSitemapXml( sitemapXml )
|
||||||
|
|
||||||
|
// Check if url entries are sitemaps
|
||||||
|
const isSitemapIndex = !!urlEntries[0].loc && urlEntries[0].loc.includes('.xml')
|
||||||
|
|
||||||
|
if ( !isSitemapIndex ) return urlEntries
|
||||||
|
|
||||||
|
|
||||||
|
// Get urls from our sitemap
|
||||||
|
const sitemaps = await Promise.all( urlEntries.map( async entry => {
|
||||||
|
// Build Sitemap Index URL
|
||||||
|
const sitemapUrl = new URL( entry.loc )
|
||||||
|
|
||||||
|
const childSitemapPath = path.join( sitemapDirectory, sitemapUrl.pathname )
|
||||||
|
|
||||||
|
return await getAllUrlsFromLocalSitemap( childSitemapPath )
|
||||||
|
}))
|
||||||
|
|
||||||
|
// Flatten array
|
||||||
|
return sitemaps.flat()
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function fetchParsedSitemapXmlForDomain ( domain ) {
|
||||||
|
for ( const sitemapFile of sitemapFilesToTry ) {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,13 +1,13 @@
|
||||||
import fs from 'fs-extra'
|
import fs from 'fs-extra'
|
||||||
import 'dotenv/config'
|
import 'dotenv/config'
|
||||||
import axios from 'axios'
|
import axios from 'axios'
|
||||||
import { parse } from 'fast-xml-parser'
|
|
||||||
|
|
||||||
import {
|
import {
|
||||||
sitemapLocation,
|
sitemapLocation,
|
||||||
sitemapIndexFileName,
|
sitemapIndexFileName,
|
||||||
} from '~/helpers/constants.js'
|
} from '~/helpers/constants.js'
|
||||||
|
|
||||||
|
import { parseSitemapXml } from '~/helpers/api/sitemap/parse.js'
|
||||||
|
|
||||||
|
|
||||||
;(async () => {
|
;(async () => {
|
||||||
|
|
@ -22,14 +22,7 @@ import {
|
||||||
const sitemapIndexFilePath = `${ sitemapLocation }${ sitemapIndexFileName }`
|
const sitemapIndexFilePath = `${ sitemapLocation }${ sitemapIndexFileName }`
|
||||||
await fs.writeFile( sitemapIndexFilePath, sitemapIndexXML )
|
await fs.writeFile( sitemapIndexFilePath, sitemapIndexXML )
|
||||||
|
|
||||||
// Get URLs from index
|
const urlEntries = parseSitemapXml( sitemapIndexXML )
|
||||||
const { sitemapindex } = parse( sitemapIndexXML )
|
|
||||||
|
|
||||||
const {
|
|
||||||
sitemap
|
|
||||||
} = sitemapindex
|
|
||||||
|
|
||||||
const urlEntries = Array.isArray( sitemap ) ? sitemap : [ sitemap ]
|
|
||||||
|
|
||||||
|
|
||||||
// Fetch each sitemap
|
// Fetch each sitemap
|
||||||
|
|
@ -44,6 +37,10 @@ import {
|
||||||
// Fetch Sitemap Index
|
// Fetch Sitemap Index
|
||||||
const sitemapXML = await axios.get( apiSitemapUrl.href ).then( response => response.data )
|
const sitemapXML = await axios.get( apiSitemapUrl.href ).then( response => response.data )
|
||||||
|
|
||||||
|
// const sitemap = parse( sitemapXML )
|
||||||
|
|
||||||
|
// console.log( 'sitemap', sitemap )
|
||||||
|
|
||||||
// console.log( 'apiSitemapUrl', apiSitemapUrl )
|
// console.log( 'apiSitemapUrl', apiSitemapUrl )
|
||||||
|
|
||||||
const sitemapFileName = apiSitemapUrl.pathname.split('/')[1]
|
const sitemapFileName = apiSitemapUrl.pathname.split('/')[1]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue