Merged in feat/SW-550-sitemap (pull request #981)

feat(SW-550): added sync functionality and sitemap generation

* feat(SW-550): added sync functionality and sitemap generation

* feat(SW-550): Added support for splitting and saving multiple sitemaps when there are 50000+ urls

* feat(SW-550): Updates after PR

* feat(SW-550): Added locale to sitemap data

* feat(SW-550): Added support for locale based sitemapData

* feat(SW-550): Saving alternates of sitemap entries

* feat(SW-550): Refactoring to use sitemap utils file

* feat(SW-550): Using Netlify.env to get environment variables

* feat(SW-550): clarify use of functions


Approved-by: Michael Zetterberg
This commit is contained in:
Erik Tiekstra
2025-02-17 10:35:11 +00:00
parent 577271c577
commit c93381ca80
13 changed files with 887 additions and 56 deletions

99
app/api/sitemap/route.ts Normal file
View File

@@ -0,0 +1,99 @@
import dayjs from "dayjs"
import { type NextRequest, NextResponse } from "next/server"
import { env } from "@/env/server"
import {
getEntries,
getSyncToken,
saveEntries,
saveLastUpdatedDate,
saveSitemapData,
saveSyncToken,
} from "@/utils/sitemap"
import { contentstackSync } from "./sync"
import {
generateSitemapCounter,
generateSitemapFailCounter,
generateSitemapSuccessCounter,
saveEntriesCounter,
saveSitemapDataCounter,
saveSyncTokenCounter,
} from "./telemetry"
import { mapEntriesToSitemapData, mergeEntries } from "./utils"
export const dynamic = "force-dynamic"
export async function GET(request: NextRequest) {
try {
generateSitemapCounter.add(1)
console.info("sitemap.generate start")
const headersList = request.headers
const secret = headersList.get("x-sitemap-sync-secret")
if (secret !== env.SITEMAP_SYNC_SECRET) {
throw Error(
`Can't sync and generate sitemap, invalid secret, received secret: ${secret}`
)
}
const syncToken = await getSyncToken()
const currentEntries = await getEntries()
const responseData = await contentstackSync(syncToken)
const mergedEntries = mergeEntries(currentEntries, responseData.entries)
saveEntriesCounter.add(1, { entriesCount: mergedEntries.length })
console.info(
"sitemap.entries.save",
JSON.stringify({ entriesCount: mergedEntries.length })
)
await saveEntries(mergedEntries)
const sitemapData = mapEntriesToSitemapData(mergedEntries)
const lastUpdated = dayjs.utc().format()
saveSitemapDataCounter.add(1, {
sitemapEntriesCount: sitemapData.length,
})
console.info(
"sitemap.data.save",
JSON.stringify({
sitemapEntriesCount: sitemapData.length,
lastUpdated,
})
)
await saveSitemapData(sitemapData)
await saveLastUpdatedDate(lastUpdated)
if (syncToken !== responseData.syncToken) {
saveSyncTokenCounter.add(1, {
syncToken: responseData.syncToken,
})
console.info(
"sitemap.synctoken.save",
JSON.stringify({ syncToken: responseData.syncToken })
)
await saveSyncToken(responseData.syncToken)
}
generateSitemapSuccessCounter.add(1)
return NextResponse.json({
message: "Sitemap data generated and stored successfully!",
now: dayjs.utc().format(),
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : JSON.stringify(error)
generateSitemapFailCounter.add(1, { error: errorMessage })
console.error("sitemap.generate.fail", errorMessage)
return NextResponse.json(
{
error: "Failed to generate sitemap",
now: dayjs.utc().format(),
},
{ status: 500, statusText: "Internal Server Error" }
)
}
}

112
app/api/sitemap/sync.ts Normal file
View File

@@ -0,0 +1,112 @@
import { Region, Stack } from "contentstack"
import { env } from "@/env/server"
import {
syncEntriesCounter,
syncEntriesFailCounter,
syncEntriesPaginationCounter,
syncEntriesSuccessCounter,
} from "./telemetry"
import type { SyncResponse } from "@/types/sitemap"
const environment = env.CMS_ENVIRONMENT
const stack = Stack({
api_key: env.CMS_API_KEY,
delivery_token: env.CMS_ACCESS_TOKEN,
branch: env.CMS_BRANCH,
environment,
region: Region.EU,
})
export async function contentstackSync(syncToken: string | null) {
const entries = []
const syncOptions = syncToken ? { sync_token: syncToken } : { init: true }
syncEntriesCounter.add(1, {
environment,
...syncOptions,
})
console.info(
"sitemap.entries.sync start",
JSON.stringify({
environment,
...syncOptions,
})
)
try {
let syncResponse: SyncResponse = await stack.sync(syncOptions)
entries.push(...syncResponse.items)
// Check if there is a pagination token, and fetch more data if needed
while (syncResponse.pagination_token && !syncResponse.sync_token) {
syncEntriesPaginationCounter.add(1, {
environment,
paginationToken: syncResponse.pagination_token,
})
console.info(
"sitemap.entries.sync.pagination start",
JSON.stringify({
environment,
paginationToken: syncResponse.pagination_token,
})
)
syncResponse = await stack.sync({
pagination_token: syncResponse.pagination_token,
})
entries.push(...syncResponse.items)
syncEntriesPaginationCounter.add(1, {
environment,
paginationToken: syncResponse.pagination_token,
entriesCount: syncResponse.items.length,
})
console.info(
"sitemap.entries.sync.pagination success",
JSON.stringify({
environment,
paginationToken: syncResponse.pagination_token,
entriesCount: syncResponse.items.length,
})
)
}
if (syncResponse.sync_token) {
syncEntriesSuccessCounter.add(1, {
environment,
...syncOptions,
newSyncToken: syncResponse.sync_token,
entriesCount: entries.length,
})
console.info(
"sitemap.entries.sync success",
JSON.stringify({
environment,
...syncOptions,
newSyncToken: syncResponse.sync_token,
entriesCount: entries.length,
})
)
return {
syncToken: syncResponse.sync_token,
entries,
}
} else {
throw new Error("No sync token received, something went wrong")
}
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : JSON.stringify(error)
syncEntriesFailCounter.add(1, {
environment,
...syncOptions,
error: errorMessage,
})
console.error("sitemap.entries.sync error", errorMessage)
throw new Error("Failed to sync entries")
}
}

View File

@@ -0,0 +1,43 @@
import { metrics } from "@opentelemetry/api"
const meter = metrics.getMeter("sitemap")
// OpenTelemetry metrics
export const generateSitemapCounter = meter.createCounter("sitemap.generate")
export const generateSitemapSuccessCounter = meter.createCounter(
"sitemap.generate-success"
)
export const generateSitemapFailCounter = meter.createCounter(
"sitemap.generate-fail"
)
export const syncEntriesCounter = meter.createCounter("sitemap.entries.sync")
export const syncEntriesSuccessCounter = meter.createCounter(
"sitemap.entries.sync-success"
)
export const syncEntriesFailCounter = meter.createCounter(
"sitemap.entries.sync-fail"
)
export const syncEntriesPaginationCounter = meter.createCounter(
"sitemap.entries.sync.pagination"
)
export const syncEntriesPaginationSuccessCounter = meter.createCounter(
"sitemap.entries.sync.pagination-success"
)
export const mergeEntriesCounter = meter.createCounter("sitemap.entries.merge")
export const mergeEntriesSuccessCounter = meter.createCounter(
"sitemap.entries.merge-success"
)
export const saveEntriesCounter = meter.createCounter("sitemap.entries.save")
export const transformEntriesCounter = meter.createCounter(
"sitemap.entries.transform"
)
export const transformEntriesSuccessCounter = meter.createCounter(
"sitemap.entries.transform-success"
)
export const saveSitemapDataCounter = meter.createCounter("sitemap.data.save")
export const saveSyncTokenCounter = meter.createCounter(
"sitemap.synctoken.save"
)

176
app/api/sitemap/utils.ts Normal file
View File

@@ -0,0 +1,176 @@
import { Lang } from "@/constants/languages"
import { env } from "@/env/server"
import {
mergeEntriesCounter,
mergeEntriesSuccessCounter,
transformEntriesCounter,
transformEntriesSuccessCounter,
} from "./telemetry"
import type { SitemapEntry, SyncItem } from "@/types/sitemap"
export function mergeEntries(
currentEntries: SyncItem[],
newEntries: SyncItem[]
) {
mergeEntriesCounter.add(1, {
currentEntriesCount: currentEntries.length,
newEntriesCount: newEntries.length,
})
console.info(
"sitemap.entries.merge start",
JSON.stringify({
currentEntriesCount: currentEntries.length,
newEntriesCount: newEntries.length,
})
)
const entries = [...currentEntries]
newEntries.forEach((entry) => {
const index = entries.findIndex(
({ data }) =>
data.uid === entry.data.uid && data.locale === entry.data.locale
)
if (index > -1) {
entries[index] = entry
} else {
entries.push(entry)
}
})
mergeEntriesSuccessCounter.add(1, {
entriesCount: entries.length,
})
console.info(
"sitemap.entries.merge success",
JSON.stringify({
entriesCount: entries.length,
})
)
return entries
}
export function mapEntriesToSitemapData(entries: SyncItem[]) {
transformEntriesCounter.add(1, { entriesCount: entries.length })
console.info(
"sitemap.entries.transform start",
JSON.stringify({
entriesCount: entries.length,
})
)
const filteredEntries = filterEntriesToSitemapEntries(entries)
const entriesByUid = groupEntriesByUid(filteredEntries)
const sitemapEntries = Object.entries(entriesByUid)
.map(([_, entries]) => mapEntriesToSitemapEntry(entries))
.filter((entry): entry is SitemapEntry => !!entry)
transformEntriesSuccessCounter.add(1, {
entriesCount: entries.length,
sitemapEntriesCount: sitemapEntries.length,
})
console.info(
"sitemap.entries.transform success",
JSON.stringify({
entriesCount: entries.length,
sitemapEntriesCount: sitemapEntries.length,
})
)
return sitemapEntries
}
function filterEntriesToSitemapEntries(entries: SyncItem[]) {
return entries.filter((entry: SyncItem) => {
const shouldIndex = !entry.data.web?.seo_metadata?.noindex
return !!(entry.type === "entry_published" && entry.data.url && shouldIndex)
})
}
// We group the entries by UID because Contentstack has the same `uid` for an
// entry regardless of locale. We want to display each locale as an alternate
// in the sitemap, therefore we group them here by `uid`.
function groupEntriesByUid(entries: SyncItem[]) {
return entries.reduce<Record<string, SyncItem[]>>((acc, entry) => {
const uid = entry.data.uid
if (!acc[uid]) {
acc[uid] = []
}
acc[uid].push(entry)
return acc
}, {})
}
function mapEntriesToSitemapEntry(entries: SyncItem[]) {
// Main entry is always English. Without English, there can't be other pages in ContentStack.
const mainEntry = entries.find((entry) => entry.data.locale === Lang.en)
const alternates = getAlternates(entries)
const lastModified = getLastModified(entries)
const changeFrequency = getChangeFrequency(entries)
const priority = getPriority(entries)
if (mainEntry) {
const { locale, url } = mainEntry.data
const sitemapEntry: SitemapEntry = {
url: `${env.PUBLIC_URL}/${locale}${url}`,
lastModified,
changeFrequency,
priority,
}
if (alternates) {
sitemapEntry.alternates = alternates
}
return sitemapEntry
}
}
function getLastModified(entries: SyncItem[]) {
// Localized versions of the data can have a different last modified value.
// We make sure we take the latest.
return entries.reduce((latest, entry) => {
const entryDate = entry.data.updated_at
return entryDate > latest ? entryDate : latest
}, "")
}
function getChangeFrequency(entries: SyncItem[]) {
// Localized versions of the data can have a different changeFrequency value.
// We make sure we take the highest.
const changeFrequencyPriority: SitemapEntry["changeFrequency"][] = [
"never",
"yearly",
"monthly",
"weekly",
"daily",
"hourly",
"always",
]
return entries.reduce<SitemapEntry["changeFrequency"]>((highest, entry) => {
const changeFrequency =
entry.data.web?.seo_metadata?.sitemap?.change_frequency ?? "daily"
return changeFrequencyPriority.indexOf(changeFrequency) >
changeFrequencyPriority.indexOf(highest)
? changeFrequency
: highest
}, "never")
}
function getPriority(entries: SyncItem[]) {
// Localized versions of the data can have a different priority.
// We make sure we take the highest.
return entries.reduce((highest, entry) => {
const priority = entry.data.web?.seo_metadata?.sitemap?.priority ?? 0.5
return priority > highest ? priority : highest
}, 0.0)
}
function getAlternates(entries: SyncItem[]) {
return entries
.filter((entry) => entry.data.locale !== Lang.en)
.reduce<Partial<Record<Lang, string>>>((acc, { data }) => {
acc[data.locale] = `${env.PUBLIC_URL}/${data.locale}${data.url}`
return acc
}, {})
}