Merged in feature/BOOK-401-handle-new-redirects (pull request #2942)

feat(BOOK-401): redirect-service with tests and typings

* Switch to typescript

* add new dependencies for tests and typescript

* Switch to typescript

* add new dependencies for tests and typescript

* rewrite to typescript and include tests

* rewrite to typescript and include tests

* refactor: update README and scripts for CSV handling; rename update script to generate

* include csv-data folder


Approved-by: Linus Flood
This commit is contained in:
Joakim Jäderberg
2025-10-10 07:32:32 +00:00
parent 790a5edd95
commit eba6319ebe
23 changed files with 430 additions and 163 deletions

View File

@@ -0,0 +1,18 @@
import fs from "node:fs"
import { langs } from "./lang"
import { csvFilePath } from "./paths"
export function checkPrerequisites() {
const missingLangs = langs.reduce((acc, lang) => {
const filepath = csvFilePath(lang)
if (!fs.existsSync(filepath)) {
return [...acc, filepath]
}
return acc
}, [] as string[])
if (missingLangs.length > 0) {
throw new Error(`Missing CSV file:\n${missingLangs.join("\n")}`)
}
}

View File

@@ -0,0 +1,59 @@
import { describe, it, expect } from "vitest"
import { createRedirectEntry } from "./createRedirectEntry"
describe("createRedirectEntry", () => {
describe("top level domain from old site to new", () => {
it(".com", () => {
expect(
createRedirectEntry({
oldUrl: "https://scandichotels.com/old-path",
newUrl: "https://www.scandichotels.com/en/new-path",
})
).toEqual({ from: "/en/old-path", to: "/en/new-path" })
})
it(".se", () => {
expect(
createRedirectEntry({
oldUrl: "https://scandichotels.se/old-path",
newUrl: "https://www.scandichotels.com/sv/new-path",
})
).toEqual({ from: "/sv/old-path", to: "/sv/new-path" })
})
it(".dk", () => {
expect(
createRedirectEntry({
oldUrl: "https://scandichotels.dk/old-path",
newUrl: "https://www.scandichotels.com/da/new-path",
})
).toEqual({ from: "/da/old-path", to: "/da/new-path" })
})
it(".de", () => {
expect(
createRedirectEntry({
oldUrl: "https://scandichotels.de/old-path",
newUrl: "https://www.scandichotels.com/de/new-path",
})
).toEqual({ from: "/de/old-path", to: "/de/new-path" })
})
it(".fi", () => {
expect(
createRedirectEntry({
oldUrl: "https://scandichotels.fi/old-path",
newUrl: "https://www.scandichotels.com/fi/new-path",
})
).toEqual({ from: "/fi/old-path", to: "/fi/new-path" })
})
it(".no", () => {
expect(
createRedirectEntry({
oldUrl: "https://scandichotels.no/old-path",
newUrl: "https://www.scandichotels.com/no/new-path",
})
).toEqual({ from: "/no/old-path", to: "/no/new-path" })
})
})
})

View File

@@ -0,0 +1,17 @@
import { akamaiRedirect, removeDomain } from "./utils"
export function createRedirectEntry({
oldUrl,
newUrl,
}: {
oldUrl: string
newUrl: string
}) {
const from = removeDomain(akamaiRedirect(oldUrl))
const to = removeDomain(newUrl)
return {
from,
to,
}
}

View File

@@ -0,0 +1,68 @@
import fs from "node:fs"
import csvToJson from "convert-csv-to-json"
import { Lang, langs } from "./lang"
import { csvFilePath, outputFilepath } from "./paths"
import { checkPrerequisites } from "./checkPrerequisites"
import { createRedirectEntry } from "./createRedirectEntry"
const csvHeaders = {
current: "Old URL",
redirect: "Redirect URL",
} as const
type Entry = {
[key in (typeof csvHeaders)[keyof typeof csvHeaders]]: string
}
async function convertCsvToJson<T extends Record<string, unknown>>(
lang: Lang
): Promise<T[]> {
const json = csvToJson
.utf8Encoding()
.fieldDelimiter(",")
.getJsonFromCsv(csvFilePath(lang))
return json
}
async function createRedirectFile(data: Entry[], lang: Lang) {
const finalUrls = data.reduce(
(acc, url) => {
const { from, to } = createRedirectEntry({
oldUrl: url[csvHeaders.current],
newUrl: url[csvHeaders.redirect],
})
return {
...acc,
[from]: to,
}
},
{} as Record<string, string>
)
try {
fs.writeFileSync(outputFilepath(lang), JSON.stringify(finalUrls), {
encoding: "utf-8",
})
} catch (e) {
console.error(`Unable to create redirect file for lang: ${lang}`, e)
}
}
checkPrerequisites()
console.log("All prerequisites met. Generating redirect files...")
let totalCount = 0
for (const lang of langs.toSorted()) {
const perf = performance.now()
const data = await convertCsvToJson<Entry>(lang)
await createRedirectFile(data, lang)
const duration = (performance.now() - perf).toFixed(0)
console.log(
`Created redirect file for ${lang}: Processed ${data.length} entries in ${duration}ms`
)
totalCount += data.length
}
console.log(`\n🥳 All done!\nProcessed a total of ${totalCount} entries.`)

View File

@@ -0,0 +1,2 @@
export const langs = ["da", "de", "en", "fi", "no", "sv"] as const
export type Lang = (typeof langs)[number]

View File

@@ -0,0 +1,17 @@
import { Lang } from "./lang"
import path from "node:path"
export function csvFilePath(lang: Lang) {
return `${import.meta.dirname}/../data/csv/${lang}.csv`
}
export function jsonFilePath(lang: Lang) {
return `${import.meta.dirname}/../data/json/${lang}.json`
}
export function outputFilepath(lang: Lang) {
return path.resolve(
import.meta.dirname,
`../../netlify/functions/data/${lang}.json`
)
}

View File

@@ -0,0 +1,114 @@
import { describe, it, expect } from "vitest"
import { akamaiRedirect, removeDomain } from "./utils"
describe("removeDomain", () => {
it("https scandichotels.com with path", () => {
expect(removeDomain("https://scandichotels.com/foo")).toBe("/foo")
})
it("http www scandichotels.com no path", () => {
expect(removeDomain("http://www.scandichotels.com")).toBe("")
})
it("stage subdomain with de TLD, query and hash", () => {
expect(
removeDomain("https://stage.scandichotels.de/path?query=1#frag")
).toBe("/path?query=1#frag")
})
it("prod subdomain with fi TLD and trailing slash", () => {
expect(removeDomain("http://prod.scandichotels.fi/")).toBe("/")
})
it("test subdomain with dk TLD and deeper path", () => {
expect(removeDomain("https://test.scandichotels.dk/some/page")).toBe(
"/some/page"
)
})
it("no TLD with unicode path", () => {
expect(removeDomain("https://scandichotels.no/öä")).toBe("/öä")
})
it("se TLD root", () => {
expect(removeDomain("https://scandichotels.se")).toBe("")
})
it("input without protocol should be unchanged", () => {
expect(removeDomain("scandichotels.com/foo")).toBe("scandichotels.com/foo")
})
it("different domain containing scandichotels should be unchanged", () => {
expect(removeDomain("https://example.com/scandichotels.com/foo")).toBe(
"https://example.com/scandichotels.com/foo"
)
})
it("uppercase scheme/domain should be unchanged (regex is case-sensitive)", () => {
expect(removeDomain("HTTPS://WWW.SCANDICHOTELS.COM/Path")).toBe(
"HTTPS://WWW.SCANDICHOTELS.COM/Path"
)
})
it("only removes the leading matching domain when it appears at the start", () => {
const input = "https://scandichotels.com/https://scandichotels.com/foo"
expect(removeDomain(input)).toBe("/https://scandichotels.com/foo")
})
})
describe("akamaiRedirect", () => {
it("https scandichotels.com with path", () => {
expect(akamaiRedirect("https://scandichotels.com/foo")).toBe("/en/foo")
})
it("http www scandichotels.com no path", () => {
expect(akamaiRedirect("http://www.scandichotels.com")).toBe("/en")
})
it("stage subdomain with de TLD, query and hash", () => {
expect(
akamaiRedirect("https://stage.scandichotels.de/path?query=1#frag")
).toBe("/de/path?query=1#frag")
})
it("prod subdomain with fi TLD and trailing slash", () => {
expect(akamaiRedirect("http://prod.scandichotels.fi/")).toBe("/fi/")
})
it("test subdomain with dk TLD and deeper path", () => {
expect(akamaiRedirect("https://test.scandichotels.dk/some/page")).toBe(
"/da/some/page"
)
})
it("no TLD with unicode path", () => {
expect(akamaiRedirect("https://scandichotels.no/öä")).toBe("/no/öä")
})
it("se TLD root", () => {
expect(akamaiRedirect("https://scandichotels.se")).toBe("/sv")
})
it("input without protocol should be unchanged", () => {
expect(akamaiRedirect("scandichotels.com/foo")).toBe(
"scandichotels.com/foo"
)
})
it("different domain containing scandichotels should be unchanged", () => {
expect(akamaiRedirect("https://example.com/scandichotels.com/foo")).toBe(
"https://example.com/scandichotels.com/foo"
)
})
it("uppercase scheme/domain should be unchanged (regex is case-sensitive)", () => {
expect(akamaiRedirect("HTTPS://WWW.SCANDICHOTELS.COM/Path")).toBe(
"HTTPS://WWW.SCANDICHOTELS.COM/Path"
)
})
it("only replaces the leading matching domain when it appears at the start", () => {
const input = "https://scandichotels.com/https://scandichotels.com/foo"
expect(akamaiRedirect(input)).toBe("/en/https://scandichotels.com/foo")
})
})

View File

@@ -0,0 +1,68 @@
/**
* Removes the scheme and Scandic domain prefix from a URL string.
*
* Strips leading "http://" or "https://" and an optional "www|test|stage|prod" subdomain
* for hosts matching scandichotels.(com|de|dk|fi|no|se).
*
* @param str - Input string or URL.
* @returns The input with the Scandic domain portion removed.
*/
export function removeDomain(str: string) {
return str.replace(
/^https?:\/\/((www|test|stage|prod)\.)?scandichotels.(com|de|dk|fi|no|se)/,
""
)
}
/**
* Converts an absolute Scandic Hotels URL to a localized path prefix based on the top-level domain.
*
* Replaces the leading "http(s)://[www|test|stage|prod.]scandichotels.{tld}" portion with a locale prefix:
* - .com -> /en
* - .de -> /de
* - .dk -> /da
* - .fi -> /fi
* - .no -> /no
* - .se -> /sv
*
* The function recognizes http and https schemes and optional subdomains (www, test, stage, prod).
* If the input does not match the expected host pattern, the original string is returned unchanged.
*
* Examples:
* - "https://scandichotels.com/hotels" -> "/en/hotels"
* - "http://www.scandichotels.de" -> "/de"
* - "https://stage.scandichotels.se/?" -> "/sv/?"
*
* @param str - The URL string to convert.
* @returns The input string with the Scandic Hotels host replaced by the locale path prefix,
* or the original string if no matching Scandic host is found.
*
* @remarks
* - This is a simple regex-based replacement and does not perform full URL parsing or validation.
* - The remainder of the path, query string, and fragment (if present) are preserved after the locale prefix.
* - If the original URL ends exactly at the TLD (no trailing slash), the result will be the locale prefix without a trailing slash.
*/
export function akamaiRedirect(str: string) {
return str.replace(
/^https?:\/\/((www|test|stage|prod)\.)?scandichotels.(com|de|dk|fi|no|se)/,
(...match) => {
if (match[3]) {
switch (match[3]) {
case "com":
return "/en"
case "de":
return "/de"
case "dk":
return "/da"
case "fi":
return "/fi"
case "no":
return "/no"
case "se":
return "/sv"
}
}
return ""
}
)
}

View File

@@ -1,138 +0,0 @@
import fs from 'node:fs';
import path from 'node:path';
import csvToJson from 'convert-csv-to-json';
const langs = ['da', 'de', 'en', 'fi', 'no', 'sv'];
const csvHeaders = {
current: 'Current URL',
redirect: 'Redirect URL',
};
function csvFilePath(lang) {
return `${import.meta.dirname}/csv/${lang}.csv`;
}
function jsonFilePath(lang) {
return `${import.meta.dirname}/json/${lang}.json`;
}
function outputFilepath(lang) {
return path.resolve(
import.meta.dirname,
`../netlify/functions/data/${lang}.json`
);
}
function removeDomain(str) {
return str.replace(
/^https?:\/\/((www|test|stage|prod)\.)?scandichotels.(com|de|dk|fi|no|se)/,
''
);
}
function akamaiRedirect(str) {
return str.replace(
/^https?:\/\/((www|test|stage|prod)\.)?scandichotels.(com|de|dk|fi|no|se)/,
(...match) => {
if (match[3]) {
switch (match[3]) {
case 'com':
return '/en';
case 'de':
return '/de';
case 'dk':
return '/da';
case 'fi':
return '/fi';
case 'no':
return '/no';
case 'se':
return '/sv';
}
}
return '';
}
);
}
function checkPrerequisites() {
const missingLangs = langs.reduce((acc, lang) => {
const filepath = csvFilePath(lang);
if (!fs.existsSync(filepath)) {
acc.push(filepath);
}
return acc;
}, []);
if (missingLangs.length > 0) {
console.error(`Missing CSV file:\n${missingLangs.join('\n')}`);
process.exit(1);
}
}
// convert-csv-to-json writes async without callback support
// so we workaround it be overriding console.log which it uses when it is done
async function convertCsvToJson() {
return new Promise((resolve, reject) => {
const _consoleLog = console.log;
let resolved = 0;
console.log = function (str) {
if (str.indexOf('File saved:') >= 0) {
resolved++;
}
if (resolved === langs.length) {
console.log = _consoleLog;
resolve();
}
};
for (const lang of langs) {
csvToJson
.utf8Encoding()
.fieldDelimiter(',')
.generateJsonFileFromCsv(csvFilePath(lang), jsonFilePath(lang));
}
setTimeout(() => {
reject('timeout');
}, 5000);
});
}
async function makeOutput() {
for (const lang of langs) {
try {
const json = JSON.parse(
fs.readFileSync(jsonFilePath(lang), {
encoding: 'utf-8',
})
);
if (Array.isArray(json)) {
const finalUrls = json.reduce((acc, url) => {
const from = removeDomain(akamaiRedirect(url[csvHeaders.current]));
const to = removeDomain(url[csvHeaders.redirect]);
return {
...acc,
[from]: to,
};
}, {});
fs.writeFileSync(outputFilepath(lang), JSON.stringify(finalUrls), {
encoding: 'utf-8',
});
} else {
throw new Error(`JSON was not an array: ${jsonFilePath(lang)}`);
}
} catch (e) {
console.error(e);
}
}
}
checkPrerequisites();
await convertCsvToJson();
await makeOutput();