From c953d238a660f7e738ec8d121c6df25c84a043ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Acid=20Chicken=20=28=E7=A1=AB=E9=85=B8=E9=B6=8F=29?= Date: Thu, 30 May 2024 18:00:19 +0900 Subject: [PATCH] chore: use requestInit instead --- src/config.ts | 66 +++++++++++++++++++++++------ src/encoding.ts | 20 ++++++++- src/index.ts | 8 ++-- src/summary/general/index.ts | 4 +- src/summary/general/player.ts | 4 +- src/summary/general/playerOEmbed.ts | 25 +++++++---- src/summary/index.ts | 4 +- 7 files changed, 98 insertions(+), 33 deletions(-) diff --git a/src/config.ts b/src/config.ts index 2f8ce80..fddd45b 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,13 +1,55 @@ -export const fetchOptions = { - cf: { - cacheEverything: true, - cacheTtlByStatus: { - "200-299": 86400, - "400-599": 60, - }, - } satisfies RequestInitCfProperties, - headers: { - Accept: "text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8", - "User-Agent": "Mozilla/5.0 (compatible; Summerflare; +https://github.com/misskey-dev/summerflare)", - }, +function parseRFC9110ListsLax(value: string | null): string[] { + return ( + value + ?.split(/(?<=^[^"]*|^(?:[^"]*"[^"]*"[^"]*)*),/) + .map((value) => value.trim()) + .filter((value) => value) ?? [] + ) +} + +export function requestInit(request: Request) { + const url = new URL(request.url) + const cdnLoop = parseRFC9110ListsLax(request.headers.get("CDN-Loop")) + if (cdnLoop.some((value) => value.toLowerCase() === url.hostname.toLowerCase() || value.toLowerCase().startsWith(`${url.hostname.toLowerCase()};`))) { + throw new Error("CDN Loop Detected") + } + return { + cf: { + cacheEverything: true, + cacheTtlByStatus: { + "200-299": 86400, + "400-599": 60, + }, + } satisfies RequestInitCfProperties, + headers: { + Accept: "text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8", + "CDN-Loop": cdnLoop.concat(url.hostname).join(", "), + "User-Agent": "Mozilla/5.0 (compatible; Summerflare; +https://github.com/misskey-dev/summerflare)", + }, + } +} + +if (import.meta.vitest) { + const { describe, expect, test } = import.meta.vitest + + describe(parseRFC9110ListsLax.name, () => { + test("null returns an empty array", () => { + expect(parseRFC9110ListsLax(null)).toStrictEqual([]) + }) + test("empty string returns an empty array", () => { + expect(parseRFC9110ListsLax("")).toStrictEqual([]) + }) + test("whitespace only string returns an empty array", () => { + expect(parseRFC9110ListsLax(" ")).toStrictEqual([]) + }) + test("Cache-Control: max-age=86400, stale-while-revalidate=604800, stale-if-error=86400 returns an array with 3 elements", () => { + expect(parseRFC9110ListsLax("max-age=86400, stale-while-revalidate=604800, stale-if-error=86400")).toStrictEqual(["max-age=86400", "stale-while-revalidate=604800", "stale-if-error=86400"]) + }) + test('Example-URIs: "http://example.com/a.html,foo", "http://without-a-comma.example.com/" returns an array with 2 elements', () => { + expect(parseRFC9110ListsLax('"http://example.com/a.html,foo", "http://without-a-comma.example.com/"')).toStrictEqual(['"http://example.com/a.html,foo"', '"http://without-a-comma.example.com/"']) + }) + test('Example-Dates: "Sat, 04 May 1996", "Wed, 14 Sep 2005" returns an array with 2 elements', () => { + expect(parseRFC9110ListsLax('"Sat, 04 May 1996", "Wed, 14 Sep 2005"')).toStrictEqual(['"Sat, 04 May 1996"', '"Wed, 14 Sep 2005"']) + }) + }) } diff --git a/src/encoding.ts b/src/encoding.ts index b54d4d8..650f323 100644 --- a/src/encoding.ts +++ b/src/encoding.ts @@ -1,5 +1,7 @@ -import { UniversalDetector } from "jschardet/src" +import { decode } from "html-entities" +import { UniversalDetector } from "jschardet/src" import MIMEType from "whatwg-mimetype" +import { assign, PrioritizedReference } from "./summary/common" function getCharset(value: string | null): string | null { const type = value === null ? null : MIMEType.parse(value) @@ -24,17 +26,31 @@ export async function normalize(response: Response): Promise { if (!getCharset(headers.get("content-type"))) { const [left, right] = response.body!.tee() response = new Response(left, response) + const result: PrioritizedReference = { + bits: 2, // 0-3 + priority: 0, + content: null, + } const rewriter = new HTMLRewriter() rewriter.on("meta", { element(element) { + const charset = element.getAttribute("charset") + if (charset) { + const mimeType = new MIMEType("text/html") + mimeType.parameters.set("charset", decode(charset)) + assign(result, 3, mimeType.toString()) + } const httpEquiv = element.getAttribute("http-equiv")?.toLowerCase() if (httpEquiv === "content-type") { - headers.set(httpEquiv, element.getAttribute("content")!) + assign(result, 2, element.getAttribute("content")!) } }, }) const reader = rewriter.transform(new Response(right, response)).body!.getReader() while (!(await reader.read()).done); + if (result.content) { + headers.set("content-type", result.content) + } } if (!headers.has("content-type")) { const [left, right] = response.body!.tee() diff --git a/src/index.ts b/src/index.ts index ddac83c..d2461da 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,5 @@ import { Hono } from "hono" -import { fetchOptions } from "./config" +import { requestInit } from "./config" import { normalize } from "./encoding" import summary from "./summary" export interface Env { @@ -30,10 +30,10 @@ app.get("/url", async (context) => { } catch (e) { return context.json({ error: "Invalid URL" }, 400) } - const response = (await fetch(url, fetchOptions)) as any as Response + const response = (await fetch(url, requestInit(context.req.raw))) as any as Response url = new URL(response.url) const rewriter = new HTMLRewriter() - const summarized = summary(url, rewriter) + const summarized = summary(context.req.raw, url, rewriter) const reader = (rewriter.transform(await normalize(response)).body as ReadableStream).getReader() while (!(await reader.read()).done); return context.json(await summarized) @@ -176,7 +176,7 @@ if (import.meta.vitest) { ])("should return summary of %s <%s>", async (_, url, contentType, expected) => { const request = new Request(`https://fakehost/url?${new URLSearchParams({ url })}`) const ctx = createExecutionContext() - const preconnect = await fetch(url, fetchOptions) + const preconnect = await fetch(url, requestInit(request)) expect(preconnect.status).toBe(200) expect(preconnect.headers.get("content-type")).toBe(contentType) const response = await app.fetch(request, env, ctx) diff --git a/src/summary/general/index.ts b/src/summary/general/index.ts index e825d5f..770230d 100644 --- a/src/summary/general/index.ts +++ b/src/summary/general/index.ts @@ -8,11 +8,11 @@ import getTitle from "./title" import getSensitive from "./sensitive" import getPlayer, { Player } from "./player" -export default function general(url: URL, html: HTMLRewriter) { +export default function general(request: Request, url: URL, html: HTMLRewriter) { const card = getCard(url, html) const title = getTitle(url, html) const image = getImage(url, html) - const player = Promise.all([card, getPlayer(url, html)]).then(([card, parsedPlayer]) => { + const player = Promise.all([card, getPlayer(request, url, html)]).then(([card, parsedPlayer]) => { return { url: (card !== "summary_large_image" && parsedPlayer.urlGeneral) || parsedPlayer.urlCommon, width: parsedPlayer.width, diff --git a/src/summary/general/player.ts b/src/summary/general/player.ts index cef30c7..a40996a 100644 --- a/src/summary/general/player.ts +++ b/src/summary/general/player.ts @@ -16,8 +16,8 @@ export interface ParsedPlayer extends Omit { urlGeneral: string | null } -export default function getPlayer(url: URL, html: HTMLRewriter): Promise { - const oEmbed = getPlayerOEmbed(url, html) +export default function getPlayer(request: Request, url: URL, html: HTMLRewriter): Promise { + const oEmbed = getPlayerOEmbed(request, url, html) const urlGeneral = getPlayerUrlGeneral(url, html) const urlCommon = getPlayerUrlCommon(url, html) const width = getPlayerUrlWidth(url, html) diff --git a/src/summary/general/playerOEmbed.ts b/src/summary/general/playerOEmbed.ts index be2d0ef..ec760bf 100644 --- a/src/summary/general/playerOEmbed.ts +++ b/src/summary/general/playerOEmbed.ts @@ -1,6 +1,6 @@ import { decode } from "html-entities" import { z } from "zod" -import { fetchOptions } from "../../config" +import { requestInit } from "../../config" import { assign, PrioritizedReference } from "../common" import type { ParsedPlayer } from "./player" @@ -41,7 +41,8 @@ const oEmbed = z.union([ }), ]) -export default function getPlayerOEmbed(url: URL, html: HTMLRewriter) { +export default function getPlayerOEmbed(request: Request, url: URL, html: HTMLRewriter) { + const { promise, resolve, reject } = Promise.withResolvers() const result: PrioritizedReference = { bits: 1, // 0-1 priority: 0, @@ -59,7 +60,14 @@ export default function getPlayerOEmbed(url: URL, html: HTMLRewriter) { if (!oEmbedHref) { return } - const oEmbedData: unknown = await fetch(oEmbedHref, fetchOptions) + let init: RequestInit + try { + init = requestInit(request) + } catch (e) { + reject(e) + return + } + const oEmbedData: unknown = await fetch(oEmbedHref, init) .then((response) => response.json()) .catch(() => undefined) const { success, data } = oEmbed.safeParse(oEmbedData) @@ -112,11 +120,10 @@ export default function getPlayerOEmbed(url: URL, html: HTMLRewriter) { } }, }) - return new Promise((resolve) => { - html.onDocument({ - end() { - resolve(result.content) - }, - }) + html.onDocument({ + end() { + resolve(result.content) + }, }) + return promise } diff --git a/src/summary/index.ts b/src/summary/index.ts index e7bd6ca..b9e838c 100644 --- a/src/summary/index.ts +++ b/src/summary/index.ts @@ -2,12 +2,12 @@ import amazon from "./amazon" import general from "./general" import wikipedia from "./wikipedia" -export default function summary(url: URL, html: HTMLRewriter) { +export default function summary(request: Request, url: URL, html: HTMLRewriter) { if (url.hostname === "www.amazon.com" || url.hostname === "www.amazon.co.jp" || url.hostname === "www.amazon.ca" || url.hostname === "www.amazon.com.br" || url.hostname === "www.amazon.com.mx" || url.hostname === "www.amazon.co.uk" || url.hostname === "www.amazon.de" || url.hostname === "www.amazon.fr" || url.hostname === "www.amazon.it" || url.hostname === "www.amazon.es" || url.hostname === "www.amazon.nl" || url.hostname === "www.amazon.cn" || url.hostname === "www.amazon.in" || url.hostname === "www.amazon.au") { return amazon(url, html) } if (`.${url.hostname}`.endsWith(".wikipedia.org")) { return wikipedia(url, html) } - return general(url, html) + return general(request, url, html) }