fix: correctly normalize encoding

This commit is contained in:
Acid Chicken (硫酸鶏)
2024-05-28 15:11:27 +09:00
parent 60507c3a88
commit 0cfacfeacc
29 changed files with 1371 additions and 1307 deletions

2
.prettierrc Normal file
View File

@ -0,0 +1,2 @@
printWidth: 4096
semi: false

View File

@ -6,7 +6,7 @@
```bash ```bash
pnpm i pnpm i
pnpm wrangler publish pnpm wrangler deploy --minify
``` ```
After executing the above command, access <https://workers.example/url?url=https%3A%2F%2Fexample.com> to verify that the worker is working properly. After executing the above command, access <https://workers.example/url?url=https%3A%2F%2Fexample.com> to verify that the worker is working properly.

View File

@ -1,16 +1,17 @@
{ {
"packageManager": "pnpm@8.3.1", "packageManager": "pnpm@8.3.1",
"devDependencies": { "devDependencies": {
"@cloudflare/workers-types": "^4.20230511.0", "@cloudflare/vitest-pool-workers": "0.1.17",
"@types/whatwg-mimetype": "^3.0.1", "@cloudflare/workers-types": "^4.20240405.0",
"vitest": "^0.31.0", "@types/whatwg-mimetype": "^3.0.2",
"wrangler": "^2.20.0" "vitest": "1.3.0",
"wrangler": "^3.48.0"
}, },
"dependencies": { "dependencies": {
"@zxing/text-encoding": "^0.9.0", "hono": "^4.2.3",
"hono": "^3.1.8", "html-entities": "^2.5.2",
"html-entities": "^2.3.3", "jschardet": "^3.1.2",
"summaly": "^2.7.0", "summaly": "^2.7.0",
"whatwg-mimetype": "^3.0.0" "whatwg-mimetype": "^4.0.0"
} }
} }

1472
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

7
src/config.ts Normal file
View File

@ -0,0 +1,7 @@
export const cf = {
cacheEverything: true,
cacheTtlByStatus: {
"200-299": 86400,
"400-599": 60,
},
} satisfies RequestInitCfProperties

View File

@ -1,69 +1,52 @@
// due to the bug in the Cloudflare Workers runtime, we have to use @zxing/text-encoding instead of the built-in TextEncoder/TextDecoder. import { UniversalDetector } from "jschardet/src"
import { encodingIndexes } from "@zxing/text-encoding/esm/encoding-indexes"; import MIMEType from "whatwg-mimetype"
(globalThis as any).TextEncodingIndexes = { encodingIndexes };
import { TextDecoder, TextEncoder } from "@zxing/text-encoding"; function getCharset(value: string | null): string | null {
import MIMEType from "whatwg-mimetype"; const type = value === null ? null : MIMEType.parse(value)
return type?.parameters.get("charset") ?? null
function getCharsetFromHeader(response: Response): string | null {
const contentType = response.headers.get("Content-Type");
if (contentType === null) {
return null;
}
try {
const mimeType = new MIMEType(contentType);
return mimeType.parameters.get("charset") ?? null;
} catch {
return null;
}
} }
async function getCharsetFromBody(response: Response): Promise<string | null> { async function guessCharsetFromBody(body: ReadableStream<any>): Promise<string | null> {
let charset: string | null = null; const detector = new UniversalDetector()
const rewriter = new HTMLRewriter(); const decoder = new TextDecoder()
for await (const chunk of body) {
detector.feed(decoder.decode(chunk, { stream: true }))
if (detector.done) {
break
}
}
detector.close()
return detector.result?.encoding ?? null
}
export async function normalize(response: Response): Promise<Response> {
const headers = new Headers(response.headers)
if (!getCharset(headers.get("content-type"))) {
const [left, right] = response.body!.tee()
response = new Response(left, response)
const rewriter = new HTMLRewriter()
rewriter.on("meta", { rewriter.on("meta", {
element(element) { element(element) {
const httpEquiv = element.getAttribute("http-equiv"); const httpEquiv = element.getAttribute("http-equiv")?.toLowerCase()
if ( if (httpEquiv === "content-type") {
charset === null && headers.set(httpEquiv, element.getAttribute("content")!)
httpEquiv !== null &&
httpEquiv.toLowerCase() === "content-type"
) {
const content = element.getAttribute("content");
if (content !== null) {
try {
const mimeType = new MIMEType(content);
charset = mimeType.parameters.get("charset") ?? null;
} catch {}
}
}
const charsetAttr = element.getAttribute("charset");
if (charsetAttr !== null) {
charset = charsetAttr;
} }
}, },
}); })
const reader = rewriter.transform(response).body!.getReader(); const reader = rewriter.transform(new Response(right, response)).body!.getReader()
while (!(await reader.read()).done); while (!(await reader.read()).done);
return charset;
}
export async function getNormalizer(
response: Response
): Promise<TransformStream<Uint8Array, Uint8Array>> {
const charset =
getCharsetFromHeader(response) ?? (await getCharsetFromBody(response));
if (charset === null || charset.toLowerCase() === "utf-8") {
return new TransformStream();
} }
const decoder = new TextDecoder(charset, { fatal: true, ignoreBOM: true }); if (!headers.has("content-type")) {
const encoder = new TextEncoder(); const [left, right] = response.body!.tee()
const transform = new TransformStream<Uint8Array, Uint8Array>({ response = new Response(left, response)
transform(chunk, controller) { const guessed = await guessCharsetFromBody(right)
controller.enqueue( if (guessed) {
encoder.encode(decoder.decode(chunk, { stream: true })) headers.set("content-type", `text/html; charset=${guessed}`)
); }
}, }
}); return new Response(response.body, {
return transform; headers,
status: response.status,
statusText: response.statusText,
})
} }

View File

@ -1,7 +1,7 @@
import { Hono } from "hono"; import { Hono } from "hono"
import summary from "./summary"; import { cf } from "./config"
import { getNormalizer } from "./encoding"; import { normalize } from "./encoding"
import summary from "./summary"
export interface Env { export interface Env {
// Example binding to KV. Learn more at https://developers.cloudflare.com/workers/runtime-apis/kv/ // Example binding to KV. Learn more at https://developers.cloudflare.com/workers/runtime-apis/kv/
// MY_KV_NAMESPACE: KVNamespace; // MY_KV_NAMESPACE: KVNamespace;
@ -16,31 +16,128 @@ export interface Env {
// MY_SERVICE: Fetcher; // MY_SERVICE: Fetcher;
} }
const app = new Hono<Env>(); const app = new Hono<Env>()
app.onError((error, context) => { app.onError((error, context) => {
console.error(error); console.error(error)
return context.json({ error: error.message }, 500); return context.json({ error: error.message }, 500)
}); })
app.get("/url", async (context) => { app.get("/url", async (context) => {
let url: URL; let url: URL
try { try {
url = new URL(context.req.query("url")!); url = new URL(context.req.query("url")!)
} catch (e) { } catch (e) {
return context.json({ error: "Invalid URL" }, 400); return context.json({ error: "Invalid URL" }, 400)
} }
const response = await fetch(url); const response = (await fetch(url, {
url = new URL(response.url); cf,
const [left, right] = response.body!.tee(); headers: {
const normalizer = await getNormalizer(new Response(left, response)); Accept: "text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8",
const rewriter = new HTMLRewriter(); "User-Agent": "Mozilla/5.0 (compatible; Summerflare; +https://github.com/misskey-dev/summerflare)",
const summarized = summary(url, rewriter); },
const reader = rewriter })) as any as Response
.transform(new Response(right.pipeThrough(normalizer), response)) url = new URL(response.url)
.body!.getReader(); const rewriter = new HTMLRewriter()
const summarized = summary(url, rewriter)
const reader = (rewriter.transform(await normalize(response)).body as ReadableStream<Uint8Array>).getReader()
while (!(await reader.read()).done); while (!(await reader.read()).done);
return context.json(await summarized); return context.json(await summarized)
}); })
export default app; export default app
if (import.meta.vitest) {
const { createExecutionContext, env, waitOnExecutionContext } = await import("cloudflare:test")
const { describe, expect, test } = import.meta.vitest
describe("GET /url", () => {
test.each([
[
"the simple UTF-8 encoded website",
"https://example.com/",
{
title: "Example Domain",
thumbnail: null,
description: null,
player: {
url: null,
width: null,
height: null,
},
allow: [],
sitename: "example.com",
icon: "https://example.com/favicon.ico",
sensitive: false,
large: false,
url: "https://example.com/",
},
],
[
"the simple Shift_JIS encoded website",
"http://abehiroshi.la.coocan.jp/",
{
title: "阿部寛のホームページ",
thumbnail: null,
description: null,
player: {
url: null,
width: null,
height: null,
},
allow: [],
sitename: "abehiroshi.la.coocan.jp",
icon: "http://abehiroshi.la.coocan.jp/favicon.ico",
sensitive: false,
large: false,
url: "http://abehiroshi.la.coocan.jp/",
},
],
[
"the simple EUC-JP encoded website",
"https://www.postgresql.jp/document/pg632doc/tutorial/f01.htm",
{
title: "概要",
thumbnail: null,
description: null,
player: {
url: null,
width: null,
height: null,
},
allow: [],
sitename: "www.postgresql.jp",
icon: "https://www.postgresql.jp/favicon.ico",
sensitive: false,
large: false,
url: "https://www.postgresql.jp/document/pg632doc/tutorial/f01.htm",
},
],
[
"the Shift_JIS encoded website with thumbnail",
"https://store.shochiku.co.jp/shop/g/g23080501/",
{
title: "アイドルマスター ミリオンライブ! 第1幕 パンフレット",
thumbnail: "https://store.shochiku.co.jp/img/goods/S/23080501s.jpg",
description: "映画グッズ・アニメグッズを取り扱う通販サイト『Froovie/フルービー』です。ハリー・ポッター、ファンタスティック・ビースト、ガンダム、アニメなどのキャラクターグッズを多数揃えております。",
player: { url: null, width: null, height: null },
allow: [],
sitename: "SHOCHIKU STORE | 松竹ストア",
icon: "https://store.shochiku.co.jp/favicon.ico",
sensitive: false,
large: false,
url: "https://store.shochiku.co.jp/shop/g/g23080501/",
},
],
])("should return summary of %s <%s>", async (_, url, expected) => {
const request = new Request(`https://fakehost/url?${new URLSearchParams({ url })}`)
const ctx = createExecutionContext()
const response = await app.fetch(request, env, ctx)
await waitOnExecutionContext(ctx)
expect(response.status).toBe(200)
const body = await response.json()
console.log(body)
expect(body).toStrictEqual(expected)
})
})
}

View File

@ -1,49 +1,49 @@
import { decode } from "html-entities"; import { decode } from "html-entities"
import clip from "summaly/built/utils/clip"; import clip from "summaly/built/utils/clip"
import { BufferedTextHandler, assign } from "../common"; import { BufferedTextHandler, assign } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getDescription(url: URL, html: HTMLRewriter) { export default function getDescription(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 3, // 0-7 bits: 3, // 0-7
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on( html.on(
"#productDescription", "#productDescription",
new BufferedTextHandler((text) => { new BufferedTextHandler((text) => {
assign(result, 7, decode(text)); assign(result, 7, decode(text))
}) }),
); )
html.on('meta[property="og:description"]', { html.on('meta[property="og:description"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
html.on('meta[name="twitter:description"]', { html.on('meta[name="twitter:description"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
html.on('meta[name="description"]', { html.on('meta[name="description"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 1, content); assign(result, 1, content)
} }
}, },
}); })
return new Promise<string | null>((resolve) => { return new Promise<string | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve(result.content && clip(result.content, 300)); resolve(result.content && clip(result.content, 300))
}, },
}); })
}); })
} }

View File

@ -1,67 +1,65 @@
import { assign, toAbsoluteURL } from "../common"; import { assign, toAbsoluteURL } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getImage(url: URL, html: HTMLRewriter) { export default function getImage(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 4, // 0-15 bits: 4, // 0-15
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on("#landingImage", { html.on("#landingImage", {
element(element) { element(element) {
const content = element.getAttribute("src"); const content = element.getAttribute("src")
if (content) { if (content) {
assign(result, 15, content); assign(result, 15, content)
} }
}, },
}); })
html.on('meta[property="og:image"]', { html.on('meta[property="og:image"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 7, content); assign(result, 7, content)
} }
}, },
}); })
html.on('meta[name="twitter:image"]', { html.on('meta[name="twitter:image"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 6, content); assign(result, 6, content)
} }
}, },
}); })
html.on('link[rel="image_src"]', { html.on('link[rel="image_src"]', {
element(element) { element(element) {
const content = element.getAttribute("href"); const content = element.getAttribute("href")
if (content) { if (content) {
assign(result, 5, content); assign(result, 5, content)
} }
}, },
}); })
html.on('link[rel="apple-touch-icon"]', { html.on('link[rel="apple-touch-icon"]', {
element(element) { element(element) {
const content = element.getAttribute("href"); const content = element.getAttribute("href")
if (content) { if (content) {
assign(result, 4, content); assign(result, 4, content)
} }
}, },
}); })
html.on('link[rel="apple-touch-icon image_src"]', { html.on('link[rel="apple-touch-icon image_src"]', {
element(element) { element(element) {
const content = element.getAttribute("href"); const content = element.getAttribute("href")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
return new Promise<string | null>((resolve) => { return new Promise<string | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve( resolve(result.content ? toAbsoluteURL(result.content, url.href) : null)
result.content ? toAbsoluteURL(result.content, url.href) : null
);
}, },
}); })
}); })
} }

View File

@ -1,62 +1,47 @@
import cleanupTitle from "summaly/built/utils/cleanup-title"; import cleanupTitle from "summaly/built/utils/cleanup-title"
import getCard from "../general/card"; import getCard from "../general/card"
import getDescription from "./description"; import getDescription from "./description"
import getFavicon from "../general/favicon"; import getFavicon from "../general/favicon"
import getImage from "./image"; import getImage from "./image"
import getPlayerUrlCommon from "../general/playerUrlCommon"; import getPlayerUrlCommon from "../general/playerUrlCommon"
import getPlayerUrlGeneral from "../general/playerUrlGeneral"; import getPlayerUrlGeneral from "../general/playerUrlGeneral"
import getPlayerUrlHeight from "../general/playerUrlHeight"; import getPlayerUrlHeight from "../general/playerUrlHeight"
import getPlayerUrlWidth from "../general/playerUrlWidth"; import getPlayerUrlWidth from "../general/playerUrlWidth"
import getSiteName from "../general/siteName"; import getSiteName from "../general/siteName"
import getTitle from "./title"; import getTitle from "./title"
import getSensitive from "../general/sensitive"; import getSensitive from "../general/sensitive"
export default function amazon(url: URL, html: HTMLRewriter) { export default function amazon(url: URL, html: HTMLRewriter) {
const card = getCard(url, html); const card = getCard(url, html)
const title = getTitle(url, html); const title = getTitle(url, html)
const thumbnail = getImage(url, html); const thumbnail = getImage(url, html)
const player = Promise.all([ const player = Promise.all([card, getPlayerUrlGeneral(url, html), getPlayerUrlCommon(url, html), getPlayerUrlWidth(url, html), getPlayerUrlHeight(url, html)]).then(([card, general, common, width, height]) => {
card, const url = (card !== "summary_large_image" && general) || common
getPlayerUrlGeneral(url, html),
getPlayerUrlCommon(url, html),
getPlayerUrlWidth(url, html),
getPlayerUrlHeight(url, html),
]).then(([card, general, common, width, height]) => {
const url = (card !== "summary_large_image" && general) || common;
if (url !== null && width !== null && height !== null) { if (url !== null && width !== null && height !== null) {
return { return {
url, url,
width, width,
height, height,
}; }
} else { } else {
return { return {
url: null, url: null,
width: null, width: null,
height: null, height: null,
};
} }
}); }
const description = getDescription(url, html); })
const siteName = getSiteName(url, html); const description = getDescription(url, html)
const favicon = getFavicon(url, html); const siteName = getSiteName(url, html)
const sensitive = getSensitive(url, html); const favicon = getFavicon(url, html)
const sensitive = getSensitive(url, html)
return Promise.all([ return Promise.all([title, thumbnail, player, description, siteName, favicon, sensitive]).then(([title, thumbnail, player, description, siteName, favicon, sensitive]) => {
title,
thumbnail,
player,
description,
siteName,
favicon,
sensitive,
]).then(
([title, thumbnail, player, description, siteName, favicon, sensitive]) => {
if (title === null) { if (title === null) {
return null; return null
} }
if (siteName !== null) { if (siteName !== null) {
title = cleanupTitle(title, siteName); title = cleanupTitle(title, siteName)
} }
return { return {
title, title,
@ -68,7 +53,6 @@ export default function amazon(url: URL, html: HTMLRewriter) {
icon: favicon, icon: favicon,
sensitive, sensitive,
url: url.href, url: url.href,
};
} }
); })
} }

View File

@ -1,47 +1,47 @@
import { decode } from "html-entities"; import { decode } from "html-entities"
import clip from "summaly/built/utils/clip"; import clip from "summaly/built/utils/clip"
import { BufferedTextHandler, assign } from "../common"; import { BufferedTextHandler, assign } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getTitle(url: URL, html: HTMLRewriter) { export default function getTitle(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 3, // 0-7 bits: 3, // 0-7
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on( html.on(
"#title", "#title",
new BufferedTextHandler((text) => { new BufferedTextHandler((text) => {
assign(result, 7, decode(text)); assign(result, 7, decode(text))
}) }),
); )
html.on('meta[property="og:title"]', { html.on('meta[property="og:title"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
html.on('meta[name="twitter:title"]', { html.on('meta[name="twitter:title"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
html.on( html.on(
"title", "title",
new BufferedTextHandler((text) => { new BufferedTextHandler((text) => {
assign(result, 1, decode(text)); assign(result, 1, decode(text))
}) }),
); )
return new Promise<string | null>((resolve) => { return new Promise<string | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve(result.content && clip(result.content, 100)); resolve(result.content && clip(result.content, 100))
}, },
}); })
}); })
} }

View File

@ -1,34 +1,30 @@
export interface PrioritizedReference<T> { export interface PrioritizedReference<T> {
bits: number; bits: number
priority: number; priority: number
content: T; content: T
} }
export function assign<T>( export function assign<T>(target: PrioritizedReference<T>, priority: PrioritizedReference<T>["priority"], content: PrioritizedReference<T>["content"]): void {
target: PrioritizedReference<T>,
priority: PrioritizedReference<T>["priority"],
content: PrioritizedReference<T>["content"]
): void {
if (target.priority <= priority) { if (target.priority <= priority) {
target.priority = priority; target.priority = priority
target.content = content; target.content = content
} }
} }
export function toAbsoluteURL(url: string, base: string) { export function toAbsoluteURL(url: string, base: string) {
if (/^https?:\/\//.test(url)) { if (/^https?:\/\//.test(url)) {
return url; return url
} else { } else {
return new URL(url, base).href; return new URL(url, base).href
} }
} }
export class BufferedTextHandler { export class BufferedTextHandler {
private buffer = ""; private buffer = ""
constructor(private readonly callback: (text: string) => void) {} constructor(private readonly callback: (text: string) => void) {}
text(text: Text) { text(text: Text) {
this.callback((this.buffer += text.text)); this.callback((this.buffer += text.text))
} }
} }

View File

@ -1,33 +1,33 @@
import { assign } from "../common"; import { assign } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getCard(url: URL, html: HTMLRewriter) { export default function getCard(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3 bits: 2, // 0-3
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on('meta[name="twitter:card"]', { html.on('meta[name="twitter:card"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
html.on('meta[property="twitter:card"]', { html.on('meta[property="twitter:card"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 1, content); assign(result, 1, content)
} }
}, },
}); })
return new Promise<string | null>((resolve) => { return new Promise<string | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve(result.content); resolve(result.content)
}, },
}); })
}); })
} }

View File

@ -1,42 +1,42 @@
import clip from "summaly/built/utils/clip"; import clip from "summaly/built/utils/clip"
import { assign } from "../common"; import { assign } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getDescription(url: URL, html: HTMLRewriter) { export default function getDescription(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3 bits: 2, // 0-3
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on('meta[property="og:description"]', { html.on('meta[property="og:description"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
html.on('meta[name="twitter:description"]', { html.on('meta[name="twitter:description"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
html.on('meta[name="description"]', { html.on('meta[name="description"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 1, content); assign(result, 1, content)
} }
}, },
}); })
return new Promise<string | null>((resolve) => { return new Promise<string | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve(result.content && clip(result.content, 300)); resolve(result.content && clip(result.content, 300))
}, },
}); })
}); })
} }

View File

@ -1,33 +1,33 @@
import { assign, toAbsoluteURL } from "../common"; import { assign, toAbsoluteURL } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getFavicon(url: URL, html: HTMLRewriter) { export default function getFavicon(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string> = { const result: PrioritizedReference<string> = {
bits: 2, // 0-3 bits: 2, // 0-3
priority: 0, priority: 0,
content: "/favicon.ico", content: "/favicon.ico",
}; }
html.on('link[rel="shortcut icon"]', { html.on('link[rel="shortcut icon"]', {
element(element) { element(element) {
const content = element.getAttribute("href"); const content = element.getAttribute("href")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
html.on('link[rel="icon"]', { html.on('link[rel="icon"]', {
element(element) { element(element) {
const content = element.getAttribute("href"); const content = element.getAttribute("href")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
return new Promise<string>((resolve) => { return new Promise<string>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve(toAbsoluteURL(result.content, url.href)); resolve(toAbsoluteURL(result.content, url.href))
}, },
}); })
}); })
} }

View File

@ -1,59 +1,57 @@
import { assign, toAbsoluteURL } from "../common"; import { assign, toAbsoluteURL } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getImage(url: URL, html: HTMLRewriter) { export default function getImage(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 3, // 0-7 bits: 3, // 0-7
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on('meta[property="og:image"]', { html.on('meta[property="og:image"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 7, content); assign(result, 7, content)
} }
}, },
}); })
html.on('meta[name="twitter:image"]', { html.on('meta[name="twitter:image"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 6, content); assign(result, 6, content)
} }
}, },
}); })
html.on('link[rel="image_src"]', { html.on('link[rel="image_src"]', {
element(element) { element(element) {
const content = element.getAttribute("href"); const content = element.getAttribute("href")
if (content) { if (content) {
assign(result, 5, content); assign(result, 5, content)
} }
}, },
}); })
html.on('link[rel="apple-touch-icon"]', { html.on('link[rel="apple-touch-icon"]', {
element(element) { element(element) {
const content = element.getAttribute("href"); const content = element.getAttribute("href")
if (content) { if (content) {
assign(result, 4, content); assign(result, 4, content)
} }
}, },
}); })
html.on('link[rel="apple-touch-icon image_src"]', { html.on('link[rel="apple-touch-icon image_src"]', {
element(element) { element(element) {
const content = element.getAttribute("href"); const content = element.getAttribute("href")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
return new Promise<string | null>((resolve) => { return new Promise<string | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve( resolve(result.content ? toAbsoluteURL(result.content, url.href) : null)
result.content ? toAbsoluteURL(result.content, url.href) : null
);
}, },
}); })
}); })
} }

View File

@ -1,63 +1,47 @@
import cleanupTitle from "summaly/built/utils/cleanup-title"; import cleanupTitle from "summaly/built/utils/cleanup-title"
import getCard from "./card"; import getCard from "./card"
import getDescription from "./description"; import getDescription from "./description"
import getFavicon from "./favicon"; import getFavicon from "./favicon"
import getImage from "./image"; import getImage from "./image"
import getPlayerUrlCommon from "./playerUrlCommon"; import getPlayerUrlCommon from "./playerUrlCommon"
import getPlayerUrlGeneral from "./playerUrlGeneral"; import getPlayerUrlGeneral from "./playerUrlGeneral"
import getPlayerUrlHeight from "./playerUrlHeight"; import getPlayerUrlHeight from "./playerUrlHeight"
import getPlayerUrlWidth from "./playerUrlWidth"; import getPlayerUrlWidth from "./playerUrlWidth"
import getSiteName from "./siteName"; import getSiteName from "./siteName"
import getTitle from "./title"; import getTitle from "./title"
import getSensitive from "./sensitive"; import getSensitive from "./sensitive"
export default function general(url: URL, html: HTMLRewriter) { export default function general(url: URL, html: HTMLRewriter) {
const card = getCard(url, html); const card = getCard(url, html)
const title = getTitle(url, html); const title = getTitle(url, html)
const image = getImage(url, html); const image = getImage(url, html)
const player = Promise.all([ const player = Promise.all([card, getPlayerUrlGeneral(url, html), getPlayerUrlCommon(url, html), getPlayerUrlWidth(url, html), getPlayerUrlHeight(url, html)]).then(([card, general, common, width, height]) => {
card, const url = (card !== "summary_large_image" && general) || common
getPlayerUrlGeneral(url, html),
getPlayerUrlCommon(url, html),
getPlayerUrlWidth(url, html),
getPlayerUrlHeight(url, html),
]).then(([card, general, common, width, height]) => {
const url = (card !== "summary_large_image" && general) || common;
if (url !== null && width !== null && height !== null) { if (url !== null && width !== null && height !== null) {
return { return {
url, url,
width, width,
height, height,
}; }
} else { } else {
return { return {
url: null, url: null,
width: null, width: null,
height: null, height: null,
};
} }
}); }
const description = getDescription(url, html); })
const siteName = getSiteName(url, html); const description = getDescription(url, html)
const favicon = getFavicon(url, html); const siteName = getSiteName(url, html)
const sensitive = getSensitive(url, html); const favicon = getFavicon(url, html)
const sensitive = getSensitive(url, html)
return Promise.all([ return Promise.all([card, title, image, player, description, siteName, favicon, sensitive]).then(([card, title, image, player, description, siteName, favicon, sensitive]) => {
card,
title,
image,
player,
description,
siteName,
favicon,
sensitive,
]).then(
([card, title, image, player, description, siteName, favicon, sensitive]) => {
if (title === null) { if (title === null) {
return null; return null
} }
if (siteName !== null) { if (siteName !== null) {
title = cleanupTitle(title, siteName); title = cleanupTitle(title, siteName)
} }
return { return {
title, title,
@ -70,7 +54,6 @@ export default function general(url: URL, html: HTMLRewriter) {
sensitive, sensitive,
large: card === "summary_large_image", large: card === "summary_large_image",
url: url.href, url: url.href,
};
} }
); })
} }

View File

@ -1,43 +1,41 @@
import { assign, toAbsoluteURL } from "../common"; import { assign, toAbsoluteURL } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getPlayerUrlCommon(url: URL, html: HTMLRewriter) { export default function getPlayerUrlCommon(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3 bits: 2, // 0-3
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on('meta[property="og:video"]', { html.on('meta[property="og:video"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
html.on('meta[property="og:video:secure_url"]', { html.on('meta[property="og:video:secure_url"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
html.on('meta[property="og:video:url"]', { html.on('meta[property="og:video:url"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 1, content); assign(result, 1, content)
} }
}, },
}); })
return new Promise<string | null>((resolve) => { return new Promise<string | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve( resolve(result.content ? toAbsoluteURL(result.content, url.href) : null)
result.content ? toAbsoluteURL(result.content, url.href) : null
);
}, },
}); })
}); })
} }

View File

@ -1,35 +1,33 @@
import { assign, toAbsoluteURL } from "../common"; import { assign, toAbsoluteURL } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getPlayerUrlGeneral(url: URL, html: HTMLRewriter) { export default function getPlayerUrlGeneral(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3 bits: 2, // 0-3
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on('meta[property="twitter:player"]', { html.on('meta[property="twitter:player"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
html.on('meta[name="twitter:player"]', { html.on('meta[name="twitter:player"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
return new Promise<string | null>((resolve) => { return new Promise<string | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve( resolve(result.content ? toAbsoluteURL(result.content, url.href) : null)
result.content ? toAbsoluteURL(result.content, url.href) : null
);
}, },
}); })
}); })
} }

View File

@ -1,42 +1,42 @@
import { assign } from "../common"; import { assign } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getPlayerUrlHeight(url: URL, html: HTMLRewriter) { export default function getPlayerUrlHeight(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3 bits: 2, // 0-3
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on('meta[property="twitter:player:height"]', { html.on('meta[property="twitter:player:height"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
html.on('meta[name="twitter:player:height"]', { html.on('meta[name="twitter:player:height"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
html.on('meta[property="og:video:height"]', { html.on('meta[property="og:video:height"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 1, content); assign(result, 1, content)
} }
}, },
}); })
return new Promise<number | null>((resolve) => { return new Promise<number | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
const content = parseInt(result.content!, 10); const content = parseInt(result.content!, 10)
resolve(Number.isNaN(content) ? null : content); resolve(Number.isNaN(content) ? null : content)
}, },
}); })
}); })
} }

View File

@ -1,42 +1,42 @@
import { assign } from "../common"; import { assign } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getPlayerUrlWidth(url: URL, html: HTMLRewriter) { export default function getPlayerUrlWidth(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3 bits: 2, // 0-3
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on('meta[property="twitter:player:width"]', { html.on('meta[property="twitter:player:width"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
html.on('meta[name="twitter:player:width"]', { html.on('meta[name="twitter:player:width"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
html.on('meta[property="og:video:width"]', { html.on('meta[property="og:video:width"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 1, content); assign(result, 1, content)
} }
}, },
}); })
return new Promise<number | null>((resolve) => { return new Promise<number | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
const content = parseInt(result.content!, 10); const content = parseInt(result.content!, 10)
resolve(Number.isNaN(content) ? null : content); resolve(Number.isNaN(content) ? null : content)
}, },
}); })
}); })
} }

View File

@ -1,22 +1,22 @@
import { assign } from "../common"; import { assign } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getSensitive(url: URL, html: HTMLRewriter) { export default function getSensitive(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<boolean> = { const result: PrioritizedReference<boolean> = {
bits: 1, // 0-1 bits: 1, // 0-1
priority: 0, priority: 0,
content: false, content: false,
}; }
html.on('.tweet[data-possibly-sensitive="true"]', { html.on('.tweet[data-possibly-sensitive="true"]', {
element() { element() {
assign(result, 1, true); assign(result, 1, true)
}, },
}); })
return new Promise<boolean>((resolve) => { return new Promise<boolean>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve(result.content); resolve(result.content)
}, },
}); })
}); })
} }

View File

@ -1,33 +1,33 @@
import { assign } from "../common"; import { assign } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getSiteName(url: URL, html: HTMLRewriter) { export default function getSiteName(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3 bits: 2, // 0-3
priority: 0, priority: 0,
content: url.hostname, content: url.hostname,
}; }
html.on('meta[property="og:site_name"]', { html.on('meta[property="og:site_name"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
html.on('meta[name="application-name"]', { html.on('meta[name="application-name"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
return new Promise<string | null>((resolve) => { return new Promise<string | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve(result.content); resolve(result.content)
}, },
}); })
}); })
} }

View File

@ -1,41 +1,41 @@
import { decode } from "html-entities"; import { decode } from "html-entities"
import clip from "summaly/built/utils/clip"; import clip from "summaly/built/utils/clip"
import { BufferedTextHandler, assign } from "../common"; import { BufferedTextHandler, assign } from "../common"
import type { PrioritizedReference } from "../common"; import type { PrioritizedReference } from "../common"
export default function getTitle(url: URL, html: HTMLRewriter) { export default function getTitle(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = { const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3 bits: 2, // 0-3
priority: 0, priority: 0,
content: null, content: null,
}; }
html.on('meta[property="og:title"]', { html.on('meta[property="og:title"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 3, content); assign(result, 3, content)
} }
}, },
}); })
html.on('meta[name="twitter:title"]', { html.on('meta[name="twitter:title"]', {
element(element) { element(element) {
const content = element.getAttribute("content"); const content = element.getAttribute("content")
if (content) { if (content) {
assign(result, 2, content); assign(result, 2, content)
} }
}, },
}); })
html.on( html.on(
"title", "title",
new BufferedTextHandler((text) => { new BufferedTextHandler((text) => {
assign(result, 1, decode(text)); assign(result, 1, decode(text))
}) }),
); )
return new Promise<string | null>((resolve) => { return new Promise<string | null>((resolve) => {
html.onDocument({ html.onDocument({
end() { end() {
resolve(result.content && clip(result.content, 100)); resolve(result.content && clip(result.content, 100))
}, },
}); })
}); })
} }

View File

@ -1,28 +1,13 @@
import amazon from "./amazon"; import amazon from "./amazon"
import general from "./general"; import general from "./general"
import wikipedia from "./wikipedia"; import wikipedia from "./wikipedia"
export default function summary(url: URL, html: HTMLRewriter) { export default function summary(url: URL, html: HTMLRewriter) {
if ( if (url.hostname === "www.amazon.com" || url.hostname === "www.amazon.co.jp" || url.hostname === "www.amazon.ca" || url.hostname === "www.amazon.com.br" || url.hostname === "www.amazon.com.mx" || url.hostname === "www.amazon.co.uk" || url.hostname === "www.amazon.de" || url.hostname === "www.amazon.fr" || url.hostname === "www.amazon.it" || url.hostname === "www.amazon.es" || url.hostname === "www.amazon.nl" || url.hostname === "www.amazon.cn" || url.hostname === "www.amazon.in" || url.hostname === "www.amazon.au") {
url.hostname === "www.amazon.com" || return amazon(url, html)
url.hostname === "www.amazon.co.jp" ||
url.hostname === "www.amazon.ca" ||
url.hostname === "www.amazon.com.br" ||
url.hostname === "www.amazon.com.mx" ||
url.hostname === "www.amazon.co.uk" ||
url.hostname === "www.amazon.de" ||
url.hostname === "www.amazon.fr" ||
url.hostname === "www.amazon.it" ||
url.hostname === "www.amazon.es" ||
url.hostname === "www.amazon.nl" ||
url.hostname === "www.amazon.cn" ||
url.hostname === "www.amazon.in" ||
url.hostname === "www.amazon.au"
) {
return amazon(url, html);
} }
if (`.${url.hostname}`.endsWith(".wikipedia.org")) { if (`.${url.hostname}`.endsWith(".wikipedia.org")) {
return wikipedia(url, html); return wikipedia(url, html)
} }
return general(url, html); return general(url, html)
} }

View File

@ -1,13 +1,11 @@
import clip from "summaly/built/utils/clip"; import clip from "summaly/built/utils/clip"
export default async function wikipedia(url: URL, html: HTMLRewriter) { export default async function wikipedia(url: URL, html: HTMLRewriter) {
const lang = url.hostname.split(".")[0]; const lang = url.hostname.split(".")[0]
const title = url.pathname.split("/")[2]; const title = url.pathname.split("/")[2]
const response = await fetch( const response = await fetch(`https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`)
`https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}` const json = await response.json<any>()
); const info = json.query.pages[Object.keys(json.query.pages)[0]]
const json = await response.json<any>();
const info = json.query.pages[Object.keys(json.query.pages)[0]];
return { return {
title: info.title, title: info.title,
icon: "https://wikipedia.org/static/favicon/wikipedia.ico", icon: "https://wikipedia.org/static/favicon/wikipedia.ico",
@ -21,5 +19,5 @@ export default async function wikipedia(url: URL, html: HTMLRewriter) {
allow: [], allow: [],
sitename: "Wikipedia", sitename: "Wikipedia",
url: url.href, url: url.href,
}; }
} }

View File

@ -12,9 +12,7 @@
/* Language and Environment */ /* Language and Environment */
"target": "es2021" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, "target": "es2021" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
"lib": [ "lib": ["esnext"] /* Specify a set of bundled library declaration files that describe the target runtime environment. */,
"es2021"
] /* Specify a set of bundled library declaration files that describe the target runtime environment. */,
"jsx": "react" /* Specify what JSX code is generated. */, "jsx": "react" /* Specify what JSX code is generated. */,
// "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */ // "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
@ -33,9 +31,7 @@
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
// "typeRoots": [], /* Specify multiple folders that act like `./node_modules/@types`. */ // "typeRoots": [], /* Specify multiple folders that act like `./node_modules/@types`. */
"types": [ "types": ["@cloudflare/vitest-pool-workers", "@cloudflare/workers-types/experimental", "vitest/importMeta"] /* Specify type package names to be included without being referenced in a source file. */,
"@cloudflare/workers-types"
] /* Specify type package names to be included without being referenced in a source file. */,
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
"resolveJsonModule": true /* Enable importing .json files */, "resolveJsonModule": true /* Enable importing .json files */,
// "noResolve": true, /* Disallow `import`s, `require`s or `<reference>`s from expanding the number of files TypeScript should add to a project. */ // "noResolve": true, /* Disallow `import`s, `require`s or `<reference>`s from expanding the number of files TypeScript should add to a project. */

15
vitest.config.ts Normal file
View File

@ -0,0 +1,15 @@
/// <reference types="vitest" />
import { defineWorkersConfig } from "@cloudflare/vitest-pool-workers/config"
export default defineWorkersConfig({
test: {
includeSource: ["src/**/*.ts"],
poolOptions: {
workers: {
wrangler: {
configPath: "./wrangler.toml",
},
},
},
},
})

View File

@ -1,3 +1,4 @@
name = "summerflare" name = "summerflare"
main = "src/index.ts" main = "src/index.ts"
compatibility_date = "2023-05-13" compatibility_date = "2024-05-13"
compatibility_flags = ["nodejs_compat"]