fix: correctly normalize encoding

This commit is contained in:
Acid Chicken (硫酸鶏)
2024-05-28 15:11:27 +09:00
parent 60507c3a88
commit 0cfacfeacc
29 changed files with 1371 additions and 1307 deletions

2
.prettierrc Normal file
View File

@ -0,0 +1,2 @@
printWidth: 4096
semi: false

View File

@ -6,7 +6,7 @@
```bash
pnpm i
pnpm wrangler publish
pnpm wrangler deploy --minify
```
After executing the above command, access <https://workers.example/url?url=https%3A%2F%2Fexample.com> to verify that the worker is working properly.

View File

@ -1,16 +1,17 @@
{
"packageManager": "pnpm@8.3.1",
"devDependencies": {
"@cloudflare/workers-types": "^4.20230511.0",
"@types/whatwg-mimetype": "^3.0.1",
"vitest": "^0.31.0",
"wrangler": "^2.20.0"
"@cloudflare/vitest-pool-workers": "0.1.17",
"@cloudflare/workers-types": "^4.20240405.0",
"@types/whatwg-mimetype": "^3.0.2",
"vitest": "1.3.0",
"wrangler": "^3.48.0"
},
"dependencies": {
"@zxing/text-encoding": "^0.9.0",
"hono": "^3.1.8",
"html-entities": "^2.3.3",
"hono": "^4.2.3",
"html-entities": "^2.5.2",
"jschardet": "^3.1.2",
"summaly": "^2.7.0",
"whatwg-mimetype": "^3.0.0"
"whatwg-mimetype": "^4.0.0"
}
}

1472
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

7
src/config.ts Normal file
View File

@ -0,0 +1,7 @@
export const cf = {
cacheEverything: true,
cacheTtlByStatus: {
"200-299": 86400,
"400-599": 60,
},
} satisfies RequestInitCfProperties

View File

@ -1,69 +1,52 @@
// due to the bug in the Cloudflare Workers runtime, we have to use @zxing/text-encoding instead of the built-in TextEncoder/TextDecoder.
import { encodingIndexes } from "@zxing/text-encoding/esm/encoding-indexes";
(globalThis as any).TextEncodingIndexes = { encodingIndexes };
import { UniversalDetector } from "jschardet/src"
import MIMEType from "whatwg-mimetype"
import { TextDecoder, TextEncoder } from "@zxing/text-encoding";
import MIMEType from "whatwg-mimetype";
function getCharsetFromHeader(response: Response): string | null {
const contentType = response.headers.get("Content-Type");
if (contentType === null) {
return null;
}
try {
const mimeType = new MIMEType(contentType);
return mimeType.parameters.get("charset") ?? null;
} catch {
return null;
}
function getCharset(value: string | null): string | null {
const type = value === null ? null : MIMEType.parse(value)
return type?.parameters.get("charset") ?? null
}
async function getCharsetFromBody(response: Response): Promise<string | null> {
let charset: string | null = null;
const rewriter = new HTMLRewriter();
async function guessCharsetFromBody(body: ReadableStream<any>): Promise<string | null> {
const detector = new UniversalDetector()
const decoder = new TextDecoder()
for await (const chunk of body) {
detector.feed(decoder.decode(chunk, { stream: true }))
if (detector.done) {
break
}
}
detector.close()
return detector.result?.encoding ?? null
}
export async function normalize(response: Response): Promise<Response> {
const headers = new Headers(response.headers)
if (!getCharset(headers.get("content-type"))) {
const [left, right] = response.body!.tee()
response = new Response(left, response)
const rewriter = new HTMLRewriter()
rewriter.on("meta", {
element(element) {
const httpEquiv = element.getAttribute("http-equiv");
if (
charset === null &&
httpEquiv !== null &&
httpEquiv.toLowerCase() === "content-type"
) {
const content = element.getAttribute("content");
if (content !== null) {
try {
const mimeType = new MIMEType(content);
charset = mimeType.parameters.get("charset") ?? null;
} catch {}
}
}
const charsetAttr = element.getAttribute("charset");
if (charsetAttr !== null) {
charset = charsetAttr;
const httpEquiv = element.getAttribute("http-equiv")?.toLowerCase()
if (httpEquiv === "content-type") {
headers.set(httpEquiv, element.getAttribute("content")!)
}
},
});
const reader = rewriter.transform(response).body!.getReader();
})
const reader = rewriter.transform(new Response(right, response)).body!.getReader()
while (!(await reader.read()).done);
return charset;
}
export async function getNormalizer(
response: Response
): Promise<TransformStream<Uint8Array, Uint8Array>> {
const charset =
getCharsetFromHeader(response) ?? (await getCharsetFromBody(response));
if (charset === null || charset.toLowerCase() === "utf-8") {
return new TransformStream();
if (!headers.has("content-type")) {
const [left, right] = response.body!.tee()
response = new Response(left, response)
const guessed = await guessCharsetFromBody(right)
if (guessed) {
headers.set("content-type", `text/html; charset=${guessed}`)
}
const decoder = new TextDecoder(charset, { fatal: true, ignoreBOM: true });
const encoder = new TextEncoder();
const transform = new TransformStream<Uint8Array, Uint8Array>({
transform(chunk, controller) {
controller.enqueue(
encoder.encode(decoder.decode(chunk, { stream: true }))
);
},
});
return transform;
}
return new Response(response.body, {
headers,
status: response.status,
statusText: response.statusText,
})
}

View File

@ -1,7 +1,7 @@
import { Hono } from "hono";
import summary from "./summary";
import { getNormalizer } from "./encoding";
import { Hono } from "hono"
import { cf } from "./config"
import { normalize } from "./encoding"
import summary from "./summary"
export interface Env {
// Example binding to KV. Learn more at https://developers.cloudflare.com/workers/runtime-apis/kv/
// MY_KV_NAMESPACE: KVNamespace;
@ -16,31 +16,128 @@ export interface Env {
// MY_SERVICE: Fetcher;
}
const app = new Hono<Env>();
const app = new Hono<Env>()
app.onError((error, context) => {
console.error(error);
return context.json({ error: error.message }, 500);
});
console.error(error)
return context.json({ error: error.message }, 500)
})
app.get("/url", async (context) => {
let url: URL;
let url: URL
try {
url = new URL(context.req.query("url")!);
url = new URL(context.req.query("url")!)
} catch (e) {
return context.json({ error: "Invalid URL" }, 400);
return context.json({ error: "Invalid URL" }, 400)
}
const response = await fetch(url);
url = new URL(response.url);
const [left, right] = response.body!.tee();
const normalizer = await getNormalizer(new Response(left, response));
const rewriter = new HTMLRewriter();
const summarized = summary(url, rewriter);
const reader = rewriter
.transform(new Response(right.pipeThrough(normalizer), response))
.body!.getReader();
const response = (await fetch(url, {
cf,
headers: {
Accept: "text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8",
"User-Agent": "Mozilla/5.0 (compatible; Summerflare; +https://github.com/misskey-dev/summerflare)",
},
})) as any as Response
url = new URL(response.url)
const rewriter = new HTMLRewriter()
const summarized = summary(url, rewriter)
const reader = (rewriter.transform(await normalize(response)).body as ReadableStream<Uint8Array>).getReader()
while (!(await reader.read()).done);
return context.json(await summarized);
});
return context.json(await summarized)
})
export default app;
export default app
if (import.meta.vitest) {
const { createExecutionContext, env, waitOnExecutionContext } = await import("cloudflare:test")
const { describe, expect, test } = import.meta.vitest
describe("GET /url", () => {
test.each([
[
"the simple UTF-8 encoded website",
"https://example.com/",
{
title: "Example Domain",
thumbnail: null,
description: null,
player: {
url: null,
width: null,
height: null,
},
allow: [],
sitename: "example.com",
icon: "https://example.com/favicon.ico",
sensitive: false,
large: false,
url: "https://example.com/",
},
],
[
"the simple Shift_JIS encoded website",
"http://abehiroshi.la.coocan.jp/",
{
title: "阿部寛のホームページ",
thumbnail: null,
description: null,
player: {
url: null,
width: null,
height: null,
},
allow: [],
sitename: "abehiroshi.la.coocan.jp",
icon: "http://abehiroshi.la.coocan.jp/favicon.ico",
sensitive: false,
large: false,
url: "http://abehiroshi.la.coocan.jp/",
},
],
[
"the simple EUC-JP encoded website",
"https://www.postgresql.jp/document/pg632doc/tutorial/f01.htm",
{
title: "概要",
thumbnail: null,
description: null,
player: {
url: null,
width: null,
height: null,
},
allow: [],
sitename: "www.postgresql.jp",
icon: "https://www.postgresql.jp/favicon.ico",
sensitive: false,
large: false,
url: "https://www.postgresql.jp/document/pg632doc/tutorial/f01.htm",
},
],
[
"the Shift_JIS encoded website with thumbnail",
"https://store.shochiku.co.jp/shop/g/g23080501/",
{
title: "アイドルマスター ミリオンライブ! 第1幕 パンフレット",
thumbnail: "https://store.shochiku.co.jp/img/goods/S/23080501s.jpg",
description: "映画グッズ・アニメグッズを取り扱う通販サイト『Froovie/フルービー』です。ハリー・ポッター、ファンタスティック・ビースト、ガンダム、アニメなどのキャラクターグッズを多数揃えております。",
player: { url: null, width: null, height: null },
allow: [],
sitename: "SHOCHIKU STORE | 松竹ストア",
icon: "https://store.shochiku.co.jp/favicon.ico",
sensitive: false,
large: false,
url: "https://store.shochiku.co.jp/shop/g/g23080501/",
},
],
])("should return summary of %s <%s>", async (_, url, expected) => {
const request = new Request(`https://fakehost/url?${new URLSearchParams({ url })}`)
const ctx = createExecutionContext()
const response = await app.fetch(request, env, ctx)
await waitOnExecutionContext(ctx)
expect(response.status).toBe(200)
const body = await response.json()
console.log(body)
expect(body).toStrictEqual(expected)
})
})
}

View File

@ -1,49 +1,49 @@
import { decode } from "html-entities";
import clip from "summaly/built/utils/clip";
import { BufferedTextHandler, assign } from "../common";
import type { PrioritizedReference } from "../common";
import { decode } from "html-entities"
import clip from "summaly/built/utils/clip"
import { BufferedTextHandler, assign } from "../common"
import type { PrioritizedReference } from "../common"
export default function getDescription(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 3, // 0-7
priority: 0,
content: null,
};
}
html.on(
"#productDescription",
new BufferedTextHandler((text) => {
assign(result, 7, decode(text));
})
);
assign(result, 7, decode(text))
}),
)
html.on('meta[property="og:description"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
html.on('meta[name="twitter:description"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
html.on('meta[name="description"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 1, content);
assign(result, 1, content)
}
},
});
})
return new Promise<string | null>((resolve) => {
html.onDocument({
end() {
resolve(result.content && clip(result.content, 300));
resolve(result.content && clip(result.content, 300))
},
});
});
})
})
}

View File

@ -1,67 +1,65 @@
import { assign, toAbsoluteURL } from "../common";
import type { PrioritizedReference } from "../common";
import { assign, toAbsoluteURL } from "../common"
import type { PrioritizedReference } from "../common"
export default function getImage(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 4, // 0-15
priority: 0,
content: null,
};
}
html.on("#landingImage", {
element(element) {
const content = element.getAttribute("src");
const content = element.getAttribute("src")
if (content) {
assign(result, 15, content);
assign(result, 15, content)
}
},
});
})
html.on('meta[property="og:image"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 7, content);
assign(result, 7, content)
}
},
});
})
html.on('meta[name="twitter:image"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 6, content);
assign(result, 6, content)
}
},
});
})
html.on('link[rel="image_src"]', {
element(element) {
const content = element.getAttribute("href");
const content = element.getAttribute("href")
if (content) {
assign(result, 5, content);
assign(result, 5, content)
}
},
});
})
html.on('link[rel="apple-touch-icon"]', {
element(element) {
const content = element.getAttribute("href");
const content = element.getAttribute("href")
if (content) {
assign(result, 4, content);
assign(result, 4, content)
}
},
});
})
html.on('link[rel="apple-touch-icon image_src"]', {
element(element) {
const content = element.getAttribute("href");
const content = element.getAttribute("href")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
return new Promise<string | null>((resolve) => {
html.onDocument({
end() {
resolve(
result.content ? toAbsoluteURL(result.content, url.href) : null
);
resolve(result.content ? toAbsoluteURL(result.content, url.href) : null)
},
});
});
})
})
}

View File

@ -1,62 +1,47 @@
import cleanupTitle from "summaly/built/utils/cleanup-title";
import getCard from "../general/card";
import getDescription from "./description";
import getFavicon from "../general/favicon";
import getImage from "./image";
import getPlayerUrlCommon from "../general/playerUrlCommon";
import getPlayerUrlGeneral from "../general/playerUrlGeneral";
import getPlayerUrlHeight from "../general/playerUrlHeight";
import getPlayerUrlWidth from "../general/playerUrlWidth";
import getSiteName from "../general/siteName";
import getTitle from "./title";
import getSensitive from "../general/sensitive";
import cleanupTitle from "summaly/built/utils/cleanup-title"
import getCard from "../general/card"
import getDescription from "./description"
import getFavicon from "../general/favicon"
import getImage from "./image"
import getPlayerUrlCommon from "../general/playerUrlCommon"
import getPlayerUrlGeneral from "../general/playerUrlGeneral"
import getPlayerUrlHeight from "../general/playerUrlHeight"
import getPlayerUrlWidth from "../general/playerUrlWidth"
import getSiteName from "../general/siteName"
import getTitle from "./title"
import getSensitive from "../general/sensitive"
export default function amazon(url: URL, html: HTMLRewriter) {
const card = getCard(url, html);
const title = getTitle(url, html);
const thumbnail = getImage(url, html);
const player = Promise.all([
card,
getPlayerUrlGeneral(url, html),
getPlayerUrlCommon(url, html),
getPlayerUrlWidth(url, html),
getPlayerUrlHeight(url, html),
]).then(([card, general, common, width, height]) => {
const url = (card !== "summary_large_image" && general) || common;
const card = getCard(url, html)
const title = getTitle(url, html)
const thumbnail = getImage(url, html)
const player = Promise.all([card, getPlayerUrlGeneral(url, html), getPlayerUrlCommon(url, html), getPlayerUrlWidth(url, html), getPlayerUrlHeight(url, html)]).then(([card, general, common, width, height]) => {
const url = (card !== "summary_large_image" && general) || common
if (url !== null && width !== null && height !== null) {
return {
url,
width,
height,
};
}
} else {
return {
url: null,
width: null,
height: null,
};
}
});
const description = getDescription(url, html);
const siteName = getSiteName(url, html);
const favicon = getFavicon(url, html);
const sensitive = getSensitive(url, html);
}
})
const description = getDescription(url, html)
const siteName = getSiteName(url, html)
const favicon = getFavicon(url, html)
const sensitive = getSensitive(url, html)
return Promise.all([
title,
thumbnail,
player,
description,
siteName,
favicon,
sensitive,
]).then(
([title, thumbnail, player, description, siteName, favicon, sensitive]) => {
return Promise.all([title, thumbnail, player, description, siteName, favicon, sensitive]).then(([title, thumbnail, player, description, siteName, favicon, sensitive]) => {
if (title === null) {
return null;
return null
}
if (siteName !== null) {
title = cleanupTitle(title, siteName);
title = cleanupTitle(title, siteName)
}
return {
title,
@ -68,7 +53,6 @@ export default function amazon(url: URL, html: HTMLRewriter) {
icon: favicon,
sensitive,
url: url.href,
};
}
);
})
}

View File

@ -1,47 +1,47 @@
import { decode } from "html-entities";
import clip from "summaly/built/utils/clip";
import { BufferedTextHandler, assign } from "../common";
import type { PrioritizedReference } from "../common";
import { decode } from "html-entities"
import clip from "summaly/built/utils/clip"
import { BufferedTextHandler, assign } from "../common"
import type { PrioritizedReference } from "../common"
export default function getTitle(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 3, // 0-7
priority: 0,
content: null,
};
}
html.on(
"#title",
new BufferedTextHandler((text) => {
assign(result, 7, decode(text));
})
);
assign(result, 7, decode(text))
}),
)
html.on('meta[property="og:title"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
html.on('meta[name="twitter:title"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
html.on(
"title",
new BufferedTextHandler((text) => {
assign(result, 1, decode(text));
})
);
assign(result, 1, decode(text))
}),
)
return new Promise<string | null>((resolve) => {
html.onDocument({
end() {
resolve(result.content && clip(result.content, 100));
resolve(result.content && clip(result.content, 100))
},
});
});
})
})
}

View File

@ -1,34 +1,30 @@
export interface PrioritizedReference<T> {
bits: number;
priority: number;
content: T;
bits: number
priority: number
content: T
}
export function assign<T>(
target: PrioritizedReference<T>,
priority: PrioritizedReference<T>["priority"],
content: PrioritizedReference<T>["content"]
): void {
export function assign<T>(target: PrioritizedReference<T>, priority: PrioritizedReference<T>["priority"], content: PrioritizedReference<T>["content"]): void {
if (target.priority <= priority) {
target.priority = priority;
target.content = content;
target.priority = priority
target.content = content
}
}
export function toAbsoluteURL(url: string, base: string) {
if (/^https?:\/\//.test(url)) {
return url;
return url
} else {
return new URL(url, base).href;
return new URL(url, base).href
}
}
export class BufferedTextHandler {
private buffer = "";
private buffer = ""
constructor(private readonly callback: (text: string) => void) {}
text(text: Text) {
this.callback((this.buffer += text.text));
this.callback((this.buffer += text.text))
}
}

View File

@ -1,33 +1,33 @@
import { assign } from "../common";
import type { PrioritizedReference } from "../common";
import { assign } from "../common"
import type { PrioritizedReference } from "../common"
export default function getCard(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3
priority: 0,
content: null,
};
}
html.on('meta[name="twitter:card"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
html.on('meta[property="twitter:card"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 1, content);
assign(result, 1, content)
}
},
});
})
return new Promise<string | null>((resolve) => {
html.onDocument({
end() {
resolve(result.content);
resolve(result.content)
},
});
});
})
})
}

View File

@ -1,42 +1,42 @@
import clip from "summaly/built/utils/clip";
import { assign } from "../common";
import type { PrioritizedReference } from "../common";
import clip from "summaly/built/utils/clip"
import { assign } from "../common"
import type { PrioritizedReference } from "../common"
export default function getDescription(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3
priority: 0,
content: null,
};
}
html.on('meta[property="og:description"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
html.on('meta[name="twitter:description"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
html.on('meta[name="description"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 1, content);
assign(result, 1, content)
}
},
});
})
return new Promise<string | null>((resolve) => {
html.onDocument({
end() {
resolve(result.content && clip(result.content, 300));
resolve(result.content && clip(result.content, 300))
},
});
});
})
})
}

View File

@ -1,33 +1,33 @@
import { assign, toAbsoluteURL } from "../common";
import type { PrioritizedReference } from "../common";
import { assign, toAbsoluteURL } from "../common"
import type { PrioritizedReference } from "../common"
export default function getFavicon(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string> = {
bits: 2, // 0-3
priority: 0,
content: "/favicon.ico",
};
}
html.on('link[rel="shortcut icon"]', {
element(element) {
const content = element.getAttribute("href");
const content = element.getAttribute("href")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
html.on('link[rel="icon"]', {
element(element) {
const content = element.getAttribute("href");
const content = element.getAttribute("href")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
return new Promise<string>((resolve) => {
html.onDocument({
end() {
resolve(toAbsoluteURL(result.content, url.href));
resolve(toAbsoluteURL(result.content, url.href))
},
});
});
})
})
}

View File

@ -1,59 +1,57 @@
import { assign, toAbsoluteURL } from "../common";
import type { PrioritizedReference } from "../common";
import { assign, toAbsoluteURL } from "../common"
import type { PrioritizedReference } from "../common"
export default function getImage(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 3, // 0-7
priority: 0,
content: null,
};
}
html.on('meta[property="og:image"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 7, content);
assign(result, 7, content)
}
},
});
})
html.on('meta[name="twitter:image"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 6, content);
assign(result, 6, content)
}
},
});
})
html.on('link[rel="image_src"]', {
element(element) {
const content = element.getAttribute("href");
const content = element.getAttribute("href")
if (content) {
assign(result, 5, content);
assign(result, 5, content)
}
},
});
})
html.on('link[rel="apple-touch-icon"]', {
element(element) {
const content = element.getAttribute("href");
const content = element.getAttribute("href")
if (content) {
assign(result, 4, content);
assign(result, 4, content)
}
},
});
})
html.on('link[rel="apple-touch-icon image_src"]', {
element(element) {
const content = element.getAttribute("href");
const content = element.getAttribute("href")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
return new Promise<string | null>((resolve) => {
html.onDocument({
end() {
resolve(
result.content ? toAbsoluteURL(result.content, url.href) : null
);
resolve(result.content ? toAbsoluteURL(result.content, url.href) : null)
},
});
});
})
})
}

View File

@ -1,63 +1,47 @@
import cleanupTitle from "summaly/built/utils/cleanup-title";
import getCard from "./card";
import getDescription from "./description";
import getFavicon from "./favicon";
import getImage from "./image";
import getPlayerUrlCommon from "./playerUrlCommon";
import getPlayerUrlGeneral from "./playerUrlGeneral";
import getPlayerUrlHeight from "./playerUrlHeight";
import getPlayerUrlWidth from "./playerUrlWidth";
import getSiteName from "./siteName";
import getTitle from "./title";
import getSensitive from "./sensitive";
import cleanupTitle from "summaly/built/utils/cleanup-title"
import getCard from "./card"
import getDescription from "./description"
import getFavicon from "./favicon"
import getImage from "./image"
import getPlayerUrlCommon from "./playerUrlCommon"
import getPlayerUrlGeneral from "./playerUrlGeneral"
import getPlayerUrlHeight from "./playerUrlHeight"
import getPlayerUrlWidth from "./playerUrlWidth"
import getSiteName from "./siteName"
import getTitle from "./title"
import getSensitive from "./sensitive"
export default function general(url: URL, html: HTMLRewriter) {
const card = getCard(url, html);
const title = getTitle(url, html);
const image = getImage(url, html);
const player = Promise.all([
card,
getPlayerUrlGeneral(url, html),
getPlayerUrlCommon(url, html),
getPlayerUrlWidth(url, html),
getPlayerUrlHeight(url, html),
]).then(([card, general, common, width, height]) => {
const url = (card !== "summary_large_image" && general) || common;
const card = getCard(url, html)
const title = getTitle(url, html)
const image = getImage(url, html)
const player = Promise.all([card, getPlayerUrlGeneral(url, html), getPlayerUrlCommon(url, html), getPlayerUrlWidth(url, html), getPlayerUrlHeight(url, html)]).then(([card, general, common, width, height]) => {
const url = (card !== "summary_large_image" && general) || common
if (url !== null && width !== null && height !== null) {
return {
url,
width,
height,
};
}
} else {
return {
url: null,
width: null,
height: null,
};
}
});
const description = getDescription(url, html);
const siteName = getSiteName(url, html);
const favicon = getFavicon(url, html);
const sensitive = getSensitive(url, html);
}
})
const description = getDescription(url, html)
const siteName = getSiteName(url, html)
const favicon = getFavicon(url, html)
const sensitive = getSensitive(url, html)
return Promise.all([
card,
title,
image,
player,
description,
siteName,
favicon,
sensitive,
]).then(
([card, title, image, player, description, siteName, favicon, sensitive]) => {
return Promise.all([card, title, image, player, description, siteName, favicon, sensitive]).then(([card, title, image, player, description, siteName, favicon, sensitive]) => {
if (title === null) {
return null;
return null
}
if (siteName !== null) {
title = cleanupTitle(title, siteName);
title = cleanupTitle(title, siteName)
}
return {
title,
@ -70,7 +54,6 @@ export default function general(url: URL, html: HTMLRewriter) {
sensitive,
large: card === "summary_large_image",
url: url.href,
};
}
);
})
}

View File

@ -1,43 +1,41 @@
import { assign, toAbsoluteURL } from "../common";
import type { PrioritizedReference } from "../common";
import { assign, toAbsoluteURL } from "../common"
import type { PrioritizedReference } from "../common"
export default function getPlayerUrlCommon(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3
priority: 0,
content: null,
};
}
html.on('meta[property="og:video"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
html.on('meta[property="og:video:secure_url"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
html.on('meta[property="og:video:url"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 1, content);
assign(result, 1, content)
}
},
});
})
return new Promise<string | null>((resolve) => {
html.onDocument({
end() {
resolve(
result.content ? toAbsoluteURL(result.content, url.href) : null
);
resolve(result.content ? toAbsoluteURL(result.content, url.href) : null)
},
});
});
})
})
}

View File

@ -1,35 +1,33 @@
import { assign, toAbsoluteURL } from "../common";
import type { PrioritizedReference } from "../common";
import { assign, toAbsoluteURL } from "../common"
import type { PrioritizedReference } from "../common"
export default function getPlayerUrlGeneral(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3
priority: 0,
content: null,
};
}
html.on('meta[property="twitter:player"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
html.on('meta[name="twitter:player"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
return new Promise<string | null>((resolve) => {
html.onDocument({
end() {
resolve(
result.content ? toAbsoluteURL(result.content, url.href) : null
);
resolve(result.content ? toAbsoluteURL(result.content, url.href) : null)
},
});
});
})
})
}

View File

@ -1,42 +1,42 @@
import { assign } from "../common";
import type { PrioritizedReference } from "../common";
import { assign } from "../common"
import type { PrioritizedReference } from "../common"
export default function getPlayerUrlHeight(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3
priority: 0,
content: null,
};
}
html.on('meta[property="twitter:player:height"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
html.on('meta[name="twitter:player:height"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
html.on('meta[property="og:video:height"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 1, content);
assign(result, 1, content)
}
},
});
})
return new Promise<number | null>((resolve) => {
html.onDocument({
end() {
const content = parseInt(result.content!, 10);
resolve(Number.isNaN(content) ? null : content);
const content = parseInt(result.content!, 10)
resolve(Number.isNaN(content) ? null : content)
},
});
});
})
})
}

View File

@ -1,42 +1,42 @@
import { assign } from "../common";
import type { PrioritizedReference } from "../common";
import { assign } from "../common"
import type { PrioritizedReference } from "../common"
export default function getPlayerUrlWidth(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3
priority: 0,
content: null,
};
}
html.on('meta[property="twitter:player:width"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
html.on('meta[name="twitter:player:width"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
html.on('meta[property="og:video:width"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 1, content);
assign(result, 1, content)
}
},
});
})
return new Promise<number | null>((resolve) => {
html.onDocument({
end() {
const content = parseInt(result.content!, 10);
resolve(Number.isNaN(content) ? null : content);
const content = parseInt(result.content!, 10)
resolve(Number.isNaN(content) ? null : content)
},
});
});
})
})
}

View File

@ -1,22 +1,22 @@
import { assign } from "../common";
import type { PrioritizedReference } from "../common";
import { assign } from "../common"
import type { PrioritizedReference } from "../common"
export default function getSensitive(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<boolean> = {
bits: 1, // 0-1
priority: 0,
content: false,
};
}
html.on('.tweet[data-possibly-sensitive="true"]', {
element() {
assign(result, 1, true);
assign(result, 1, true)
},
});
})
return new Promise<boolean>((resolve) => {
html.onDocument({
end() {
resolve(result.content);
resolve(result.content)
},
});
});
})
})
}

View File

@ -1,33 +1,33 @@
import { assign } from "../common";
import type { PrioritizedReference } from "../common";
import { assign } from "../common"
import type { PrioritizedReference } from "../common"
export default function getSiteName(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3
priority: 0,
content: url.hostname,
};
}
html.on('meta[property="og:site_name"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
html.on('meta[name="application-name"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
return new Promise<string | null>((resolve) => {
html.onDocument({
end() {
resolve(result.content);
resolve(result.content)
},
});
});
})
})
}

View File

@ -1,41 +1,41 @@
import { decode } from "html-entities";
import clip from "summaly/built/utils/clip";
import { BufferedTextHandler, assign } from "../common";
import type { PrioritizedReference } from "../common";
import { decode } from "html-entities"
import clip from "summaly/built/utils/clip"
import { BufferedTextHandler, assign } from "../common"
import type { PrioritizedReference } from "../common"
export default function getTitle(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<string | null> = {
bits: 2, // 0-3
priority: 0,
content: null,
};
}
html.on('meta[property="og:title"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 3, content);
assign(result, 3, content)
}
},
});
})
html.on('meta[name="twitter:title"]', {
element(element) {
const content = element.getAttribute("content");
const content = element.getAttribute("content")
if (content) {
assign(result, 2, content);
assign(result, 2, content)
}
},
});
})
html.on(
"title",
new BufferedTextHandler((text) => {
assign(result, 1, decode(text));
})
);
assign(result, 1, decode(text))
}),
)
return new Promise<string | null>((resolve) => {
html.onDocument({
end() {
resolve(result.content && clip(result.content, 100));
resolve(result.content && clip(result.content, 100))
},
});
});
})
})
}

View File

@ -1,28 +1,13 @@
import amazon from "./amazon";
import general from "./general";
import wikipedia from "./wikipedia";
import amazon from "./amazon"
import general from "./general"
import wikipedia from "./wikipedia"
export default function summary(url: URL, html: HTMLRewriter) {
if (
url.hostname === "www.amazon.com" ||
url.hostname === "www.amazon.co.jp" ||
url.hostname === "www.amazon.ca" ||
url.hostname === "www.amazon.com.br" ||
url.hostname === "www.amazon.com.mx" ||
url.hostname === "www.amazon.co.uk" ||
url.hostname === "www.amazon.de" ||
url.hostname === "www.amazon.fr" ||
url.hostname === "www.amazon.it" ||
url.hostname === "www.amazon.es" ||
url.hostname === "www.amazon.nl" ||
url.hostname === "www.amazon.cn" ||
url.hostname === "www.amazon.in" ||
url.hostname === "www.amazon.au"
) {
return amazon(url, html);
if (url.hostname === "www.amazon.com" || url.hostname === "www.amazon.co.jp" || url.hostname === "www.amazon.ca" || url.hostname === "www.amazon.com.br" || url.hostname === "www.amazon.com.mx" || url.hostname === "www.amazon.co.uk" || url.hostname === "www.amazon.de" || url.hostname === "www.amazon.fr" || url.hostname === "www.amazon.it" || url.hostname === "www.amazon.es" || url.hostname === "www.amazon.nl" || url.hostname === "www.amazon.cn" || url.hostname === "www.amazon.in" || url.hostname === "www.amazon.au") {
return amazon(url, html)
}
if (`.${url.hostname}`.endsWith(".wikipedia.org")) {
return wikipedia(url, html);
return wikipedia(url, html)
}
return general(url, html);
return general(url, html)
}

View File

@ -1,13 +1,11 @@
import clip from "summaly/built/utils/clip";
import clip from "summaly/built/utils/clip"
export default async function wikipedia(url: URL, html: HTMLRewriter) {
const lang = url.hostname.split(".")[0];
const title = url.pathname.split("/")[2];
const response = await fetch(
`https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`
);
const json = await response.json<any>();
const info = json.query.pages[Object.keys(json.query.pages)[0]];
const lang = url.hostname.split(".")[0]
const title = url.pathname.split("/")[2]
const response = await fetch(`https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`)
const json = await response.json<any>()
const info = json.query.pages[Object.keys(json.query.pages)[0]]
return {
title: info.title,
icon: "https://wikipedia.org/static/favicon/wikipedia.ico",
@ -21,5 +19,5 @@ export default async function wikipedia(url: URL, html: HTMLRewriter) {
allow: [],
sitename: "Wikipedia",
url: url.href,
};
}
}

View File

@ -12,9 +12,7 @@
/* Language and Environment */
"target": "es2021" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
"lib": [
"es2021"
] /* Specify a set of bundled library declaration files that describe the target runtime environment. */,
"lib": ["esnext"] /* Specify a set of bundled library declaration files that describe the target runtime environment. */,
"jsx": "react" /* Specify what JSX code is generated. */,
// "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
@ -33,9 +31,7 @@
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
// "typeRoots": [], /* Specify multiple folders that act like `./node_modules/@types`. */
"types": [
"@cloudflare/workers-types"
] /* Specify type package names to be included without being referenced in a source file. */,
"types": ["@cloudflare/vitest-pool-workers", "@cloudflare/workers-types/experimental", "vitest/importMeta"] /* Specify type package names to be included without being referenced in a source file. */,
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
"resolveJsonModule": true /* Enable importing .json files */,
// "noResolve": true, /* Disallow `import`s, `require`s or `<reference>`s from expanding the number of files TypeScript should add to a project. */

15
vitest.config.ts Normal file
View File

@ -0,0 +1,15 @@
/// <reference types="vitest" />
import { defineWorkersConfig } from "@cloudflare/vitest-pool-workers/config"
export default defineWorkersConfig({
test: {
includeSource: ["src/**/*.ts"],
poolOptions: {
workers: {
wrangler: {
configPath: "./wrangler.toml",
},
},
},
},
})

View File

@ -1,3 +1,4 @@
name = "summerflare"
main = "src/index.ts"
compatibility_date = "2023-05-13"
compatibility_date = "2024-05-13"
compatibility_flags = ["nodejs_compat"]