mirror of
https://github.com/misskey-dev/summerflare.git
synced 2025-06-26 22:28:00 +09:00
fix: support non-standard encodings
This commit is contained in:
parent
d07f170d09
commit
c6a859dff3
@ -2,12 +2,15 @@
|
|||||||
"packageManager": "pnpm@8.3.1",
|
"packageManager": "pnpm@8.3.1",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@cloudflare/workers-types": "^4.20230511.0",
|
"@cloudflare/workers-types": "^4.20230511.0",
|
||||||
|
"@types/whatwg-mimetype": "^3.0.1",
|
||||||
"vitest": "^0.31.0",
|
"vitest": "^0.31.0",
|
||||||
"wrangler": "^2.20.0"
|
"wrangler": "^2.20.0"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@zxing/text-encoding": "^0.9.0",
|
||||||
"hono": "^3.1.8",
|
"hono": "^3.1.8",
|
||||||
"html-entities": "^2.3.3",
|
"html-entities": "^2.3.3",
|
||||||
"summaly": "^2.7.0"
|
"summaly": "^2.7.0",
|
||||||
|
"whatwg-mimetype": "^3.0.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
22
pnpm-lock.yaml
generated
22
pnpm-lock.yaml
generated
@ -1,6 +1,9 @@
|
|||||||
lockfileVersion: '6.0'
|
lockfileVersion: '6.0'
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
|
'@zxing/text-encoding':
|
||||||
|
specifier: ^0.9.0
|
||||||
|
version: 0.9.0
|
||||||
hono:
|
hono:
|
||||||
specifier: ^3.1.8
|
specifier: ^3.1.8
|
||||||
version: 3.1.8
|
version: 3.1.8
|
||||||
@ -10,11 +13,17 @@ dependencies:
|
|||||||
summaly:
|
summaly:
|
||||||
specifier: ^2.7.0
|
specifier: ^2.7.0
|
||||||
version: 2.7.0
|
version: 2.7.0
|
||||||
|
whatwg-mimetype:
|
||||||
|
specifier: ^3.0.0
|
||||||
|
version: 3.0.0
|
||||||
|
|
||||||
devDependencies:
|
devDependencies:
|
||||||
'@cloudflare/workers-types':
|
'@cloudflare/workers-types':
|
||||||
specifier: ^4.20230511.0
|
specifier: ^4.20230511.0
|
||||||
version: 4.20230511.0
|
version: 4.20230511.0
|
||||||
|
'@types/whatwg-mimetype':
|
||||||
|
specifier: ^3.0.1
|
||||||
|
version: 3.0.1
|
||||||
vitest:
|
vitest:
|
||||||
specifier: ^0.31.0
|
specifier: ^0.31.0
|
||||||
version: 0.31.0
|
version: 0.31.0
|
||||||
@ -687,6 +696,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-TgfOX+mGY/NyNxJLIbDWrO9DjGoVSW9+aB8H2yy1fy32jsvxijhmyJI9fDFgvz3YP4lvJaq9DzdR/M1bOgVc9g==}
|
resolution: {integrity: sha512-TgfOX+mGY/NyNxJLIbDWrO9DjGoVSW9+aB8H2yy1fy32jsvxijhmyJI9fDFgvz3YP4lvJaq9DzdR/M1bOgVc9g==}
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/@types/whatwg-mimetype@3.0.1:
|
||||||
|
resolution: {integrity: sha512-dy1Os16KVaOyiwS237oURk7v0IGezg06FUlW//WcoAJMqrMSQlHNiYmZz06MgXdqE/uARd9h+sOm4AWRsJvUnQ==}
|
||||||
|
dev: true
|
||||||
|
|
||||||
/@vitest/expect@0.31.0:
|
/@vitest/expect@0.31.0:
|
||||||
resolution: {integrity: sha512-Jlm8ZTyp6vMY9iz9Ny9a0BHnCG4fqBa8neCF6Pk/c/6vkUk49Ls6UBlgGAU82QnzzoaUs9E/mUhq/eq9uMOv/g==}
|
resolution: {integrity: sha512-Jlm8ZTyp6vMY9iz9Ny9a0BHnCG4fqBa8neCF6Pk/c/6vkUk49Ls6UBlgGAU82QnzzoaUs9E/mUhq/eq9uMOv/g==}
|
||||||
dependencies:
|
dependencies:
|
||||||
@ -726,6 +739,10 @@ packages:
|
|||||||
pretty-format: 27.5.1
|
pretty-format: 27.5.1
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/@zxing/text-encoding@0.9.0:
|
||||||
|
resolution: {integrity: sha512-U/4aVJ2mxI0aDNI8Uq0wEhMgY+u4CNtEb0om3+y3+niDAsoTCOB33UF0sxpzqzdqXLqmvc+vZyAt4O8pPdfkwA==}
|
||||||
|
dev: false
|
||||||
|
|
||||||
/accepts@1.3.8:
|
/accepts@1.3.8:
|
||||||
resolution: {integrity: sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==}
|
resolution: {integrity: sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==}
|
||||||
engines: {node: '>= 0.6'}
|
engines: {node: '>= 0.6'}
|
||||||
@ -2263,6 +2280,11 @@ packages:
|
|||||||
engines: {node: '>=6'}
|
engines: {node: '>=6'}
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/whatwg-mimetype@3.0.0:
|
||||||
|
resolution: {integrity: sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==}
|
||||||
|
engines: {node: '>=12'}
|
||||||
|
dev: false
|
||||||
|
|
||||||
/which@2.0.2:
|
/which@2.0.2:
|
||||||
resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==}
|
resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==}
|
||||||
engines: {node: '>= 8'}
|
engines: {node: '>= 8'}
|
||||||
|
69
src/encoding.ts
Normal file
69
src/encoding.ts
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
// due to the bug in the Cloudflare Workers runtime, we have to use @zxing/text-encoding instead of the built-in TextEncoder/TextDecoder.
|
||||||
|
import { encodingIndexes } from "@zxing/text-encoding/esm/encoding-indexes";
|
||||||
|
(globalThis as any).TextEncodingIndexes = { encodingIndexes };
|
||||||
|
|
||||||
|
import { TextDecoder, TextEncoder } from "@zxing/text-encoding";
|
||||||
|
import MIMEType from "whatwg-mimetype";
|
||||||
|
|
||||||
|
function getCharsetFromHeader(response: Response): string | null {
|
||||||
|
const contentType = response.headers.get("Content-Type");
|
||||||
|
if (contentType === null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const mimeType = new MIMEType(contentType);
|
||||||
|
return mimeType.parameters.get("charset") ?? null;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getCharsetFromBody(response: Response): Promise<string | null> {
|
||||||
|
let charset: string | null = null;
|
||||||
|
const rewriter = new HTMLRewriter();
|
||||||
|
rewriter.on("meta", {
|
||||||
|
element(element) {
|
||||||
|
const httpEquiv = element.getAttribute("http-equiv");
|
||||||
|
if (
|
||||||
|
charset === null &&
|
||||||
|
httpEquiv !== null &&
|
||||||
|
httpEquiv.toLowerCase() === "content-type"
|
||||||
|
) {
|
||||||
|
const content = element.getAttribute("content");
|
||||||
|
if (content !== null) {
|
||||||
|
try {
|
||||||
|
const mimeType = new MIMEType(content);
|
||||||
|
charset = mimeType.parameters.get("charset") ?? null;
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const charsetAttr = element.getAttribute("charset");
|
||||||
|
if (charsetAttr !== null) {
|
||||||
|
charset = charsetAttr;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const reader = rewriter.transform(response).body!.getReader();
|
||||||
|
while (!(await reader.read()).done);
|
||||||
|
return charset;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getNormalizer(
|
||||||
|
response: Response
|
||||||
|
): Promise<TransformStream<Uint8Array, Uint8Array>> {
|
||||||
|
const charset =
|
||||||
|
getCharsetFromHeader(response) ?? (await getCharsetFromBody(response));
|
||||||
|
if (charset === null || charset.toLowerCase() === "utf-8") {
|
||||||
|
return new TransformStream();
|
||||||
|
}
|
||||||
|
const decoder = new TextDecoder(charset, { fatal: true, ignoreBOM: true });
|
||||||
|
const encoder = new TextEncoder();
|
||||||
|
const transform = new TransformStream<Uint8Array, Uint8Array>({
|
||||||
|
transform(chunk, controller) {
|
||||||
|
controller.enqueue(
|
||||||
|
encoder.encode(decoder.decode(chunk, { stream: true }))
|
||||||
|
);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
return transform;
|
||||||
|
}
|
12
src/index.ts
12
src/index.ts
@ -1,5 +1,6 @@
|
|||||||
import { Hono } from "hono";
|
import { Hono } from "hono";
|
||||||
import summary from "./summary";
|
import summary from "./summary";
|
||||||
|
import { getNormalizer } from "./encoding";
|
||||||
|
|
||||||
export interface Env {
|
export interface Env {
|
||||||
// Example binding to KV. Learn more at https://developers.cloudflare.com/workers/runtime-apis/kv/
|
// Example binding to KV. Learn more at https://developers.cloudflare.com/workers/runtime-apis/kv/
|
||||||
@ -17,6 +18,11 @@ export interface Env {
|
|||||||
|
|
||||||
const app = new Hono<Env>();
|
const app = new Hono<Env>();
|
||||||
|
|
||||||
|
app.onError((error, context) => {
|
||||||
|
console.error(error);
|
||||||
|
return context.json({ error: error.message, stack: error.stack }, 500);
|
||||||
|
});
|
||||||
|
|
||||||
app.get("/url", async (context) => {
|
app.get("/url", async (context) => {
|
||||||
let url: URL;
|
let url: URL;
|
||||||
try {
|
try {
|
||||||
@ -26,9 +32,13 @@ app.get("/url", async (context) => {
|
|||||||
}
|
}
|
||||||
const response = await fetch(url);
|
const response = await fetch(url);
|
||||||
url = new URL(response.url);
|
url = new URL(response.url);
|
||||||
|
const [left, right] = response.body!.tee();
|
||||||
|
const normalizer = await getNormalizer(new Response(left, response));
|
||||||
const rewriter = new HTMLRewriter();
|
const rewriter = new HTMLRewriter();
|
||||||
const summarized = summary(url, rewriter);
|
const summarized = summary(url, rewriter);
|
||||||
const reader = rewriter.transform(response).body!.getReader();
|
const reader = rewriter
|
||||||
|
.transform(new Response(right.pipeThrough(normalizer), response))
|
||||||
|
.body!.getReader();
|
||||||
while (!(await reader.read()).done);
|
while (!(await reader.read()).done);
|
||||||
return context.json(await summarized);
|
return context.json(await summarized);
|
||||||
});
|
});
|
||||||
|
Loading…
x
Reference in New Issue
Block a user