mirror of
https://github.com/misskey-dev/summerflare.git
synced 2025-06-26 14:18:02 +09:00
fix: support non-standard encodings
This commit is contained in:
parent
d07f170d09
commit
c6a859dff3
@ -2,12 +2,15 @@
|
||||
"packageManager": "pnpm@8.3.1",
|
||||
"devDependencies": {
|
||||
"@cloudflare/workers-types": "^4.20230511.0",
|
||||
"@types/whatwg-mimetype": "^3.0.1",
|
||||
"vitest": "^0.31.0",
|
||||
"wrangler": "^2.20.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@zxing/text-encoding": "^0.9.0",
|
||||
"hono": "^3.1.8",
|
||||
"html-entities": "^2.3.3",
|
||||
"summaly": "^2.7.0"
|
||||
"summaly": "^2.7.0",
|
||||
"whatwg-mimetype": "^3.0.0"
|
||||
}
|
||||
}
|
||||
|
22
pnpm-lock.yaml
generated
22
pnpm-lock.yaml
generated
@ -1,6 +1,9 @@
|
||||
lockfileVersion: '6.0'
|
||||
|
||||
dependencies:
|
||||
'@zxing/text-encoding':
|
||||
specifier: ^0.9.0
|
||||
version: 0.9.0
|
||||
hono:
|
||||
specifier: ^3.1.8
|
||||
version: 3.1.8
|
||||
@ -10,11 +13,17 @@ dependencies:
|
||||
summaly:
|
||||
specifier: ^2.7.0
|
||||
version: 2.7.0
|
||||
whatwg-mimetype:
|
||||
specifier: ^3.0.0
|
||||
version: 3.0.0
|
||||
|
||||
devDependencies:
|
||||
'@cloudflare/workers-types':
|
||||
specifier: ^4.20230511.0
|
||||
version: 4.20230511.0
|
||||
'@types/whatwg-mimetype':
|
||||
specifier: ^3.0.1
|
||||
version: 3.0.1
|
||||
vitest:
|
||||
specifier: ^0.31.0
|
||||
version: 0.31.0
|
||||
@ -687,6 +696,10 @@ packages:
|
||||
resolution: {integrity: sha512-TgfOX+mGY/NyNxJLIbDWrO9DjGoVSW9+aB8H2yy1fy32jsvxijhmyJI9fDFgvz3YP4lvJaq9DzdR/M1bOgVc9g==}
|
||||
dev: true
|
||||
|
||||
/@types/whatwg-mimetype@3.0.1:
|
||||
resolution: {integrity: sha512-dy1Os16KVaOyiwS237oURk7v0IGezg06FUlW//WcoAJMqrMSQlHNiYmZz06MgXdqE/uARd9h+sOm4AWRsJvUnQ==}
|
||||
dev: true
|
||||
|
||||
/@vitest/expect@0.31.0:
|
||||
resolution: {integrity: sha512-Jlm8ZTyp6vMY9iz9Ny9a0BHnCG4fqBa8neCF6Pk/c/6vkUk49Ls6UBlgGAU82QnzzoaUs9E/mUhq/eq9uMOv/g==}
|
||||
dependencies:
|
||||
@ -726,6 +739,10 @@ packages:
|
||||
pretty-format: 27.5.1
|
||||
dev: true
|
||||
|
||||
/@zxing/text-encoding@0.9.0:
|
||||
resolution: {integrity: sha512-U/4aVJ2mxI0aDNI8Uq0wEhMgY+u4CNtEb0om3+y3+niDAsoTCOB33UF0sxpzqzdqXLqmvc+vZyAt4O8pPdfkwA==}
|
||||
dev: false
|
||||
|
||||
/accepts@1.3.8:
|
||||
resolution: {integrity: sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==}
|
||||
engines: {node: '>= 0.6'}
|
||||
@ -2263,6 +2280,11 @@ packages:
|
||||
engines: {node: '>=6'}
|
||||
dev: true
|
||||
|
||||
/whatwg-mimetype@3.0.0:
|
||||
resolution: {integrity: sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==}
|
||||
engines: {node: '>=12'}
|
||||
dev: false
|
||||
|
||||
/which@2.0.2:
|
||||
resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==}
|
||||
engines: {node: '>= 8'}
|
||||
|
69
src/encoding.ts
Normal file
69
src/encoding.ts
Normal file
@ -0,0 +1,69 @@
|
||||
// due to the bug in the Cloudflare Workers runtime, we have to use @zxing/text-encoding instead of the built-in TextEncoder/TextDecoder.
|
||||
import { encodingIndexes } from "@zxing/text-encoding/esm/encoding-indexes";
|
||||
(globalThis as any).TextEncodingIndexes = { encodingIndexes };
|
||||
|
||||
import { TextDecoder, TextEncoder } from "@zxing/text-encoding";
|
||||
import MIMEType from "whatwg-mimetype";
|
||||
|
||||
function getCharsetFromHeader(response: Response): string | null {
|
||||
const contentType = response.headers.get("Content-Type");
|
||||
if (contentType === null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const mimeType = new MIMEType(contentType);
|
||||
return mimeType.parameters.get("charset") ?? null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function getCharsetFromBody(response: Response): Promise<string | null> {
|
||||
let charset: string | null = null;
|
||||
const rewriter = new HTMLRewriter();
|
||||
rewriter.on("meta", {
|
||||
element(element) {
|
||||
const httpEquiv = element.getAttribute("http-equiv");
|
||||
if (
|
||||
charset === null &&
|
||||
httpEquiv !== null &&
|
||||
httpEquiv.toLowerCase() === "content-type"
|
||||
) {
|
||||
const content = element.getAttribute("content");
|
||||
if (content !== null) {
|
||||
try {
|
||||
const mimeType = new MIMEType(content);
|
||||
charset = mimeType.parameters.get("charset") ?? null;
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
const charsetAttr = element.getAttribute("charset");
|
||||
if (charsetAttr !== null) {
|
||||
charset = charsetAttr;
|
||||
}
|
||||
},
|
||||
});
|
||||
const reader = rewriter.transform(response).body!.getReader();
|
||||
while (!(await reader.read()).done);
|
||||
return charset;
|
||||
}
|
||||
|
||||
export async function getNormalizer(
|
||||
response: Response
|
||||
): Promise<TransformStream<Uint8Array, Uint8Array>> {
|
||||
const charset =
|
||||
getCharsetFromHeader(response) ?? (await getCharsetFromBody(response));
|
||||
if (charset === null || charset.toLowerCase() === "utf-8") {
|
||||
return new TransformStream();
|
||||
}
|
||||
const decoder = new TextDecoder(charset, { fatal: true, ignoreBOM: true });
|
||||
const encoder = new TextEncoder();
|
||||
const transform = new TransformStream<Uint8Array, Uint8Array>({
|
||||
transform(chunk, controller) {
|
||||
controller.enqueue(
|
||||
encoder.encode(decoder.decode(chunk, { stream: true }))
|
||||
);
|
||||
},
|
||||
});
|
||||
return transform;
|
||||
}
|
12
src/index.ts
12
src/index.ts
@ -1,5 +1,6 @@
|
||||
import { Hono } from "hono";
|
||||
import summary from "./summary";
|
||||
import { getNormalizer } from "./encoding";
|
||||
|
||||
export interface Env {
|
||||
// Example binding to KV. Learn more at https://developers.cloudflare.com/workers/runtime-apis/kv/
|
||||
@ -17,6 +18,11 @@ export interface Env {
|
||||
|
||||
const app = new Hono<Env>();
|
||||
|
||||
app.onError((error, context) => {
|
||||
console.error(error);
|
||||
return context.json({ error: error.message, stack: error.stack }, 500);
|
||||
});
|
||||
|
||||
app.get("/url", async (context) => {
|
||||
let url: URL;
|
||||
try {
|
||||
@ -26,9 +32,13 @@ app.get("/url", async (context) => {
|
||||
}
|
||||
const response = await fetch(url);
|
||||
url = new URL(response.url);
|
||||
const [left, right] = response.body!.tee();
|
||||
const normalizer = await getNormalizer(new Response(left, response));
|
||||
const rewriter = new HTMLRewriter();
|
||||
const summarized = summary(url, rewriter);
|
||||
const reader = rewriter.transform(response).body!.getReader();
|
||||
const reader = rewriter
|
||||
.transform(new Response(right.pipeThrough(normalizer), response))
|
||||
.body!.getReader();
|
||||
while (!(await reader.read()).done);
|
||||
return context.json(await summarized);
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user