feat: add oEmbed support

Co-Authored-By: Acid Chicken (硫酸鶏) <root@acid-chicken.com>
This commit is contained in:
ikasoba 2024-05-28 15:11:27 +09:00 committed by Acid Chicken (硫酸鶏)
parent 0cfacfeacc
commit 8fa8ab8bc7
No known key found for this signature in database
GPG Key ID: 3E87B98A3F6BAB99
9 changed files with 221 additions and 47 deletions

View File

@ -12,6 +12,7 @@
"html-entities": "^2.5.2",
"jschardet": "^3.1.2",
"summaly": "^2.7.0",
"whatwg-mimetype": "^4.0.0"
"whatwg-mimetype": "^4.0.0",
"zod": "^3.23.8"
}
}

12
pnpm-lock.yaml generated
View File

@ -16,6 +16,9 @@ dependencies:
whatwg-mimetype:
specifier: ^4.0.0
version: 4.0.0
zod:
specifier: ^3.23.8
version: 3.23.8
devDependencies:
'@cloudflare/vitest-pool-workers':
@ -58,7 +61,7 @@ packages:
miniflare: 3.20240404.0
vitest: 1.3.0
wrangler: 3.48.0(@cloudflare/workers-types@4.20240405.0)
zod: 3.22.4
zod: 3.23.8
transitivePeerDependencies:
- '@cloudflare/workers-types'
- bufferutil
@ -1692,7 +1695,7 @@ packages:
workerd: 1.20240404.0
ws: 8.16.0
youch: 3.3.3
zod: 3.22.4
zod: 3.23.8
transitivePeerDependencies:
- bufferutil
- supports-color
@ -2396,6 +2399,5 @@ packages:
stacktracey: 2.1.8
dev: true
/zod@3.22.4:
resolution: {integrity: sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==}
dev: true
/zod@3.23.8:
resolution: {integrity: sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==}

View File

@ -1,7 +1,13 @@
export const cf = {
cacheEverything: true,
cacheTtlByStatus: {
"200-299": 86400,
"400-599": 60,
export const fetchOptions = {
cf: {
cacheEverything: true,
cacheTtlByStatus: {
"200-299": 86400,
"400-599": 60,
},
} satisfies RequestInitCfProperties,
headers: {
Accept: "text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8",
"User-Agent": "Mozilla/5.0 (compatible; Summerflare; +https://github.com/misskey-dev/summerflare)",
},
} satisfies RequestInitCfProperties
}

View File

@ -1,5 +1,5 @@
import { Hono } from "hono"
import { cf } from "./config"
import { fetchOptions } from "./config"
import { normalize } from "./encoding"
import summary from "./summary"
export interface Env {
@ -30,13 +30,7 @@ app.get("/url", async (context) => {
} catch (e) {
return context.json({ error: "Invalid URL" }, 400)
}
const response = (await fetch(url, {
cf,
headers: {
Accept: "text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8",
"User-Agent": "Mozilla/5.0 (compatible; Summerflare; +https://github.com/misskey-dev/summerflare)",
},
})) as any as Response
const response = (await fetch(url, fetchOptions)) as any as Response
url = new URL(response.url)
const rewriter = new HTMLRewriter()
const summarized = summary(url, rewriter)
@ -61,11 +55,11 @@ if (import.meta.vitest) {
thumbnail: null,
description: null,
player: {
allow: [],
url: null,
width: null,
height: null,
},
allow: [],
sitename: "example.com",
icon: "https://example.com/favicon.ico",
sensitive: false,
@ -81,11 +75,11 @@ if (import.meta.vitest) {
thumbnail: null,
description: null,
player: {
allow: [],
url: null,
width: null,
height: null,
},
allow: [],
sitename: "abehiroshi.la.coocan.jp",
icon: "http://abehiroshi.la.coocan.jp/favicon.ico",
sensitive: false,
@ -101,11 +95,11 @@ if (import.meta.vitest) {
thumbnail: null,
description: null,
player: {
allow: [],
url: null,
width: null,
height: null,
},
allow: [],
sitename: "www.postgresql.jp",
icon: "https://www.postgresql.jp/favicon.ico",
sensitive: false,
@ -120,8 +114,12 @@ if (import.meta.vitest) {
title: "アイドルマスター ミリオンライブ! 第1幕 パンフレット",
thumbnail: "https://store.shochiku.co.jp/img/goods/S/23080501s.jpg",
description: "映画グッズ・アニメグッズを取り扱う通販サイト『Froovie/フルービー』です。ハリー・ポッター、ファンタスティック・ビースト、ガンダム、アニメなどのキャラクターグッズを多数揃えております。",
player: { url: null, width: null, height: null },
allow: [],
player: {
allow: [],
url: null,
width: null,
height: null,
},
sitename: "SHOCHIKU STORE | 松竹ストア",
icon: "https://store.shochiku.co.jp/favicon.ico",
sensitive: false,
@ -129,6 +127,26 @@ if (import.meta.vitest) {
url: "https://store.shochiku.co.jp/shop/g/g23080501/",
},
],
[
"the UTF-8 encoded website with oEmbed",
"https://open.spotify.com/intl-ja/track/5Odr16TvEN4my22K9nbH7l",
{
description: "May'n · Song · 2012",
icon: "https://open.spotifycdn.com/cdn/images/favicon.0f31d2ea.ico",
large: false,
player: {
allow: ["autoplay", "clipboard-write", "encrypted-media", "fullscreen", "picture-in-picture"],
height: 152,
url: "https://open.spotify.com/embed/track/5Odr16TvEN4my22K9nbH7l?utm_source=oembed",
width: 456,
},
sensitive: false,
sitename: "Spotify",
thumbnail: "https://i.scdn.co/image/ab67616d0000b273357d721f236b923d864f1c2e",
title: "Brain Diver",
url: "https://open.spotify.com/track/5Odr16TvEN4my22K9nbH7l",
},
],
])("should return summary of %s <%s>", async (_, url, expected) => {
const request = new Request(`https://fakehost/url?${new URLSearchParams({ url })}`)
const ctx = createExecutionContext()
@ -136,7 +154,6 @@ if (import.meta.vitest) {
await waitOnExecutionContext(ctx)
expect(response.status).toBe(200)
const body = await response.json()
console.log(body)
expect(body).toStrictEqual(expected)
})
})

View File

@ -48,7 +48,6 @@ export default function amazon(url: URL, html: HTMLRewriter) {
thumbnail,
description: title === description ? null : description,
player,
allow: [],
sitename: siteName,
icon: favicon,
sensitive,

View File

@ -3,32 +3,21 @@ import getCard from "./card"
import getDescription from "./description"
import getFavicon from "./favicon"
import getImage from "./image"
import getPlayerUrlCommon from "./playerUrlCommon"
import getPlayerUrlGeneral from "./playerUrlGeneral"
import getPlayerUrlHeight from "./playerUrlHeight"
import getPlayerUrlWidth from "./playerUrlWidth"
import getSiteName from "./siteName"
import getTitle from "./title"
import getSensitive from "./sensitive"
import getPlayer, { Player } from "./player"
export default function general(url: URL, html: HTMLRewriter) {
const card = getCard(url, html)
const title = getTitle(url, html)
const image = getImage(url, html)
const player = Promise.all([card, getPlayerUrlGeneral(url, html), getPlayerUrlCommon(url, html), getPlayerUrlWidth(url, html), getPlayerUrlHeight(url, html)]).then(([card, general, common, width, height]) => {
const url = (card !== "summary_large_image" && general) || common
if (url !== null && width !== null && height !== null) {
return {
url,
width,
height,
}
} else {
return {
url: null,
width: null,
height: null,
}
const player = Promise.all([card, getPlayer(url, html)]).then<Player>(([card, parsedPlayer]) => {
return {
url: card !== "summary_large_image" && parsedPlayer.urlGeneral || parsedPlayer.urlCommon,
width: parsedPlayer.width,
height: parsedPlayer.height,
allow: parsedPlayer.allow,
}
})
const description = getDescription(url, html)
@ -48,7 +37,6 @@ export default function general(url: URL, html: HTMLRewriter) {
thumbnail: image,
description: title === description ? null : description,
player,
allow: [],
sitename: siteName,
icon: favicon,
sensitive,

View File

@ -0,0 +1,38 @@
import getPlayerOEmbed from "./playerOEmbed"
import getPlayerUrlCommon from "./playerUrlCommon"
import getPlayerUrlGeneral from "./playerUrlGeneral"
import getPlayerUrlHeight from "./playerUrlHeight"
import getPlayerUrlWidth from "./playerUrlWidth"
export interface Player {
url: string | null
width: number | null
height: number | null
allow: string[]
}
export interface ParsedPlayer extends Omit<Player, "url"> {
urlCommon: string | null
urlGeneral: string | null
}
export default function getPlayer(url: URL, html: HTMLRewriter): Promise<ParsedPlayer> {
const oEmbed = getPlayerOEmbed(url, html)
const urlGeneral = getPlayerUrlGeneral(url, html)
const urlCommon = getPlayerUrlCommon(url, html)
const width = getPlayerUrlWidth(url, html)
const height = getPlayerUrlHeight(url, html)
return Promise.all([oEmbed, urlGeneral, urlCommon, width, height]).then(([oEmbed, urlGeneral, urlCommon, width, height]) => {
if (oEmbed) {
return oEmbed
}
return {
urlCommon,
urlGeneral,
width,
height,
allow: [],
}
})
}

View File

@ -0,0 +1,124 @@
import { decode } from "html-entities"
import { z } from "zod"
import { fetchOptions } from "../../config"
import { assign, PrioritizedReference } from "../common"
import type { ParsedPlayer } from "./player"
const oEmbedBase = z.object({
type: z.enum(["photo", "video", "link", "rich"]),
version: z.literal("1.0"),
title: z.string().optional(),
author_name: z.string().optional(),
author_url: z.string().optional(),
provider_name: z.string().optional(),
provider_url: z.string().optional(),
cache_age: z.number().optional(),
thumbnail_url: z.string().optional(),
thumbnail_width: z.number().optional(),
thumbnail_height: z.number().optional(),
})
const oEmbed = z.union([
oEmbedBase.extend({
type: z.literal("photo"),
url: z.string(),
width: z.number(),
height: z.number(),
}),
oEmbedBase.extend({
type: z.literal("video"),
html: z.string(),
width: z.number(),
height: z.number(),
}),
oEmbedBase.extend({
type: z.literal("link"),
}),
oEmbedBase.extend({
type: z.literal("rich"),
html: z.string(),
width: z.number(),
height: z.number(),
}),
])
export default function getPlayerOEmbed(url: URL, html: HTMLRewriter) {
const result: PrioritizedReference<ParsedPlayer> = {
bits: 1, // 0-1
priority: 0,
content: {
urlCommon: null,
urlGeneral: null,
width: null,
height: null,
allow: [],
},
}
html.on('link[type="application/json+oembed"]', {
async element(element) {
const oEmbedHref = decode(element.getAttribute("href") ?? "")
if (!oEmbedHref) {
return
}
console.log(oEmbedHref)
const oEmbedData: unknown = await fetch(oEmbedHref, fetchOptions)
.then((response) => response.json())
.catch(() => undefined)
const { success, data } = oEmbed.safeParse(oEmbedData)
console.log(oEmbedData, success, data)
if (!success) {
return
}
const html = new HTMLRewriter()
html.on("iframe", {
element(element) {
const allowValue = element.getAttribute("allow")
const allow =
(allowValue &&
decode(allowValue)
?.replace(/^\s*|\s*$/g, "")
.split(/\s*;\s*/)
.sort()) ||
[]
const srcValue = element.getAttribute("src")
const src = srcValue ? decode(srcValue) : null
switch (data.type) {
case "video":
case "rich": {
assign(result, 1, {
urlCommon: src,
urlGeneral: null,
width: data.width,
height: data.height,
allow,
})
break
}
}
},
})
switch (data.type) {
case "video":
case "rich": {
const reader = html
.transform(
new Response(data.html, {
headers: {
"Content-Type": "text/html; charset=UTF-8",
},
})
)
.body?.getReader()
while (reader != null && !(await reader.read()).done);
break
}
}
},
})
return new Promise<ParsedPlayer>((resolve) => {
html.onDocument({
end() {
resolve(result.content)
},
})
})
}

View File

@ -16,7 +16,6 @@ export default async function wikipedia(url: URL, html: HTMLRewriter) {
width: null,
height: null,
},
allow: [],
sitename: "Wikipedia",
url: url.href,
}