From cc7ae8d00a0c96af0e36b155d6a3e432587d8122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=8B=E3=81=A3=E3=81=93=E3=81=8B=E3=82=8A?= <67428053+kakkokari-gtyih@users.noreply.github.com> Date: Sun, 2 Feb 2025 10:58:43 +0900 Subject: [PATCH] =?UTF-8?q?enhance(plugin):=20bluesky=E3=81=AEURL=E3=83=97?= =?UTF-8?q?=E3=83=AC=E3=83=93=E3=83=A5=E3=83=BC=E3=81=AB=E5=AF=BE=E5=BF=9C?= =?UTF-8?q?=20(#35)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * blueskyのURLプレビューに対応 (MisskeyIO#5) (cherry picked from commit 5407ae09230ab44693f2198a5cf639ae3a95c941) * Update Changelog * refactor * lint * refactor * lint --------- Co-authored-by: たーびん --- CHANGELOG.md | 1 + src/general.ts | 32 ++++++++++++++++++------------- src/index.ts | 4 ++-- src/plugins/bluesky.ts | 26 +++++++++++++++++++++++++ src/plugins/branchio-deeplinks.ts | 4 ++-- src/plugins/index.ts | 2 ++ src/utils/got.ts | 30 ++++++++++++++--------------- 7 files changed, 66 insertions(+), 33 deletions(-) create mode 100644 src/plugins/bluesky.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index ee9c593..a1e69d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ (unreleased) ------------------ * センシティブフラグの判定を `` および `rating` ヘッダでも行うように +* Bluesky(bsky.app)のプレビューに対応 * 依存関係の更新 * eslintの設定を更新 diff --git a/src/general.ts b/src/general.ts index b1facce..6351d31 100644 --- a/src/general.ts +++ b/src/general.ts @@ -138,19 +138,7 @@ export type GeneralScrapingOptions = { contentLengthRequired?: boolean; } -function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) { - if (!headerValue) { - return false; - } - - if (Array.isArray(headerValue)) { - return headerValue.some(value => value.toLowerCase() === search.toLowerCase()); - } - - return headerValue.toLowerCase() === search.toLowerCase(); -} - -export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOptions): Promise { +export async function general(_url: URL | string, opts?: GeneralScrapingOptions): Promise { let lang = opts?.lang; if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null; @@ -164,6 +152,24 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt contentLengthLimit: opts?.contentLengthLimit, contentLengthRequired: opts?.contentLengthRequired, }); + + return await parseGeneral(url, res); +} + +function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) { + if (!headerValue) { + return false; + } + + if (Array.isArray(headerValue)) { + return headerValue.some(value => value.toLowerCase() === search.toLowerCase()); + } + + return headerValue.toLowerCase() === search.toLowerCase(); +} + +export async function parseGeneral(_url: URL | string, res: Awaited>): Promise { + const url = typeof _url === 'string' ? new URL(_url) : _url; const $ = res.$; const twitterCard = $('meta[name="twitter:card"]').attr('content') || diff --git a/src/index.ts b/src/index.ts index 2fa4b96..a8f544f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,7 +7,7 @@ import { got, type Agents as GotAgents } from 'got'; import type { FastifyInstance } from 'fastify'; import { SummalyResult } from '@/summary.js'; import { SummalyPlugin as _SummalyPlugin } from '@/iplugin.js'; -import { parseGeneral, type GeneralScrapingOptions } from '@/general.js'; +import { general, type GeneralScrapingOptions } from '@/general.js'; import { DEFAULT_OPERATION_TIMEOUT, DEFAULT_RESPONSE_TIMEOUT, agent, setAgent } from '@/utils/got.js'; import { plugins as builtinPlugins } from '@/plugins/index.js'; @@ -125,7 +125,7 @@ export const summaly = async (url: string, options?: SummalyOptions): Promise { + const args = getGotOptions(url.href, opts); + + // HEADで取ると404が返るためGETのみで取得 + const res = await getResponse({ + ...args, + method: 'GET', + }); + const body = res.body; + const $ = cheerio.load(body); + + return await parseGeneral(url, { + body, + $, + response: res, + }); +} diff --git a/src/plugins/branchio-deeplinks.ts b/src/plugins/branchio-deeplinks.ts index 0623d94..67d08dc 100644 --- a/src/plugins/branchio-deeplinks.ts +++ b/src/plugins/branchio-deeplinks.ts @@ -1,4 +1,4 @@ -import { parseGeneral, type GeneralScrapingOptions } from '@/general.js'; +import { general, type GeneralScrapingOptions } from '@/general.js'; import Summary from '@/summary.js'; export function test(url: URL): boolean { @@ -12,5 +12,5 @@ export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promis // Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ url.searchParams.append('$web_only', 'true'); - return await parseGeneral(url, opts); + return await general(url, opts); } diff --git a/src/plugins/index.ts b/src/plugins/index.ts index 41078eb..140380a 100644 --- a/src/plugins/index.ts +++ b/src/plugins/index.ts @@ -1,10 +1,12 @@ import * as amazon from './amazon.js'; +import * as bluesky from './bluesky.js'; import * as wikipedia from './wikipedia.js'; import * as branchIoDeeplinks from './branchio-deeplinks.js'; import { SummalyPlugin } from '@/iplugin.js'; export const plugins: SummalyPlugin[] = [ amazon, + bluesky, wikipedia, branchIoDeeplinks, ]; diff --git a/src/utils/got.ts b/src/utils/got.ts index 6a2b6d9..23dee91 100644 --- a/src/utils/got.ts +++ b/src/utils/got.ts @@ -4,8 +4,9 @@ import { readFileSync } from 'node:fs'; import got, * as Got from 'got'; import * as cheerio from 'cheerio'; import PrivateIp from 'private-ip'; -import { StatusError } from './status-error.js'; -import { detectEncoding, toUtf8 } from './encoding.js'; +import type { GeneralScrapingOptions } from '@/general.js'; +import { StatusError } from '@/utils/status-error.js'; +import { detectEncoding, toUtf8 } from '@/utils/encoding.js'; const _filename = fileURLToPath(import.meta.url); const _dirname = dirname(_filename); @@ -36,23 +37,13 @@ export const DEFAULT_OPERATION_TIMEOUT = 60 * 1000; export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024; export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`; -export async function scpaping( - url: string, - opts?: { - lang?: string; - userAgent?: string; - responseTimeout?: number; - operationTimeout?: number; - contentLengthLimit?: number; - contentLengthRequired?: boolean; - }, -) { - const args: Omit = { +export function getGotOptions(url: string, opts?: GeneralScrapingOptions): Omit { + return { url, headers: { 'accept': 'text/html,application/xhtml+xml', 'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA, - 'accept-language': opts?.lang, + 'accept-language': opts?.lang ?? undefined, }, typeFilter: /^(text\/html|application\/xhtml\+xml)/, responseTimeout: opts?.responseTimeout, @@ -60,6 +51,13 @@ export async function scpaping( contentLengthLimit: opts?.contentLengthLimit, contentLengthRequired: opts?.contentLengthRequired, }; +} + +export async function scpaping( + url: string, + opts?: GeneralScrapingOptions, +) { + const args = getGotOptions(url, opts); const headResponse = await getResponse({ ...args, @@ -110,7 +108,7 @@ export async function head(url: string) { }); } -async function getResponse(args: GotOptions) { +export async function getResponse(args: GotOptions) { const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT; const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT;