enhance(plugin): blueskyのURLプレビューに対応 (#35)

* blueskyのURLプレビューに対応 (MisskeyIO#5)

(cherry picked from commit 5407ae09230ab44693f2198a5cf639ae3a95c941)

* Update Changelog

* refactor

* lint

* refactor

* lint

---------

Co-authored-by: たーびん <tar.bin.master@gmail.com>
This commit is contained in:
かっこかり 2025-02-02 10:58:43 +09:00 committed by GitHub
parent 7fbab86441
commit cc7ae8d00a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 66 additions and 33 deletions

View File

@ -1,6 +1,7 @@
(unreleased)
------------------
* センシティブフラグの判定を `<meta property="rating">` および `rating` ヘッダでも行うように
* Blueskybsky.appのプレビューに対応
* 依存関係の更新
* eslintの設定を更新

View File

@ -138,19 +138,7 @@ export type GeneralScrapingOptions = {
contentLengthRequired?: boolean;
}
function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
if (!headerValue) {
return false;
}
if (Array.isArray(headerValue)) {
return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
}
return headerValue.toLowerCase() === search.toLowerCase();
}
export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOptions): Promise<Summary | null> {
export async function general(_url: URL | string, opts?: GeneralScrapingOptions): Promise<Summary | null> {
let lang = opts?.lang;
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
@ -164,6 +152,24 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt
contentLengthLimit: opts?.contentLengthLimit,
contentLengthRequired: opts?.contentLengthRequired,
});
return await parseGeneral(url, res);
}
function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
if (!headerValue) {
return false;
}
if (Array.isArray(headerValue)) {
return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
}
return headerValue.toLowerCase() === search.toLowerCase();
}
export async function parseGeneral(_url: URL | string, res: Awaited<ReturnType<typeof scpaping>>): Promise<Summary | null> {
const url = typeof _url === 'string' ? new URL(_url) : _url;
const $ = res.$;
const twitterCard =
$('meta[name="twitter:card"]').attr('content') ||

View File

@ -7,7 +7,7 @@ import { got, type Agents as GotAgents } from 'got';
import type { FastifyInstance } from 'fastify';
import { SummalyResult } from '@/summary.js';
import { SummalyPlugin as _SummalyPlugin } from '@/iplugin.js';
import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
import { general, type GeneralScrapingOptions } from '@/general.js';
import { DEFAULT_OPERATION_TIMEOUT, DEFAULT_RESPONSE_TIMEOUT, agent, setAgent } from '@/utils/got.js';
import { plugins as builtinPlugins } from '@/plugins/index.js';
@ -125,7 +125,7 @@ export const summaly = async (url: string, options?: SummalyOptions): Promise<Su
};
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
const summary = await (match ? match.summarize : parseGeneral)(_url, scrapingOptions);
const summary = await (match ? match.summarize : general)(_url, scrapingOptions);
if (summary == null) {
throw new Error('failed summarize');

26
src/plugins/bluesky.ts Normal file
View File

@ -0,0 +1,26 @@
import * as cheerio from 'cheerio';
import type Summary from '@/summary.js';
import { getResponse, getGotOptions } from '@/utils/got.js';
import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
export function test(url: URL): boolean {
return url.hostname === 'bsky.app';
}
export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promise<Summary | null> {
const args = getGotOptions(url.href, opts);
// HEADで取ると404が返るためGETのみで取得
const res = await getResponse({
...args,
method: 'GET',
});
const body = res.body;
const $ = cheerio.load(body);
return await parseGeneral(url, {
body,
$,
response: res,
});
}

View File

@ -1,4 +1,4 @@
import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
import { general, type GeneralScrapingOptions } from '@/general.js';
import Summary from '@/summary.js';
export function test(url: URL): boolean {
@ -12,5 +12,5 @@ export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promis
// Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ
url.searchParams.append('$web_only', 'true');
return await parseGeneral(url, opts);
return await general(url, opts);
}

View File

@ -1,10 +1,12 @@
import * as amazon from './amazon.js';
import * as bluesky from './bluesky.js';
import * as wikipedia from './wikipedia.js';
import * as branchIoDeeplinks from './branchio-deeplinks.js';
import { SummalyPlugin } from '@/iplugin.js';
export const plugins: SummalyPlugin[] = [
amazon,
bluesky,
wikipedia,
branchIoDeeplinks,
];

View File

@ -4,8 +4,9 @@ import { readFileSync } from 'node:fs';
import got, * as Got from 'got';
import * as cheerio from 'cheerio';
import PrivateIp from 'private-ip';
import { StatusError } from './status-error.js';
import { detectEncoding, toUtf8 } from './encoding.js';
import type { GeneralScrapingOptions } from '@/general.js';
import { StatusError } from '@/utils/status-error.js';
import { detectEncoding, toUtf8 } from '@/utils/encoding.js';
const _filename = fileURLToPath(import.meta.url);
const _dirname = dirname(_filename);
@ -36,23 +37,13 @@ export const DEFAULT_OPERATION_TIMEOUT = 60 * 1000;
export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`;
export async function scpaping(
url: string,
opts?: {
lang?: string;
userAgent?: string;
responseTimeout?: number;
operationTimeout?: number;
contentLengthLimit?: number;
contentLengthRequired?: boolean;
},
) {
const args: Omit<GotOptions, 'method'> = {
export function getGotOptions(url: string, opts?: GeneralScrapingOptions): Omit<GotOptions, 'method'> {
return {
url,
headers: {
'accept': 'text/html,application/xhtml+xml',
'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA,
'accept-language': opts?.lang,
'accept-language': opts?.lang ?? undefined,
},
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
responseTimeout: opts?.responseTimeout,
@ -60,6 +51,13 @@ export async function scpaping(
contentLengthLimit: opts?.contentLengthLimit,
contentLengthRequired: opts?.contentLengthRequired,
};
}
export async function scpaping(
url: string,
opts?: GeneralScrapingOptions,
) {
const args = getGotOptions(url, opts);
const headResponse = await getResponse({
...args,
@ -110,7 +108,7 @@ export async function head(url: string) {
});
}
async function getResponse(args: GotOptions) {
export async function getResponse(args: GotOptions) {
const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT;
const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT;