mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-04-29 02:37:27 +09:00
enhance(plugin): blueskyのURLプレビューに対応 (#35)
* blueskyのURLプレビューに対応 (MisskeyIO#5) (cherry picked from commit 5407ae09230ab44693f2198a5cf639ae3a95c941) * Update Changelog * refactor * lint * refactor * lint --------- Co-authored-by: たーびん <tar.bin.master@gmail.com>
This commit is contained in:
parent
7fbab86441
commit
cc7ae8d00a
@ -1,6 +1,7 @@
|
||||
(unreleased)
|
||||
------------------
|
||||
* センシティブフラグの判定を `<meta property="rating">` および `rating` ヘッダでも行うように
|
||||
* Bluesky(bsky.app)のプレビューに対応
|
||||
* 依存関係の更新
|
||||
* eslintの設定を更新
|
||||
|
||||
|
@ -138,19 +138,7 @@ export type GeneralScrapingOptions = {
|
||||
contentLengthRequired?: boolean;
|
||||
}
|
||||
|
||||
function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
|
||||
if (!headerValue) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Array.isArray(headerValue)) {
|
||||
return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
|
||||
}
|
||||
|
||||
return headerValue.toLowerCase() === search.toLowerCase();
|
||||
}
|
||||
|
||||
export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOptions): Promise<Summary | null> {
|
||||
export async function general(_url: URL | string, opts?: GeneralScrapingOptions): Promise<Summary | null> {
|
||||
let lang = opts?.lang;
|
||||
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
|
||||
|
||||
@ -164,6 +152,24 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt
|
||||
contentLengthLimit: opts?.contentLengthLimit,
|
||||
contentLengthRequired: opts?.contentLengthRequired,
|
||||
});
|
||||
|
||||
return await parseGeneral(url, res);
|
||||
}
|
||||
|
||||
function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
|
||||
if (!headerValue) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Array.isArray(headerValue)) {
|
||||
return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
|
||||
}
|
||||
|
||||
return headerValue.toLowerCase() === search.toLowerCase();
|
||||
}
|
||||
|
||||
export async function parseGeneral(_url: URL | string, res: Awaited<ReturnType<typeof scpaping>>): Promise<Summary | null> {
|
||||
const url = typeof _url === 'string' ? new URL(_url) : _url;
|
||||
const $ = res.$;
|
||||
const twitterCard =
|
||||
$('meta[name="twitter:card"]').attr('content') ||
|
||||
|
@ -7,7 +7,7 @@ import { got, type Agents as GotAgents } from 'got';
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { SummalyResult } from '@/summary.js';
|
||||
import { SummalyPlugin as _SummalyPlugin } from '@/iplugin.js';
|
||||
import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
|
||||
import { general, type GeneralScrapingOptions } from '@/general.js';
|
||||
import { DEFAULT_OPERATION_TIMEOUT, DEFAULT_RESPONSE_TIMEOUT, agent, setAgent } from '@/utils/got.js';
|
||||
import { plugins as builtinPlugins } from '@/plugins/index.js';
|
||||
|
||||
@ -125,7 +125,7 @@ export const summaly = async (url: string, options?: SummalyOptions): Promise<Su
|
||||
};
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
const summary = await (match ? match.summarize : parseGeneral)(_url, scrapingOptions);
|
||||
const summary = await (match ? match.summarize : general)(_url, scrapingOptions);
|
||||
|
||||
if (summary == null) {
|
||||
throw new Error('failed summarize');
|
||||
|
26
src/plugins/bluesky.ts
Normal file
26
src/plugins/bluesky.ts
Normal file
@ -0,0 +1,26 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
import type Summary from '@/summary.js';
|
||||
import { getResponse, getGotOptions } from '@/utils/got.js';
|
||||
import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
|
||||
|
||||
export function test(url: URL): boolean {
|
||||
return url.hostname === 'bsky.app';
|
||||
}
|
||||
|
||||
export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promise<Summary | null> {
|
||||
const args = getGotOptions(url.href, opts);
|
||||
|
||||
// HEADで取ると404が返るためGETのみで取得
|
||||
const res = await getResponse({
|
||||
...args,
|
||||
method: 'GET',
|
||||
});
|
||||
const body = res.body;
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
return await parseGeneral(url, {
|
||||
body,
|
||||
$,
|
||||
response: res,
|
||||
});
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
|
||||
import { general, type GeneralScrapingOptions } from '@/general.js';
|
||||
import Summary from '@/summary.js';
|
||||
|
||||
export function test(url: URL): boolean {
|
||||
@ -12,5 +12,5 @@ export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promis
|
||||
// Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ
|
||||
url.searchParams.append('$web_only', 'true');
|
||||
|
||||
return await parseGeneral(url, opts);
|
||||
return await general(url, opts);
|
||||
}
|
||||
|
@ -1,10 +1,12 @@
|
||||
import * as amazon from './amazon.js';
|
||||
import * as bluesky from './bluesky.js';
|
||||
import * as wikipedia from './wikipedia.js';
|
||||
import * as branchIoDeeplinks from './branchio-deeplinks.js';
|
||||
import { SummalyPlugin } from '@/iplugin.js';
|
||||
|
||||
export const plugins: SummalyPlugin[] = [
|
||||
amazon,
|
||||
bluesky,
|
||||
wikipedia,
|
||||
branchIoDeeplinks,
|
||||
];
|
||||
|
@ -4,8 +4,9 @@ import { readFileSync } from 'node:fs';
|
||||
import got, * as Got from 'got';
|
||||
import * as cheerio from 'cheerio';
|
||||
import PrivateIp from 'private-ip';
|
||||
import { StatusError } from './status-error.js';
|
||||
import { detectEncoding, toUtf8 } from './encoding.js';
|
||||
import type { GeneralScrapingOptions } from '@/general.js';
|
||||
import { StatusError } from '@/utils/status-error.js';
|
||||
import { detectEncoding, toUtf8 } from '@/utils/encoding.js';
|
||||
|
||||
const _filename = fileURLToPath(import.meta.url);
|
||||
const _dirname = dirname(_filename);
|
||||
@ -36,23 +37,13 @@ export const DEFAULT_OPERATION_TIMEOUT = 60 * 1000;
|
||||
export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
||||
export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`;
|
||||
|
||||
export async function scpaping(
|
||||
url: string,
|
||||
opts?: {
|
||||
lang?: string;
|
||||
userAgent?: string;
|
||||
responseTimeout?: number;
|
||||
operationTimeout?: number;
|
||||
contentLengthLimit?: number;
|
||||
contentLengthRequired?: boolean;
|
||||
},
|
||||
) {
|
||||
const args: Omit<GotOptions, 'method'> = {
|
||||
export function getGotOptions(url: string, opts?: GeneralScrapingOptions): Omit<GotOptions, 'method'> {
|
||||
return {
|
||||
url,
|
||||
headers: {
|
||||
'accept': 'text/html,application/xhtml+xml',
|
||||
'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA,
|
||||
'accept-language': opts?.lang,
|
||||
'accept-language': opts?.lang ?? undefined,
|
||||
},
|
||||
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
|
||||
responseTimeout: opts?.responseTimeout,
|
||||
@ -60,6 +51,13 @@ export async function scpaping(
|
||||
contentLengthLimit: opts?.contentLengthLimit,
|
||||
contentLengthRequired: opts?.contentLengthRequired,
|
||||
};
|
||||
}
|
||||
|
||||
export async function scpaping(
|
||||
url: string,
|
||||
opts?: GeneralScrapingOptions,
|
||||
) {
|
||||
const args = getGotOptions(url, opts);
|
||||
|
||||
const headResponse = await getResponse({
|
||||
...args,
|
||||
@ -110,7 +108,7 @@ export async function head(url: string) {
|
||||
});
|
||||
}
|
||||
|
||||
async function getResponse(args: GotOptions) {
|
||||
export async function getResponse(args: GotOptions) {
|
||||
const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT;
|
||||
const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user