mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-04-29 10:47:25 +09:00
enhance(plugin): blueskyのURLプレビューに対応 (#35)
* blueskyのURLプレビューに対応 (MisskeyIO#5) (cherry picked from commit 5407ae09230ab44693f2198a5cf639ae3a95c941) * Update Changelog * refactor * lint * refactor * lint --------- Co-authored-by: たーびん <tar.bin.master@gmail.com>
This commit is contained in:
parent
7fbab86441
commit
cc7ae8d00a
@ -1,6 +1,7 @@
|
|||||||
(unreleased)
|
(unreleased)
|
||||||
------------------
|
------------------
|
||||||
* センシティブフラグの判定を `<meta property="rating">` および `rating` ヘッダでも行うように
|
* センシティブフラグの判定を `<meta property="rating">` および `rating` ヘッダでも行うように
|
||||||
|
* Bluesky(bsky.app)のプレビューに対応
|
||||||
* 依存関係の更新
|
* 依存関係の更新
|
||||||
* eslintの設定を更新
|
* eslintの設定を更新
|
||||||
|
|
||||||
|
@ -138,19 +138,7 @@ export type GeneralScrapingOptions = {
|
|||||||
contentLengthRequired?: boolean;
|
contentLengthRequired?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
|
export async function general(_url: URL | string, opts?: GeneralScrapingOptions): Promise<Summary | null> {
|
||||||
if (!headerValue) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Array.isArray(headerValue)) {
|
|
||||||
return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
|
|
||||||
}
|
|
||||||
|
|
||||||
return headerValue.toLowerCase() === search.toLowerCase();
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOptions): Promise<Summary | null> {
|
|
||||||
let lang = opts?.lang;
|
let lang = opts?.lang;
|
||||||
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
|
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
|
||||||
|
|
||||||
@ -164,6 +152,24 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt
|
|||||||
contentLengthLimit: opts?.contentLengthLimit,
|
contentLengthLimit: opts?.contentLengthLimit,
|
||||||
contentLengthRequired: opts?.contentLengthRequired,
|
contentLengthRequired: opts?.contentLengthRequired,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
return await parseGeneral(url, res);
|
||||||
|
}
|
||||||
|
|
||||||
|
function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
|
||||||
|
if (!headerValue) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(headerValue)) {
|
||||||
|
return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
return headerValue.toLowerCase() === search.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function parseGeneral(_url: URL | string, res: Awaited<ReturnType<typeof scpaping>>): Promise<Summary | null> {
|
||||||
|
const url = typeof _url === 'string' ? new URL(_url) : _url;
|
||||||
const $ = res.$;
|
const $ = res.$;
|
||||||
const twitterCard =
|
const twitterCard =
|
||||||
$('meta[name="twitter:card"]').attr('content') ||
|
$('meta[name="twitter:card"]').attr('content') ||
|
||||||
|
@ -7,7 +7,7 @@ import { got, type Agents as GotAgents } from 'got';
|
|||||||
import type { FastifyInstance } from 'fastify';
|
import type { FastifyInstance } from 'fastify';
|
||||||
import { SummalyResult } from '@/summary.js';
|
import { SummalyResult } from '@/summary.js';
|
||||||
import { SummalyPlugin as _SummalyPlugin } from '@/iplugin.js';
|
import { SummalyPlugin as _SummalyPlugin } from '@/iplugin.js';
|
||||||
import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
|
import { general, type GeneralScrapingOptions } from '@/general.js';
|
||||||
import { DEFAULT_OPERATION_TIMEOUT, DEFAULT_RESPONSE_TIMEOUT, agent, setAgent } from '@/utils/got.js';
|
import { DEFAULT_OPERATION_TIMEOUT, DEFAULT_RESPONSE_TIMEOUT, agent, setAgent } from '@/utils/got.js';
|
||||||
import { plugins as builtinPlugins } from '@/plugins/index.js';
|
import { plugins as builtinPlugins } from '@/plugins/index.js';
|
||||||
|
|
||||||
@ -125,7 +125,7 @@ export const summaly = async (url: string, options?: SummalyOptions): Promise<Su
|
|||||||
};
|
};
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||||
const summary = await (match ? match.summarize : parseGeneral)(_url, scrapingOptions);
|
const summary = await (match ? match.summarize : general)(_url, scrapingOptions);
|
||||||
|
|
||||||
if (summary == null) {
|
if (summary == null) {
|
||||||
throw new Error('failed summarize');
|
throw new Error('failed summarize');
|
||||||
|
26
src/plugins/bluesky.ts
Normal file
26
src/plugins/bluesky.ts
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import * as cheerio from 'cheerio';
|
||||||
|
import type Summary from '@/summary.js';
|
||||||
|
import { getResponse, getGotOptions } from '@/utils/got.js';
|
||||||
|
import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
|
||||||
|
|
||||||
|
export function test(url: URL): boolean {
|
||||||
|
return url.hostname === 'bsky.app';
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promise<Summary | null> {
|
||||||
|
const args = getGotOptions(url.href, opts);
|
||||||
|
|
||||||
|
// HEADで取ると404が返るためGETのみで取得
|
||||||
|
const res = await getResponse({
|
||||||
|
...args,
|
||||||
|
method: 'GET',
|
||||||
|
});
|
||||||
|
const body = res.body;
|
||||||
|
const $ = cheerio.load(body);
|
||||||
|
|
||||||
|
return await parseGeneral(url, {
|
||||||
|
body,
|
||||||
|
$,
|
||||||
|
response: res,
|
||||||
|
});
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
|
import { general, type GeneralScrapingOptions } from '@/general.js';
|
||||||
import Summary from '@/summary.js';
|
import Summary from '@/summary.js';
|
||||||
|
|
||||||
export function test(url: URL): boolean {
|
export function test(url: URL): boolean {
|
||||||
@ -12,5 +12,5 @@ export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promis
|
|||||||
// Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ
|
// Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ
|
||||||
url.searchParams.append('$web_only', 'true');
|
url.searchParams.append('$web_only', 'true');
|
||||||
|
|
||||||
return await parseGeneral(url, opts);
|
return await general(url, opts);
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
import * as amazon from './amazon.js';
|
import * as amazon from './amazon.js';
|
||||||
|
import * as bluesky from './bluesky.js';
|
||||||
import * as wikipedia from './wikipedia.js';
|
import * as wikipedia from './wikipedia.js';
|
||||||
import * as branchIoDeeplinks from './branchio-deeplinks.js';
|
import * as branchIoDeeplinks from './branchio-deeplinks.js';
|
||||||
import { SummalyPlugin } from '@/iplugin.js';
|
import { SummalyPlugin } from '@/iplugin.js';
|
||||||
|
|
||||||
export const plugins: SummalyPlugin[] = [
|
export const plugins: SummalyPlugin[] = [
|
||||||
amazon,
|
amazon,
|
||||||
|
bluesky,
|
||||||
wikipedia,
|
wikipedia,
|
||||||
branchIoDeeplinks,
|
branchIoDeeplinks,
|
||||||
];
|
];
|
||||||
|
@ -4,8 +4,9 @@ import { readFileSync } from 'node:fs';
|
|||||||
import got, * as Got from 'got';
|
import got, * as Got from 'got';
|
||||||
import * as cheerio from 'cheerio';
|
import * as cheerio from 'cheerio';
|
||||||
import PrivateIp from 'private-ip';
|
import PrivateIp from 'private-ip';
|
||||||
import { StatusError } from './status-error.js';
|
import type { GeneralScrapingOptions } from '@/general.js';
|
||||||
import { detectEncoding, toUtf8 } from './encoding.js';
|
import { StatusError } from '@/utils/status-error.js';
|
||||||
|
import { detectEncoding, toUtf8 } from '@/utils/encoding.js';
|
||||||
|
|
||||||
const _filename = fileURLToPath(import.meta.url);
|
const _filename = fileURLToPath(import.meta.url);
|
||||||
const _dirname = dirname(_filename);
|
const _dirname = dirname(_filename);
|
||||||
@ -36,23 +37,13 @@ export const DEFAULT_OPERATION_TIMEOUT = 60 * 1000;
|
|||||||
export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
||||||
export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`;
|
export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`;
|
||||||
|
|
||||||
export async function scpaping(
|
export function getGotOptions(url: string, opts?: GeneralScrapingOptions): Omit<GotOptions, 'method'> {
|
||||||
url: string,
|
return {
|
||||||
opts?: {
|
|
||||||
lang?: string;
|
|
||||||
userAgent?: string;
|
|
||||||
responseTimeout?: number;
|
|
||||||
operationTimeout?: number;
|
|
||||||
contentLengthLimit?: number;
|
|
||||||
contentLengthRequired?: boolean;
|
|
||||||
},
|
|
||||||
) {
|
|
||||||
const args: Omit<GotOptions, 'method'> = {
|
|
||||||
url,
|
url,
|
||||||
headers: {
|
headers: {
|
||||||
'accept': 'text/html,application/xhtml+xml',
|
'accept': 'text/html,application/xhtml+xml',
|
||||||
'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA,
|
'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA,
|
||||||
'accept-language': opts?.lang,
|
'accept-language': opts?.lang ?? undefined,
|
||||||
},
|
},
|
||||||
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
|
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
|
||||||
responseTimeout: opts?.responseTimeout,
|
responseTimeout: opts?.responseTimeout,
|
||||||
@ -60,6 +51,13 @@ export async function scpaping(
|
|||||||
contentLengthLimit: opts?.contentLengthLimit,
|
contentLengthLimit: opts?.contentLengthLimit,
|
||||||
contentLengthRequired: opts?.contentLengthRequired,
|
contentLengthRequired: opts?.contentLengthRequired,
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function scpaping(
|
||||||
|
url: string,
|
||||||
|
opts?: GeneralScrapingOptions,
|
||||||
|
) {
|
||||||
|
const args = getGotOptions(url, opts);
|
||||||
|
|
||||||
const headResponse = await getResponse({
|
const headResponse = await getResponse({
|
||||||
...args,
|
...args,
|
||||||
@ -110,7 +108,7 @@ export async function head(url: string) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getResponse(args: GotOptions) {
|
export async function getResponse(args: GotOptions) {
|
||||||
const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT;
|
const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT;
|
||||||
const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT;
|
const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user