diff --git a/CHANGELOG.md b/CHANGELOG.md
index ee9c593..a1e69d8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
(unreleased)
------------------
* センシティブフラグの判定を `` および `rating` ヘッダでも行うように
+* Bluesky(bsky.app)のプレビューに対応
* 依存関係の更新
* eslintの設定を更新
diff --git a/src/general.ts b/src/general.ts
index b1facce..6351d31 100644
--- a/src/general.ts
+++ b/src/general.ts
@@ -138,19 +138,7 @@ export type GeneralScrapingOptions = {
contentLengthRequired?: boolean;
}
-function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
- if (!headerValue) {
- return false;
- }
-
- if (Array.isArray(headerValue)) {
- return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
- }
-
- return headerValue.toLowerCase() === search.toLowerCase();
-}
-
-export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOptions): Promise {
+export async function general(_url: URL | string, opts?: GeneralScrapingOptions): Promise {
let lang = opts?.lang;
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
@@ -164,6 +152,24 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt
contentLengthLimit: opts?.contentLengthLimit,
contentLengthRequired: opts?.contentLengthRequired,
});
+
+ return await parseGeneral(url, res);
+}
+
+function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
+ if (!headerValue) {
+ return false;
+ }
+
+ if (Array.isArray(headerValue)) {
+ return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
+ }
+
+ return headerValue.toLowerCase() === search.toLowerCase();
+}
+
+export async function parseGeneral(_url: URL | string, res: Awaited>): Promise {
+ const url = typeof _url === 'string' ? new URL(_url) : _url;
const $ = res.$;
const twitterCard =
$('meta[name="twitter:card"]').attr('content') ||
diff --git a/src/index.ts b/src/index.ts
index 2fa4b96..a8f544f 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -7,7 +7,7 @@ import { got, type Agents as GotAgents } from 'got';
import type { FastifyInstance } from 'fastify';
import { SummalyResult } from '@/summary.js';
import { SummalyPlugin as _SummalyPlugin } from '@/iplugin.js';
-import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
+import { general, type GeneralScrapingOptions } from '@/general.js';
import { DEFAULT_OPERATION_TIMEOUT, DEFAULT_RESPONSE_TIMEOUT, agent, setAgent } from '@/utils/got.js';
import { plugins as builtinPlugins } from '@/plugins/index.js';
@@ -125,7 +125,7 @@ export const summaly = async (url: string, options?: SummalyOptions): Promise {
+ const args = getGotOptions(url.href, opts);
+
+ // HEADで取ると404が返るためGETのみで取得
+ const res = await getResponse({
+ ...args,
+ method: 'GET',
+ });
+ const body = res.body;
+ const $ = cheerio.load(body);
+
+ return await parseGeneral(url, {
+ body,
+ $,
+ response: res,
+ });
+}
diff --git a/src/plugins/branchio-deeplinks.ts b/src/plugins/branchio-deeplinks.ts
index 0623d94..67d08dc 100644
--- a/src/plugins/branchio-deeplinks.ts
+++ b/src/plugins/branchio-deeplinks.ts
@@ -1,4 +1,4 @@
-import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
+import { general, type GeneralScrapingOptions } from '@/general.js';
import Summary from '@/summary.js';
export function test(url: URL): boolean {
@@ -12,5 +12,5 @@ export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promis
// Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ
url.searchParams.append('$web_only', 'true');
- return await parseGeneral(url, opts);
+ return await general(url, opts);
}
diff --git a/src/plugins/index.ts b/src/plugins/index.ts
index 41078eb..140380a 100644
--- a/src/plugins/index.ts
+++ b/src/plugins/index.ts
@@ -1,10 +1,12 @@
import * as amazon from './amazon.js';
+import * as bluesky from './bluesky.js';
import * as wikipedia from './wikipedia.js';
import * as branchIoDeeplinks from './branchio-deeplinks.js';
import { SummalyPlugin } from '@/iplugin.js';
export const plugins: SummalyPlugin[] = [
amazon,
+ bluesky,
wikipedia,
branchIoDeeplinks,
];
diff --git a/src/utils/got.ts b/src/utils/got.ts
index 6a2b6d9..23dee91 100644
--- a/src/utils/got.ts
+++ b/src/utils/got.ts
@@ -4,8 +4,9 @@ import { readFileSync } from 'node:fs';
import got, * as Got from 'got';
import * as cheerio from 'cheerio';
import PrivateIp from 'private-ip';
-import { StatusError } from './status-error.js';
-import { detectEncoding, toUtf8 } from './encoding.js';
+import type { GeneralScrapingOptions } from '@/general.js';
+import { StatusError } from '@/utils/status-error.js';
+import { detectEncoding, toUtf8 } from '@/utils/encoding.js';
const _filename = fileURLToPath(import.meta.url);
const _dirname = dirname(_filename);
@@ -36,23 +37,13 @@ export const DEFAULT_OPERATION_TIMEOUT = 60 * 1000;
export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`;
-export async function scpaping(
- url: string,
- opts?: {
- lang?: string;
- userAgent?: string;
- responseTimeout?: number;
- operationTimeout?: number;
- contentLengthLimit?: number;
- contentLengthRequired?: boolean;
- },
-) {
- const args: Omit = {
+export function getGotOptions(url: string, opts?: GeneralScrapingOptions): Omit {
+ return {
url,
headers: {
'accept': 'text/html,application/xhtml+xml',
'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA,
- 'accept-language': opts?.lang,
+ 'accept-language': opts?.lang ?? undefined,
},
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
responseTimeout: opts?.responseTimeout,
@@ -60,6 +51,13 @@ export async function scpaping(
contentLengthLimit: opts?.contentLengthLimit,
contentLengthRequired: opts?.contentLengthRequired,
};
+}
+
+export async function scpaping(
+ url: string,
+ opts?: GeneralScrapingOptions,
+) {
+ const args = getGotOptions(url, opts);
const headResponse = await getResponse({
...args,
@@ -110,7 +108,7 @@ export async function head(url: string) {
});
}
-async function getResponse(args: GotOptions) {
+export async function getResponse(args: GotOptions) {
const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT;
const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT;