mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-08-07 16:54:01 +09:00
Compare commits
9 Commits
Author | SHA1 | Date | |
---|---|---|---|
1b93243ff9 | |||
ee06d841c2 | |||
e9547a556a | |||
71a6aefb8e | |||
fc9f7db477 | |||
cc7ae8d00a | |||
7fbab86441 | |||
1b541a1418 | |||
3e09d27613 |
@ -1,16 +0,0 @@
|
||||
module.exports = {
|
||||
root: true,
|
||||
parser: '@typescript-eslint/parser',
|
||||
parserOptions: {
|
||||
tsconfigRootDir: __dirname,
|
||||
project: ['./tsconfig.json', './test/tsconfig.json'],
|
||||
},
|
||||
ignorePatterns: ['**/.eslintrc.cjs'],
|
||||
extends: [
|
||||
'plugin:@misskey-dev/recommended',
|
||||
],
|
||||
rules: {
|
||||
'@typescript-eslint/prefer-nullish-coalescing': 'off',
|
||||
'import/no-default-export': 'off',
|
||||
},
|
||||
};
|
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@ -31,4 +31,4 @@ jobs:
|
||||
pnpm build
|
||||
- name: Test
|
||||
run: |
|
||||
pnpm test
|
||||
SKIP_NETWORK_TEST=true pnpm test
|
||||
|
12
CHANGELOG.md
12
CHANGELOG.md
@ -1,4 +1,16 @@
|
||||
(unreleased)
|
||||
------------------
|
||||
|
||||
5.2.0 / 2025/02/05
|
||||
------------------
|
||||
* センシティブフラグの判定を `<meta property="rating">` および `rating` ヘッダでも行うように
|
||||
* Bluesky(bsky.app)のプレビューに対応
|
||||
* `fediverse:creator` のパースに対応
|
||||
* 依存関係の更新
|
||||
* eslintの設定を更新
|
||||
|
||||
5.1.0 / 2024-03-18
|
||||
------------------
|
||||
* GETリクエストよりも前にHEADリクエストを送信し、その結果を使用して検証するように (#22)
|
||||
* 下記のパラメータを`summaly`メソッドのオプションに追加
|
||||
- userAgent
|
||||
|
21
README.md
21
README.md
@ -85,15 +85,16 @@ A Promise of an Object that contains properties below:
|
||||
|
||||
| Property | Type | Description |
|
||||
|:----------------|:-------------------|:-----------------------------------------------------------|
|
||||
| **title** | *string* \| *null* | The title of the web page |
|
||||
| **icon** | *string* \| *null* | The url of the icon of the web page |
|
||||
| **description** | *string* \| *null* | The description of the web page |
|
||||
| **thumbnail** | *string* \| *null* | The url of the thumbnail of the web page |
|
||||
| **sitename** | *string* \| *null* | The name of the web site |
|
||||
| **player** | *Player* | The player of the web page |
|
||||
| **sensitive** | *boolean* | Whether the url is sensitive |
|
||||
| **activityPub** | *string* \| *null* | The url of the ActivityPub representation of that web page |
|
||||
| **url** | *string* | The url of the web page |
|
||||
| **title** | *string* \| *null* | The title of the web page |
|
||||
| **icon** | *string* \| *null* | The url of the icon of the web page |
|
||||
| **description** | *string* \| *null* | The description of the web page |
|
||||
| **thumbnail** | *string* \| *null* | The url of the thumbnail of the web page |
|
||||
| **sitename** | *string* \| *null* | The name of the web site |
|
||||
| **player** | *Player* | The player of the web page |
|
||||
| **sensitive** | *boolean* | Whether the url is sensitive |
|
||||
| **activityPub** | *string* \| *null* | The url of the ActivityPub representation of that web page |
|
||||
| **fediverseCreator** | *string* \| *null* | The pages fediverse handle |
|
||||
| **url** | *string* | The url of the web page |
|
||||
|
||||
#### Summary
|
||||
|
||||
@ -134,7 +135,7 @@ will be ... ↓
|
||||
```json
|
||||
{
|
||||
"title": "【アイドルマスター】「Stage Bye Stage」(歌:島村卯月、渋谷凛、本田未央)",
|
||||
"icon": "https://www.youtube.com/s/desktop/28b0985e/img/favicon.ico",
|
||||
"icon": "https://www.youtube.com/s/desktop/711fd789/img/logos/favicon.ico",
|
||||
"description": "Website▶https://columbia.jp/idolmaster/Playlist▶https://www.youtube.com/playlist?list=PL83A2998CF3BBC86D2018年7月18日発売予定THE IDOLM@STER CINDERELLA GIRLS CG STAR...",
|
||||
"thumbnail": "https://i.ytimg.com/vi/NMIEAhH_fTU/maxresdefault.jpg",
|
||||
"player": {
|
||||
|
38
eslint.config.js
Normal file
38
eslint.config.js
Normal file
@ -0,0 +1,38 @@
|
||||
import pluginMisskey from '@misskey-dev/eslint-plugin';
|
||||
import tsParser from '@typescript-eslint/parser';
|
||||
|
||||
//@ts-check
|
||||
/** @type {import('eslint').Linter.Config[]} */
|
||||
export default [ // eslint-disable-line import/no-default-export
|
||||
...pluginMisskey.configs['recommended'],
|
||||
{
|
||||
ignores: [
|
||||
'**/node_modules',
|
||||
'src/@types/package.json.d.ts',
|
||||
'built',
|
||||
'jest.config.js',
|
||||
'test',
|
||||
],
|
||||
},
|
||||
{
|
||||
files: ['**/*.ts', '**/*.tsx'],
|
||||
languageOptions: {
|
||||
parserOptions: {
|
||||
parser: tsParser,
|
||||
project: ['./tsconfig.json', './test/tsconfig.json'],
|
||||
sourceType: 'module',
|
||||
tsConfigRootDir: import.meta.dirname,
|
||||
},
|
||||
},
|
||||
rules: {
|
||||
// 空文字でもフォールバックしたいので無効
|
||||
'@typescript-eslint/prefer-nullish-coalescing': 'off',
|
||||
},
|
||||
},
|
||||
{
|
||||
files: ['**/*.js', '**/*.cjs'],
|
||||
rules: {
|
||||
'@typescript-eslint/no-var-requires': 'off',
|
||||
},
|
||||
},
|
||||
];
|
@ -92,6 +92,9 @@ export default {
|
||||
// TODO: Use `--allowImportingTsExtensions` on TypeScript 5.0 so that we can
|
||||
// directly import `.ts` files without this hack.
|
||||
'^(\\.{1,2}/.*)\\.js$': '$1',
|
||||
|
||||
// Resolve @/foo/bar to `../../src/foo/bar`
|
||||
'^@/(.*)\\.js$': '<rootDir>/src/$1',
|
||||
},
|
||||
|
||||
// An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
|
||||
|
50
package.json
50
package.json
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@misskey-dev/summaly",
|
||||
"version": "5.1.0",
|
||||
"version": "5.2.0",
|
||||
"description": "Get web page's summary",
|
||||
"author": "syuilo <syuilotan@yahoo.co.jp>",
|
||||
"license": "MIT",
|
||||
@ -9,44 +9,44 @@
|
||||
"main": "./built/index.js",
|
||||
"type": "module",
|
||||
"types": "./built/index.d.ts",
|
||||
"packageManager": "pnpm@8.13.1",
|
||||
"packageManager": "pnpm@9.12.3",
|
||||
"files": [
|
||||
"built",
|
||||
"LICENSE"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"eslint": "eslint src --ext .js,.jsx,.ts,.tsx",
|
||||
"build": "tsc && tsc-alias",
|
||||
"eslint": "eslint",
|
||||
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --silent=false --verbose false",
|
||||
"serve": "fastify start ./built/index.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@misskey-dev/eslint-plugin": "^1.0.0",
|
||||
"@swc/core": "^1.3.101",
|
||||
"@swc/jest": "^0.2.29",
|
||||
"@types/cheerio": "0.22.18",
|
||||
"@types/debug": "4.1.7",
|
||||
"@types/escape-regexp": "^0.0.1",
|
||||
"@types/node": "20.10.6",
|
||||
"@typescript-eslint/eslint-plugin": "^6.16.0",
|
||||
"@typescript-eslint/parser": "^6.16.0",
|
||||
"debug": "^4.3.4",
|
||||
"eslint": "^8.56.0",
|
||||
"eslint-plugin-import": "^2.29.1",
|
||||
"fastify": "^4.25.2",
|
||||
"fastify-cli": "^5.9.0",
|
||||
"@misskey-dev/eslint-plugin": "^2.1.0",
|
||||
"@swc/core": "^1.10.12",
|
||||
"@swc/jest": "^0.2.37",
|
||||
"@types/cheerio": "0.22.35",
|
||||
"@types/debug": "4.1.12",
|
||||
"@types/escape-regexp": "^0.0.3",
|
||||
"@types/node": "22.13.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.22.0",
|
||||
"@typescript-eslint/parser": "^8.22.0",
|
||||
"debug": "^4.4.0",
|
||||
"eslint": "^9.19.0",
|
||||
"eslint-plugin-import": "^2.31.0",
|
||||
"fastify": "^5.2.1",
|
||||
"fastify-cli": "^7.3.0",
|
||||
"jest": "^29.7.0",
|
||||
"typescript": "5.3.3"
|
||||
"tsc-alias": "^1.8.10",
|
||||
"typescript": "5.7.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"cheerio": "1.0.0-rc.12",
|
||||
"cheerio": "1.0.0",
|
||||
"escape-regexp": "0.0.1",
|
||||
"got": "^12.6.1",
|
||||
"html-entities": "2.3.2",
|
||||
"got": "^14.4.5",
|
||||
"html-entities": "2.5.2",
|
||||
"iconv-lite": "0.6.3",
|
||||
"jschardet": "3.0.0",
|
||||
"private-ip": "2.3.3",
|
||||
"trace-redirect": "1.0.6"
|
||||
"jschardet": "3.1.4",
|
||||
"private-ip": "3.0.2"
|
||||
}
|
||||
}
|
||||
|
7390
pnpm-lock.yaml
generated
7390
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@ -1,11 +1,10 @@
|
||||
import { URL } from 'node:url';
|
||||
import { decode as decodeHtml } from 'html-entities';
|
||||
import * as cheerio from 'cheerio';
|
||||
import clip from './utils/clip.js';
|
||||
import cleanupTitle from './utils/cleanup-title.js';
|
||||
import type { default as Summary, Player } from '@/summary.js';
|
||||
import { clip } from '@/utils/clip.js';
|
||||
import { cleanupTitle } from '@/utils/cleanup-title.js';
|
||||
|
||||
import { get, head, scpaping } from './utils/got.js';
|
||||
import type { default as Summary, Player } from './summary.js';
|
||||
import { get, head, scpaping } from '@/utils/got.js';
|
||||
|
||||
/**
|
||||
* Contains only the html snippet for a sanitized iframe as the thumbnail is
|
||||
@ -137,11 +136,10 @@ export type GeneralScrapingOptions = {
|
||||
operationTimeout?: number;
|
||||
contentLengthLimit?: number;
|
||||
contentLengthRequired?: boolean;
|
||||
}
|
||||
};
|
||||
|
||||
export default async (_url: URL | string, opts?: GeneralScrapingOptions): Promise<Summary | null> => {
|
||||
export async function general(_url: URL | string, opts?: GeneralScrapingOptions): Promise<Summary | null> {
|
||||
let lang = opts?.lang;
|
||||
// eslint-disable-next-line no-param-reassign
|
||||
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
|
||||
|
||||
const url = typeof _url === 'string' ? new URL(_url) : _url;
|
||||
@ -154,6 +152,24 @@ export default async (_url: URL | string, opts?: GeneralScrapingOptions): Promis
|
||||
contentLengthLimit: opts?.contentLengthLimit,
|
||||
contentLengthRequired: opts?.contentLengthRequired,
|
||||
});
|
||||
|
||||
return await parseGeneral(url, res);
|
||||
}
|
||||
|
||||
function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
|
||||
if (!headerValue) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Array.isArray(headerValue)) {
|
||||
return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
|
||||
}
|
||||
|
||||
return headerValue.toLowerCase() === search.toLowerCase();
|
||||
}
|
||||
|
||||
export async function parseGeneral(_url: URL | string, res: Awaited<ReturnType<typeof scpaping>>): Promise<Summary | null> {
|
||||
const url = typeof _url === 'string' ? new URL(_url) : _url;
|
||||
const $ = res.$;
|
||||
const twitterCard =
|
||||
$('meta[name="twitter:card"]').attr('content') ||
|
||||
@ -235,9 +251,16 @@ export default async (_url: URL | string, opts?: GeneralScrapingOptions): Promis
|
||||
const activityPub =
|
||||
$('link[rel="alternate"][type="application/activity+json"]').attr('href') || null;
|
||||
|
||||
const fediverseCreator: string | null =
|
||||
$('meta[name=\'fediverse:creator\']').attr('content') || null;
|
||||
|
||||
// https://developer.mixi.co.jp/connect/mixi_plugin/mixi_check/spec_mixi_check/#toc-18-
|
||||
const sensitive =
|
||||
$('meta[property=\'mixi:content-rating\']').attr('content') === '1';
|
||||
$('meta[property=\'mixi:content-rating\']').attr('content') === '1' ||
|
||||
headerEqualValueContains('adult', res.response.headers.rating) ||
|
||||
headerEqualValueContains('RTA-5042-1996-1400-1577-RTA', res.response.headers.rating) ||
|
||||
$('meta[name=\'rating\']').attr('content') === 'adult' ||
|
||||
$('meta[name=\'rating\']').attr('content')?.toUpperCase() === 'RTA-5042-1996-1400-1577-RTA';
|
||||
|
||||
const find = async (path: string) => {
|
||||
const target = new URL(path, url.href);
|
||||
@ -279,5 +302,6 @@ export default async (_url: URL | string, opts?: GeneralScrapingOptions): Promis
|
||||
sitename: siteName || null,
|
||||
sensitive,
|
||||
activityPub,
|
||||
fediverseCreator,
|
||||
};
|
||||
};
|
||||
}
|
||||
|
49
src/index.ts
49
src/index.ts
@ -3,16 +3,15 @@
|
||||
* https://github.com/misskey-dev/summaly
|
||||
*/
|
||||
|
||||
import { URL } from 'node:url';
|
||||
import tracer from 'trace-redirect';
|
||||
import * as Got from 'got';
|
||||
import { SummalyResult } from './summary.js';
|
||||
import { SummalyPlugin } from './iplugin.js';
|
||||
export * from './iplugin.js';
|
||||
import general, { GeneralScrapingOptions } from './general.js';
|
||||
import { setAgent } from './utils/got.js';
|
||||
import { plugins as builtinPlugins } from './plugins/index.js';
|
||||
import { got, type Agents as GotAgents } from 'got';
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { SummalyResult } from '@/summary.js';
|
||||
import { SummalyPlugin as _SummalyPlugin } from '@/iplugin.js';
|
||||
import { general, type GeneralScrapingOptions } from '@/general.js';
|
||||
import { DEFAULT_OPERATION_TIMEOUT, DEFAULT_RESPONSE_TIMEOUT, agent, setAgent } from '@/utils/got.js';
|
||||
import { plugins as builtinPlugins } from '@/plugins/index.js';
|
||||
|
||||
export type SummalyPlugin = _SummalyPlugin;
|
||||
|
||||
export type SummalyOptions = {
|
||||
/**
|
||||
@ -33,7 +32,7 @@ export type SummalyOptions = {
|
||||
/**
|
||||
* Custom HTTP agent
|
||||
*/
|
||||
agent?: Got.Agents;
|
||||
agent?: GotAgents;
|
||||
|
||||
/**
|
||||
* User-Agent for the request
|
||||
@ -85,7 +84,26 @@ export const summaly = async (url: string, options?: SummalyOptions): Promise<Su
|
||||
if (opts.followRedirects) {
|
||||
// .catch(() => url)にすればいいけど、jestにtrace-redirectを食わせるのが面倒なのでtry-catch
|
||||
try {
|
||||
actualUrl = await tracer(url);
|
||||
const timeout = opts.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT;
|
||||
const operationTimeout = opts.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT;
|
||||
actualUrl = await got
|
||||
.head(url, {
|
||||
timeout: {
|
||||
lookup: timeout,
|
||||
connect: timeout,
|
||||
secureConnect: timeout,
|
||||
socket: timeout, // read timeout
|
||||
response: timeout,
|
||||
send: timeout,
|
||||
request: operationTimeout, // whole operation timeout
|
||||
},
|
||||
agent,
|
||||
http2: false,
|
||||
retry: {
|
||||
limit: 0,
|
||||
},
|
||||
})
|
||||
.then(res => res.url);
|
||||
} catch (e) {
|
||||
actualUrl = url;
|
||||
}
|
||||
@ -118,12 +136,13 @@ export const summaly = async (url: string, options?: SummalyOptions): Promise<Su
|
||||
});
|
||||
};
|
||||
|
||||
// eslint-disable-next-line import/no-default-export
|
||||
export default function (fastify: FastifyInstance, options: SummalyOptions, done: (err?: Error) => void) {
|
||||
fastify.get<{
|
||||
Querystring: {
|
||||
url?: string;
|
||||
lang?: string;
|
||||
};
|
||||
Querystring: {
|
||||
url?: string;
|
||||
lang?: string;
|
||||
};
|
||||
}>('/', async (req, reply) => {
|
||||
const url = req.query.url as string;
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
|
@ -1,6 +1,5 @@
|
||||
import Summary from './summary.js';
|
||||
import type { URL } from 'node:url';
|
||||
import { GeneralScrapingOptions } from '@/general';
|
||||
import type Summary from '@/summary.js';
|
||||
import type { GeneralScrapingOptions } from '@/general.js';
|
||||
|
||||
export interface SummalyPlugin {
|
||||
test: (url: URL) => boolean;
|
||||
|
@ -1,6 +1,5 @@
|
||||
import { URL } from 'node:url';
|
||||
import { scpaping } from '../utils/got.js';
|
||||
import summary from '../summary.js';
|
||||
import { scpaping } from '@/utils/got.js';
|
||||
import summary from '@/summary.js';
|
||||
|
||||
export function test(url: URL): boolean {
|
||||
return url.hostname === 'www.amazon.com' ||
|
||||
@ -56,5 +55,6 @@ export async function summarize(url: URL): Promise<summary> {
|
||||
},
|
||||
sitename: 'Amazon',
|
||||
activityPub: null,
|
||||
fediverseCreator: null,
|
||||
};
|
||||
}
|
||||
|
26
src/plugins/bluesky.ts
Normal file
26
src/plugins/bluesky.ts
Normal file
@ -0,0 +1,26 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
import type Summary from '@/summary.js';
|
||||
import { getResponse, getGotOptions } from '@/utils/got.js';
|
||||
import { parseGeneral, type GeneralScrapingOptions } from '@/general.js';
|
||||
|
||||
export function test(url: URL): boolean {
|
||||
return url.hostname === 'bsky.app';
|
||||
}
|
||||
|
||||
export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promise<Summary | null> {
|
||||
const args = getGotOptions(url.href, opts);
|
||||
|
||||
// HEADで取ると404が返るためGETのみで取得
|
||||
const res = await getResponse({
|
||||
...args,
|
||||
method: 'GET',
|
||||
});
|
||||
const body = res.body;
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
return await parseGeneral(url, {
|
||||
body,
|
||||
$,
|
||||
response: res,
|
||||
});
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
import { URL } from 'node:url';
|
||||
import general, { GeneralScrapingOptions } from '../general.js';
|
||||
import Summary from '../summary.js';
|
||||
import { general, type GeneralScrapingOptions } from '@/general.js';
|
||||
import Summary from '@/summary.js';
|
||||
|
||||
export function test(url: URL): boolean {
|
||||
// Branch.io を使用したディープリンクにマッチ
|
||||
|
@ -1,10 +1,12 @@
|
||||
import * as amazon from './amazon.js';
|
||||
import * as bluesky from './bluesky.js';
|
||||
import * as wikipedia from './wikipedia.js';
|
||||
import * as branchIoDeeplinks from './branchio-deeplinks.js';
|
||||
import { SummalyPlugin } from '@/iplugin.js';
|
||||
|
||||
export const plugins: SummalyPlugin[] = [
|
||||
amazon,
|
||||
bluesky,
|
||||
wikipedia,
|
||||
branchIoDeeplinks,
|
||||
];
|
||||
|
@ -1,8 +1,7 @@
|
||||
import { URL } from 'node:url';
|
||||
import debug from 'debug';
|
||||
import { get } from '../utils/got.js';
|
||||
import summary from '../summary.js';
|
||||
import clip from './../utils/clip.js';
|
||||
import { get } from '@/utils/got.js';
|
||||
import summary from '@/summary.js';
|
||||
import { clip } from '@/utils/clip.js';
|
||||
|
||||
const log = debug('summaly:plugins:wikipedia');
|
||||
|
||||
@ -44,5 +43,6 @@ export async function summarize(url: URL): Promise<summary> {
|
||||
},
|
||||
sitename: 'Wikipedia',
|
||||
activityPub: null,
|
||||
fediverseCreator: null,
|
||||
};
|
||||
}
|
||||
|
@ -38,6 +38,11 @@ type Summary = {
|
||||
* The url of the ActivityPub representation of that web page
|
||||
*/
|
||||
activityPub: string | null;
|
||||
|
||||
/**
|
||||
* The @ handle of a fediverse user (https://blog.joinmastodon.org/2024/07/highlighting-journalism-on-mastodon/)
|
||||
*/
|
||||
fediverseCreator: string | null;
|
||||
};
|
||||
|
||||
export type SummalyResult = Summary & {
|
||||
@ -47,6 +52,7 @@ export type SummalyResult = Summary & {
|
||||
url: string;
|
||||
};
|
||||
|
||||
// eslint-disable-next-line import/no-default-export
|
||||
export default Summary;
|
||||
|
||||
export type Player = {
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* eslint-disable no-param-reassign */
|
||||
import escapeRegExp from 'escape-regexp';
|
||||
|
||||
export default function(title: string, siteName?: string | null): string {
|
||||
export function cleanupTitle(title: string, siteName?: string | null): string {
|
||||
title = title.trim();
|
||||
|
||||
if (siteName) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
import nullOrEmpty from './null-or-empty.js';
|
||||
import { nullOrEmpty } from './null-or-empty.js';
|
||||
|
||||
export default function(s: string, max: number): string {
|
||||
export function clip(s: string, max: number): string {
|
||||
if (nullOrEmpty(s)) {
|
||||
return s;
|
||||
}
|
||||
|
@ -4,8 +4,9 @@ import { readFileSync } from 'node:fs';
|
||||
import got, * as Got from 'got';
|
||||
import * as cheerio from 'cheerio';
|
||||
import PrivateIp from 'private-ip';
|
||||
import { StatusError } from './status-error.js';
|
||||
import { detectEncoding, toUtf8 } from './encoding.js';
|
||||
import type { GeneralScrapingOptions } from '@/general.js';
|
||||
import { StatusError } from '@/utils/status-error.js';
|
||||
import { detectEncoding, toUtf8 } from '@/utils/encoding.js';
|
||||
|
||||
const _filename = fileURLToPath(import.meta.url);
|
||||
const _dirname = dirname(_filename);
|
||||
@ -27,32 +28,22 @@ export type GotOptions = {
|
||||
operationTimeout?: number;
|
||||
contentLengthLimit?: number;
|
||||
contentLengthRequired?: boolean;
|
||||
}
|
||||
};
|
||||
|
||||
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
|
||||
|
||||
const DEFAULT_RESPONSE_TIMEOUT = 20 * 1000;
|
||||
const DEFAULT_OPERATION_TIMEOUT = 60 * 1000;
|
||||
const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
||||
const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`;
|
||||
export const DEFAULT_RESPONSE_TIMEOUT = 20 * 1000;
|
||||
export const DEFAULT_OPERATION_TIMEOUT = 60 * 1000;
|
||||
export const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
||||
export const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`;
|
||||
|
||||
export async function scpaping(
|
||||
url: string,
|
||||
opts?: {
|
||||
lang?: string;
|
||||
userAgent?: string;
|
||||
responseTimeout?: number;
|
||||
operationTimeout?: number;
|
||||
contentLengthLimit?: number;
|
||||
contentLengthRequired?: boolean;
|
||||
},
|
||||
) {
|
||||
const args: Omit<GotOptions, 'method'> = {
|
||||
export function getGotOptions(url: string, opts?: GeneralScrapingOptions): Omit<GotOptions, 'method'> {
|
||||
return {
|
||||
url,
|
||||
headers: {
|
||||
'accept': 'text/html,application/xhtml+xml',
|
||||
'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA,
|
||||
'accept-language': opts?.lang,
|
||||
'accept-language': opts?.lang ?? undefined,
|
||||
},
|
||||
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
|
||||
responseTimeout: opts?.responseTimeout,
|
||||
@ -60,6 +51,13 @@ export async function scpaping(
|
||||
contentLengthLimit: opts?.contentLengthLimit,
|
||||
contentLengthRequired: opts?.contentLengthRequired,
|
||||
};
|
||||
}
|
||||
|
||||
export async function scpaping(
|
||||
url: string,
|
||||
opts?: GeneralScrapingOptions,
|
||||
) {
|
||||
const args = getGotOptions(url, opts);
|
||||
|
||||
const headResponse = await getResponse({
|
||||
...args,
|
||||
@ -110,7 +108,7 @@ export async function head(url: string) {
|
||||
});
|
||||
}
|
||||
|
||||
async function getResponse(args: GotOptions) {
|
||||
export async function getResponse(args: GotOptions) {
|
||||
const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT;
|
||||
const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* eslint-disable @typescript-eslint/no-unnecessary-condition */
|
||||
export default function(val: string): boolean {
|
||||
export function nullOrEmpty(val: string): boolean {
|
||||
if (val === undefined) {
|
||||
return true;
|
||||
} else if (val === null) {
|
||||
|
13
test/htmls/fediverse-creator.html
Normal file
13
test/htmls/fediverse-creator.html
Normal file
@ -0,0 +1,13 @@
|
||||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="fediverse:creator" content="@test@example.com">
|
||||
<title>Meow</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Hellooo!</h1>
|
||||
<p>:3</p>
|
||||
</body>
|
||||
</html>
|
13
test/htmls/meta-adult-sensitive.html
Normal file
13
test/htmls/meta-adult-sensitive.html
Normal file
@ -0,0 +1,13 @@
|
||||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="rating" content="adult">
|
||||
<title>SENSITIVE CONTENT!!</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey syuilo.</p>
|
||||
</body>
|
||||
</html>
|
13
test/htmls/meta-rta-sensitive.html
Normal file
13
test/htmls/meta-rta-sensitive.html
Normal file
@ -0,0 +1,13 @@
|
||||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="rating" content="RTA-5042-1996-1400-1577-RTA">
|
||||
<title>SENSITIVE CONTENT!!</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey syuilo.</p>
|
||||
</body>
|
||||
</html>
|
103
test/index.ts
103
test/index.ts
@ -12,8 +12,8 @@ import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { Agent as httpAgent } from 'node:http';
|
||||
import { Agent as httpsAgent } from 'node:https';
|
||||
import { expect, test, describe, beforeEach, afterEach } from '@jest/globals';
|
||||
import fastify from 'fastify';
|
||||
import { expect, test, describe, beforeEach, afterEach, xtest } from '@jest/globals';
|
||||
import fastify, { type FastifyInstance } from 'fastify';
|
||||
import { summaly } from '../src/index.js';
|
||||
import { StatusError } from '../src/utils/status-error.js';
|
||||
|
||||
@ -34,7 +34,16 @@ const host = `http://localhost:${port}`;
|
||||
// Display detail of unhandled promise rejection
|
||||
process.on('unhandledRejection', console.dir);
|
||||
|
||||
let app: ReturnType<typeof fastify> | null = null;
|
||||
let app: FastifyInstance | null = null;
|
||||
|
||||
function skippableTest(name: string, fn: () => void) {
|
||||
if (process.env.SKIP_NETWORK_TEST === 'true') {
|
||||
console.log(`[SKIP] ${name}`);
|
||||
xtest(name, fn);
|
||||
} else {
|
||||
test(name, fn);
|
||||
}
|
||||
}
|
||||
|
||||
afterEach(async () => {
|
||||
if (app) {
|
||||
@ -71,19 +80,20 @@ test('basic', async () => {
|
||||
},
|
||||
sitename: 'localhost:3060',
|
||||
sensitive: false,
|
||||
url: host,
|
||||
url: host + '/',
|
||||
activityPub: null,
|
||||
fediverseCreator: null,
|
||||
});
|
||||
});
|
||||
|
||||
test('Stage Bye Stage', async () => {
|
||||
skippableTest('Stage Bye Stage', async () => {
|
||||
// If this test fails, you must rewrite the result data and the example in README.md.
|
||||
|
||||
const summary = await summaly('https://www.youtube.com/watch?v=NMIEAhH_fTU');
|
||||
expect(summary).toEqual(
|
||||
{
|
||||
'title': '【アイドルマスター】「Stage Bye Stage」(歌:島村卯月、渋谷凛、本田未央)',
|
||||
'icon': 'https://www.youtube.com/s/desktop/4feff1e2/img/favicon.ico',
|
||||
'icon': 'https://www.youtube.com/s/desktop/711fd789/img/logos/favicon.ico',
|
||||
'description': 'Website▶https://columbia.jp/idolmaster/Playlist▶https://www.youtube.com/playlist?list=PL83A2998CF3BBC86D2018年7月18日発売予定THE IDOLM@STER CINDERELLA GIRLS CG STAR...',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/NMIEAhH_fTU/maxresdefault.jpg',
|
||||
'player': {
|
||||
@ -102,6 +112,7 @@ test('Stage Bye Stage', async () => {
|
||||
'sitename': 'YouTube',
|
||||
'sensitive': false,
|
||||
'activityPub': null,
|
||||
'fediverseCreator': null,
|
||||
'url': 'https://www.youtube.com/watch?v=NMIEAhH_fTU',
|
||||
},
|
||||
);
|
||||
@ -507,6 +518,36 @@ describe('ActivityPub', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('Fediverse Creator', () => {
|
||||
test('Basic', async () => {
|
||||
app = fastify();
|
||||
app.get('*', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/fediverse-creator.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.fediverseCreator).toBe('@test@example.com');
|
||||
});
|
||||
|
||||
test('Null', async () => {
|
||||
app = fastify();
|
||||
app.get('*', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.fediverseCreator).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('sensitive', () => {
|
||||
test('default', async () => {
|
||||
app = fastify();
|
||||
@ -531,6 +572,56 @@ describe('sensitive', () => {
|
||||
await app.listen({ port });
|
||||
expect((await summaly(host)).sensitive).toBe(true);
|
||||
});
|
||||
|
||||
test('meta rating adult', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/meta-adult-sensitive.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
expect((await summaly(host)).sensitive).toBe(true);
|
||||
});
|
||||
|
||||
test('meta rating rta', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/meta-rta-sensitive.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
expect((await summaly(host)).sensitive).toBe(true);
|
||||
});
|
||||
|
||||
test('HTTP Header rating adult', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
reply.header('rating', 'adult');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
expect((await summaly(host)).sensitive).toBe(true);
|
||||
});
|
||||
|
||||
test('HTTP Header rating rta', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
reply.header('rating', 'RTA-5042-1996-1400-1577-RTA');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
expect((await summaly(host)).sensitive).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('UserAgent', () => {
|
||||
|
@ -1,6 +0,0 @@
|
||||
{
|
||||
"extends": "../tsconfig.json",
|
||||
"include": [
|
||||
"./**/*.ts"
|
||||
],
|
||||
}
|
@ -10,8 +10,8 @@
|
||||
"declaration": true,
|
||||
"sourceMap": false,
|
||||
"target": "es2021",
|
||||
"module": "esnext",
|
||||
"moduleResolution": "node",
|
||||
"module": "nodenext",
|
||||
"moduleResolution": "nodenext",
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"removeComments": false,
|
||||
"noLib": false,
|
||||
@ -23,7 +23,6 @@
|
||||
"emitDecoratorMetadata": true,
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"rootDir": "./src",
|
||||
"baseUrl": "./",
|
||||
"paths": {
|
||||
"@/*": [
|
||||
@ -44,6 +43,10 @@
|
||||
},
|
||||
"compileOnSave": false,
|
||||
"include": [
|
||||
"./src/**/*.ts"
|
||||
"./src/**/*"
|
||||
],
|
||||
"exclude": [
|
||||
"node_modules",
|
||||
"test/**/*"
|
||||
]
|
||||
}
|
||||
|
Reference in New Issue
Block a user