27 Commits

Author SHA1 Message Date
77c53be159 v5.0.2 2023-12-30 09:22:39 +00:00
541a0ddd0a fix workflow (v5.0.1) 2023-12-30 09:09:13 +00:00
7e94c26e9f fix workflow name 2023-12-30 08:37:41 +00:00
2f5ab74f52 v5.0.0 2023-12-30 08:36:45 +00:00
7902ded327 なんかめっちゃ変えた 2023-12-30 08:36:01 +00:00
67909d360d npm install @misskey-dev/summaly 2023-12-30 07:14:54 +00:00
0606cd3fae Update workflow to change name to Test 2023-12-30 07:04:46 +00:00
52f5692199 update typescript 2023-12-30 07:03:53 +00:00
3ecb30aefc ignore built, remove travis.yml 2023-12-30 06:59:59 +00:00
c45b3a1c63 add workflows, mod chglog, remove built 2023-12-30 06:53:31 +00:00
81de408b54 remove package-lock.json
https://github.com/misskey-dev/summaly/issues/18
2023-12-30 06:42:33 +00:00
38000dd462 feat: support 'mixi:content-rating' for sensitive (#16)
* feat: support 'mixi:content-rating' for sensitive

* fix: d7f6a88aee でビルドしてない

* ビルドする
2023-12-30 15:33:43 +09:00
d7f6a88aee fix: twitter card with name attr not working (#15) 2023-12-02 21:05:33 +09:00
d2a3e07205 fix: branch.ioを用いたディープリンクをパースできるように修正 (#13)
* (fix) branch.ioのディープリンクでうまく作動しないのを修正

* Update changelog

* fix regex
2023-11-15 19:23:03 +09:00
d2d8db4994 fix: use activity+json instead of activitypub+json (#11) 2023-08-06 19:30:20 +02:00
089a0ad8e8 feat: support <link rel=alternate type=application/activitypub+json> (#10)
* feat: support `<link rel=alternate type=application/activitypub+json>`

* Update index.ts

* build
2023-07-20 15:36:26 +09:00
77dd5654bb README.md 2023-05-07 12:19:50 +00:00
2d63e2a006 fix README.md 2023-05-04 17:18:10 +00:00
c7d71a9ec2 Merge branch 'master' of https://github.com/misskey-dev/summaly 2023-04-20 04:02:59 +00:00
994f420b46 4.0.2 2023-04-20 04:02:55 +00:00
5a3321a04f fix: allow legacy allowfullscreen (#9) 2023-04-20 12:41:11 +09:00
1bab7afee6 Merge branch 'master' of https://github.com/misskey-dev/summaly 2023-03-16 03:26:10 +00:00
441e8c22f9 v4.0.1 2023-03-16 03:26:00 +00:00
376bba9c61 fix: give null when oEmbed access fails (#8) 2023-03-16 12:22:23 +09:00
028b2fed2f fix README.md 2023-03-13 18:03:16 +00:00
90d5d0f33b Fix README.md 2023-03-13 18:02:07 +00:00
9e955d8d04 fix readme 2023-03-13 17:57:37 +00:00
61 changed files with 3120 additions and 11354 deletions

View File

@ -5,6 +5,11 @@ indent_style = tab
indent_size = 2
charset = utf-8
insert_final_newline = true
end_of_line = lf
trim_trailing_whitespace = true
[*.json]
[*.md]
trim_trailing_whitespace = false
[*.{yml,yaml}]
indent_style = space

122
.eslintrc.cjs Normal file
View File

@ -0,0 +1,122 @@
module.exports = {
root: true,
parser: '@typescript-eslint/parser',
parserOptions: {
tsconfigRootDir: __dirname,
project: ['./tsconfig.json'],
},
plugins: [
'@typescript-eslint',
'import'
],
extends: [
'eslint:recommended',
'plugin:@typescript-eslint/recommended',
'plugin:import/recommended',
'plugin:import/typescript'
],
rules: {
'indent': ['warn', 'tab', {
'SwitchCase': 1,
'MemberExpression': 1,
'flatTernaryExpressions': true,
'ArrayExpression': 'first',
'ObjectExpression': 'first',
}],
'eol-last': ['error', 'always'],
'semi': ['error', 'always'],
'semi-spacing': ['error', { 'before': false, 'after': true }],
'quotes': ['warn', 'single'],
'comma-dangle': ['warn', 'always-multiline'],
'comma-spacing': ['error', { 'before': false, 'after': true }],
'array-bracket-spacing': ['error', 'never'],
'keyword-spacing': ['error', {
'before': true,
'after': true,
}],
'key-spacing': ['error', {
'beforeColon': false,
'afterColon': true,
}],
'arrow-spacing': ['error', {
'before': true,
'after': true,
}],
'brace-style': ['error', '1tbs', {
'allowSingleLine': true,
}],
'padded-blocks': ['error', 'never'],
/* TODO: path aliasを使わないとwarnする
'no-restricted-imports': ['warn', {
'patterns': [
]
}],
*/
'eqeqeq': ['error', 'always', { 'null': 'ignore' }],
'no-multi-spaces': ['error'],
'no-var': ['error'],
'prefer-arrow-callback': ['error'],
'no-throw-literal': ['error'],
'no-param-reassign': ['warn'],
'no-constant-condition': ['warn'],
'no-empty-pattern': ['warn'],
'no-async-promise-executor': ['off'],
'no-useless-escape': ['off'],
'no-multiple-empty-lines': ['error', { 'max': 1 }],
'no-control-regex': ['warn'],
'no-empty': ['warn'],
'no-inner-declarations': ['off'],
'no-sparse-arrays': ['off'],
'nonblock-statement-body-position': ['error', 'beside'],
'object-curly-spacing': ['error', 'always'],
'space-infix-ops': ['error'],
'space-before-blocks': ['error', 'always'],
'padding-line-between-statements': [
'error',
{ 'blankLine': 'always', 'prev': 'function', 'next': '*' },
{ 'blankLine': 'always', 'prev': '*', 'next': 'function' },
],
"lines-between-class-members": "off",
/* typescript-eslint では enforce に対応してないっぽい
'@typescript-eslint/lines-between-class-members': ['error', {
enforce: [{
blankLine: 'always',
prev: 'method',
next: '*',
}]
}],
*/
'@typescript-eslint/func-call-spacing': ['error', 'never'],
'@typescript-eslint/no-explicit-any': ['warn'],
'@typescript-eslint/no-unused-vars': ['warn'],
'@typescript-eslint/no-unnecessary-condition': ['warn'],
'@typescript-eslint/no-var-requires': ['warn'],
'@typescript-eslint/no-inferrable-types': ['warn'],
'@typescript-eslint/no-empty-function': ['off'],
'@typescript-eslint/no-non-null-assertion': ['warn'],
'@typescript-eslint/explicit-function-return-type': ['off'],
'@typescript-eslint/no-misused-promises': ['error', {
'checksVoidReturn': false,
}],
'@typescript-eslint/consistent-type-imports': 'off',
'@typescript-eslint/prefer-nullish-coalescing': [
'warn',
],
'@typescript-eslint/naming-convention': [
'error',
{
"selector": "typeLike",
"format": ["PascalCase"]
},
{
"selector": "typeParameter",
"format": []
}
],
'import/no-unresolved': ['off'],
'import/no-default-export': ['warn'],
'import/order': ['warn', {
'groups': ['builtin', 'external', 'internal', 'parent', 'sibling', 'index', 'object', 'type'],
}]
},
};

31
.github/workflows/lint.yml vendored Normal file
View File

@ -0,0 +1,31 @@
name: Lint
on: [push, pull_request]
jobs:
lint:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [20.10.0]
steps:
- uses: actions/checkout@v4
- name: Install pnpm
uses: pnpm/action-setup@v2
with:
version: 8
run_install: false
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
cache: 'pnpm'
- name: Install
run: |
corepack enable
pnpm i --frozen-lockfile
- name: eslint
run: |
pnpm eslint

41
.github/workflows/npm-publish.yml vendored Normal file
View File

@ -0,0 +1,41 @@
name: Publish Node.js Package
on:
release:
types: [created]
workflow_dispatch:
jobs:
publish:
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
strategy:
matrix:
node-version: [20.10.0]
steps:
- uses: actions/checkout@v4
- name: Install pnpm
uses: pnpm/action-setup@v2
with:
version: 8
run_install: false
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
cache: 'pnpm'
registry-url: 'https://registry.npmjs.org'
- name: Publish package
run: |
corepack enable
pnpm i --frozen-lockfile
pnpm build
pnpm publish --access public --no-git-checks --provenance
env:
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}

34
.github/workflows/test.yml vendored Normal file
View File

@ -0,0 +1,34 @@
name: Test
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [20.10.0]
steps:
- uses: actions/checkout@v4
- name: Install pnpm
uses: pnpm/action-setup@v2
with:
version: 8
run_install: false
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
cache: 'pnpm'
- name: Install
run: |
corepack enable
pnpm i --frozen-lockfile
- name: Build
run: |
pnpm build
- name: Test
run: |
pnpm test

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
/node_modules
/built
npm-debug.log

View File

@ -5,5 +5,5 @@
npm-debug.log
gulpfile.js
tsconfig.json
tslint.json
.eslintrc.cjs
.editorconfig

View File

@ -1,14 +0,0 @@
# travis file
# https://docs.travis-ci.com/user/customizing-the-build
language: node_js
node_js:
- 7.5.0
before_script:
- npm run build
cache:
directories:
- node_modules

View File

@ -1,3 +1,30 @@
5.0.2 / 2023-12-30
------------------
* Fix .github/workflows/npm-publish.yml
5.0.1 / 2023-12-30
------------------
* Fix .github/workflows/npm-publish.yml
5.0.0 / 2023-12-30
------------------
* support `<link rel="alternate" type="application/activitypub+json" href="{href}">` https://github.com/misskey-dev/summaly/pull/10, https://github.com/misskey-dev/summaly/pull/11
* 結果の`activityPub`プロパティでherfの内容を取得できます
* branch.ioを用いたディープリンクspotify.linkなどでパースに失敗する問題を修正 https://github.com/misskey-dev/summaly/pull/13
* Twitter Cardが読めていない問題を修正 https://github.com/misskey-dev/summaly/pull/15
* 'mixi:content-rating'をsensitive判定で見ることで、dlsiteなどでセンシティブ情報を得れるように https://github.com/misskey-dev/summaly/pull/16
* sitenameをURLから生成する場合、ポートを含むように (URL.hostname → URL.host)
* `Summary`型に`url`プロパティを追加した`SummalyResult`型をexportするように
* `IPlugin`インターフェースを`SummalyPlugin`に改称
4.0.2 / 2023-04-20
------------------
* YouTubeをフルスクリーンにできない問題を修正
4.0.1 / 2023-03-16
------------------
* oEmbedの読み込みでエラーが発生した際は、エラーにせずplayerの中身をnullにするように
4.0.0 / 2023-03-14
------------------
* oEmbed type=richの制限的なサポート

View File

@ -1,6 +1,6 @@
The MIT License (MIT)
Copyright (c) 2016-2019 syuilo
Copyright (c) 2016-2024 syuilo
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,6 +1,7 @@
summaly
================================================================
[![][npm-badge]][npm-link]
[![][mit-badge]][mit]
[![][himawari-badge]][himasaku]
[![][sakurako-badge]][himasaku]
@ -8,7 +9,7 @@ summaly
Installation
----------------------------------------------------------------
```
npm install git+https://github.com/misskey-dev/summaly.git
npm install @misskey-dev/summaly
```
Usage
@ -40,17 +41,19 @@ npm run build
npm run serve
```
### Options
#### opts (SummalyOptions)
| Property | Type | Description | Default |
| :------------------ | :--------------------- | :----------------------- | :------ |
| **followRedirects** | *boolean* | Whether follow redirects | `true` |
| **plugins** | *plugin[]* (see below) | Custom plugins | `null` |
| Property | Type | Description | Default |
| :------------------ | :--------------------- | :------------------------------ | :------ |
| **lang** | *string* | Accept-Language for the request | `null` |
| **followRedirects** | *boolean* | Whether follow redirects | `true` |
| **plugins** | *plugin[]* (see below) | Custom plugins | `null` |
| **agent** | *Got.Agents* | Custom HTTP agent (see below) | `null` |
#### Plugin
``` typescript
interface IPlugin {
interface SummalyPlugin {
test: (url: URL) => boolean;
summarize: (url: URL) => Promise<Summary>;
}
@ -58,32 +61,46 @@ interface IPlugin {
urls are WHATWG URL since v4.
#### Custom HTTP agent for proxy
You can specify agents to be passed to Got for proxy use, etc.
https://github.com/sindresorhus/got/blob/v12.6.0/documentation/tips.md#proxying
**⚠If you set some agent, local IP rejecting will not work.⚠️**
(Summaly usually rejects local IPs.)
(Summaly currently does not support http2.)
### Returns
A Promise of an Object that contains properties below:
※ Almost all values are nullable. player should not be null.
#### Root
#### SummalyResult
| Property | Type | Description |
| :-------------- | :------- | :------------------------------------------ |
| **description** | *string* | The description of the web page |
| **icon** | *string* | The url of the icon of the web page |
| **sitename** | *string* | The name of the web site |
| **thumbnail** | *string* | The url of the thumbnail of the web page |
| **oEmbed** | *OEmbedRichIframe* | The oEmbed rich iframe info of the web page |
| **title** | *string* \| *null* | The title of the web page |
| **icon** | *string* \| *null* | The url of the icon of the web page |
| **description** | *string* \| *null* | The description of the web page |
| **thumbnail** | *string* \| *null* | The url of the thumbnail of the web page |
| **sitename** | *string* \| *null* | The name of the web site |
| **player** | *Player* | The player of the web page |
| **title** | *string* | The title of the web page |
| **sensitive** | *boolean* | Whether the url is sensitive |
| **activityPub** | *string* \| *null* | The url of the ActivityPub representation of that web page |
| **url** | *string* | The url of the web page |
#### Summary
`Omit<SummalyResult, "url">`
#### Player
| Property | Type | Description |
| :-------------- | :--------- | :---------------------------------------------- |
| **url** | *string* | The url of the player |
| **width** | *number* | The width of the player |
| **height** | *number* | The height of the player |
| **url** | *string* \| *null* | The url of the player |
| **width** | *number* \| *null* | The width of the player |
| **height** | *number* \| *null* | The height of the player |
| **allow** | *string[]* | The names of the allowed permissions for iframe |
Currently the possible items in `allow` are:
@ -93,32 +110,46 @@ Currently the possible items in `allow` are:
* `fullscreen`
* `encrypted-media`
* `picture-in-picture`
* `web-share`
See [Permissions Policy](https://developer.mozilla.org/en-US/docs/Web/HTTP/Permissions_Policy) in MDN for details of them.
### Example
``` javascript
```javascript
import { summaly } from 'summaly';
const summary = await summaly('https://www.youtube.com/watch?v=NMIEAhH_fTU');
console.log(summary); // will be ... ↓
/*
console.log(summary);
```
will be ... ↓
```json
{
title: '【楽曲試聴】「Stage Bye Stage」(歌:島村卯月、渋谷凛、本田未央)',
icon: 'https://s.ytimg.com/yts/img/favicon-vfl8qSV2F.ico',
description: 'http://columbia.jp/idolmaster/ 2018年7月18日発売予定 THE IDOLM@STER CINDERELLA GIRLS CG STAR LIVE Stage Bye Stage 歌:島村卯月、渋谷凛、本田未央 COCC-17495CD1枚組 ¥1,200税 収録内容 Tr...',
thumbnail: 'https://i.ytimg.com/vi/NMIEAhH_fTU/maxresdefault.jpg',
player: {
url: 'https://www.youtube.com/embed/NMIEAhH_fTU',
width: 1280,
height: 720
"title": "【アイドルマスター】「Stage Bye Stage」(歌:島村卯月、渋谷凛、本田未央)",
"icon": "https://www.youtube.com/s/desktop/28b0985e/img/favicon.ico",
"description": "Website▶https://columbia.jp/idolmaster/Playlist▶https://www.youtube.com/playlist?list=PL83A2998CF3BBC86D2018年7月18日発売予定THE IDOLM@STER CINDERELLA GIRLS CG STAR...",
"thumbnail": "https://i.ytimg.com/vi/NMIEAhH_fTU/maxresdefault.jpg",
"player": {
"url": "https://www.youtube.com/embed/NMIEAhH_fTU?feature=oembed",
"width": 200,
"height": 113,
"allow": [
"autoplay",
"clipboard-write",
"encrypted-media",
"picture-in-picture",
"web-share",
"fullscreen",
]
},
sitename: 'YouTube',
url: 'https://www.youtube.com/watch?v=NMIEAhH_fTU'
"sitename": "YouTube",
"sensitive": false,
"activityPub": null,
"url": "https://www.youtube.com/watch?v=NMIEAhH_fTU"
}
*/
```
Testing
@ -129,5 +160,10 @@ License
----------------------------------------------------------------
[MIT](LICENSE)
[mit]: http://opensource.org/licenses/MIT
[mit-badge]: https://img.shields.io/badge/license-MIT-444444.svg?style=flat-square
[himasaku]: https://himasaku.net
[himawari-badge]: https://img.shields.io/badge/%E5%8F%A4%E8%B0%B7-%E5%90%91%E6%97%A5%E8%91%B5-1684c5.svg?style=flat-square
[sakurako-badge]: https://img.shields.io/badge/%E5%A4%A7%E5%AE%A4-%E6%AB%BB%E5%AD%90-efb02a.svg?style=flat-square
[npm-link]: https://www.npmjs.com/package/@misskey-dev/summaly
[npm-badge]: https://img.shields.io/npm/v/@misskey-dev/summaly.svg?style=flat-square

4
built/general.d.ts vendored
View File

@ -1,4 +0,0 @@
import { URL } from 'node:url';
import type { default as Summary } from './summary.js';
declare const _default: (_url: URL | string, lang?: string | null) => Promise<Summary | null>;
export default _default;

View File

@ -1,188 +0,0 @@
import { URL } from 'node:url';
import clip from './utils/clip.js';
import cleanupTitle from './utils/cleanup-title.js';
import { decode as decodeHtml } from 'html-entities';
import { get, head, scpaping } from './utils/got.js';
import * as cheerio from 'cheerio';
/**
* Contains only the html snippet for a sanitized iframe as the thumbnail is
* mostly covered in OpenGraph instead.
*
* Width should always be 100%.
*/
async function getOEmbedPlayer($, pageUrl) {
const href = $('link[type="application/json+oembed"]').attr('href');
if (!href) {
return null;
}
const oEmbed = await get((new URL(href, pageUrl)).href);
const body = (() => {
try {
return JSON.parse(oEmbed);
}
catch { }
})();
if (!body || body.version !== '1.0' || !['rich', 'video'].includes(body.type)) {
// Not a well formed rich oEmbed
return null;
}
if (!body.html.startsWith('<iframe ') || !body.html.endsWith('</iframe>')) {
// It includes something else than an iframe
return null;
}
const oEmbedHtml = cheerio.load(body.html);
const iframe = oEmbedHtml("iframe");
if (iframe.length !== 1) {
// Somehow we either have multiple iframes or none
return null;
}
if (iframe.parents().length !== 2) {
// Should only have the body and html elements as the parents
return null;
}
const url = iframe.attr('src');
if (!url) {
// No src?
return null;
}
try {
if ((new URL(url)).protocol !== 'https:') {
// Allow only HTTPS for best security
return null;
}
}
catch (e) {
return null;
}
// Height is the most important, width is okay to be null. The implementer
// should choose fixed height instead of fixed aspect ratio if width is null.
//
// For example, Spotify's embed page does not strictly follow aspect ratio
// and thus keeping the height is better than keeping the aspect ratio.
//
// Spotify gives `width: 100%, height: 152px` for iframe while `width: 456,
// height: 152` for oEmbed data, and we treat any percentages as null here.
let width = Number(iframe.attr('width') ?? body.width);
if (Number.isNaN(width)) {
width = null;
}
const height = Math.min(Number(iframe.attr('height') ?? body.height), 1024);
if (Number.isNaN(height)) {
// No proper height info
return null;
}
// TODO: This implementation only allows basic syntax of `allow`.
// Might need to implement better later.
const safeList = [
'autoplay',
'clipboard-write',
'fullscreen',
'encrypted-media',
'picture-in-picture',
'web-share',
];
// YouTube has these but they are almost never used.
const ignoredList = [
'gyroscope',
'accelerometer',
];
const allowedPermissions = (iframe.attr('allow') ?? '').split(/\s*;\s*/g)
.filter(s => s)
.filter(s => !ignoredList.includes(s));
if (allowedPermissions.some(allow => !safeList.includes(allow))) {
// This iframe is probably too powerful to be embedded
return null;
}
return {
url,
width,
height,
allow: allowedPermissions
};
}
export default async (_url, lang = null) => {
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/))
lang = null;
const url = typeof _url === 'string' ? new URL(_url) : _url;
const res = await scpaping(url.href, { lang: lang || undefined });
const $ = res.$;
const twitterCard = $('meta[property="twitter:card"]').attr('content');
let title = $('meta[property="og:title"]').attr('content') ||
$('meta[property="twitter:title"]').attr('content') ||
$('title').text();
if (title === undefined || title === null) {
return null;
}
title = clip(decodeHtml(title), 100);
let image = $('meta[property="og:image"]').attr('content') ||
$('meta[property="twitter:image"]').attr('content') ||
$('link[rel="image_src"]').attr('href') ||
$('link[rel="apple-touch-icon"]').attr('href') ||
$('link[rel="apple-touch-icon image_src"]').attr('href');
image = image ? (new URL(image, url.href)).href : null;
const playerUrl = (twitterCard !== 'summary_large_image' && $('meta[property="twitter:player"]').attr('content')) ||
(twitterCard !== 'summary_large_image' && $('meta[name="twitter:player"]').attr('content')) ||
$('meta[property="og:video"]').attr('content') ||
$('meta[property="og:video:secure_url"]').attr('content') ||
$('meta[property="og:video:url"]').attr('content');
const playerWidth = parseInt($('meta[property="twitter:player:width"]').attr('content') ||
$('meta[name="twitter:player:width"]').attr('content') ||
$('meta[property="og:video:width"]').attr('content') ||
'');
const playerHeight = parseInt($('meta[property="twitter:player:height"]').attr('content') ||
$('meta[name="twitter:player:height"]').attr('content') ||
$('meta[property="og:video:height"]').attr('content') ||
'');
let description = $('meta[property="og:description"]').attr('content') ||
$('meta[property="twitter:description"]').attr('content') ||
$('meta[name="description"]').attr('content');
description = description
? clip(decodeHtml(description), 300)
: null;
if (title === description) {
description = null;
}
let siteName = decodeHtml($('meta[property="og:site_name"]').attr('content') ||
$('meta[name="application-name"]').attr('content') ||
url.hostname);
const favicon = $('link[rel="shortcut icon"]').attr('href') ||
$('link[rel="icon"]').attr('href') ||
'/favicon.ico';
const sensitive = $('.tweet').attr('data-possibly-sensitive') === 'true';
const find = async (path) => {
const target = new URL(path, url.href);
try {
await head(target.href);
return target;
}
catch (e) {
return null;
}
};
const getIcon = async () => {
return (await find(favicon)) || null;
};
const [icon, oEmbed] = await Promise.all([
getIcon(),
getOEmbedPlayer($, url.href),
]);
// Clean up the title
title = cleanupTitle(title, siteName);
if (title === '') {
title = siteName;
}
return {
title: title || null,
icon: icon?.href || null,
description: description || null,
thumbnail: image || null,
player: oEmbed ?? {
url: playerUrl || null,
width: Number.isNaN(playerWidth) ? null : playerWidth,
height: Number.isNaN(playerHeight) ? null : playerHeight,
allow: ['autoplay', 'encrypted-media', 'fullscreen'],
},
sitename: siteName || null,
sensitive,
};
};

39
built/index.d.ts vendored
View File

@ -1,39 +0,0 @@
/**
* summaly
* https://github.com/syuilo/summaly
*/
import Summary from './summary.js';
import type { IPlugin as _IPlugin } from './iplugin.js';
export declare type IPlugin = _IPlugin;
import * as Got from 'got';
import type { FastifyInstance } from 'fastify';
declare type Options = {
/**
* Accept-Language for the request
*/
lang?: string | null;
/**
* Whether follow redirects
*/
followRedirects?: boolean;
/**
* Custom Plugins
*/
plugins?: IPlugin[];
/**
* Custom HTTP agent
*/
agent?: Got.Agents;
};
declare type Result = Summary & {
/**
* The actual url of that web page
*/
url: string;
};
/**
* Summarize an web page
*/
export declare const summaly: (url: string, options?: Options | undefined) => Promise<Result>;
export default function (fastify: FastifyInstance, options: Options, done: (err?: Error) => void): void;
export {};

View File

@ -1,68 +0,0 @@
/**
* summaly
* https://github.com/syuilo/summaly
*/
import { URL } from 'node:url';
import tracer from 'trace-redirect';
import general from './general.js';
import { setAgent } from './utils/got.js';
import { plugins as builtinPlugins } from './plugins/index.js';
const defaultOptions = {
lang: null,
followRedirects: true,
plugins: [],
};
/**
* Summarize an web page
*/
export const summaly = async (url, options) => {
if (options?.agent)
setAgent(options.agent);
const opts = Object.assign(defaultOptions, options);
const plugins = builtinPlugins.concat(opts.plugins || []);
let actualUrl = url;
if (opts.followRedirects) {
// .catch(() => url)にすればいいけど、jestにtrace-redirectを食わせるのが面倒なのでtry-catch
try {
actualUrl = await tracer(url);
}
catch (e) {
actualUrl = url;
}
}
const _url = new URL(actualUrl);
// Find matching plugin
const match = plugins.filter(plugin => plugin.test(_url))[0];
// Get summary
const summary = await (match ? match.summarize : general)(_url, opts.lang || undefined);
if (summary == null) {
throw 'failed summarize';
}
return Object.assign(summary, {
url: actualUrl
});
};
export default function (fastify, options, done) {
fastify.get('/', async (req, reply) => {
const url = req.query.url;
if (url == null) {
return reply.status(400).send({
error: 'url is required'
});
}
try {
const summary = await summaly(url, {
lang: req.query.lang,
followRedirects: false,
...options,
});
return summary;
}
catch (e) {
return reply.status(500).send({
error: e
});
}
});
done();
}

7
built/iplugin.d.ts vendored
View File

@ -1,7 +0,0 @@
/// <reference types="node" />
import type { URL } from 'node:url';
import Summary from './summary.js';
export interface IPlugin {
test: (url: URL) => boolean;
summarize: (url: URL, lang?: string) => Promise<Summary>;
}

View File

@ -1 +0,0 @@
export {};

View File

@ -1,5 +0,0 @@
/// <reference types="node" />
import { URL } from 'node:url';
import summary from '../summary.js';
export declare function test(url: URL): boolean;
export declare function summarize(url: URL): Promise<summary>;

View File

@ -1,44 +0,0 @@
import { scpaping } from '../utils/got.js';
export function test(url) {
return url.hostname === 'www.amazon.com' ||
url.hostname === 'www.amazon.co.jp' ||
url.hostname === 'www.amazon.ca' ||
url.hostname === 'www.amazon.com.br' ||
url.hostname === 'www.amazon.com.mx' ||
url.hostname === 'www.amazon.co.uk' ||
url.hostname === 'www.amazon.de' ||
url.hostname === 'www.amazon.fr' ||
url.hostname === 'www.amazon.it' ||
url.hostname === 'www.amazon.es' ||
url.hostname === 'www.amazon.nl' ||
url.hostname === 'www.amazon.cn' ||
url.hostname === 'www.amazon.in' ||
url.hostname === 'www.amazon.au';
}
export async function summarize(url) {
const res = await scpaping(url.href);
const $ = res.$;
const title = $('#title').text();
const description = $('#productDescription').text() ||
$('meta[name="description"]').attr('content');
const thumbnail = $('#landingImage').attr('src');
const playerUrl = $('meta[property="twitter:player"]').attr('content') ||
$('meta[name="twitter:player"]').attr('content');
const playerWidth = $('meta[property="twitter:player:width"]').attr('content') ||
$('meta[name="twitter:player:width"]').attr('content');
const playerHeight = $('meta[property="twitter:player:height"]').attr('content') ||
$('meta[name="twitter:player:height"]').attr('content');
return {
title: title ? title.trim() : null,
icon: 'https://www.amazon.com/favicon.ico',
description: description ? description.trim() : null,
thumbnail: thumbnail ? thumbnail.trim() : null,
player: {
url: playerUrl || null,
width: playerWidth ? parseInt(playerWidth) : null,
height: playerHeight ? parseInt(playerHeight) : null,
allow: playerUrl ? ['fullscreen', 'encrypted-media'] : [],
},
sitename: 'Amazon',
};
}

View File

@ -1,2 +0,0 @@
import { IPlugin } from '@/iplugin.js';
export declare const plugins: IPlugin[];

View File

@ -1,6 +0,0 @@
import * as amazon from './amazon.js';
import * as wikipedia from './wikipedia.js';
export const plugins = [
amazon,
wikipedia,
];

View File

@ -1,5 +0,0 @@
/// <reference types="node" />
import { URL } from 'node:url';
import summary from '../summary.js';
export declare function test(url: URL): boolean;
export declare function summarize(url: URL): Promise<summary>;

View File

@ -1,37 +0,0 @@
import { get } from '../utils/got.js';
import debug from 'debug';
import clip from './../utils/clip.js';
const log = debug('summaly:plugins:wikipedia');
export function test(url) {
if (!url.hostname)
return false;
return /\.wikipedia\.org$/.test(url.hostname);
}
export async function summarize(url) {
const lang = url.host ? url.host.split('.')[0] : null;
const title = url.pathname ? url.pathname.split('/')[2] : null;
const endpoint = `https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`;
log(`lang is ${lang}`);
log(`title is ${title}`);
log(`endpoint is ${endpoint}`);
let body = await get(endpoint);
body = JSON.parse(body);
log(body);
if (!('query' in body) || !('pages' in body.query)) {
throw 'fetch failed';
}
const info = body.query.pages[Object.keys(body.query.pages)[0]];
return {
title: info.title,
icon: 'https://wikipedia.org/static/favicon/wikipedia.ico',
description: clip(info.extract, 300),
thumbnail: `https://wikipedia.org/static/images/project-logos/${lang}wiki.png`,
player: {
url: null,
width: null,
height: null,
allow: [],
},
sitename: 'Wikipedia',
};
}

View File

@ -1 +0,0 @@
export {};

View File

@ -1,22 +0,0 @@
import * as http from 'http';
import * as Koa from 'koa';
import summaly from '../';
const app = new Koa();
app.use(async (ctx) => {
if (!ctx.query.url) {
ctx.status = 400;
return;
}
try {
const summary = await summaly(ctx.query.url, {
lang: ctx.query.lang,
followRedirects: false
});
ctx.body = summary;
}
catch (e) {
ctx.status = 500;
}
});
const server = http.createServer(app.callback());
server.listen(process.env.PORT || 80);

49
built/summary.d.ts vendored
View File

@ -1,49 +0,0 @@
declare type Summary = {
/**
* The description of that web page
*/
description: string | null;
/**
* The url of the icon of that web page
*/
icon: string | null;
/**
* The name of site of that web page
*/
sitename: string | null;
/**
* The url of the thumbnail of that web page
*/
thumbnail: string | null;
/**
* The player of that web page
*/
player: Player;
/**
* The title of that web page
*/
title: string | null;
/**
* Possibly sensitive
*/
sensitive?: boolean;
};
export default Summary;
export declare type Player = {
/**
* The url of the player
*/
url: string | null;
/**
* The width of the player
*/
width: number | null;
/**
* The height of the player
*/
height: number | null;
/**
* The allowed permissions of the iframe
*/
allow: string[];
};

View File

@ -1 +0,0 @@
export {};

View File

@ -1 +0,0 @@
export default function (title: string, siteName?: string | null): string;

View File

@ -1,19 +0,0 @@
import escapeRegExp from 'escape-regexp';
export default function (title, siteName) {
title = title.trim();
if (siteName) {
siteName = siteName.trim();
const x = escapeRegExp(siteName);
const patterns = [
`^(.+?)\\s?[\\-\\|:・]\\s?${x}$`
];
for (let i = 0; i < patterns.length; i++) {
const pattern = new RegExp(patterns[i]);
const [, match] = pattern.exec(title) || [null, null];
if (match) {
return match;
}
}
}
return title;
}

View File

@ -1 +0,0 @@
export default function (s: string, max: number): string;

View File

@ -1,13 +0,0 @@
import nullOrEmpty from './null-or-empty.js';
export default function (s, max) {
if (nullOrEmpty(s)) {
return s;
}
s = s.trim();
if (s.length > max) {
return s.substr(0, max) + '...';
}
else {
return s;
}
}

View File

@ -1,8 +0,0 @@
/// <reference types="node" />
/**
* Detect HTML encoding
* @param body Body in Buffer
* @returns encoding
*/
export declare function detectEncoding(body: Buffer): string;
export declare function toUtf8(body: Buffer, encoding: string): string;

View File

@ -1,40 +0,0 @@
import iconv from 'iconv-lite';
import jschardet from 'jschardet';
const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i');
/**
* Detect HTML encoding
* @param body Body in Buffer
* @returns encoding
*/
export function detectEncoding(body) {
// By detection
const detected = jschardet.detect(body, { minimumThreshold: 0.99 });
if (detected) {
const candicate = detected.encoding;
const encoding = toEncoding(candicate);
if (encoding != null)
return encoding;
}
// From meta
const matchMeta = body.toString('ascii').match(regCharset);
if (matchMeta) {
const candicate = matchMeta[1];
const encoding = toEncoding(candicate);
if (encoding != null)
return encoding;
}
return 'utf-8';
}
export function toUtf8(body, encoding) {
return iconv.decode(body, encoding);
}
function toEncoding(candicate) {
if (iconv.encodingExists(candicate)) {
if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase()))
return 'cp932';
return candicate;
}
else {
return null;
}
}

20
built/utils/got.d.ts vendored
View File

@ -1,20 +0,0 @@
import * as Got from 'got';
import * as cheerio from 'cheerio';
export declare let agent: Got.Agents;
export declare function setAgent(_agent: Got.Agents): void;
export declare type GotOptions = {
url: string;
method: 'GET' | 'POST' | 'HEAD';
body?: string;
headers: Record<string, string | undefined>;
typeFilter?: RegExp;
};
export declare function scpaping(url: string, opts?: {
lang?: string;
}): Promise<{
body: string;
$: cheerio.CheerioAPI;
response: Got.Response<string>;
}>;
export declare function get(url: string): Promise<string>;
export declare function head(url: string): Promise<Got.Response<string>>;

View File

@ -1,124 +0,0 @@
import got, * as Got from 'got';
import { StatusError } from './status-error.js';
import { detectEncoding, toUtf8 } from './encoding.js';
import * as cheerio from 'cheerio';
import PrivateIp from 'private-ip';
import { dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { readFileSync } from 'node:fs';
const _filename = fileURLToPath(import.meta.url);
const _dirname = dirname(_filename);
export let agent = {};
export function setAgent(_agent) {
agent = _agent || {};
}
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
const RESPONSE_TIMEOUT = 20 * 1000;
const OPERATION_TIMEOUT = 60 * 1000;
const MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
const BOT_UA = `SummalyBot/${repo.version}`;
export async function scpaping(url, opts) {
const response = await getResponse({
url,
method: 'GET',
headers: {
'accept': 'text/html,application/xhtml+xml',
'user-agent': BOT_UA,
'accept-language': opts?.lang
},
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
});
// SUMMALY_ALLOW_PRIVATE_IPはテスト用
const allowPrivateIp = process.env.SUMMALY_ALLOW_PRIVATE_IP === 'true' || Object.keys(agent).length > 0;
if (!allowPrivateIp && response.ip && PrivateIp(response.ip)) {
throw new StatusError(`Private IP rejected ${response.ip}`, 400, 'Private IP Rejected');
}
const encoding = detectEncoding(response.rawBody);
const body = toUtf8(response.rawBody, encoding);
const $ = cheerio.load(body);
return {
body,
$,
response,
};
}
export async function get(url) {
const res = await getResponse({
url,
method: 'GET',
headers: {
'accept': '*/*',
},
});
return await res.body;
}
export async function head(url) {
const res = await getResponse({
url,
method: 'HEAD',
headers: {
'accept': '*/*',
},
});
return await res;
}
async function getResponse(args) {
const timeout = RESPONSE_TIMEOUT;
const operationTimeout = OPERATION_TIMEOUT;
const req = got(args.url, {
method: args.method,
headers: args.headers,
body: args.body,
timeout: {
lookup: timeout,
connect: timeout,
secureConnect: timeout,
socket: timeout,
response: timeout,
send: timeout,
request: operationTimeout, // whole operation timeout
},
agent,
http2: false,
retry: {
limit: 0,
},
});
return await receiveResponse({ req, typeFilter: args.typeFilter });
}
async function receiveResponse(args) {
const req = args.req;
const maxSize = MAX_RESPONSE_SIZE;
req.on('response', (res) => {
// Check html
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
// console.warn(res.headers['content-type']);
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
return;
}
// 応答ヘッダでサイズチェック
const contentLength = res.headers['content-length'];
if (contentLength != null) {
const size = Number(contentLength);
if (size > maxSize) {
req.cancel(`maxSize exceeded (${size} > ${maxSize}) on response`);
}
}
});
// 受信中のデータでサイズチェック
req.on('downloadProgress', (progress) => {
if (progress.transferred > maxSize && progress.percent !== 1) {
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
}
});
// 応答取得 with ステータスコードエラーの整形
const res = await req.catch(e => {
if (e instanceof Got.HTTPError) {
throw new StatusError(`${e.response.statusCode} ${e.response.statusMessage}`, e.response.statusCode, e.response.statusMessage);
}
else {
throw e;
}
});
return res;
}

View File

@ -1 +0,0 @@
export default function (val: string): boolean;

View File

@ -1,14 +0,0 @@
export default function (val) {
if (val === undefined) {
return true;
}
else if (val === null) {
return true;
}
else if (val.trim() === '') {
return true;
}
else {
return false;
}
}

View File

@ -1,7 +0,0 @@
export declare class StatusError extends Error {
name: string;
statusCode: number;
statusMessage?: string;
isPermanentError: boolean;
constructor(message: string, statusCode: number, statusMessage?: string);
}

View File

@ -1,9 +0,0 @@
export class StatusError extends Error {
constructor(message, statusCode, statusMessage) {
super(message);
this.name = 'StatusError';
this.statusCode = statusCode;
this.statusMessage = statusMessage;
this.isPermanentError = typeof this.statusCode === 'number' && this.statusCode >= 400 && this.statusCode < 500;
}
}

9241
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{
"name": "summaly",
"version": "4.0.0",
"name": "@misskey-dev/summaly",
"version": "5.0.2",
"description": "Get web page's summary",
"author": "syuilo <syuilotan@yahoo.co.jp>",
"license": "MIT",
@ -9,33 +9,39 @@
"main": "./built/index.js",
"type": "module",
"types": "./built/index.d.ts",
"packageManager": "pnpm@8.13.1",
"files": [
"built",
"LICENSE"
],
"scripts": {
"build": "tsc",
"eslint": "eslint --quiet \"src/**/*.ts\"",
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --silent=false --verbose false",
"serve": "fastify start ./built/index.js"
},
"devDependencies": {
"@jest/globals": "^29.4.2",
"@swc/core": "^1.3.35",
"@swc/jest": "^0.2.24",
"@jest/globals": "^29.7.0",
"@swc/core": "^1.3.101",
"@swc/jest": "^0.2.29",
"@types/cheerio": "0.22.18",
"@types/debug": "4.1.7",
"@types/escape-regexp": "^0.0.1",
"@types/node": "16.11.12",
"@types/node": "20.10.6",
"@typescript-eslint/eslint-plugin": "^6.16.0",
"@typescript-eslint/parser": "^6.16.0",
"debug": "^4.3.4",
"fastify": "^4.13.0",
"fastify-cli": "^5.7.1",
"jest": "^29.4.2",
"typescript": "4.5.3"
"eslint": "^8.56.0",
"eslint-plugin-import": "^2.29.1",
"fastify": "^4.25.2",
"fastify-cli": "^5.9.0",
"jest": "^29.7.0",
"typescript": "5.3.3"
},
"dependencies": {
"cheerio": "1.0.0-rc.12",
"escape-regexp": "0.0.1",
"got": "^12.6.0",
"got": "^12.6.1",
"html-entities": "2.3.2",
"iconv-lite": "0.6.3",
"jschardet": "3.0.0",

3679
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,11 @@
import { URL } from 'node:url';
import { decode as decodeHtml } from 'html-entities';
import * as cheerio from 'cheerio';
import clip from './utils/clip.js';
import cleanupTitle from './utils/cleanup-title.js';
import { decode as decodeHtml } from 'html-entities';
import { get, head, scpaping } from './utils/got.js';
import type { default as Summary, Player } from './summary.js';
import * as cheerio from 'cheerio';
/**
* Contains only the html snippet for a sanitized iframe as the thumbnail is
@ -20,7 +19,20 @@ async function getOEmbedPlayer($: cheerio.CheerioAPI, pageUrl: string): Promise<
return null;
}
const oEmbed = await get((new URL(href, pageUrl)).href);
const oEmbedUrl = (() => {
try {
return new URL(href, pageUrl);
} catch { return null; }
})();
if (!oEmbedUrl) {
return null;
}
const oEmbed = await get(oEmbedUrl.href).catch(() => null);
if (!oEmbed) {
return null;
}
const body = (() => {
try {
return JSON.parse(oEmbed);
@ -38,7 +50,7 @@ async function getOEmbedPlayer($: cheerio.CheerioAPI, pageUrl: string): Promise<
}
const oEmbedHtml = cheerio.load(body.html);
const iframe = oEmbedHtml("iframe");
const iframe = oEmbedHtml('iframe');
if (iframe.length !== 1) {
// Somehow we either have multiple iframes or none
@ -102,6 +114,9 @@ async function getOEmbedPlayer($: cheerio.CheerioAPI, pageUrl: string): Promise<
(iframe.attr('allow') ?? '').split(/\s*;\s*/g)
.filter(s => s)
.filter(s => !ignoredList.includes(s));
if (iframe.attr('allowfullscreen') === '') {
allowedPermissions.push('fullscreen');
}
if (allowedPermissions.some(allow => !safeList.includes(allow))) {
// This iframe is probably too powerful to be embedded
return null;
@ -111,8 +126,8 @@ async function getOEmbedPlayer($: cheerio.CheerioAPI, pageUrl: string): Promise<
url,
width,
height,
allow: allowedPermissions
}
allow: allowedPermissions,
};
}
export default async (_url: URL | string, lang: string | null = null): Promise<Summary | null> => {
@ -122,10 +137,19 @@ export default async (_url: URL | string, lang: string | null = null): Promise<S
const res = await scpaping(url.href, { lang: lang || undefined });
const $ = res.$;
const twitterCard = $('meta[property="twitter:card"]').attr('content');
const twitterCard =
$('meta[name="twitter:card"]').attr('content') ||
$('meta[property="twitter:card"]').attr('content');
// According to docs, name attribute of meta tag is used for twitter card but for compatibility,
// this library will also look for property attribute.
// See https://developer.twitter.com/en/docs/twitter-for-websites/cards/overview/summary
// Property attribute is used for open graph.
// See https://ogp.me/
let title: string | null | undefined =
$('meta[property="og:title"]').attr('content') ||
$('meta[name="twitter:title"]').attr('content') ||
$('meta[property="twitter:title"]').attr('content') ||
$('title').text();
@ -137,6 +161,7 @@ export default async (_url: URL | string, lang: string | null = null): Promise<S
let image: string | null | undefined =
$('meta[property="og:image"]').attr('content') ||
$('meta[name="twitter:image"]').attr('content') ||
$('meta[property="twitter:image"]').attr('content') ||
$('link[rel="image_src"]').attr('href') ||
$('link[rel="apple-touch-icon"]').attr('href') ||
@ -145,26 +170,27 @@ export default async (_url: URL | string, lang: string | null = null): Promise<S
image = image ? (new URL(image, url.href)).href : null;
const playerUrl =
(twitterCard !== 'summary_large_image' && $('meta[property="twitter:player"]').attr('content')) ||
(twitterCard !== 'summary_large_image' && $('meta[name="twitter:player"]').attr('content')) ||
(twitterCard !== 'summary_large_image' && $('meta[property="twitter:player"]').attr('content')) ||
$('meta[property="og:video"]').attr('content') ||
$('meta[property="og:video:secure_url"]').attr('content') ||
$('meta[property="og:video:url"]').attr('content');
const playerWidth = parseInt(
$('meta[property="twitter:player:width"]').attr('content') ||
$('meta[name="twitter:player:width"]').attr('content') ||
$('meta[property="twitter:player:width"]').attr('content') ||
$('meta[property="og:video:width"]').attr('content') ||
'');
const playerHeight = parseInt(
$('meta[property="twitter:player:height"]').attr('content') ||
$('meta[name="twitter:player:height"]').attr('content') ||
$('meta[property="twitter:player:height"]').attr('content') ||
$('meta[property="og:video:height"]').attr('content') ||
'');
let description: string | null | undefined =
$('meta[property="og:description"]').attr('content') ||
$('meta[name="twitter:description"]').attr('content') ||
$('meta[property="twitter:description"]').attr('content') ||
$('meta[name="description"]').attr('content');
@ -176,10 +202,10 @@ export default async (_url: URL | string, lang: string | null = null): Promise<S
description = null;
}
let siteName = decodeHtml(
const siteName = decodeHtml(
$('meta[property="og:site_name"]').attr('content') ||
$('meta[name="application-name"]').attr('content') ||
url.hostname
url.host,
);
const favicon =
@ -187,7 +213,12 @@ export default async (_url: URL | string, lang: string | null = null): Promise<S
$('link[rel="icon"]').attr('href') ||
'/favicon.ico';
const sensitive = $('.tweet').attr('data-possibly-sensitive') === 'true'
const activityPub =
$('link[rel="alternate"][type="application/activity+json"]').attr('href') || null;
// https://developer.mixi.co.jp/connect/mixi_plugin/mixi_check/spec_mixi_check/#toc-18-
const sensitive =
$('meta[property=\'mixi:content-rating\']').attr('content') === '1';
const find = async (path: string) => {
const target = new URL(path, url.href);
@ -201,12 +232,12 @@ export default async (_url: URL | string, lang: string | null = null): Promise<S
const getIcon = async () => {
return (await find(favicon)) || null;
}
};
const [icon, oEmbed] = await Promise.all([
getIcon(),
getOEmbedPlayer($, url.href),
])
]);
// Clean up the title
title = cleanupTitle(title, siteName);
@ -228,5 +259,6 @@ export default async (_url: URL | string, lang: string | null = null): Promise<S
},
sitename: siteName || null,
sensitive,
activityPub,
};
};

View File

@ -1,20 +1,20 @@
/**
* summaly
* https://github.com/syuilo/summaly
* https://github.com/misskey-dev/summaly
*/
import { URL } from 'node:url';
import tracer from 'trace-redirect';
import Summary from './summary.js';
import type { IPlugin as _IPlugin } from './iplugin.js';
export type IPlugin = _IPlugin;
import { SummalyResult } from './summary.js';
import { SummalyPlugin } from './iplugin.js';
export * from './iplugin.js';
import general from './general.js';
import * as Got from 'got';
import { setAgent } from './utils/got.js';
import type { FastifyInstance } from 'fastify';
import { plugins as builtinPlugins } from './plugins/index.js';
type Options = {
export type SummalyOptions = {
/**
* Accept-Language for the request
*/
@ -28,7 +28,7 @@ type Options = {
/**
* Custom Plugins
*/
plugins?: IPlugin[];
plugins?: SummalyPlugin[];
/**
* Custom HTTP agent
@ -36,26 +36,19 @@ type Options = {
agent?: Got.Agents;
};
type Result = Summary & {
/**
* The actual url of that web page
*/
url: string;
};
const defaultOptions = {
export const summalyDefaultOptions = {
lang: null,
followRedirects: true,
plugins: [],
} as Options;
} as SummalyOptions;
/**
* Summarize an web page
*/
export const summaly = async (url: string, options?: Options): Promise<Result> => {
export const summaly = async (url: string, options?: SummalyOptions): Promise<SummalyResult> => {
if (options?.agent) setAgent(options.agent);
const opts = Object.assign(defaultOptions, options);
const opts = Object.assign(summalyDefaultOptions, options);
const plugins = builtinPlugins.concat(opts.plugins || []);
@ -68,7 +61,7 @@ export const summaly = async (url: string, options?: Options): Promise<Result> =
actualUrl = url;
}
}
const _url = new URL(actualUrl);
// Find matching plugin
@ -78,7 +71,7 @@ export const summaly = async (url: string, options?: Options): Promise<Result> =
const summary = await (match ? match.summarize : general)(_url, opts.lang || undefined);
if (summary == null) {
throw 'failed summarize';
throw new Error('failed summarize');
}
return Object.assign(summary, {
@ -86,7 +79,7 @@ export const summaly = async (url: string, options?: Options): Promise<Result> =
});
};
export default function (fastify: FastifyInstance, options: Options, done: (err?: Error) => void) {
export default function (fastify: FastifyInstance, options: SummalyOptions, done: (err?: Error) => void) {
fastify.get<{
Querystring: {
url?: string;
@ -116,4 +109,4 @@ export default function (fastify: FastifyInstance, options: Options, done: (err?
});
done();
}
}

View File

@ -1,7 +1,7 @@
import type { URL } from 'node:url';
import Summary from './summary.js';
export interface IPlugin {
export interface SummalyPlugin {
test: (url: URL) => boolean;
summarize: (url: URL, lang?: string) => Promise<Summary>;
summarize: (url: URL, lang?: string) => Promise<Summary | null>;
}

View File

@ -55,5 +55,6 @@ export async function summarize(url: URL): Promise<summary> {
allow: playerUrl ? ['fullscreen', 'encrypted-media'] : [],
},
sitename: 'Amazon',
activityPub: null,
};
}

View File

@ -0,0 +1,18 @@
import { URL } from 'node:url';
import { scpaping } from '../utils/got.js';
import general from '../general.js';
import Summary from '../summary.js';
export function test(url: URL): boolean {
// Branch.io を使用したディープリンクにマッチ
return /^[a-zA-Z0-9]+\.app\.link$/.test(url.hostname) ||
url.hostname === 'spotify.link';
}
export async function summarize(url: URL, lang: string | null = null): Promise<Summary | null> {
// https://help.branch.io/using-branch/docs/creating-a-deep-link#redirections
// Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ
url.searchParams.append('$web_only', 'true');
return await general(url, lang);
}

View File

@ -1,8 +1,10 @@
import { IPlugin } from '@/iplugin.js';
import { SummalyPlugin } from '@/iplugin.js';
import * as amazon from './amazon.js';
import * as wikipedia from './wikipedia.js';
import * as branchIoDeeplinks from './branchio-deeplinks.js';
export const plugins: IPlugin[] = [
amazon,
wikipedia,
export const plugins: SummalyPlugin[] = [
amazon,
wikipedia,
branchIoDeeplinks,
];

View File

@ -25,7 +25,7 @@ export async function summarize(url: URL): Promise<summary> {
log(body);
if (!('query' in body) || !('pages' in body.query)) {
throw 'fetch failed';
throw new Error('fetch failed');
}
const info = body.query.pages[Object.keys(body.query.pages)[0]];
@ -42,5 +42,6 @@ export async function summarize(url: URL): Promise<summary> {
allow: [],
},
sitename: 'Wikipedia',
activityPub: null,
};
}

View File

@ -1,8 +1,8 @@
type Summary = {
/**
* The description of that web page
* The title of that web page
*/
description: string | null;
title: string | null;
/**
* The url of the icon of that web page
@ -10,29 +10,41 @@ type Summary = {
icon: string | null;
/**
* The name of site of that web page
* The description of that web page
*/
sitename: string | null;
description: string | null;
/**
* The url of the thumbnail of that web page
*/
thumbnail: string | null;
/**
* The name of site of that web page
*/
sitename: string | null;
/**
* The player of that web page
*/
player: Player;
/**
* The title of that web page
*/
title: string | null;
/**
* Possibly sensitive
*/
sensitive?: boolean;
/**
* The url of the ActivityPub representation of that web page
*/
activityPub: string | null;
};
export type SummalyResult = Summary & {
/**
* The actual url of that web page
*/
url: string;
};
export default Summary;

View File

@ -0,0 +1,3 @@
<!DOCTYPE html>
<meta charset="utf-8">
<link rel="alternate" type="application/activity+json" href="https://misskey.test/notes/abcdefg">

View File

@ -7,6 +7,6 @@
</head>
<body>
<h1>KISS principle</h1>
<p>KISS is an acronym for "Keep it simple, stupid" as a design principle noted by the U.S. Navy in 1960.</p>
<p>KISS is an acronym for Keep it simple, stupid as a design principle noted by the U.S. Navy in 1960.</p>
</body>
</html>

View File

@ -1,9 +1,13 @@
<!doctype html>
<html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta property="og:site_name" content="Alice's Site">
<title>Strawberry Pasta | Alice's Site</title>
</head>
<body>
<h1>Strawberry Pasta</h1>
<p>Strawberry pasta is a kind of pasta with strawberry sauce.</p>
</body>
</html>

View File

@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta property="mixi:content-rating" content="1">
<title>SENSITIVE CONTENT!!</title>
</head>
<body>
<h1>Yo</h1>
<p>Hey hey hey syuilo.</p>
</body>
</html>

View File

@ -3,9 +3,10 @@
<html lang="en">
<head>
<meta charset="utf-8">
<title>KISS principle</title>
</head>
<body>
<h1>KISS principle</h1>
<p>KISS is an acronym for "Keep it simple, stupid" as a design principle noted by the U.S. Navy in 1960.</p>
<p>KISS is an acronym for Keep it simple, stupid as a design principle noted by the U.S. Navy in 1960.</p>
</body>
</html>

View File

@ -1,2 +1,3 @@
<!DOCTYPE html>
<link type="application/json+oembed" href="http://localhost:3060/oembe.json" />
<meta property="og:description" content="nonexistent">

View File

@ -0,0 +1,3 @@
<!DOCTYPE html>
<link type="application/json+oembed" href="http://localhost:+3060/oembed.json" />
<meta property="og:description" content="wrong url">

View File

@ -1,2 +0,0 @@
<!DOCTYPE html>
<link type="application/json+oembed" href="http://localhost+:3060/oembed.json" />

View File

@ -46,6 +46,65 @@ afterEach(async () => {
/* tests below */
test('basic', async () => {
app = fastify();
app.get('/', (request, reply) => {
return reply.send(fs.createReadStream(_dirname + '/htmls/basic.html'));
});
await app.listen({ port });
expect(await summaly(host)).toEqual({
title: 'KISS principle',
icon: null,
description: null,
thumbnail: null,
player: {
url: null,
width: null,
height: null,
"allow": [
"autoplay",
"encrypted-media",
"fullscreen",
],
},
sitename: 'localhost:3060',
sensitive: false,
url: host,
activityPub: null,
});
});
test('Stage Bye Stage', async () => {
// If this test fails, you must rewrite the result data and the example in README.md.
const summary = await summaly('https://www.youtube.com/watch?v=NMIEAhH_fTU');
expect(summary).toEqual(
{
"title": "【アイドルマスター】「Stage Bye Stage」(歌:島村卯月、渋谷凛、本田未央)",
"icon": "https://www.youtube.com/s/desktop/28b0985e/img/favicon.ico",
"description": "Website▶https://columbia.jp/idolmaster/Playlist▶https://www.youtube.com/playlist?list=PL83A2998CF3BBC86D2018年7月18日発売予定THE IDOLM@STER CINDERELLA GIRLS CG STAR...",
"thumbnail": "https://i.ytimg.com/vi/NMIEAhH_fTU/maxresdefault.jpg",
"player": {
"url": "https://www.youtube.com/embed/NMIEAhH_fTU?feature=oembed",
"width": 200,
"height": 113,
"allow": [
"autoplay",
"clipboard-write",
"encrypted-media",
"picture-in-picture",
"web-share",
"fullscreen",
]
},
"sitename": "YouTube",
"sensitive": false,
"activityPub": null,
"url": "https://www.youtube.com/watch?v=NMIEAhH_fTU"
}
);
});
test('faviconがHTML上で指定されていないが、ルートに存在する場合、正しく設定される', async () => {
app = fastify();
app.get('/', (request, reply) => {
@ -294,7 +353,14 @@ describe("oEmbed", () => {
await setUpFastify('oembed-allow-fullscreen.json');
const summary = await summaly(host);
expect(summary.player.url).toBe('https://example.com/');
expect(summary.player.allow).toStrictEqual(['fullscreen'])
expect(summary.player.allow).toStrictEqual(['fullscreen']);
});
test('allows legacy allowfullscreen', async () => {
await setUpFastify('oembed-allow-fullscreen-legacy.json');
const summary = await summaly(host);
expect(summary.player.url).toBe('https://example.com/');
expect(summary.player.allow).toStrictEqual(['fullscreen']);
});
test('allows safelisted permissions', async () => {
@ -322,12 +388,16 @@ describe("oEmbed", () => {
test('oEmbed with nonexistent path', async () => {
await setUpFastify('oembed.json', 'htmls/oembed-nonexistent-path.html');
await expect(summaly(host)).rejects.toThrow('404 Not Found');
const summary = await summaly(host);
expect(summary.player.url).toBe(null);
expect(summary.description).toBe('nonexistent');
});
test('oEmbed with wrong path', async () => {
await setUpFastify('oembed.json', 'htmls/oembed-wrong-path.html');
await expect(summaly(host)).rejects.toThrow();
const summary = await summaly(host);
expect(summary.player.url).toBe(null);
expect(summary.description).toBe('wrong url');
});
test('oEmbed with OpenGraph', async () => {
@ -358,3 +428,47 @@ describe("oEmbed", () => {
expect(summary.player.height).toBe(300);
});
});
describe('ActivityPub', () => {
test('Basic', async () => {
app = fastify();
app.get('*', (request, reply) => {
return reply.send(fs.createReadStream(_dirname + '/htmls/activitypub.html'));
});
await app.listen({ port });
const summary = await summaly(host);
expect(summary.activityPub).toBe('https://misskey.test/notes/abcdefg');
});
test('Null', async () => {
app = fastify();
app.get('*', (request, reply) => {
return reply.send(fs.createReadStream(_dirname + '/htmls/basic.html'));
});
await app.listen({ port });
const summary = await summaly(host);
expect(summary.activityPub).toBe(null);
});
});
describe('sensitive', () => {
test('default', async () => {
app = fastify();
app.get('/', (request, reply) => {
return reply.send(fs.createReadStream(_dirname + '/htmls/basic.html'));
});
await app.listen({ port });
expect((await summaly(host)).sensitive).toBe(false);
});
test('mixi:content-rating 1', async () => {
app = fastify();
app.get('/', (request, reply) => {
return reply.send(fs.createReadStream(_dirname + '/htmls/mixi-sensitive.html'));
});
await app.listen({ port });
expect((await summaly(host)).sensitive).toBe(true);
});
});

View File

@ -0,0 +1,7 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/' allowfullscreen></iframe>",
"width": 500,
"height": 300
}

View File

@ -1,86 +0,0 @@
{
"rules": {
"align": [true,
"parameters",
"statements"
],
"ban": false,
"class-name": true,
"comment-format": [true,
"check-upper-case"
],
"curly": true,
"eofline": true,
"forin": false,
"indent": [true, "tabs"],
"interface-name": false,
"jsdoc-format": true,
"label-position": true,
"label-undefined": true,
"max-line-length": false,
"member-access": false,
"member-ordering": [true,
"static-before-instance",
"variables-before-functions"
],
"no-any": false,
"no-arg": true,
"no-bitwise": true,
"no-console": [true,
"debug",
"info",
"time",
"timeEnd",
"trace"
],
"no-consecutive-blank-lines": true,
"no-construct": true,
"no-constructor-vars": true,
"no-debugger": true,
"no-duplicate-key": true,
"no-shadowed-variable": false,
"no-duplicate-variable": true,
"no-empty": true,
"no-eval": true,
"no-internal-module": true,
"no-require-imports": false,
"no-string-literal": false,
"no-switch-case-fall-through": true,
"no-trailing-whitespace": true,
"no-unreachable": true,
"no-unused-expression": true,
"no-unused-variable": true,
"no-use-before-declare": true,
"no-var-keyword": true,
"no-var-requires": false,
"one-line": [true,
"check-catch",
"check-whitespace"
],
"quotemark": false,
"radix": true,
"semicolon": true,
"switch-default": false,
"triple-equals": false,
"typedef": [true,
"call-signature",
"property-declaration"
],
"typedef-whitespace": [true, {
"call-signature": "nospace",
"index-signature": "nospace",
"parameter": "nospace",
"property-declaration": "nospace",
"variable-declaration": "nospace"
}],
"use-strict": false,
"variable-name": false,
"whitespace": [true,
"check-branch",
"check-decl",
"check-operator",
"check-separator",
"check-type"
]
}
}