2 Commits

Author SHA1 Message Date
a36652c859 v4.0.0 2023-03-13 17:53:29 +00:00
eab3766db9 feat: add oEmbed support (#6)
* feat: add oEmbed support

* more safelisted features

* fix the syntax

* Update README.md

* permissions

* names

* playerを使うように

* fix type error

* support width (for size ratio)

* test for type: video

* nullable width

* restore max height test

* ignored permissions

* restore autoplay

* Use WHATWG URL

---------

Co-authored-by: tamaina <tamaina@hotmail.co.jp>
2023-03-14 02:46:41 +09:00
16 changed files with 77 additions and 117 deletions

View File

@ -1,6 +1,7 @@
Unreleased
4.0.0 / 2023-03-14
------------------
* oEmbed type=richの制限的なサポート
* プラグインの引数がWHATWG URLになりました
3.0.4 / 2023-02-12
------------------

View File

@ -51,11 +51,13 @@ npm run serve
``` typescript
interface IPlugin {
test: (url: URL.Url) => boolean;
summarize: (url: URL.Url) => Promise<Summary>;
test: (url: URL) => boolean;
summarize: (url: URL) => Promise<Summary>;
}
```
urls are WHATWG URL since v4.
### Returns
A Promise of an Object that contains properties below:
@ -127,12 +129,5 @@ License
----------------------------------------------------------------
[MIT](LICENSE)
[npm-link]: https://www.npmjs.com/package/summaly
[npm-badge]: https://img.shields.io/npm/v/summaly.svg?style=flat-square
[mit]: http://opensource.org/licenses/MIT
[mit-badge]: https://img.shields.io/badge/license-MIT-444444.svg?style=flat-square
[travis-link]: https://travis-ci.org/syuilo/summaly
[travis-badge]: http://img.shields.io/travis/syuilo/summaly.svg?style=flat-square
[himasaku]: https://himasaku.net
[himawari-badge]: https://img.shields.io/badge/%E5%8F%A4%E8%B0%B7-%E5%90%91%E6%97%A5%E8%91%B5-1684c5.svg?style=flat-square
[sakurako-badge]: https://img.shields.io/badge/%E5%A4%A7%E5%AE%A4-%E6%AB%BB%E5%AD%90-efb02a.svg?style=flat-square

4
built/general.d.ts vendored
View File

@ -1,4 +1,4 @@
import * as URL from 'node:url';
import { URL } from 'node:url';
import type { default as Summary } from './summary.js';
declare const _default: (url: URL.Url, lang?: string | null) => Promise<Summary | null>;
declare const _default: (_url: URL | string, lang?: string | null) => Promise<Summary | null>;
export default _default;

View File

@ -1,4 +1,4 @@
import * as URL from 'node:url';
import { URL } from 'node:url';
import clip from './utils/clip.js';
import cleanupTitle from './utils/cleanup-title.js';
import { decode as decodeHtml } from 'html-entities';
@ -15,9 +15,7 @@ async function getOEmbedPlayer($, pageUrl) {
if (!href) {
return null;
}
// XXX: Use global URL object instead of the deprecated `node:url`
// Disallow relative URL as no one seems to use it
const oEmbed = await get(URL.resolve(pageUrl, href));
const oEmbed = await get((new URL(href, pageUrl)).href);
const body = (() => {
try {
return JSON.parse(oEmbed);
@ -47,9 +45,13 @@ async function getOEmbedPlayer($, pageUrl) {
// No src?
return null;
}
// XXX: Use global URL object instead of the deprecated `node:url`
if (URL.parse(url).protocol !== 'https:') {
// Allow only HTTPS for best security
try {
if ((new URL(url)).protocol !== 'https:') {
// Allow only HTTPS for best security
return null;
}
}
catch (e) {
return null;
}
// Height is the most important, width is okay to be null. The implementer
@ -98,9 +100,10 @@ async function getOEmbedPlayer($, pageUrl) {
allow: allowedPermissions
};
}
export default async (url, lang = null) => {
export default async (_url, lang = null) => {
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/))
lang = null;
const url = typeof _url === 'string' ? new URL(_url) : _url;
const res = await scpaping(url.href, { lang: lang || undefined });
const $ = res.$;
const twitterCard = $('meta[property="twitter:card"]').attr('content');
@ -116,7 +119,7 @@ export default async (url, lang = null) => {
$('link[rel="image_src"]').attr('href') ||
$('link[rel="apple-touch-icon"]').attr('href') ||
$('link[rel="apple-touch-icon image_src"]').attr('href');
image = image ? URL.resolve(url.href, image) : null;
image = image ? (new URL(image, url.href)).href : null;
const playerUrl = (twitterCard !== 'summary_large_image' && $('meta[property="twitter:player"]').attr('content')) ||
(twitterCard !== 'summary_large_image' && $('meta[name="twitter:player"]').attr('content')) ||
$('meta[property="og:video"]').attr('content') ||
@ -139,40 +142,25 @@ export default async (url, lang = null) => {
if (title === description) {
description = null;
}
let siteName = $('meta[property="og:site_name"]').attr('content') ||
let siteName = decodeHtml($('meta[property="og:site_name"]').attr('content') ||
$('meta[name="application-name"]').attr('content') ||
url.hostname;
siteName = siteName ? decodeHtml(siteName) : null;
url.hostname);
const favicon = $('link[rel="shortcut icon"]').attr('href') ||
$('link[rel="icon"]').attr('href') ||
'/favicon.ico';
const sensitive = $('.tweet').attr('data-possibly-sensitive') === 'true';
const find = async (path) => {
const target = URL.resolve(url.href, path);
const target = new URL(path, url.href);
try {
await head(target);
await head(target.href);
return target;
}
catch (e) {
return null;
}
};
// 相対的なURL (ex. test) を絶対的 (ex. /test) に変換
const toAbsolute = (relativeURLString) => {
const relativeURL = URL.parse(relativeURLString);
const isAbsolute = relativeURL.slashes || relativeURL.path !== null && relativeURL.path[0] === '/';
// 既に絶対的なら、即座に値を返却
if (isAbsolute) {
return relativeURLString;
}
// スラッシュを付けて返却
return '/' + relativeURLString;
};
const getIcon = async () => {
return await find(favicon) ||
// 相対指定を絶対指定に変換し再試行
await find(toAbsolute(favicon)) ||
null;
return (await find(favicon)) || null;
};
const [icon, oEmbed] = await Promise.all([
getIcon(),
@ -185,14 +173,14 @@ export default async (url, lang = null) => {
}
return {
title: title || null,
icon: icon || null,
icon: icon?.href || null,
description: description || null,
thumbnail: image || null,
player: oEmbed ?? {
url: playerUrl || null,
width: Number.isNaN(playerWidth) ? null : playerWidth,
height: Number.isNaN(playerHeight) ? null : playerHeight,
allow: ['fullscreen', 'encrypted-media'],
allow: ['autoplay', 'encrypted-media', 'fullscreen'],
},
sitename: siteName || null,
sensitive,

View File

@ -2,7 +2,7 @@
* summaly
* https://github.com/syuilo/summaly
*/
import * as URL from 'node:url';
import { URL } from 'node:url';
import tracer from 'trace-redirect';
import general from './general.js';
import { setAgent } from './utils/got.js';
@ -30,7 +30,7 @@ export const summaly = async (url, options) => {
actualUrl = url;
}
}
const _url = URL.parse(actualUrl, true);
const _url = new URL(actualUrl);
// Find matching plugin
const match = plugins.filter(plugin => plugin.test(_url))[0];
// Get summary

6
built/iplugin.d.ts vendored
View File

@ -1,7 +1,7 @@
/// <reference types="node" />
import * as URL from 'node:url';
import type { URL } from 'node:url';
import Summary from './summary.js';
export interface IPlugin {
test: (url: URL.Url) => boolean;
summarize: (url: URL.Url, lang?: string) => Promise<Summary>;
test: (url: URL) => boolean;
summarize: (url: URL, lang?: string) => Promise<Summary>;
}

View File

@ -1,5 +1,5 @@
/// <reference types="node" />
import * as URL from 'node:url';
import { URL } from 'node:url';
import summary from '../summary.js';
export declare function test(url: URL.Url): boolean;
export declare function summarize(url: URL.Url): Promise<summary>;
export declare function test(url: URL): boolean;
export declare function summarize(url: URL): Promise<summary>;

View File

@ -1,5 +1,5 @@
/// <reference types="node" />
import * as URL from 'node:url';
import { URL } from 'node:url';
import summary from '../summary.js';
export declare function test(url: URL.Url): boolean;
export declare function summarize(url: URL.Url): Promise<summary>;
export declare function test(url: URL): boolean;
export declare function summarize(url: URL): Promise<summary>;

View File

@ -1,6 +1,6 @@
{
"name": "summaly",
"version": "3.0.4",
"version": "4.0.0",
"description": "Get web page's summary",
"author": "syuilo <syuilotan@yahoo.co.jp>",
"license": "MIT",
@ -25,7 +25,6 @@
"@types/cheerio": "0.22.18",
"@types/debug": "4.1.7",
"@types/escape-regexp": "^0.0.1",
"@types/html-entities": "1.3.4",
"@types/node": "16.11.12",
"debug": "^4.3.4",
"fastify": "^4.13.0",
@ -34,9 +33,9 @@
"typescript": "4.5.3"
},
"dependencies": {
"cheerio": "^1.0.0-rc.12",
"cheerio": "1.0.0-rc.12",
"escape-regexp": "0.0.1",
"got": "^12.5.3",
"got": "^12.6.0",
"html-entities": "2.3.2",
"iconv-lite": "0.6.3",
"jschardet": "3.0.0",

14
pnpm-lock.yaml generated
View File

@ -7,14 +7,13 @@ specifiers:
'@types/cheerio': 0.22.18
'@types/debug': 4.1.7
'@types/escape-regexp': ^0.0.1
'@types/html-entities': 1.3.4
'@types/node': 16.11.12
cheerio: ^1.0.0-rc.12
cheerio: 1.0.0-rc.12
debug: ^4.3.4
escape-regexp: 0.0.1
fastify: ^4.13.0
fastify-cli: ^5.7.1
got: ^12.5.3
got: ^12.6.0
html-entities: 2.3.2
iconv-lite: 0.6.3
jest: ^29.4.2
@ -40,7 +39,6 @@ devDependencies:
'@types/cheerio': 0.22.18
'@types/debug': 4.1.7
'@types/escape-regexp': 0.0.1
'@types/html-entities': 1.3.4
'@types/node': 16.11.12
debug: 4.3.4
fastify: 4.14.1
@ -895,13 +893,6 @@ packages:
'@types/node': 16.11.12
dev: true
/@types/html-entities/1.3.4:
resolution: {integrity: sha512-Ut62LV90H9tgXwyhmfR8U6yCw/6xeo26IlsbAJJfqPomaqDN2zoLb2Z+cbmy5AycJFhwNJDdH0zqjQp7Ox/eXg==}
deprecated: This is a stub types definition. html-entities provides its own type definitions, so you do not need this installed.
dependencies:
html-entities: 2.3.2
dev: true
/@types/http-cache-semantics/4.0.1:
resolution: {integrity: sha512-SZs7ekbP8CN0txVG2xVRH6EgKmEm31BOxA07vkFaETzZz1xh+cbt8BcI0slpymvwhx5dlFnQG2rTlPVQn+iRPQ==}
dev: false
@ -1838,6 +1829,7 @@ packages:
/html-entities/2.3.2:
resolution: {integrity: sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ==}
dev: false
/html-escaper/2.0.2:
resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}

View File

@ -1,4 +1,4 @@
import * as URL from 'node:url';
import { URL } from 'node:url';
import clip from './utils/clip.js';
import cleanupTitle from './utils/cleanup-title.js';
@ -20,9 +20,7 @@ async function getOEmbedPlayer($: cheerio.CheerioAPI, pageUrl: string): Promise<
return null;
}
// XXX: Use global URL object instead of the deprecated `node:url`
// Disallow relative URL as no one seems to use it
const oEmbed = await get(URL.resolve(pageUrl, href));
const oEmbed = await get((new URL(href, pageUrl)).href);
const body = (() => {
try {
return JSON.parse(oEmbed);
@ -58,9 +56,12 @@ async function getOEmbedPlayer($: cheerio.CheerioAPI, pageUrl: string): Promise<
return null;
}
// XXX: Use global URL object instead of the deprecated `node:url`
if (URL.parse(url).protocol !== 'https:') {
// Allow only HTTPS for best security
try {
if ((new URL(url)).protocol !== 'https:') {
// Allow only HTTPS for best security
return null;
}
} catch (e) {
return null;
}
@ -114,9 +115,11 @@ async function getOEmbedPlayer($: cheerio.CheerioAPI, pageUrl: string): Promise<
}
}
export default async (url: URL.Url, lang: string | null = null): Promise<Summary | null> => {
export default async (_url: URL | string, lang: string | null = null): Promise<Summary | null> => {
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
const url = typeof _url === 'string' ? new URL(_url) : _url;
const res = await scpaping(url.href, { lang: lang || undefined });
const $ = res.$;
const twitterCard = $('meta[property="twitter:card"]').attr('content');
@ -139,7 +142,7 @@ export default async (url: URL.Url, lang: string | null = null): Promise<Summary
$('link[rel="apple-touch-icon"]').attr('href') ||
$('link[rel="apple-touch-icon image_src"]').attr('href');
image = image ? URL.resolve(url.href, image) : null;
image = image ? (new URL(image, url.href)).href : null;
const playerUrl =
(twitterCard !== 'summary_large_image' && $('meta[property="twitter:player"]').attr('content')) ||
@ -173,12 +176,11 @@ export default async (url: URL.Url, lang: string | null = null): Promise<Summary
description = null;
}
let siteName =
let siteName = decodeHtml(
$('meta[property="og:site_name"]').attr('content') ||
$('meta[name="application-name"]').attr('content') ||
url.hostname;
siteName = siteName ? decodeHtml(siteName) : null;
url.hostname
);
const favicon =
$('link[rel="shortcut icon"]').attr('href') ||
@ -188,34 +190,17 @@ export default async (url: URL.Url, lang: string | null = null): Promise<Summary
const sensitive = $('.tweet').attr('data-possibly-sensitive') === 'true'
const find = async (path: string) => {
const target = URL.resolve(url.href, path);
const target = new URL(path, url.href);
try {
await head(target);
await head(target.href);
return target;
} catch (e) {
return null;
}
};
// 相対的なURL (ex. test) を絶対的 (ex. /test) に変換
const toAbsolute = (relativeURLString: string): string => {
const relativeURL = URL.parse(relativeURLString);
const isAbsolute = relativeURL.slashes || relativeURL.path !== null && relativeURL.path[0] === '/';
// 既に絶対的なら、即座に値を返却
if (isAbsolute) {
return relativeURLString;
}
// スラッシュを付けて返却
return '/' + relativeURLString;
};
const getIcon = async () => {
return await find(favicon) ||
// 相対指定を絶対指定に変換し再試行
await find(toAbsolute(favicon)) ||
null;
return (await find(favicon)) || null;
}
const [icon, oEmbed] = await Promise.all([
@ -232,14 +217,14 @@ export default async (url: URL.Url, lang: string | null = null): Promise<Summary
return {
title: title || null,
icon: icon || null,
icon: icon?.href || null,
description: description || null,
thumbnail: image || null,
player: oEmbed ?? {
url: playerUrl || null,
width: Number.isNaN(playerWidth) ? null : playerWidth,
height: Number.isNaN(playerHeight) ? null : playerHeight,
allow: ['fullscreen', 'encrypted-media'],
allow: ['autoplay', 'encrypted-media', 'fullscreen'],
},
sitename: siteName || null,
sensitive,

View File

@ -3,7 +3,7 @@
* https://github.com/syuilo/summaly
*/
import * as URL from 'node:url';
import { URL } from 'node:url';
import tracer from 'trace-redirect';
import Summary from './summary.js';
import type { IPlugin as _IPlugin } from './iplugin.js';
@ -69,7 +69,7 @@ export const summaly = async (url: string, options?: Options): Promise<Result> =
}
}
const _url = URL.parse(actualUrl, true);
const _url = new URL(actualUrl);
// Find matching plugin
const match = plugins.filter(plugin => plugin.test(_url))[0];

View File

@ -1,7 +1,7 @@
import * as URL from 'node:url';
import type { URL } from 'node:url';
import Summary from './summary.js';
export interface IPlugin {
test: (url: URL.Url) => boolean;
summarize: (url: URL.Url, lang?: string) => Promise<Summary>;
test: (url: URL) => boolean;
summarize: (url: URL, lang?: string) => Promise<Summary>;
}

View File

@ -1,8 +1,8 @@
import * as URL from 'node:url';
import { URL } from 'node:url';
import { scpaping } from '../utils/got.js';
import summary from '../summary.js';
export function test(url: URL.Url): boolean {
export function test(url: URL): boolean {
return url.hostname === 'www.amazon.com' ||
url.hostname === 'www.amazon.co.jp' ||
url.hostname === 'www.amazon.ca' ||
@ -19,7 +19,7 @@ export function test(url: URL.Url): boolean {
url.hostname === 'www.amazon.au';
}
export async function summarize(url: URL.Url): Promise<summary> {
export async function summarize(url: URL): Promise<summary> {
const res = await scpaping(url.href);
const $ = res.$;

View File

@ -1,4 +1,4 @@
import * as URL from 'node:url';
import { URL } from 'node:url';
import { get } from '../utils/got.js';
import debug from 'debug';
import summary from '../summary.js';
@ -6,12 +6,12 @@ import clip from './../utils/clip.js';
const log = debug('summaly:plugins:wikipedia');
export function test(url: URL.Url): boolean {
export function test(url: URL): boolean {
if (!url.hostname) return false;
return /\.wikipedia\.org$/.test(url.hostname);
}
export async function summarize(url: URL.Url): Promise<summary> {
export async function summarize(url: URL): Promise<summary> {
const lang = url.host ? url.host.split('.')[0] : null;
const title = url.pathname ? url.pathname.split('/')[2] : null;
const endpoint = `https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`;

View File

@ -213,7 +213,7 @@ describe('TwitterCard', () => {
const summary = await summaly(host);
expect(summary.player.url).toBe('https://example.com/embedurl');
expect(summary.player.allow).toStrictEqual(['fullscreen', 'encrypted-media']);
expect(summary.player.allow).toStrictEqual(['autoplay', 'encrypted-media', 'fullscreen']);
});
test('Player detection - Pleroma:video => video', async () => {
@ -225,7 +225,7 @@ describe('TwitterCard', () => {
const summary = await summaly(host);
expect(summary.player.url).toBe('https://example.com/embedurl');
expect(summary.player.allow).toStrictEqual(['fullscreen', 'encrypted-media']);
expect(summary.player.allow).toStrictEqual(['autoplay', 'encrypted-media', 'fullscreen']);
});
test('Player detection - Pleroma:image => image', async () => {