mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-05-04 21:27:18 +09:00
feat: add oEmbed support (#6)
* feat: add oEmbed support * more safelisted features * fix the syntax * Update README.md * permissions * names * playerを使うように * fix type error * support width (for size ratio) * test for type: video * nullable width * restore max height test * ignored permissions * restore autoplay * Use WHATWG URL --------- Co-authored-by: tamaina <tamaina@hotmail.co.jp>
This commit is contained in:
parent
51f3870e1f
commit
eab3766db9
@ -1,3 +1,7 @@
|
||||
Unreleased
|
||||
------------------
|
||||
* oEmbed type=richの制限的なサポート
|
||||
|
||||
3.0.4 / 2023-02-12
|
||||
------------------
|
||||
* 不要な依存関係を除去
|
||||
|
18
README.md
18
README.md
@ -60,16 +60,17 @@ interface IPlugin {
|
||||
|
||||
A Promise of an Object that contains properties below:
|
||||
|
||||
※ Almost all values are nullable. player shoud not be null.
|
||||
※ Almost all values are nullable. player should not be null.
|
||||
|
||||
#### Root
|
||||
|
||||
| Property | Type | Description |
|
||||
| :-------------- | :------- | :--------------------------------------- |
|
||||
| :-------------- | :------- | :------------------------------------------ |
|
||||
| **description** | *string* | The description of the web page |
|
||||
| **icon** | *string* | The url of the icon of the web page |
|
||||
| **sitename** | *string* | The name of the web site |
|
||||
| **thumbnail** | *string* | The url of the thumbnail of the web page |
|
||||
| **oEmbed** | *OEmbedRichIframe* | The oEmbed rich iframe info of the web page |
|
||||
| **player** | *Player* | The player of the web page |
|
||||
| **title** | *string* | The title of the web page |
|
||||
| **url** | *string* | The url of the web page |
|
||||
@ -77,10 +78,21 @@ A Promise of an Object that contains properties below:
|
||||
#### Player
|
||||
|
||||
| Property | Type | Description |
|
||||
| :-------------- | :------- | :--------------------------------------- |
|
||||
| :-------------- | :--------- | :---------------------------------------------- |
|
||||
| **url** | *string* | The url of the player |
|
||||
| **width** | *number* | The width of the player |
|
||||
| **height** | *number* | The height of the player |
|
||||
| **allow** | *string[]* | The names of the allowed permissions for iframe |
|
||||
|
||||
Currently the possible items in `allow` are:
|
||||
|
||||
* `autoplay`
|
||||
* `clipboard-write`
|
||||
* `fullscreen`
|
||||
* `encrypted-media`
|
||||
* `picture-in-picture`
|
||||
|
||||
See [Permissions Policy](https://developer.mozilla.org/en-US/docs/Web/HTTP/Permissions_Policy) in MDN for details of them.
|
||||
|
||||
### Example
|
||||
|
||||
|
6
built/general.d.ts
vendored
6
built/general.d.ts
vendored
@ -1,4 +1,4 @@
|
||||
import * as URL from 'node:url';
|
||||
import Summary from './summary.js';
|
||||
declare const _default: (url: URL.Url, lang?: string | null) => Promise<Summary | null>;
|
||||
import { URL } from 'node:url';
|
||||
import type { default as Summary } from './summary.js';
|
||||
declare const _default: (_url: URL | string, lang?: string | null) => Promise<Summary | null>;
|
||||
export default _default;
|
||||
|
142
built/general.js
142
built/general.js
@ -1,11 +1,109 @@
|
||||
import * as URL from 'node:url';
|
||||
import { URL } from 'node:url';
|
||||
import clip from './utils/clip.js';
|
||||
import cleanupTitle from './utils/cleanup-title.js';
|
||||
import { decode as decodeHtml } from 'html-entities';
|
||||
import { head, scpaping } from './utils/got.js';
|
||||
export default async (url, lang = null) => {
|
||||
import { get, head, scpaping } from './utils/got.js';
|
||||
import * as cheerio from 'cheerio';
|
||||
/**
|
||||
* Contains only the html snippet for a sanitized iframe as the thumbnail is
|
||||
* mostly covered in OpenGraph instead.
|
||||
*
|
||||
* Width should always be 100%.
|
||||
*/
|
||||
async function getOEmbedPlayer($, pageUrl) {
|
||||
const href = $('link[type="application/json+oembed"]').attr('href');
|
||||
if (!href) {
|
||||
return null;
|
||||
}
|
||||
const oEmbed = await get((new URL(href, pageUrl)).href);
|
||||
const body = (() => {
|
||||
try {
|
||||
return JSON.parse(oEmbed);
|
||||
}
|
||||
catch { }
|
||||
})();
|
||||
if (!body || body.version !== '1.0' || !['rich', 'video'].includes(body.type)) {
|
||||
// Not a well formed rich oEmbed
|
||||
return null;
|
||||
}
|
||||
if (!body.html.startsWith('<iframe ') || !body.html.endsWith('</iframe>')) {
|
||||
// It includes something else than an iframe
|
||||
return null;
|
||||
}
|
||||
const oEmbedHtml = cheerio.load(body.html);
|
||||
const iframe = oEmbedHtml("iframe");
|
||||
if (iframe.length !== 1) {
|
||||
// Somehow we either have multiple iframes or none
|
||||
return null;
|
||||
}
|
||||
if (iframe.parents().length !== 2) {
|
||||
// Should only have the body and html elements as the parents
|
||||
return null;
|
||||
}
|
||||
const url = iframe.attr('src');
|
||||
if (!url) {
|
||||
// No src?
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
if ((new URL(url)).protocol !== 'https:') {
|
||||
// Allow only HTTPS for best security
|
||||
return null;
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
return null;
|
||||
}
|
||||
// Height is the most important, width is okay to be null. The implementer
|
||||
// should choose fixed height instead of fixed aspect ratio if width is null.
|
||||
//
|
||||
// For example, Spotify's embed page does not strictly follow aspect ratio
|
||||
// and thus keeping the height is better than keeping the aspect ratio.
|
||||
//
|
||||
// Spotify gives `width: 100%, height: 152px` for iframe while `width: 456,
|
||||
// height: 152` for oEmbed data, and we treat any percentages as null here.
|
||||
let width = Number(iframe.attr('width') ?? body.width);
|
||||
if (Number.isNaN(width)) {
|
||||
width = null;
|
||||
}
|
||||
const height = Math.min(Number(iframe.attr('height') ?? body.height), 1024);
|
||||
if (Number.isNaN(height)) {
|
||||
// No proper height info
|
||||
return null;
|
||||
}
|
||||
// TODO: This implementation only allows basic syntax of `allow`.
|
||||
// Might need to implement better later.
|
||||
const safeList = [
|
||||
'autoplay',
|
||||
'clipboard-write',
|
||||
'fullscreen',
|
||||
'encrypted-media',
|
||||
'picture-in-picture',
|
||||
'web-share',
|
||||
];
|
||||
// YouTube has these but they are almost never used.
|
||||
const ignoredList = [
|
||||
'gyroscope',
|
||||
'accelerometer',
|
||||
];
|
||||
const allowedPermissions = (iframe.attr('allow') ?? '').split(/\s*;\s*/g)
|
||||
.filter(s => s)
|
||||
.filter(s => !ignoredList.includes(s));
|
||||
if (allowedPermissions.some(allow => !safeList.includes(allow))) {
|
||||
// This iframe is probably too powerful to be embedded
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
url,
|
||||
width,
|
||||
height,
|
||||
allow: allowedPermissions
|
||||
};
|
||||
}
|
||||
export default async (_url, lang = null) => {
|
||||
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/))
|
||||
lang = null;
|
||||
const url = typeof _url === 'string' ? new URL(_url) : _url;
|
||||
const res = await scpaping(url.href, { lang: lang || undefined });
|
||||
const $ = res.$;
|
||||
const twitterCard = $('meta[property="twitter:card"]').attr('content');
|
||||
@ -21,7 +119,7 @@ export default async (url, lang = null) => {
|
||||
$('link[rel="image_src"]').attr('href') ||
|
||||
$('link[rel="apple-touch-icon"]').attr('href') ||
|
||||
$('link[rel="apple-touch-icon image_src"]').attr('href');
|
||||
image = image ? URL.resolve(url.href, image) : null;
|
||||
image = image ? (new URL(image, url.href)).href : null;
|
||||
const playerUrl = (twitterCard !== 'summary_large_image' && $('meta[property="twitter:player"]').attr('content')) ||
|
||||
(twitterCard !== 'summary_large_image' && $('meta[name="twitter:player"]').attr('content')) ||
|
||||
$('meta[property="og:video"]').attr('content') ||
|
||||
@ -44,39 +142,30 @@ export default async (url, lang = null) => {
|
||||
if (title === description) {
|
||||
description = null;
|
||||
}
|
||||
let siteName = $('meta[property="og:site_name"]').attr('content') ||
|
||||
let siteName = decodeHtml($('meta[property="og:site_name"]').attr('content') ||
|
||||
$('meta[name="application-name"]').attr('content') ||
|
||||
url.hostname;
|
||||
siteName = siteName ? decodeHtml(siteName) : null;
|
||||
url.hostname);
|
||||
const favicon = $('link[rel="shortcut icon"]').attr('href') ||
|
||||
$('link[rel="icon"]').attr('href') ||
|
||||
'/favicon.ico';
|
||||
const sensitive = $('.tweet').attr('data-possibly-sensitive') === 'true';
|
||||
const find = async (path) => {
|
||||
const target = URL.resolve(url.href, path);
|
||||
const target = new URL(path, url.href);
|
||||
try {
|
||||
await head(target);
|
||||
await head(target.href);
|
||||
return target;
|
||||
}
|
||||
catch (e) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
// 相対的なURL (ex. test) を絶対的 (ex. /test) に変換
|
||||
const toAbsolute = (relativeURLString) => {
|
||||
const relativeURL = URL.parse(relativeURLString);
|
||||
const isAbsolute = relativeURL.slashes || relativeURL.path !== null && relativeURL.path[0] === '/';
|
||||
// 既に絶対的なら、即座に値を返却
|
||||
if (isAbsolute) {
|
||||
return relativeURLString;
|
||||
}
|
||||
// スラッシュを付けて返却
|
||||
return '/' + relativeURLString;
|
||||
const getIcon = async () => {
|
||||
return (await find(favicon)) || null;
|
||||
};
|
||||
const icon = await find(favicon) ||
|
||||
// 相対指定を絶対指定に変換し再試行
|
||||
await find(toAbsolute(favicon)) ||
|
||||
null;
|
||||
const [icon, oEmbed] = await Promise.all([
|
||||
getIcon(),
|
||||
getOEmbedPlayer($, url.href),
|
||||
]);
|
||||
// Clean up the title
|
||||
title = cleanupTitle(title, siteName);
|
||||
if (title === '') {
|
||||
@ -84,13 +173,14 @@ export default async (url, lang = null) => {
|
||||
}
|
||||
return {
|
||||
title: title || null,
|
||||
icon: icon || null,
|
||||
icon: icon?.href || null,
|
||||
description: description || null,
|
||||
thumbnail: image || null,
|
||||
player: {
|
||||
player: oEmbed ?? {
|
||||
url: playerUrl || null,
|
||||
width: Number.isNaN(playerWidth) ? null : playerWidth,
|
||||
height: Number.isNaN(playerHeight) ? null : playerHeight
|
||||
height: Number.isNaN(playerHeight) ? null : playerHeight,
|
||||
allow: ['autoplay', 'encrypted-media', 'fullscreen'],
|
||||
},
|
||||
sitename: siteName || null,
|
||||
sensitive,
|
||||
|
@ -2,7 +2,7 @@
|
||||
* summaly
|
||||
* https://github.com/syuilo/summaly
|
||||
*/
|
||||
import * as URL from 'node:url';
|
||||
import { URL } from 'node:url';
|
||||
import tracer from 'trace-redirect';
|
||||
import general from './general.js';
|
||||
import { setAgent } from './utils/got.js';
|
||||
@ -30,7 +30,7 @@ export const summaly = async (url, options) => {
|
||||
actualUrl = url;
|
||||
}
|
||||
}
|
||||
const _url = URL.parse(actualUrl, true);
|
||||
const _url = new URL(actualUrl);
|
||||
// Find matching plugin
|
||||
const match = plugins.filter(plugin => plugin.test(_url))[0];
|
||||
// Get summary
|
||||
|
6
built/iplugin.d.ts
vendored
6
built/iplugin.d.ts
vendored
@ -1,7 +1,7 @@
|
||||
/// <reference types="node" />
|
||||
import * as URL from 'node:url';
|
||||
import type { URL } from 'node:url';
|
||||
import Summary from './summary.js';
|
||||
export interface IPlugin {
|
||||
test: (url: URL.Url) => boolean;
|
||||
summarize: (url: URL.Url, lang?: string) => Promise<Summary>;
|
||||
test: (url: URL) => boolean;
|
||||
summarize: (url: URL, lang?: string) => Promise<Summary>;
|
||||
}
|
||||
|
6
built/plugins/amazon.d.ts
vendored
6
built/plugins/amazon.d.ts
vendored
@ -1,5 +1,5 @@
|
||||
/// <reference types="node" />
|
||||
import * as URL from 'node:url';
|
||||
import { URL } from 'node:url';
|
||||
import summary from '../summary.js';
|
||||
export declare function test(url: URL.Url): boolean;
|
||||
export declare function summarize(url: URL.Url): Promise<summary>;
|
||||
export declare function test(url: URL): boolean;
|
||||
export declare function summarize(url: URL): Promise<summary>;
|
||||
|
@ -36,8 +36,9 @@ export async function summarize(url) {
|
||||
player: {
|
||||
url: playerUrl || null,
|
||||
width: playerWidth ? parseInt(playerWidth) : null,
|
||||
height: playerHeight ? parseInt(playerHeight) : null
|
||||
height: playerHeight ? parseInt(playerHeight) : null,
|
||||
allow: playerUrl ? ['fullscreen', 'encrypted-media'] : [],
|
||||
},
|
||||
sitename: 'Amazon'
|
||||
sitename: 'Amazon',
|
||||
};
|
||||
}
|
||||
|
6
built/plugins/wikipedia.d.ts
vendored
6
built/plugins/wikipedia.d.ts
vendored
@ -1,5 +1,5 @@
|
||||
/// <reference types="node" />
|
||||
import * as URL from 'node:url';
|
||||
import { URL } from 'node:url';
|
||||
import summary from '../summary.js';
|
||||
export declare function test(url: URL.Url): boolean;
|
||||
export declare function summarize(url: URL.Url): Promise<summary>;
|
||||
export declare function test(url: URL): boolean;
|
||||
export declare function summarize(url: URL): Promise<summary>;
|
||||
|
@ -29,8 +29,9 @@ export async function summarize(url) {
|
||||
player: {
|
||||
url: null,
|
||||
width: null,
|
||||
height: null
|
||||
height: null,
|
||||
allow: [],
|
||||
},
|
||||
sitename: 'Wikipedia'
|
||||
sitename: 'Wikipedia',
|
||||
};
|
||||
}
|
||||
|
4
built/summary.d.ts
vendored
4
built/summary.d.ts
vendored
@ -42,4 +42,8 @@ export declare type Player = {
|
||||
* The height of the player
|
||||
*/
|
||||
height: number | null;
|
||||
/**
|
||||
* The allowed permissions of the iframe
|
||||
*/
|
||||
allow: string[];
|
||||
};
|
||||
|
@ -84,14 +84,15 @@ async function getResponse(args) {
|
||||
limit: 0,
|
||||
},
|
||||
});
|
||||
return await receiveResponce({ req, typeFilter: args.typeFilter });
|
||||
return await receiveResponse({ req, typeFilter: args.typeFilter });
|
||||
}
|
||||
async function receiveResponce(args) {
|
||||
async function receiveResponse(args) {
|
||||
const req = args.req;
|
||||
const maxSize = MAX_RESPONSE_SIZE;
|
||||
req.on('response', (res) => {
|
||||
// Check html
|
||||
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
|
||||
// console.warn(res.headers['content-type']);
|
||||
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
|
||||
return;
|
||||
}
|
||||
|
3338
pnpm-lock.yaml
generated
Normal file
3338
pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
160
src/general.ts
160
src/general.ts
@ -1,15 +1,125 @@
|
||||
import * as URL from 'node:url';
|
||||
import { URL } from 'node:url';
|
||||
import clip from './utils/clip.js';
|
||||
import cleanupTitle from './utils/cleanup-title.js';
|
||||
|
||||
import { decode as decodeHtml } from 'html-entities';
|
||||
|
||||
import { head, scpaping } from './utils/got.js';
|
||||
import Summary from './summary.js';
|
||||
import { get, head, scpaping } from './utils/got.js';
|
||||
import type { default as Summary, Player } from './summary.js';
|
||||
import * as cheerio from 'cheerio';
|
||||
|
||||
export default async (url: URL.Url, lang: string | null = null): Promise<Summary | null> => {
|
||||
/**
|
||||
* Contains only the html snippet for a sanitized iframe as the thumbnail is
|
||||
* mostly covered in OpenGraph instead.
|
||||
*
|
||||
* Width should always be 100%.
|
||||
*/
|
||||
async function getOEmbedPlayer($: cheerio.CheerioAPI, pageUrl: string): Promise<Player | null> {
|
||||
const href = $('link[type="application/json+oembed"]').attr('href');
|
||||
if (!href) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const oEmbed = await get((new URL(href, pageUrl)).href);
|
||||
const body = (() => {
|
||||
try {
|
||||
return JSON.parse(oEmbed);
|
||||
} catch {}
|
||||
})();
|
||||
|
||||
if (!body || body.version !== '1.0' || !['rich', 'video'].includes(body.type)) {
|
||||
// Not a well formed rich oEmbed
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!body.html.startsWith('<iframe ') || !body.html.endsWith('</iframe>')) {
|
||||
// It includes something else than an iframe
|
||||
return null;
|
||||
}
|
||||
|
||||
const oEmbedHtml = cheerio.load(body.html);
|
||||
const iframe = oEmbedHtml("iframe");
|
||||
|
||||
if (iframe.length !== 1) {
|
||||
// Somehow we either have multiple iframes or none
|
||||
return null;
|
||||
}
|
||||
|
||||
if (iframe.parents().length !== 2) {
|
||||
// Should only have the body and html elements as the parents
|
||||
return null;
|
||||
}
|
||||
|
||||
const url = iframe.attr('src');
|
||||
if (!url) {
|
||||
// No src?
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
if ((new URL(url)).protocol !== 'https:') {
|
||||
// Allow only HTTPS for best security
|
||||
return null;
|
||||
}
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Height is the most important, width is okay to be null. The implementer
|
||||
// should choose fixed height instead of fixed aspect ratio if width is null.
|
||||
//
|
||||
// For example, Spotify's embed page does not strictly follow aspect ratio
|
||||
// and thus keeping the height is better than keeping the aspect ratio.
|
||||
//
|
||||
// Spotify gives `width: 100%, height: 152px` for iframe while `width: 456,
|
||||
// height: 152` for oEmbed data, and we treat any percentages as null here.
|
||||
let width: number | null = Number(iframe.attr('width') ?? body.width);
|
||||
if (Number.isNaN(width)) {
|
||||
width = null;
|
||||
}
|
||||
const height = Math.min(Number(iframe.attr('height') ?? body.height), 1024);
|
||||
if (Number.isNaN(height)) {
|
||||
// No proper height info
|
||||
return null;
|
||||
}
|
||||
|
||||
// TODO: This implementation only allows basic syntax of `allow`.
|
||||
// Might need to implement better later.
|
||||
const safeList = [
|
||||
'autoplay',
|
||||
'clipboard-write',
|
||||
'fullscreen',
|
||||
'encrypted-media',
|
||||
'picture-in-picture',
|
||||
'web-share',
|
||||
];
|
||||
// YouTube has these but they are almost never used.
|
||||
const ignoredList = [
|
||||
'gyroscope',
|
||||
'accelerometer',
|
||||
];
|
||||
const allowedPermissions =
|
||||
(iframe.attr('allow') ?? '').split(/\s*;\s*/g)
|
||||
.filter(s => s)
|
||||
.filter(s => !ignoredList.includes(s));
|
||||
if (allowedPermissions.some(allow => !safeList.includes(allow))) {
|
||||
// This iframe is probably too powerful to be embedded
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
url,
|
||||
width,
|
||||
height,
|
||||
allow: allowedPermissions
|
||||
}
|
||||
}
|
||||
|
||||
export default async (_url: URL | string, lang: string | null = null): Promise<Summary | null> => {
|
||||
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
|
||||
|
||||
const url = typeof _url === 'string' ? new URL(_url) : _url;
|
||||
|
||||
const res = await scpaping(url.href, { lang: lang || undefined });
|
||||
const $ = res.$;
|
||||
const twitterCard = $('meta[property="twitter:card"]').attr('content');
|
||||
@ -32,7 +142,7 @@ export default async (url: URL.Url, lang: string | null = null): Promise<Summary
|
||||
$('link[rel="apple-touch-icon"]').attr('href') ||
|
||||
$('link[rel="apple-touch-icon image_src"]').attr('href');
|
||||
|
||||
image = image ? URL.resolve(url.href, image) : null;
|
||||
image = image ? (new URL(image, url.href)).href : null;
|
||||
|
||||
const playerUrl =
|
||||
(twitterCard !== 'summary_large_image' && $('meta[property="twitter:player"]').attr('content')) ||
|
||||
@ -66,12 +176,11 @@ export default async (url: URL.Url, lang: string | null = null): Promise<Summary
|
||||
description = null;
|
||||
}
|
||||
|
||||
let siteName =
|
||||
let siteName = decodeHtml(
|
||||
$('meta[property="og:site_name"]').attr('content') ||
|
||||
$('meta[name="application-name"]').attr('content') ||
|
||||
url.hostname;
|
||||
|
||||
siteName = siteName ? decodeHtml(siteName) : null;
|
||||
url.hostname
|
||||
);
|
||||
|
||||
const favicon =
|
||||
$('link[rel="shortcut icon"]').attr('href') ||
|
||||
@ -81,33 +190,23 @@ export default async (url: URL.Url, lang: string | null = null): Promise<Summary
|
||||
const sensitive = $('.tweet').attr('data-possibly-sensitive') === 'true'
|
||||
|
||||
const find = async (path: string) => {
|
||||
const target = URL.resolve(url.href, path);
|
||||
const target = new URL(path, url.href);
|
||||
try {
|
||||
await head(target);
|
||||
await head(target.href);
|
||||
return target;
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
// 相対的なURL (ex. test) を絶対的 (ex. /test) に変換
|
||||
const toAbsolute = (relativeURLString: string): string => {
|
||||
const relativeURL = URL.parse(relativeURLString);
|
||||
const isAbsolute = relativeURL.slashes || relativeURL.path !== null && relativeURL.path[0] === '/';
|
||||
|
||||
// 既に絶対的なら、即座に値を返却
|
||||
if (isAbsolute) {
|
||||
return relativeURLString;
|
||||
const getIcon = async () => {
|
||||
return (await find(favicon)) || null;
|
||||
}
|
||||
|
||||
// スラッシュを付けて返却
|
||||
return '/' + relativeURLString;
|
||||
};
|
||||
|
||||
const icon = await find(favicon) ||
|
||||
// 相対指定を絶対指定に変換し再試行
|
||||
await find(toAbsolute(favicon)) ||
|
||||
null;
|
||||
const [icon, oEmbed] = await Promise.all([
|
||||
getIcon(),
|
||||
getOEmbedPlayer($, url.href),
|
||||
])
|
||||
|
||||
// Clean up the title
|
||||
title = cleanupTitle(title, siteName);
|
||||
@ -118,13 +217,14 @@ export default async (url: URL.Url, lang: string | null = null): Promise<Summary
|
||||
|
||||
return {
|
||||
title: title || null,
|
||||
icon: icon || null,
|
||||
icon: icon?.href || null,
|
||||
description: description || null,
|
||||
thumbnail: image || null,
|
||||
player: {
|
||||
player: oEmbed ?? {
|
||||
url: playerUrl || null,
|
||||
width: Number.isNaN(playerWidth) ? null : playerWidth,
|
||||
height: Number.isNaN(playerHeight) ? null : playerHeight
|
||||
height: Number.isNaN(playerHeight) ? null : playerHeight,
|
||||
allow: ['autoplay', 'encrypted-media', 'fullscreen'],
|
||||
},
|
||||
sitename: siteName || null,
|
||||
sensitive,
|
||||
|
@ -3,7 +3,7 @@
|
||||
* https://github.com/syuilo/summaly
|
||||
*/
|
||||
|
||||
import * as URL from 'node:url';
|
||||
import { URL } from 'node:url';
|
||||
import tracer from 'trace-redirect';
|
||||
import Summary from './summary.js';
|
||||
import type { IPlugin as _IPlugin } from './iplugin.js';
|
||||
@ -69,7 +69,7 @@ export const summaly = async (url: string, options?: Options): Promise<Result> =
|
||||
}
|
||||
}
|
||||
|
||||
const _url = URL.parse(actualUrl, true);
|
||||
const _url = new URL(actualUrl);
|
||||
|
||||
// Find matching plugin
|
||||
const match = plugins.filter(plugin => plugin.test(_url))[0];
|
||||
|
@ -1,7 +1,7 @@
|
||||
import * as URL from 'node:url';
|
||||
import type { URL } from 'node:url';
|
||||
import Summary from './summary.js';
|
||||
|
||||
export interface IPlugin {
|
||||
test: (url: URL.Url) => boolean;
|
||||
summarize: (url: URL.Url, lang?: string) => Promise<Summary>;
|
||||
test: (url: URL) => boolean;
|
||||
summarize: (url: URL, lang?: string) => Promise<Summary>;
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
import * as URL from 'node:url';
|
||||
import { URL } from 'node:url';
|
||||
import { scpaping } from '../utils/got.js';
|
||||
import summary from '../summary.js';
|
||||
|
||||
export function test(url: URL.Url): boolean {
|
||||
export function test(url: URL): boolean {
|
||||
return url.hostname === 'www.amazon.com' ||
|
||||
url.hostname === 'www.amazon.co.jp' ||
|
||||
url.hostname === 'www.amazon.ca' ||
|
||||
@ -19,7 +19,7 @@ export function test(url: URL.Url): boolean {
|
||||
url.hostname === 'www.amazon.au';
|
||||
}
|
||||
|
||||
export async function summarize(url: URL.Url): Promise<summary> {
|
||||
export async function summarize(url: URL): Promise<summary> {
|
||||
const res = await scpaping(url.href);
|
||||
const $ = res.$;
|
||||
|
||||
@ -51,8 +51,9 @@ export async function summarize(url: URL.Url): Promise<summary> {
|
||||
player: {
|
||||
url: playerUrl || null,
|
||||
width: playerWidth ? parseInt(playerWidth) : null,
|
||||
height: playerHeight ? parseInt(playerHeight) : null
|
||||
height: playerHeight ? parseInt(playerHeight) : null,
|
||||
allow: playerUrl ? ['fullscreen', 'encrypted-media'] : [],
|
||||
},
|
||||
sitename: 'Amazon'
|
||||
sitename: 'Amazon',
|
||||
};
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
import * as URL from 'node:url';
|
||||
import { URL } from 'node:url';
|
||||
import { get } from '../utils/got.js';
|
||||
import debug from 'debug';
|
||||
import summary from '../summary.js';
|
||||
@ -6,12 +6,12 @@ import clip from './../utils/clip.js';
|
||||
|
||||
const log = debug('summaly:plugins:wikipedia');
|
||||
|
||||
export function test(url: URL.Url): boolean {
|
||||
export function test(url: URL): boolean {
|
||||
if (!url.hostname) return false;
|
||||
return /\.wikipedia\.org$/.test(url.hostname);
|
||||
}
|
||||
|
||||
export async function summarize(url: URL.Url): Promise<summary> {
|
||||
export async function summarize(url: URL): Promise<summary> {
|
||||
const lang = url.host ? url.host.split('.')[0] : null;
|
||||
const title = url.pathname ? url.pathname.split('/')[2] : null;
|
||||
const endpoint = `https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`;
|
||||
@ -38,8 +38,9 @@ export async function summarize(url: URL.Url): Promise<summary> {
|
||||
player: {
|
||||
url: null,
|
||||
width: null,
|
||||
height: null
|
||||
height: null,
|
||||
allow: [],
|
||||
},
|
||||
sitename: 'Wikipedia'
|
||||
sitename: 'Wikipedia',
|
||||
};
|
||||
}
|
||||
|
@ -52,4 +52,9 @@ export type Player = {
|
||||
* The height of the player
|
||||
*/
|
||||
height: number | null;
|
||||
|
||||
/**
|
||||
* The allowed permissions of the iframe
|
||||
*/
|
||||
allow: string[];
|
||||
};
|
||||
|
@ -108,16 +108,17 @@ async function getResponse(args: GotOptions) {
|
||||
},
|
||||
});
|
||||
|
||||
return await receiveResponce({ req, typeFilter: args.typeFilter });
|
||||
return await receiveResponse({ req, typeFilter: args.typeFilter });
|
||||
}
|
||||
|
||||
async function receiveResponce<T>(args: { req: Got.CancelableRequest<Got.Response<T>>, typeFilter?: RegExp }) {
|
||||
async function receiveResponse<T>(args: { req: Got.CancelableRequest<Got.Response<T>>, typeFilter?: RegExp }) {
|
||||
const req = args.req;
|
||||
const maxSize = MAX_RESPONSE_SIZE;
|
||||
|
||||
req.on('response', (res: Got.Response) => {
|
||||
// Check html
|
||||
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
|
||||
// console.warn(res.headers['content-type']);
|
||||
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
|
||||
return;
|
||||
}
|
||||
|
3
test/htmls/oembed-and-og-video.html
Normal file
3
test/htmls/oembed-and-og-video.html
Normal file
@ -0,0 +1,3 @@
|
||||
<!DOCTYPE html>
|
||||
<meta property="og:video:url" content="https://example.com/embedurl" />
|
||||
<link type="application/json+oembed" href="http://localhost:3060/oembed.json" />
|
3
test/htmls/oembed-and-og.html
Normal file
3
test/htmls/oembed-and-og.html
Normal file
@ -0,0 +1,3 @@
|
||||
<!DOCTYPE html>
|
||||
<meta property="og:description" content="blobcats rule the world">
|
||||
<link type="application/json+oembed" href="http://localhost:3060/oembed.json" />
|
2
test/htmls/oembed-nonexistent-path.html
Normal file
2
test/htmls/oembed-nonexistent-path.html
Normal file
@ -0,0 +1,2 @@
|
||||
<!DOCTYPE html>
|
||||
<link type="application/json+oembed" href="http://localhost:3060/oembe.json" />
|
2
test/htmls/oembed-relative.html
Normal file
2
test/htmls/oembed-relative.html
Normal file
@ -0,0 +1,2 @@
|
||||
<!DOCTYPE html>
|
||||
<link type="application/json+oembed" href="oembed.json" />
|
2
test/htmls/oembed-wrong.html
Normal file
2
test/htmls/oembed-wrong.html
Normal file
@ -0,0 +1,2 @@
|
||||
<!DOCTYPE html>
|
||||
<link type="application/json+oembed" href="http://localhost+:3060/oembed.json" />
|
2
test/htmls/oembed.html
Normal file
2
test/htmls/oembed.html
Normal file
@ -0,0 +1,2 @@
|
||||
<!DOCTYPE html>
|
||||
<link type="application/json+oembed" href="http://localhost:3060/oembed.json" />
|
123
test/index.ts
123
test/index.ts
@ -6,7 +6,7 @@
|
||||
|
||||
/* dependencies below */
|
||||
|
||||
import fs from 'node:fs';
|
||||
import fs, { readdirSync } from 'node:fs';
|
||||
import process from 'node:process';
|
||||
import fastify from 'fastify';
|
||||
import { summaly } from '../src/index.js';
|
||||
@ -213,6 +213,7 @@ describe('TwitterCard', () => {
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/embedurl');
|
||||
expect(summary.player.allow).toStrictEqual(['autoplay', 'encrypted-media', 'fullscreen']);
|
||||
});
|
||||
|
||||
test('Player detection - Pleroma:video => video', async () => {
|
||||
@ -224,6 +225,7 @@ describe('TwitterCard', () => {
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/embedurl');
|
||||
expect(summary.player.allow).toStrictEqual(['autoplay', 'encrypted-media', 'fullscreen']);
|
||||
});
|
||||
|
||||
test('Player detection - Pleroma:image => image', async () => {
|
||||
@ -237,3 +239,122 @@ describe('TwitterCard', () => {
|
||||
expect(summary.thumbnail).toBe('https://example.com/imageurl');
|
||||
});
|
||||
});
|
||||
|
||||
describe("oEmbed", () => {
|
||||
const setUpFastify = async (oEmbedPath: string, htmlPath = 'htmls/oembed.html') => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
return reply.send(fs.createReadStream(new URL(htmlPath, import.meta.url)));
|
||||
});
|
||||
app.get('/oembed.json', (request, reply) => {
|
||||
return reply.send(fs.createReadStream(
|
||||
new URL(oEmbedPath, new URL('oembed/', import.meta.url))
|
||||
));
|
||||
});
|
||||
await app.listen({ port });
|
||||
}
|
||||
|
||||
for (const filename of readdirSync(new URL('oembed/invalid', import.meta.url))) {
|
||||
test(`Invalidity test: ${filename}`, async () => {
|
||||
await setUpFastify(`invalid/${filename}`);
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe(null);
|
||||
});
|
||||
}
|
||||
|
||||
test('basic properties', async () => {
|
||||
await setUpFastify('oembed.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.width).toBe(500);
|
||||
expect(summary.player.height).toBe(300);
|
||||
});
|
||||
|
||||
test('type: video', async () => {
|
||||
await setUpFastify('oembed-video.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.width).toBe(500);
|
||||
expect(summary.player.height).toBe(300);
|
||||
});
|
||||
|
||||
test('max height', async () => {
|
||||
await setUpFastify('oembed-too-tall.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.height).toBe(1024);
|
||||
});
|
||||
|
||||
test('children are ignored', async () => {
|
||||
await setUpFastify('oembed-iframe-child.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
});
|
||||
|
||||
test('allows fullscreen', async () => {
|
||||
await setUpFastify('oembed-allow-fullscreen.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.allow).toStrictEqual(['fullscreen'])
|
||||
});
|
||||
|
||||
test('allows safelisted permissions', async () => {
|
||||
await setUpFastify('oembed-allow-safelisted-permissions.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.allow).toStrictEqual([
|
||||
'autoplay', 'clipboard-write', 'fullscreen',
|
||||
'encrypted-media', 'picture-in-picture', 'web-share',
|
||||
]);
|
||||
});
|
||||
|
||||
test('ignores rare permissions', async () => {
|
||||
await setUpFastify('oembed-ignore-rare-permissions.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.allow).toStrictEqual(['autoplay']);
|
||||
});
|
||||
|
||||
test('oEmbed with relative path', async () => {
|
||||
await setUpFastify('oembed.json', 'htmls/oembed-relative.html');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
});
|
||||
|
||||
test('oEmbed with nonexistent path', async () => {
|
||||
await setUpFastify('oembed.json', 'htmls/oembed-nonexistent-path.html');
|
||||
await expect(summaly(host)).rejects.toThrow('404 Not Found');
|
||||
});
|
||||
|
||||
test('oEmbed with wrong path', async () => {
|
||||
await setUpFastify('oembed.json', 'htmls/oembed-wrong-path.html');
|
||||
await expect(summaly(host)).rejects.toThrow();
|
||||
});
|
||||
|
||||
test('oEmbed with OpenGraph', async () => {
|
||||
await setUpFastify('oembed.json', 'htmls/oembed-and-og.html');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.description).toBe('blobcats rule the world');
|
||||
});
|
||||
|
||||
test('Invalid oEmbed with valid OpenGraph', async () => {
|
||||
await setUpFastify('invalid/oembed-insecure.json', 'htmls/oembed-and-og.html');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe(null);
|
||||
expect(summary.description).toBe('blobcats rule the world');
|
||||
});
|
||||
|
||||
test('oEmbed with og:video', async () => {
|
||||
await setUpFastify('oembed.json', 'htmls/oembed-and-og-video.html');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.allow).toStrictEqual([]);
|
||||
});
|
||||
|
||||
test('width: 100%', async () => {
|
||||
await setUpFastify('oembed-percentage-width.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.width).toBe(null);
|
||||
expect(summary.player.height).toBe(300);
|
||||
});
|
||||
});
|
||||
|
7
test/oembed/invalid/oembed-child-iframe.json
Normal file
7
test/oembed/invalid/oembed-child-iframe.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<div><iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-double-iframes.json
Normal file
7
test/oembed/invalid/oembed-double-iframes.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe><iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-future.json
Normal file
7
test/oembed/invalid/oembed-future.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "11.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-insecure.json
Normal file
7
test/oembed/invalid/oembed-insecure.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='http://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-invalid-height.json
Normal file
7
test/oembed/invalid/oembed-invalid-height.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": "blobcat"
|
||||
}
|
6
test/oembed/invalid/oembed-no-height.json
Normal file
6
test/oembed/invalid/oembed-no-height.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500
|
||||
}
|
6
test/oembed/invalid/oembed-no-version.json
Normal file
6
test/oembed/invalid/oembed-no-version.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-old.json
Normal file
7
test/oembed/invalid/oembed-old.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "0.1",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-photo.json
Normal file
7
test/oembed/invalid/oembed-photo.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "photo",
|
||||
"url": "https://example.com/example.avif",
|
||||
"width": 300,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-too-powerful.json
Normal file
7
test/oembed/invalid/oembed-too-powerful.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allow='camera'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-too-powerful2.json
Normal file
7
test/oembed/invalid/oembed-too-powerful2.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allow='fullscreen;camera'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-allow-fullscreen.json
Normal file
7
test/oembed/oembed-allow-fullscreen.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allow='fullscreen'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-allow-safelisted-permissions.json
Normal file
7
test/oembed/oembed-allow-safelisted-permissions.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allow='autoplay;clipboard-write;fullscreen;encrypted-media;picture-in-picture;web-share'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-iframe-child.json
Normal file
7
test/oembed/oembed-iframe-child.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'><script>alert('Hahaha I take this world')</script></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-ignore-rare-permissions.json
Normal file
7
test/oembed/oembed-ignore-rare-permissions.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allow='autoplay;gyroscope;accelerometer'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-percentage-width.json
Normal file
7
test/oembed/oembed-percentage-width.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": "100%",
|
||||
"height": 300
|
||||
}
|
6
test/oembed/oembed-too-tall.json
Normal file
6
test/oembed/oembed-too-tall.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"height": 3000
|
||||
}
|
7
test/oembed/oembed-video.json
Normal file
7
test/oembed/oembed-video.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "video",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed.json
Normal file
7
test/oembed/oembed.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user