feat: add oEmbed support

This commit is contained in:
Kagami Sascha Rosylight 2023-03-11 14:04:55 +01:00
parent 51f3870e1f
commit a5a8c4437d
30 changed files with 3669 additions and 12 deletions

View File

@ -1,3 +1,7 @@
Unreleased
------------------
* oEmbed type=richの制限的なサポート
3.0.4 / 2023-02-12 3.0.4 / 2023-02-12
------------------ ------------------
* 不要な依存関係を除去 * 不要な依存関係を除去

3338
pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -4,8 +4,86 @@ import cleanupTitle from './utils/cleanup-title.js';
import { decode as decodeHtml } from 'html-entities'; import { decode as decodeHtml } from 'html-entities';
import { head, scpaping } from './utils/got.js'; import { get, head, scpaping } from './utils/got.js';
import Summary from './summary.js'; import type { default as Summary, OEmbedRichIframe } from './summary.js';
import * as cheerio from 'cheerio';
/**
* Contains only the html snippet for a sanitized iframe as the thumbnail is
* mostly covered in OpenGraph instead.
*
* Width should always be 100%.
*/
async function getOEmbedRich($: cheerio.CheerioAPI, pageUrl: string): Promise<OEmbedRichIframe | null> {
const href = $('link[type="application/json+oembed"]').attr('href');
if (!href) {
return null;
}
// XXX: Use global URL object instead of the deprecated `node:url`
// Disallow relative URL as no one seems to use it
const oEmbed = await get(URL.resolve(pageUrl, href));
const body = (() => {
try {
return JSON.parse(oEmbed);
} catch {}
})();
if (!body || body.version !== '1.0' || body.type !== 'rich') {
// Not a well formed rich oEmbed
return null;
}
if (!body.html.startsWith('<iframe ') || !body.html.endsWith('</iframe>')) {
// It includes something else than an iframe
return null;
}
const oEmbedHtml = cheerio.load(body.html);
const iframe = oEmbedHtml("iframe");
if (iframe.length !== 1) {
// Somehow we either have multiple iframes or none
return null;
}
if (iframe.parents().length !== 2) {
// Should only have the body and html elements as the parents
return null;
}
const src = iframe.attr('src');
if (!src) {
// No src?
return null;
}
// XXX: Use global URL object instead of the deprecated `node:url`
const url = URL.parse(src);
if (url.protocol !== 'https:') {
// Allow only HTTPS for best security
return null;
}
const height = Math.min(Number(iframe.attr('height') ?? body.height), 1024);
if (Number.isNaN(height)) {
// No proper size info
return null;
}
const allowedFeatures = (iframe.attr('allow') ?? '').split(/\s+/g);
const safeList = ['', 'fullscreen', 'encrypted-media', 'picture-in-picture'];
if (allowedFeatures.some(allow => !safeList.includes(allow))) {
// This iframe is probably too powerful to be embedded
return null;
}
return {
src,
height,
allow: allowedFeatures
}
}
export default async (url: URL.Url, lang: string | null = null): Promise<Summary | null> => { export default async (url: URL.Url, lang: string | null = null): Promise<Summary | null> => {
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null; if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
@ -104,10 +182,18 @@ export default async (url: URL.Url, lang: string | null = null): Promise<Summary
return '/' + relativeURLString; return '/' + relativeURLString;
}; };
const icon = await find(favicon) || const getIcon = async () => {
// 相対指定を絶対指定に変換し再試行 return await find(favicon) ||
await find(toAbsolute(favicon)) || // 相対指定を絶対指定に変換し再試行
null; await find(toAbsolute(favicon)) ||
null;
}
const [icon, oEmbed] = await Promise.all([
getIcon(),
// playerあるならoEmbedは必要ない
!playerUrl ? getOEmbedRich($, url.href) : null,
])
// Clean up the title // Clean up the title
title = cleanupTitle(title, siteName); title = cleanupTitle(title, siteName);
@ -128,5 +214,6 @@ export default async (url: URL.Url, lang: string | null = null): Promise<Summary
}, },
sitename: siteName || null, sitename: siteName || null,
sensitive, sensitive,
oEmbed,
}; };
}; };

View File

@ -53,6 +53,7 @@ export async function summarize(url: URL.Url): Promise<summary> {
width: playerWidth ? parseInt(playerWidth) : null, width: playerWidth ? parseInt(playerWidth) : null,
height: playerHeight ? parseInt(playerHeight) : null height: playerHeight ? parseInt(playerHeight) : null
}, },
sitename: 'Amazon' sitename: 'Amazon',
oEmbed: null,
}; };
} }

View File

@ -40,6 +40,7 @@ export async function summarize(url: URL.Url): Promise<summary> {
width: null, width: null,
height: null height: null
}, },
sitename: 'Wikipedia' sitename: 'Wikipedia',
oEmbed: null,
}; };
} }

View File

@ -33,6 +33,11 @@ type Summary = {
* Possibly sensitive * Possibly sensitive
*/ */
sensitive?: boolean; sensitive?: boolean;
/**
* The iframe information of oEmbed data from that web page
*/
oEmbed: OEmbedRichIframe | null;
}; };
export default Summary; export default Summary;
@ -53,3 +58,24 @@ export type Player = {
*/ */
height: number | null; height: number | null;
}; };
/**
* Extracted iframe information from OEmbed html field.
* `width` is omitted here as it should always be 100%.
*/
export type OEmbedRichIframe = {
/**
* The src of the iframe
*/
src: string,
/**
* The height of the iframe
*/
height: number,
/**
* The allowed feature list of the iframe
*/
allow: string[],
};

View File

@ -108,16 +108,17 @@ async function getResponse(args: GotOptions) {
}, },
}); });
return await receiveResponce({ req, typeFilter: args.typeFilter }); return await receiveResponse({ req, typeFilter: args.typeFilter });
} }
async function receiveResponce<T>(args: { req: Got.CancelableRequest<Got.Response<T>>, typeFilter?: RegExp }) { async function receiveResponse<T>(args: { req: Got.CancelableRequest<Got.Response<T>>, typeFilter?: RegExp }) {
const req = args.req; const req = args.req;
const maxSize = MAX_RESPONSE_SIZE; const maxSize = MAX_RESPONSE_SIZE;
req.on('response', (res: Got.Response) => { req.on('response', (res: Got.Response) => {
// Check html // Check html
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) { if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
// console.warn(res.headers['content-type']);
req.cancel(`Rejected by type filter ${res.headers['content-type']}`); req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
return; return;
} }

View File

@ -0,0 +1,3 @@
<!DOCTYPE html>
<meta property="og:video:url" content="https://example.com/embedurl" />
<link type="application/json+oembed" href="http://localhost:3060/oembed.json" />

View File

@ -0,0 +1,3 @@
<!DOCTYPE html>
<meta property="og:description" content="blobcats rule the world">
<link type="application/json+oembed" href="http://localhost:3060/oembed.json" />

View File

@ -0,0 +1,2 @@
<!DOCTYPE html>
<link type="application/json+oembed" href="http://localhost:3060/oembe.json" />

View File

@ -0,0 +1,2 @@
<!DOCTYPE html>
<link type="application/json+oembed" href="oembed.json" />

View File

@ -0,0 +1,2 @@
<!DOCTYPE html>
<link type="application/json+oembed" href="http://localhost+:3060/oembed.json" />

2
test/htmls/oembed.html Normal file
View File

@ -0,0 +1,2 @@
<!DOCTYPE html>
<link type="application/json+oembed" href="http://localhost:3060/oembed.json" />

View File

@ -6,13 +6,13 @@
/* dependencies below */ /* dependencies below */
import fs from 'node:fs'; import fs, { readdirSync } from 'node:fs';
import process from 'node:process'; import process from 'node:process';
import fastify from 'fastify'; import fastify from 'fastify';
import { summaly } from '../src/index.js'; import { summaly } from '../src/index.js';
import { dirname } from 'node:path'; import { dirname } from 'node:path';
import { fileURLToPath } from 'node:url'; import { fileURLToPath } from 'node:url';
import {expect, jest, test, describe, beforeEach, afterEach} from '@jest/globals'; import { expect, jest, test, describe, beforeEach, afterEach } from '@jest/globals';
import { Agent as httpAgent } from 'node:http'; import { Agent as httpAgent } from 'node:http';
import { Agent as httpsAgent } from 'node:https'; import { Agent as httpsAgent } from 'node:https';
import { StatusError } from '../src/utils/status-error.js'; import { StatusError } from '../src/utils/status-error.js';
@ -237,3 +237,93 @@ describe('TwitterCard', () => {
expect(summary.thumbnail).toBe('https://example.com/imageurl'); expect(summary.thumbnail).toBe('https://example.com/imageurl');
}); });
}); });
describe("oEmbed", () => {
const setUpFastify = async (oEmbedPath: string, htmlPath = 'htmls/oembed.html') => {
app = fastify();
app.get('/', (request, reply) => {
return reply.send(fs.createReadStream(new URL(htmlPath, import.meta.url)));
});
app.get('/oembed.json', (request, reply) => {
return reply.send(fs.createReadStream(
new URL(oEmbedPath, new URL('oembed/', import.meta.url))
));
});
await app.listen({ port });
}
for (const filename of readdirSync(new URL('oembed/invalid', import.meta.url))) {
test(`Invalidity test: ${filename}`, async () => {
await setUpFastify(`invalid/${filename}`);
const summary = await summaly(host);
expect(summary.oEmbed).toBe(null);
});
}
test('src', async () => {
await setUpFastify('oembed.json');
const summary = await summaly(host);
expect(summary.oEmbed?.src).toBe('https://example.com/');
});
test('max height', async () => {
await setUpFastify('oembed-too-tall.json');
const summary = await summaly(host);
expect(summary.oEmbed?.height).toBe(1024);
});
test('children are ignored', async () => {
await setUpFastify('oembed-iframe-child.json');
const summary = await summaly(host);
expect(summary.oEmbed?.src).toBe('https://example.com/');
});
test('allows fullscreen', async () => {
await setUpFastify('oembed-allow-fullscreen.json');
const summary = await summaly(host);
expect(summary.oEmbed?.src).toBe('https://example.com/');
});
test('allows safelisted features', async () => {
await setUpFastify('oembed-allow-safelisted-features.json');
const summary = await summaly(host);
expect(summary.oEmbed?.src).toBe('https://example.com/');
});
test('oEmbed with relative path', async () => {
await setUpFastify('oembed.json', 'htmls/oembed-relative.html');
const summary = await summaly(host);
expect(summary.oEmbed?.src).toBe('https://example.com/');
});
test('oEmbed with nonexistent path', async () => {
await setUpFastify('oembed.json', 'htmls/oembed-nonexistent-path.html');
await expect(summaly(host)).rejects.toThrow('404 Not Found');
});
test('oEmbed with wrong path', async () => {
await setUpFastify('oembed.json', 'htmls/oembed-wrong-path.html');
await expect(summaly(host)).rejects.toThrow();
});
test('oEmbed with OpenGraph', async () => {
await setUpFastify('oembed.json', 'htmls/oembed-and-og.html');
const summary = await summaly(host);
expect(summary.oEmbed?.src).toBe('https://example.com/');
expect(summary.description).toBe('blobcats rule the world');
});
test('Invalid oEmbed with valid OpenGraph', async () => {
await setUpFastify('invalid/oembed-insecure.json', 'htmls/oembed-and-og.html');
const summary = await summaly(host);
expect(summary.oEmbed).toBe(null);
expect(summary.description).toBe('blobcats rule the world');
});
test('oEmbed with og:video', async () => {
await setUpFastify('oembed.json', 'htmls/oembed-and-og-video.html');
const summary = await summaly(host);
expect(summary.oEmbed).toBe(null);
expect(summary.player.url).toBe('https://example.com/embedurl');
});
});

View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<div><iframe src='https://example.com/'></iframe>",
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/'></iframe><iframe src='https://example.com/'></iframe>",
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "11.0",
"type": "rich",
"html": "<iframe src='https://example.com/'></iframe>",
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='http://example.com/'></iframe>",
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/'></iframe>",
"height": "blobcat"
}

View File

@ -0,0 +1,5 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/'></iframe>"
}

View File

@ -0,0 +1,5 @@
{
"type": "rich",
"html": "<iframe src='https://example.com/'></iframe>",
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "0.1",
"type": "rich",
"html": "<iframe src='https://example.com/'></iframe>",
"height": 300
}

View File

@ -0,0 +1,7 @@
{
"version": "1.0",
"type": "photo",
"url": "https://example.com/example.avif",
"width": 300,
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/' allow='camera'></iframe>",
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/' allow='fullscreen camera'></iframe>",
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/' allow='fullscreen'></iframe>",
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/' allow='fullscreen encrypted-media picture-in-picture'></iframe>",
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/'><script>alert('Hahaha I take this world')</script></iframe>",
"height": 300
}

View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/'></iframe>",
"height": 3000
}

6
test/oembed/oembed.json Normal file
View File

@ -0,0 +1,6 @@
{
"version": "1.0",
"type": "rich",
"html": "<iframe src='https://example.com/'></iframe>",
"height": 300
}