This commit is contained in:
Kagami Sascha Rosylight 2023-03-11 14:31:38 +01:00
parent a5a8c4437d
commit 7b8a2b0913
6 changed files with 110 additions and 10 deletions

2
built/general.d.ts vendored
View File

@ -1,4 +1,4 @@
import * as URL from 'node:url';
import Summary from './summary.js';
import type { default as Summary } from './summary.js';
declare const _default: (url: URL.Url, lang?: string | null) => Promise<Summary | null>;
export default _default;

View File

@ -2,7 +2,74 @@ import * as URL from 'node:url';
import clip from './utils/clip.js';
import cleanupTitle from './utils/cleanup-title.js';
import { decode as decodeHtml } from 'html-entities';
import { head, scpaping } from './utils/got.js';
import { get, head, scpaping } from './utils/got.js';
import * as cheerio from 'cheerio';
/**
* Contains only the html snippet for a sanitized iframe as the thumbnail is
* mostly covered in OpenGraph instead.
*
* Width should always be 100%.
*/
async function getOEmbedRich($, pageUrl) {
const href = $('link[type="application/json+oembed"]').attr('href');
if (!href) {
return null;
}
// XXX: Use global URL object instead of the deprecated `node:url`
// Disallow relative URL as no one seems to use it
const oEmbed = await get(URL.resolve(pageUrl, href));
const body = (() => {
try {
return JSON.parse(oEmbed);
}
catch { }
})();
if (!body || body.version !== '1.0' || body.type !== 'rich') {
// Not a well formed rich oEmbed
return null;
}
if (!body.html.startsWith('<iframe ') || !body.html.endsWith('</iframe>')) {
// It includes something else than an iframe
return null;
}
const oEmbedHtml = cheerio.load(body.html);
const iframe = oEmbedHtml("iframe");
if (iframe.length !== 1) {
// Somehow we either have multiple iframes or none
return null;
}
if (iframe.parents().length !== 2) {
// Should only have the body and html elements as the parents
return null;
}
const src = iframe.attr('src');
if (!src) {
// No src?
return null;
}
// XXX: Use global URL object instead of the deprecated `node:url`
const url = URL.parse(src);
if (url.protocol !== 'https:') {
// Allow only HTTPS for best security
return null;
}
const height = Math.min(Number(iframe.attr('height') ?? body.height), 1024);
if (Number.isNaN(height)) {
// No proper size info
return null;
}
const allowedFeatures = (iframe.attr('allow') ?? '').split(/\s+/g);
const safeList = ['', 'fullscreen', 'encrypted-media', 'picture-in-picture'];
if (allowedFeatures.some(allow => !safeList.includes(allow))) {
// This iframe is probably too powerful to be embedded
return null;
}
return {
src,
height,
allow: allowedFeatures
};
}
export default async (url, lang = null) => {
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/))
lang = null;
@ -73,10 +140,17 @@ export default async (url, lang = null) => {
// スラッシュを付けて返却
return '/' + relativeURLString;
};
const icon = await find(favicon) ||
// 相対指定を絶対指定に変換し再試行
await find(toAbsolute(favicon)) ||
null;
const getIcon = async () => {
return await find(favicon) ||
// 相対指定を絶対指定に変換し再試行
await find(toAbsolute(favicon)) ||
null;
};
const [icon, oEmbed] = await Promise.all([
getIcon(),
// playerあるならoEmbedは必要ない
!playerUrl ? getOEmbedRich($, url.href) : null,
]);
// Clean up the title
title = cleanupTitle(title, siteName);
if (title === '') {
@ -94,5 +168,6 @@ export default async (url, lang = null) => {
},
sitename: siteName || null,
sensitive,
oEmbed,
};
};

View File

@ -38,6 +38,7 @@ export async function summarize(url) {
width: playerWidth ? parseInt(playerWidth) : null,
height: playerHeight ? parseInt(playerHeight) : null
},
sitename: 'Amazon'
sitename: 'Amazon',
oEmbed: null,
};
}

View File

@ -31,6 +31,7 @@ export async function summarize(url) {
width: null,
height: null
},
sitename: 'Wikipedia'
sitename: 'Wikipedia',
oEmbed: null,
};
}

22
built/summary.d.ts vendored
View File

@ -27,6 +27,10 @@ declare type Summary = {
* Possibly sensitive
*/
sensitive?: boolean;
/**
* The iframe information of oEmbed data from that web page
*/
oEmbed: OEmbedRichIframe | null;
};
export default Summary;
export declare type Player = {
@ -43,3 +47,21 @@ export declare type Player = {
*/
height: number | null;
};
/**
* Extracted iframe information from OEmbed html field.
* `width` is omitted here as it should always be 100%.
*/
export declare type OEmbedRichIframe = {
/**
* The src of the iframe
*/
src: string;
/**
* The height of the iframe
*/
height: number;
/**
* The allowed feature list of the iframe
*/
allow: string[];
};

View File

@ -84,14 +84,15 @@ async function getResponse(args) {
limit: 0,
},
});
return await receiveResponce({ req, typeFilter: args.typeFilter });
return await receiveResponse({ req, typeFilter: args.typeFilter });
}
async function receiveResponce(args) {
async function receiveResponse(args) {
const req = args.req;
const maxSize = MAX_RESPONSE_SIZE;
req.on('response', (res) => {
// Check html
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
// console.warn(res.headers['content-type']);
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
return;
}