mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-04-29 02:37:27 +09:00
* feat: add oEmbed support * more safelisted features * fix the syntax * Update README.md * permissions * names * playerを使うように * fix type error * support width (for size ratio) * test for type: video * nullable width * restore max height test * ignored permissions * restore autoplay * Use WHATWG URL --------- Co-authored-by: tamaina <tamaina@hotmail.co.jp>
125 lines
4.0 KiB
JavaScript
125 lines
4.0 KiB
JavaScript
import got, * as Got from 'got';
|
|
import { StatusError } from './status-error.js';
|
|
import { detectEncoding, toUtf8 } from './encoding.js';
|
|
import * as cheerio from 'cheerio';
|
|
import PrivateIp from 'private-ip';
|
|
import { dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { readFileSync } from 'node:fs';
|
|
const _filename = fileURLToPath(import.meta.url);
|
|
const _dirname = dirname(_filename);
|
|
export let agent = {};
|
|
export function setAgent(_agent) {
|
|
agent = _agent || {};
|
|
}
|
|
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
|
|
const RESPONSE_TIMEOUT = 20 * 1000;
|
|
const OPERATION_TIMEOUT = 60 * 1000;
|
|
const MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
|
const BOT_UA = `SummalyBot/${repo.version}`;
|
|
export async function scpaping(url, opts) {
|
|
const response = await getResponse({
|
|
url,
|
|
method: 'GET',
|
|
headers: {
|
|
'accept': 'text/html,application/xhtml+xml',
|
|
'user-agent': BOT_UA,
|
|
'accept-language': opts?.lang
|
|
},
|
|
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
|
|
});
|
|
// SUMMALY_ALLOW_PRIVATE_IPはテスト用
|
|
const allowPrivateIp = process.env.SUMMALY_ALLOW_PRIVATE_IP === 'true' || Object.keys(agent).length > 0;
|
|
if (!allowPrivateIp && response.ip && PrivateIp(response.ip)) {
|
|
throw new StatusError(`Private IP rejected ${response.ip}`, 400, 'Private IP Rejected');
|
|
}
|
|
const encoding = detectEncoding(response.rawBody);
|
|
const body = toUtf8(response.rawBody, encoding);
|
|
const $ = cheerio.load(body);
|
|
return {
|
|
body,
|
|
$,
|
|
response,
|
|
};
|
|
}
|
|
export async function get(url) {
|
|
const res = await getResponse({
|
|
url,
|
|
method: 'GET',
|
|
headers: {
|
|
'accept': '*/*',
|
|
},
|
|
});
|
|
return await res.body;
|
|
}
|
|
export async function head(url) {
|
|
const res = await getResponse({
|
|
url,
|
|
method: 'HEAD',
|
|
headers: {
|
|
'accept': '*/*',
|
|
},
|
|
});
|
|
return await res;
|
|
}
|
|
async function getResponse(args) {
|
|
const timeout = RESPONSE_TIMEOUT;
|
|
const operationTimeout = OPERATION_TIMEOUT;
|
|
const req = got(args.url, {
|
|
method: args.method,
|
|
headers: args.headers,
|
|
body: args.body,
|
|
timeout: {
|
|
lookup: timeout,
|
|
connect: timeout,
|
|
secureConnect: timeout,
|
|
socket: timeout,
|
|
response: timeout,
|
|
send: timeout,
|
|
request: operationTimeout, // whole operation timeout
|
|
},
|
|
agent,
|
|
http2: false,
|
|
retry: {
|
|
limit: 0,
|
|
},
|
|
});
|
|
return await receiveResponse({ req, typeFilter: args.typeFilter });
|
|
}
|
|
async function receiveResponse(args) {
|
|
const req = args.req;
|
|
const maxSize = MAX_RESPONSE_SIZE;
|
|
req.on('response', (res) => {
|
|
// Check html
|
|
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
|
|
// console.warn(res.headers['content-type']);
|
|
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
|
|
return;
|
|
}
|
|
// 応答ヘッダでサイズチェック
|
|
const contentLength = res.headers['content-length'];
|
|
if (contentLength != null) {
|
|
const size = Number(contentLength);
|
|
if (size > maxSize) {
|
|
req.cancel(`maxSize exceeded (${size} > ${maxSize}) on response`);
|
|
}
|
|
}
|
|
});
|
|
// 受信中のデータでサイズチェック
|
|
req.on('downloadProgress', (progress) => {
|
|
if (progress.transferred > maxSize && progress.percent !== 1) {
|
|
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
|
|
}
|
|
});
|
|
// 応答取得 with ステータスコードエラーの整形
|
|
const res = await req.catch(e => {
|
|
if (e instanceof Got.HTTPError) {
|
|
throw new StatusError(`${e.response.statusCode} ${e.response.statusMessage}`, e.response.statusCode, e.response.statusMessage);
|
|
}
|
|
else {
|
|
throw e;
|
|
}
|
|
});
|
|
return res;
|
|
}
|