mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-07-03 00:29:58 +09:00
3.0.1
This commit is contained in:
1
built/utils/cleanup-title.d.ts
vendored
Normal file
1
built/utils/cleanup-title.d.ts
vendored
Normal file
@ -0,0 +1 @@
|
||||
export default function (title: string, siteName?: string | null): string;
|
19
built/utils/cleanup-title.js
Normal file
19
built/utils/cleanup-title.js
Normal file
@ -0,0 +1,19 @@
|
||||
import escapeRegExp from 'escape-regexp';
|
||||
export default function (title, siteName) {
|
||||
title = title.trim();
|
||||
if (siteName) {
|
||||
siteName = siteName.trim();
|
||||
const x = escapeRegExp(siteName);
|
||||
const patterns = [
|
||||
`^(.+?)\\s?[\\-\\|:・]\\s?${x}$`
|
||||
];
|
||||
for (let i = 0; i < patterns.length; i++) {
|
||||
const pattern = new RegExp(patterns[i]);
|
||||
const [, match] = pattern.exec(title) || [null, null];
|
||||
if (match) {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
}
|
||||
return title;
|
||||
}
|
1
built/utils/clip.d.ts
vendored
Normal file
1
built/utils/clip.d.ts
vendored
Normal file
@ -0,0 +1 @@
|
||||
export default function (s: string, max: number): string;
|
13
built/utils/clip.js
Normal file
13
built/utils/clip.js
Normal file
@ -0,0 +1,13 @@
|
||||
import nullOrEmpty from './null-or-empty.js';
|
||||
export default function (s, max) {
|
||||
if (nullOrEmpty(s)) {
|
||||
return s;
|
||||
}
|
||||
s = s.trim();
|
||||
if (s.length > max) {
|
||||
return s.substr(0, max) + '...';
|
||||
}
|
||||
else {
|
||||
return s;
|
||||
}
|
||||
}
|
8
built/utils/encoding.d.ts
vendored
Normal file
8
built/utils/encoding.d.ts
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
/// <reference types="node" />
|
||||
/**
|
||||
* Detect HTML encoding
|
||||
* @param body Body in Buffer
|
||||
* @returns encoding
|
||||
*/
|
||||
export declare function detectEncoding(body: Buffer): string;
|
||||
export declare function toUtf8(body: Buffer, encoding: string): string;
|
40
built/utils/encoding.js
Normal file
40
built/utils/encoding.js
Normal file
@ -0,0 +1,40 @@
|
||||
import iconv from 'iconv-lite';
|
||||
import jschardet from 'jschardet';
|
||||
const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i');
|
||||
/**
|
||||
* Detect HTML encoding
|
||||
* @param body Body in Buffer
|
||||
* @returns encoding
|
||||
*/
|
||||
export function detectEncoding(body) {
|
||||
// By detection
|
||||
const detected = jschardet.detect(body, { minimumThreshold: 0.99 });
|
||||
if (detected) {
|
||||
const candicate = detected.encoding;
|
||||
const encoding = toEncoding(candicate);
|
||||
if (encoding != null)
|
||||
return encoding;
|
||||
}
|
||||
// From meta
|
||||
const matchMeta = body.toString('ascii').match(regCharset);
|
||||
if (matchMeta) {
|
||||
const candicate = matchMeta[1];
|
||||
const encoding = toEncoding(candicate);
|
||||
if (encoding != null)
|
||||
return encoding;
|
||||
}
|
||||
return 'utf-8';
|
||||
}
|
||||
export function toUtf8(body, encoding) {
|
||||
return iconv.decode(body, encoding);
|
||||
}
|
||||
function toEncoding(candicate) {
|
||||
if (iconv.encodingExists(candicate)) {
|
||||
if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase()))
|
||||
return 'cp932';
|
||||
return candicate;
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
}
|
20
built/utils/got.d.ts
vendored
Normal file
20
built/utils/got.d.ts
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
import * as Got from 'got';
|
||||
import * as cheerio from 'cheerio';
|
||||
export declare let agent: Got.Agents;
|
||||
export declare function setAgent(_agent: Got.Agents): void;
|
||||
export declare type GotOptions = {
|
||||
url: string;
|
||||
method: 'GET' | 'POST' | 'HEAD';
|
||||
body?: string;
|
||||
headers: Record<string, string | undefined>;
|
||||
typeFilter?: RegExp;
|
||||
};
|
||||
export declare function scpaping(url: string, opts?: {
|
||||
lang?: string;
|
||||
}): Promise<{
|
||||
body: string;
|
||||
$: cheerio.CheerioAPI;
|
||||
response: Got.Response<string>;
|
||||
}>;
|
||||
export declare function get(url: string): Promise<string>;
|
||||
export declare function head(url: string): Promise<Got.Response<string>>;
|
123
built/utils/got.js
Normal file
123
built/utils/got.js
Normal file
@ -0,0 +1,123 @@
|
||||
import got, * as Got from 'got';
|
||||
import { StatusError } from './status-error.js';
|
||||
import { detectEncoding, toUtf8 } from './encoding.js';
|
||||
import * as cheerio from 'cheerio';
|
||||
import PrivateIp from 'private-ip';
|
||||
import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { readFileSync } from 'node:fs';
|
||||
const _filename = fileURLToPath(import.meta.url);
|
||||
const _dirname = dirname(_filename);
|
||||
export let agent = {};
|
||||
export function setAgent(_agent) {
|
||||
agent = _agent || {};
|
||||
}
|
||||
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
|
||||
const RESPONSE_TIMEOUT = 20 * 1000;
|
||||
const OPERATION_TIMEOUT = 60 * 1000;
|
||||
const MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
||||
const BOT_UA = `SummalyBot/${repo.version}`;
|
||||
export async function scpaping(url, opts) {
|
||||
const response = await getResponse({
|
||||
url,
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'accept': 'text/html,application/xhtml+xml',
|
||||
'user-agent': BOT_UA,
|
||||
'accept-language': opts?.lang
|
||||
},
|
||||
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
|
||||
});
|
||||
// テスト用
|
||||
const allowPrivateIp = process.env.SUMMALY_ALLOW_PRIVATE_IP === 'true';
|
||||
if (!allowPrivateIp && response.ip && PrivateIp(response.ip)) {
|
||||
throw new StatusError(`Private IP rejected ${response.ip}`, 400, 'Private IP Rejected');
|
||||
}
|
||||
const encoding = detectEncoding(response.rawBody);
|
||||
const body = toUtf8(response.rawBody, encoding);
|
||||
const $ = cheerio.load(body);
|
||||
return {
|
||||
body,
|
||||
$,
|
||||
response,
|
||||
};
|
||||
}
|
||||
export async function get(url) {
|
||||
const res = await getResponse({
|
||||
url,
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'accept': '*/*',
|
||||
},
|
||||
});
|
||||
return await res.body;
|
||||
}
|
||||
export async function head(url) {
|
||||
const res = await getResponse({
|
||||
url,
|
||||
method: 'HEAD',
|
||||
headers: {
|
||||
'accept': '*/*',
|
||||
},
|
||||
});
|
||||
return await res;
|
||||
}
|
||||
async function getResponse(args) {
|
||||
const timeout = RESPONSE_TIMEOUT;
|
||||
const operationTimeout = OPERATION_TIMEOUT;
|
||||
const req = got(args.url, {
|
||||
method: args.method,
|
||||
headers: args.headers,
|
||||
body: args.body,
|
||||
timeout: {
|
||||
lookup: timeout,
|
||||
connect: timeout,
|
||||
secureConnect: timeout,
|
||||
socket: timeout,
|
||||
response: timeout,
|
||||
send: timeout,
|
||||
request: operationTimeout, // whole operation timeout
|
||||
},
|
||||
agent,
|
||||
http2: false,
|
||||
retry: {
|
||||
limit: 0,
|
||||
},
|
||||
});
|
||||
return await receiveResponce({ req, typeFilter: args.typeFilter });
|
||||
}
|
||||
async function receiveResponce(args) {
|
||||
const req = args.req;
|
||||
const maxSize = MAX_RESPONSE_SIZE;
|
||||
req.on('response', (res) => {
|
||||
// Check html
|
||||
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
|
||||
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
|
||||
return;
|
||||
}
|
||||
// 応答ヘッダでサイズチェック
|
||||
const contentLength = res.headers['content-length'];
|
||||
if (contentLength != null) {
|
||||
const size = Number(contentLength);
|
||||
if (size > maxSize) {
|
||||
req.cancel(`maxSize exceeded (${size} > ${maxSize}) on response`);
|
||||
}
|
||||
}
|
||||
});
|
||||
// 受信中のデータでサイズチェック
|
||||
req.on('downloadProgress', (progress) => {
|
||||
if (progress.transferred > maxSize && progress.percent !== 1) {
|
||||
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
|
||||
}
|
||||
});
|
||||
// 応答取得 with ステータスコードエラーの整形
|
||||
const res = await req.catch(e => {
|
||||
if (e instanceof Got.HTTPError) {
|
||||
throw new StatusError(`${e.response.statusCode} ${e.response.statusMessage}`, e.response.statusCode, e.response.statusMessage);
|
||||
}
|
||||
else {
|
||||
throw e;
|
||||
}
|
||||
});
|
||||
return res;
|
||||
}
|
1
built/utils/null-or-empty.d.ts
vendored
Normal file
1
built/utils/null-or-empty.d.ts
vendored
Normal file
@ -0,0 +1 @@
|
||||
export default function (val: string): boolean;
|
14
built/utils/null-or-empty.js
Normal file
14
built/utils/null-or-empty.js
Normal file
@ -0,0 +1,14 @@
|
||||
export default function (val) {
|
||||
if (val === undefined) {
|
||||
return true;
|
||||
}
|
||||
else if (val === null) {
|
||||
return true;
|
||||
}
|
||||
else if (val.trim() === '') {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
6
built/utils/status-error.d.ts
vendored
Normal file
6
built/utils/status-error.d.ts
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
export declare class StatusError extends Error {
|
||||
statusCode: number;
|
||||
statusMessage?: string;
|
||||
isPermanentError: boolean;
|
||||
constructor(message: string, statusCode: number, statusMessage?: string);
|
||||
}
|
9
built/utils/status-error.js
Normal file
9
built/utils/status-error.js
Normal file
@ -0,0 +1,9 @@
|
||||
export class StatusError extends Error {
|
||||
constructor(message, statusCode, statusMessage) {
|
||||
super(message);
|
||||
this.name = 'StatusError';
|
||||
this.statusCode = statusCode;
|
||||
this.statusMessage = statusMessage;
|
||||
this.isPermanentError = typeof this.statusCode === 'number' && this.statusCode >= 400 && this.statusCode < 500;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user