This commit is contained in:
tamaina
2023-02-12 12:20:19 +00:00
parent 7eb9cbb4a6
commit 199b247e85
30 changed files with 639 additions and 2 deletions

1
built/utils/cleanup-title.d.ts vendored Normal file
View File

@ -0,0 +1 @@
export default function (title: string, siteName?: string | null): string;

View File

@ -0,0 +1,19 @@
import escapeRegExp from 'escape-regexp';
export default function (title, siteName) {
title = title.trim();
if (siteName) {
siteName = siteName.trim();
const x = escapeRegExp(siteName);
const patterns = [
`^(.+?)\\s?[\\-\\|:・]\\s?${x}$`
];
for (let i = 0; i < patterns.length; i++) {
const pattern = new RegExp(patterns[i]);
const [, match] = pattern.exec(title) || [null, null];
if (match) {
return match;
}
}
}
return title;
}

1
built/utils/clip.d.ts vendored Normal file
View File

@ -0,0 +1 @@
export default function (s: string, max: number): string;

13
built/utils/clip.js Normal file
View File

@ -0,0 +1,13 @@
import nullOrEmpty from './null-or-empty.js';
export default function (s, max) {
if (nullOrEmpty(s)) {
return s;
}
s = s.trim();
if (s.length > max) {
return s.substr(0, max) + '...';
}
else {
return s;
}
}

8
built/utils/encoding.d.ts vendored Normal file
View File

@ -0,0 +1,8 @@
/// <reference types="node" />
/**
* Detect HTML encoding
* @param body Body in Buffer
* @returns encoding
*/
export declare function detectEncoding(body: Buffer): string;
export declare function toUtf8(body: Buffer, encoding: string): string;

40
built/utils/encoding.js Normal file
View File

@ -0,0 +1,40 @@
import iconv from 'iconv-lite';
import jschardet from 'jschardet';
const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i');
/**
* Detect HTML encoding
* @param body Body in Buffer
* @returns encoding
*/
export function detectEncoding(body) {
// By detection
const detected = jschardet.detect(body, { minimumThreshold: 0.99 });
if (detected) {
const candicate = detected.encoding;
const encoding = toEncoding(candicate);
if (encoding != null)
return encoding;
}
// From meta
const matchMeta = body.toString('ascii').match(regCharset);
if (matchMeta) {
const candicate = matchMeta[1];
const encoding = toEncoding(candicate);
if (encoding != null)
return encoding;
}
return 'utf-8';
}
export function toUtf8(body, encoding) {
return iconv.decode(body, encoding);
}
function toEncoding(candicate) {
if (iconv.encodingExists(candicate)) {
if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase()))
return 'cp932';
return candicate;
}
else {
return null;
}
}

20
built/utils/got.d.ts vendored Normal file
View File

@ -0,0 +1,20 @@
import * as Got from 'got';
import * as cheerio from 'cheerio';
export declare let agent: Got.Agents;
export declare function setAgent(_agent: Got.Agents): void;
export declare type GotOptions = {
url: string;
method: 'GET' | 'POST' | 'HEAD';
body?: string;
headers: Record<string, string | undefined>;
typeFilter?: RegExp;
};
export declare function scpaping(url: string, opts?: {
lang?: string;
}): Promise<{
body: string;
$: cheerio.CheerioAPI;
response: Got.Response<string>;
}>;
export declare function get(url: string): Promise<string>;
export declare function head(url: string): Promise<Got.Response<string>>;

123
built/utils/got.js Normal file
View File

@ -0,0 +1,123 @@
import got, * as Got from 'got';
import { StatusError } from './status-error.js';
import { detectEncoding, toUtf8 } from './encoding.js';
import * as cheerio from 'cheerio';
import PrivateIp from 'private-ip';
import { dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { readFileSync } from 'node:fs';
const _filename = fileURLToPath(import.meta.url);
const _dirname = dirname(_filename);
export let agent = {};
export function setAgent(_agent) {
agent = _agent || {};
}
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
const RESPONSE_TIMEOUT = 20 * 1000;
const OPERATION_TIMEOUT = 60 * 1000;
const MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
const BOT_UA = `SummalyBot/${repo.version}`;
export async function scpaping(url, opts) {
const response = await getResponse({
url,
method: 'GET',
headers: {
'accept': 'text/html,application/xhtml+xml',
'user-agent': BOT_UA,
'accept-language': opts?.lang
},
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
});
// テスト用
const allowPrivateIp = process.env.SUMMALY_ALLOW_PRIVATE_IP === 'true';
if (!allowPrivateIp && response.ip && PrivateIp(response.ip)) {
throw new StatusError(`Private IP rejected ${response.ip}`, 400, 'Private IP Rejected');
}
const encoding = detectEncoding(response.rawBody);
const body = toUtf8(response.rawBody, encoding);
const $ = cheerio.load(body);
return {
body,
$,
response,
};
}
export async function get(url) {
const res = await getResponse({
url,
method: 'GET',
headers: {
'accept': '*/*',
},
});
return await res.body;
}
export async function head(url) {
const res = await getResponse({
url,
method: 'HEAD',
headers: {
'accept': '*/*',
},
});
return await res;
}
async function getResponse(args) {
const timeout = RESPONSE_TIMEOUT;
const operationTimeout = OPERATION_TIMEOUT;
const req = got(args.url, {
method: args.method,
headers: args.headers,
body: args.body,
timeout: {
lookup: timeout,
connect: timeout,
secureConnect: timeout,
socket: timeout,
response: timeout,
send: timeout,
request: operationTimeout, // whole operation timeout
},
agent,
http2: false,
retry: {
limit: 0,
},
});
return await receiveResponce({ req, typeFilter: args.typeFilter });
}
async function receiveResponce(args) {
const req = args.req;
const maxSize = MAX_RESPONSE_SIZE;
req.on('response', (res) => {
// Check html
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
return;
}
// 応答ヘッダでサイズチェック
const contentLength = res.headers['content-length'];
if (contentLength != null) {
const size = Number(contentLength);
if (size > maxSize) {
req.cancel(`maxSize exceeded (${size} > ${maxSize}) on response`);
}
}
});
// 受信中のデータでサイズチェック
req.on('downloadProgress', (progress) => {
if (progress.transferred > maxSize && progress.percent !== 1) {
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
}
});
// 応答取得 with ステータスコードエラーの整形
const res = await req.catch(e => {
if (e instanceof Got.HTTPError) {
throw new StatusError(`${e.response.statusCode} ${e.response.statusMessage}`, e.response.statusCode, e.response.statusMessage);
}
else {
throw e;
}
});
return res;
}

1
built/utils/null-or-empty.d.ts vendored Normal file
View File

@ -0,0 +1 @@
export default function (val: string): boolean;

View File

@ -0,0 +1,14 @@
export default function (val) {
if (val === undefined) {
return true;
}
else if (val === null) {
return true;
}
else if (val.trim() === '') {
return true;
}
else {
return false;
}
}

6
built/utils/status-error.d.ts vendored Normal file
View File

@ -0,0 +1,6 @@
export declare class StatusError extends Error {
statusCode: number;
statusMessage?: string;
isPermanentError: boolean;
constructor(message: string, statusCode: number, statusMessage?: string);
}

View File

@ -0,0 +1,9 @@
export class StatusError extends Error {
constructor(message, statusCode, statusMessage) {
super(message);
this.name = 'StatusError';
this.statusCode = statusCode;
this.statusMessage = statusMessage;
this.isPermanentError = typeof this.statusCode === 'number' && this.statusCode >= 400 && this.statusCode < 500;
}
}