mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-04-29 02:37:27 +09:00
add workflows, mod chglog, remove built
This commit is contained in:
parent
81de408b54
commit
c45b3a1c63
@ -5,6 +5,11 @@ indent_style = tab
|
||||
indent_size = 2
|
||||
charset = utf-8
|
||||
insert_final_newline = true
|
||||
end_of_line = lf
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[*.json]
|
||||
[*.md]
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
[*.{yml,yaml}]
|
||||
indent_style = space
|
||||
|
36
.github/workflows/npm-publish.yml
vendored
Normal file
36
.github/workflows/npm-publish.yml
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
name: Publish Node.js Package
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [created]
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
node-version: [20.10.0]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v2
|
||||
with:
|
||||
version: 8
|
||||
run_install: false
|
||||
- name: Use Node.js ${{ matrix.node-version }}
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
cache: 'pnpm'
|
||||
- name: Publish package
|
||||
run: |
|
||||
corepack enable
|
||||
pnpm i --frozen-lockfile
|
||||
pnpm build
|
||||
pnpm publish --access public --no-git-checks --provenance
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
|
32
.github/workflows/test.yml
vendored
Normal file
32
.github/workflows/test.yml
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
name: Publish Node.js Package
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
node-version: [20.10.0]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v2
|
||||
with:
|
||||
version: 8
|
||||
run_install: false
|
||||
- name: Use Node.js ${{ matrix.node-version }}
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
cache: 'pnpm'
|
||||
- name: Install
|
||||
run: |
|
||||
corepack enable
|
||||
pnpm i --frozen-lockfile
|
||||
pnpm build
|
||||
- name: Test
|
||||
run: |
|
||||
pnpm test
|
@ -1,6 +1,7 @@
|
||||
4.x.x (Unreleased) / 2023-09-xx
|
||||
4.1.0 / 2023-12-30
|
||||
------------------
|
||||
* branch.ioを用いたディープリンク(spotify.link)などでパースに失敗する問題を修正
|
||||
* 'mixi:content-rating'をsensitive判定で見ることで、dlsiteなどでセンシティブ情報を得れるように
|
||||
|
||||
4.0.2 / 2023-04-20
|
||||
------------------
|
||||
|
4
built/general.d.ts
vendored
4
built/general.d.ts
vendored
@ -1,4 +0,0 @@
|
||||
import { URL } from 'node:url';
|
||||
import type { default as Summary } from './summary.js';
|
||||
declare const _default: (_url: URL | string, lang?: string | null) => Promise<Summary | null>;
|
||||
export default _default;
|
218
built/general.js
218
built/general.js
@ -1,218 +0,0 @@
|
||||
import { URL } from 'node:url';
|
||||
import clip from './utils/clip.js';
|
||||
import cleanupTitle from './utils/cleanup-title.js';
|
||||
import { decode as decodeHtml } from 'html-entities';
|
||||
import { get, head, scpaping } from './utils/got.js';
|
||||
import * as cheerio from 'cheerio';
|
||||
/**
|
||||
* Contains only the html snippet for a sanitized iframe as the thumbnail is
|
||||
* mostly covered in OpenGraph instead.
|
||||
*
|
||||
* Width should always be 100%.
|
||||
*/
|
||||
async function getOEmbedPlayer($, pageUrl) {
|
||||
const href = $('link[type="application/json+oembed"]').attr('href');
|
||||
if (!href) {
|
||||
return null;
|
||||
}
|
||||
const oEmbedUrl = (() => {
|
||||
try {
|
||||
return new URL(href, pageUrl);
|
||||
}
|
||||
catch {
|
||||
return null;
|
||||
}
|
||||
})();
|
||||
if (!oEmbedUrl) {
|
||||
return null;
|
||||
}
|
||||
const oEmbed = await get(oEmbedUrl.href).catch(() => null);
|
||||
if (!oEmbed) {
|
||||
return null;
|
||||
}
|
||||
const body = (() => {
|
||||
try {
|
||||
return JSON.parse(oEmbed);
|
||||
}
|
||||
catch { }
|
||||
})();
|
||||
if (!body || body.version !== '1.0' || !['rich', 'video'].includes(body.type)) {
|
||||
// Not a well formed rich oEmbed
|
||||
return null;
|
||||
}
|
||||
if (!body.html.startsWith('<iframe ') || !body.html.endsWith('</iframe>')) {
|
||||
// It includes something else than an iframe
|
||||
return null;
|
||||
}
|
||||
const oEmbedHtml = cheerio.load(body.html);
|
||||
const iframe = oEmbedHtml("iframe");
|
||||
if (iframe.length !== 1) {
|
||||
// Somehow we either have multiple iframes or none
|
||||
return null;
|
||||
}
|
||||
if (iframe.parents().length !== 2) {
|
||||
// Should only have the body and html elements as the parents
|
||||
return null;
|
||||
}
|
||||
const url = iframe.attr('src');
|
||||
if (!url) {
|
||||
// No src?
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
if ((new URL(url)).protocol !== 'https:') {
|
||||
// Allow only HTTPS for best security
|
||||
return null;
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
return null;
|
||||
}
|
||||
// Height is the most important, width is okay to be null. The implementer
|
||||
// should choose fixed height instead of fixed aspect ratio if width is null.
|
||||
//
|
||||
// For example, Spotify's embed page does not strictly follow aspect ratio
|
||||
// and thus keeping the height is better than keeping the aspect ratio.
|
||||
//
|
||||
// Spotify gives `width: 100%, height: 152px` for iframe while `width: 456,
|
||||
// height: 152` for oEmbed data, and we treat any percentages as null here.
|
||||
let width = Number(iframe.attr('width') ?? body.width);
|
||||
if (Number.isNaN(width)) {
|
||||
width = null;
|
||||
}
|
||||
const height = Math.min(Number(iframe.attr('height') ?? body.height), 1024);
|
||||
if (Number.isNaN(height)) {
|
||||
// No proper height info
|
||||
return null;
|
||||
}
|
||||
// TODO: This implementation only allows basic syntax of `allow`.
|
||||
// Might need to implement better later.
|
||||
const safeList = [
|
||||
'autoplay',
|
||||
'clipboard-write',
|
||||
'fullscreen',
|
||||
'encrypted-media',
|
||||
'picture-in-picture',
|
||||
'web-share',
|
||||
];
|
||||
// YouTube has these but they are almost never used.
|
||||
const ignoredList = [
|
||||
'gyroscope',
|
||||
'accelerometer',
|
||||
];
|
||||
const allowedPermissions = (iframe.attr('allow') ?? '').split(/\s*;\s*/g)
|
||||
.filter(s => s)
|
||||
.filter(s => !ignoredList.includes(s));
|
||||
if (iframe.attr('allowfullscreen') === '') {
|
||||
allowedPermissions.push('fullscreen');
|
||||
}
|
||||
if (allowedPermissions.some(allow => !safeList.includes(allow))) {
|
||||
// This iframe is probably too powerful to be embedded
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
url,
|
||||
width,
|
||||
height,
|
||||
allow: allowedPermissions
|
||||
};
|
||||
}
|
||||
export default async (_url, lang = null) => {
|
||||
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/))
|
||||
lang = null;
|
||||
const url = typeof _url === 'string' ? new URL(_url) : _url;
|
||||
const res = await scpaping(url.href, { lang: lang || undefined });
|
||||
const $ = res.$;
|
||||
const twitterCard = $('meta[name="twitter:card"]').attr('content') ||
|
||||
$('meta[property="twitter:card"]').attr('content');
|
||||
// According to docs, name attribute of meta tag is used for twitter card but for compatibility,
|
||||
// this library will also look for property attribute.
|
||||
// See https://developer.twitter.com/en/docs/twitter-for-websites/cards/overview/summary
|
||||
// Property attribute is used for open graph.
|
||||
// See https://ogp.me/
|
||||
let title = $('meta[property="og:title"]').attr('content') ||
|
||||
$('meta[name="twitter:title"]').attr('content') ||
|
||||
$('meta[property="twitter:title"]').attr('content') ||
|
||||
$('title').text();
|
||||
if (title === undefined || title === null) {
|
||||
return null;
|
||||
}
|
||||
title = clip(decodeHtml(title), 100);
|
||||
let image = $('meta[property="og:image"]').attr('content') ||
|
||||
$('meta[name="twitter:image"]').attr('content') ||
|
||||
$('meta[property="twitter:image"]').attr('content') ||
|
||||
$('link[rel="image_src"]').attr('href') ||
|
||||
$('link[rel="apple-touch-icon"]').attr('href') ||
|
||||
$('link[rel="apple-touch-icon image_src"]').attr('href');
|
||||
image = image ? (new URL(image, url.href)).href : null;
|
||||
const playerUrl = (twitterCard !== 'summary_large_image' && $('meta[name="twitter:player"]').attr('content')) ||
|
||||
(twitterCard !== 'summary_large_image' && $('meta[property="twitter:player"]').attr('content')) ||
|
||||
$('meta[property="og:video"]').attr('content') ||
|
||||
$('meta[property="og:video:secure_url"]').attr('content') ||
|
||||
$('meta[property="og:video:url"]').attr('content');
|
||||
const playerWidth = parseInt($('meta[name="twitter:player:width"]').attr('content') ||
|
||||
$('meta[property="twitter:player:width"]').attr('content') ||
|
||||
$('meta[property="og:video:width"]').attr('content') ||
|
||||
'');
|
||||
const playerHeight = parseInt($('meta[name="twitter:player:height"]').attr('content') ||
|
||||
$('meta[property="twitter:player:height"]').attr('content') ||
|
||||
$('meta[property="og:video:height"]').attr('content') ||
|
||||
'');
|
||||
let description = $('meta[property="og:description"]').attr('content') ||
|
||||
$('meta[name="twitter:description"]').attr('content') ||
|
||||
$('meta[property="twitter:description"]').attr('content') ||
|
||||
$('meta[name="description"]').attr('content');
|
||||
description = description
|
||||
? clip(decodeHtml(description), 300)
|
||||
: null;
|
||||
if (title === description) {
|
||||
description = null;
|
||||
}
|
||||
let siteName = decodeHtml($('meta[property="og:site_name"]').attr('content') ||
|
||||
$('meta[name="application-name"]').attr('content') ||
|
||||
url.hostname);
|
||||
const favicon = $('link[rel="shortcut icon"]').attr('href') ||
|
||||
$('link[rel="icon"]').attr('href') ||
|
||||
'/favicon.ico';
|
||||
const activityPub = $('link[rel="alternate"][type="application/activity+json"]').attr('href') || null;
|
||||
// https://developer.mixi.co.jp/connect/mixi_plugin/mixi_check/spec_mixi_check/#toc-18-
|
||||
const sensitive = $("meta[property='mixi:content-rating']").attr('content') == '1' ||
|
||||
$('.tweet').attr('data-possibly-sensitive') === 'true';
|
||||
const find = async (path) => {
|
||||
const target = new URL(path, url.href);
|
||||
try {
|
||||
await head(target.href);
|
||||
return target;
|
||||
}
|
||||
catch (e) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
const getIcon = async () => {
|
||||
return (await find(favicon)) || null;
|
||||
};
|
||||
const [icon, oEmbed] = await Promise.all([
|
||||
getIcon(),
|
||||
getOEmbedPlayer($, url.href),
|
||||
]);
|
||||
// Clean up the title
|
||||
title = cleanupTitle(title, siteName);
|
||||
if (title === '') {
|
||||
title = siteName;
|
||||
}
|
||||
return {
|
||||
title: title || null,
|
||||
icon: icon?.href || null,
|
||||
description: description || null,
|
||||
thumbnail: image || null,
|
||||
player: oEmbed ?? {
|
||||
url: playerUrl || null,
|
||||
width: Number.isNaN(playerWidth) ? null : playerWidth,
|
||||
height: Number.isNaN(playerHeight) ? null : playerHeight,
|
||||
allow: ['autoplay', 'encrypted-media', 'fullscreen'],
|
||||
},
|
||||
sitename: siteName || null,
|
||||
sensitive,
|
||||
activityPub,
|
||||
};
|
||||
};
|
39
built/index.d.ts
vendored
39
built/index.d.ts
vendored
@ -1,39 +0,0 @@
|
||||
/**
|
||||
* summaly
|
||||
* https://github.com/syuilo/summaly
|
||||
*/
|
||||
import Summary from './summary.js';
|
||||
import type { IPlugin as _IPlugin } from './iplugin.js';
|
||||
export declare type IPlugin = _IPlugin;
|
||||
import * as Got from 'got';
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
declare type Options = {
|
||||
/**
|
||||
* Accept-Language for the request
|
||||
*/
|
||||
lang?: string | null;
|
||||
/**
|
||||
* Whether follow redirects
|
||||
*/
|
||||
followRedirects?: boolean;
|
||||
/**
|
||||
* Custom Plugins
|
||||
*/
|
||||
plugins?: IPlugin[];
|
||||
/**
|
||||
* Custom HTTP agent
|
||||
*/
|
||||
agent?: Got.Agents;
|
||||
};
|
||||
declare type Result = Summary & {
|
||||
/**
|
||||
* The actual url of that web page
|
||||
*/
|
||||
url: string;
|
||||
};
|
||||
/**
|
||||
* Summarize an web page
|
||||
*/
|
||||
export declare const summaly: (url: string, options?: Options | undefined) => Promise<Result>;
|
||||
export default function (fastify: FastifyInstance, options: Options, done: (err?: Error) => void): void;
|
||||
export {};
|
@ -1,68 +0,0 @@
|
||||
/**
|
||||
* summaly
|
||||
* https://github.com/syuilo/summaly
|
||||
*/
|
||||
import { URL } from 'node:url';
|
||||
import tracer from 'trace-redirect';
|
||||
import general from './general.js';
|
||||
import { setAgent } from './utils/got.js';
|
||||
import { plugins as builtinPlugins } from './plugins/index.js';
|
||||
const defaultOptions = {
|
||||
lang: null,
|
||||
followRedirects: true,
|
||||
plugins: [],
|
||||
};
|
||||
/**
|
||||
* Summarize an web page
|
||||
*/
|
||||
export const summaly = async (url, options) => {
|
||||
if (options?.agent)
|
||||
setAgent(options.agent);
|
||||
const opts = Object.assign(defaultOptions, options);
|
||||
const plugins = builtinPlugins.concat(opts.plugins || []);
|
||||
let actualUrl = url;
|
||||
if (opts.followRedirects) {
|
||||
// .catch(() => url)にすればいいけど、jestにtrace-redirectを食わせるのが面倒なのでtry-catch
|
||||
try {
|
||||
actualUrl = await tracer(url);
|
||||
}
|
||||
catch (e) {
|
||||
actualUrl = url;
|
||||
}
|
||||
}
|
||||
const _url = new URL(actualUrl);
|
||||
// Find matching plugin
|
||||
const match = plugins.filter(plugin => plugin.test(_url))[0];
|
||||
// Get summary
|
||||
const summary = await (match ? match.summarize : general)(_url, opts.lang || undefined);
|
||||
if (summary == null) {
|
||||
throw 'failed summarize';
|
||||
}
|
||||
return Object.assign(summary, {
|
||||
url: actualUrl
|
||||
});
|
||||
};
|
||||
export default function (fastify, options, done) {
|
||||
fastify.get('/', async (req, reply) => {
|
||||
const url = req.query.url;
|
||||
if (url == null) {
|
||||
return reply.status(400).send({
|
||||
error: 'url is required'
|
||||
});
|
||||
}
|
||||
try {
|
||||
const summary = await summaly(url, {
|
||||
lang: req.query.lang,
|
||||
followRedirects: false,
|
||||
...options,
|
||||
});
|
||||
return summary;
|
||||
}
|
||||
catch (e) {
|
||||
return reply.status(500).send({
|
||||
error: e
|
||||
});
|
||||
}
|
||||
});
|
||||
done();
|
||||
}
|
7
built/iplugin.d.ts
vendored
7
built/iplugin.d.ts
vendored
@ -1,7 +0,0 @@
|
||||
/// <reference types="node" />
|
||||
import type { URL } from 'node:url';
|
||||
import Summary from './summary.js';
|
||||
export interface IPlugin {
|
||||
test: (url: URL) => boolean;
|
||||
summarize: (url: URL, lang?: string) => Promise<Summary | null>;
|
||||
}
|
@ -1 +0,0 @@
|
||||
export {};
|
5
built/plugins/amazon.d.ts
vendored
5
built/plugins/amazon.d.ts
vendored
@ -1,5 +0,0 @@
|
||||
/// <reference types="node" />
|
||||
import { URL } from 'node:url';
|
||||
import summary from '../summary.js';
|
||||
export declare function test(url: URL): boolean;
|
||||
export declare function summarize(url: URL): Promise<summary>;
|
@ -1,45 +0,0 @@
|
||||
import { scpaping } from '../utils/got.js';
|
||||
export function test(url) {
|
||||
return url.hostname === 'www.amazon.com' ||
|
||||
url.hostname === 'www.amazon.co.jp' ||
|
||||
url.hostname === 'www.amazon.ca' ||
|
||||
url.hostname === 'www.amazon.com.br' ||
|
||||
url.hostname === 'www.amazon.com.mx' ||
|
||||
url.hostname === 'www.amazon.co.uk' ||
|
||||
url.hostname === 'www.amazon.de' ||
|
||||
url.hostname === 'www.amazon.fr' ||
|
||||
url.hostname === 'www.amazon.it' ||
|
||||
url.hostname === 'www.amazon.es' ||
|
||||
url.hostname === 'www.amazon.nl' ||
|
||||
url.hostname === 'www.amazon.cn' ||
|
||||
url.hostname === 'www.amazon.in' ||
|
||||
url.hostname === 'www.amazon.au';
|
||||
}
|
||||
export async function summarize(url) {
|
||||
const res = await scpaping(url.href);
|
||||
const $ = res.$;
|
||||
const title = $('#title').text();
|
||||
const description = $('#productDescription').text() ||
|
||||
$('meta[name="description"]').attr('content');
|
||||
const thumbnail = $('#landingImage').attr('src');
|
||||
const playerUrl = $('meta[property="twitter:player"]').attr('content') ||
|
||||
$('meta[name="twitter:player"]').attr('content');
|
||||
const playerWidth = $('meta[property="twitter:player:width"]').attr('content') ||
|
||||
$('meta[name="twitter:player:width"]').attr('content');
|
||||
const playerHeight = $('meta[property="twitter:player:height"]').attr('content') ||
|
||||
$('meta[name="twitter:player:height"]').attr('content');
|
||||
return {
|
||||
title: title ? title.trim() : null,
|
||||
icon: 'https://www.amazon.com/favicon.ico',
|
||||
description: description ? description.trim() : null,
|
||||
thumbnail: thumbnail ? thumbnail.trim() : null,
|
||||
player: {
|
||||
url: playerUrl || null,
|
||||
width: playerWidth ? parseInt(playerWidth) : null,
|
||||
height: playerHeight ? parseInt(playerHeight) : null,
|
||||
allow: playerUrl ? ['fullscreen', 'encrypted-media'] : [],
|
||||
},
|
||||
sitename: 'Amazon',
|
||||
activityPub: null,
|
||||
};
|
||||
}
|
5
built/plugins/branchio-deeplinks.d.ts
vendored
5
built/plugins/branchio-deeplinks.d.ts
vendored
@ -1,5 +0,0 @@
|
||||
/// <reference types="node" />
|
||||
import { URL } from 'node:url';
|
||||
import Summary from '../summary.js';
|
||||
export declare function test(url: URL): boolean;
|
||||
export declare function summarize(url: URL, lang?: string | null): Promise<Summary | null>;
|
@ -1,12 +0,0 @@
|
||||
import general from '../general.js';
|
||||
export function test(url) {
|
||||
// Branch.io を使用したディープリンクにマッチ
|
||||
return /^[a-zA-Z0-9]+\.app\.link$/.test(url.hostname) ||
|
||||
url.hostname === 'spotify.link';
|
||||
}
|
||||
export async function summarize(url, lang = null) {
|
||||
// https://help.branch.io/using-branch/docs/creating-a-deep-link#redirections
|
||||
// Web版に強制リダイレクトすることでbranch.ioの独自ページが開くのを防ぐ
|
||||
url.searchParams.append('$web_only', 'true');
|
||||
return await general(url, lang);
|
||||
}
|
2
built/plugins/index.d.ts
vendored
2
built/plugins/index.d.ts
vendored
@ -1,2 +0,0 @@
|
||||
import { IPlugin } from '@/iplugin.js';
|
||||
export declare const plugins: IPlugin[];
|
@ -1,8 +0,0 @@
|
||||
import * as amazon from './amazon.js';
|
||||
import * as wikipedia from './wikipedia.js';
|
||||
import * as branchIoDeeplinks from './branchio-deeplinks.js';
|
||||
export const plugins = [
|
||||
amazon,
|
||||
wikipedia,
|
||||
branchIoDeeplinks,
|
||||
];
|
5
built/plugins/wikipedia.d.ts
vendored
5
built/plugins/wikipedia.d.ts
vendored
@ -1,5 +0,0 @@
|
||||
/// <reference types="node" />
|
||||
import { URL } from 'node:url';
|
||||
import summary from '../summary.js';
|
||||
export declare function test(url: URL): boolean;
|
||||
export declare function summarize(url: URL): Promise<summary>;
|
@ -1,38 +0,0 @@
|
||||
import { get } from '../utils/got.js';
|
||||
import debug from 'debug';
|
||||
import clip from './../utils/clip.js';
|
||||
const log = debug('summaly:plugins:wikipedia');
|
||||
export function test(url) {
|
||||
if (!url.hostname)
|
||||
return false;
|
||||
return /\.wikipedia\.org$/.test(url.hostname);
|
||||
}
|
||||
export async function summarize(url) {
|
||||
const lang = url.host ? url.host.split('.')[0] : null;
|
||||
const title = url.pathname ? url.pathname.split('/')[2] : null;
|
||||
const endpoint = `https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`;
|
||||
log(`lang is ${lang}`);
|
||||
log(`title is ${title}`);
|
||||
log(`endpoint is ${endpoint}`);
|
||||
let body = await get(endpoint);
|
||||
body = JSON.parse(body);
|
||||
log(body);
|
||||
if (!('query' in body) || !('pages' in body.query)) {
|
||||
throw 'fetch failed';
|
||||
}
|
||||
const info = body.query.pages[Object.keys(body.query.pages)[0]];
|
||||
return {
|
||||
title: info.title,
|
||||
icon: 'https://wikipedia.org/static/favicon/wikipedia.ico',
|
||||
description: clip(info.extract, 300),
|
||||
thumbnail: `https://wikipedia.org/static/images/project-logos/${lang}wiki.png`,
|
||||
player: {
|
||||
url: null,
|
||||
width: null,
|
||||
height: null,
|
||||
allow: [],
|
||||
},
|
||||
sitename: 'Wikipedia',
|
||||
activityPub: null,
|
||||
};
|
||||
}
|
1
built/server/index.d.ts
vendored
1
built/server/index.d.ts
vendored
@ -1 +0,0 @@
|
||||
export {};
|
@ -1,22 +0,0 @@
|
||||
import * as http from 'http';
|
||||
import * as Koa from 'koa';
|
||||
import summaly from '../';
|
||||
const app = new Koa();
|
||||
app.use(async (ctx) => {
|
||||
if (!ctx.query.url) {
|
||||
ctx.status = 400;
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const summary = await summaly(ctx.query.url, {
|
||||
lang: ctx.query.lang,
|
||||
followRedirects: false
|
||||
});
|
||||
ctx.body = summary;
|
||||
}
|
||||
catch (e) {
|
||||
ctx.status = 500;
|
||||
}
|
||||
});
|
||||
const server = http.createServer(app.callback());
|
||||
server.listen(process.env.PORT || 80);
|
53
built/summary.d.ts
vendored
53
built/summary.d.ts
vendored
@ -1,53 +0,0 @@
|
||||
declare type Summary = {
|
||||
/**
|
||||
* The description of that web page
|
||||
*/
|
||||
description: string | null;
|
||||
/**
|
||||
* The url of the icon of that web page
|
||||
*/
|
||||
icon: string | null;
|
||||
/**
|
||||
* The name of site of that web page
|
||||
*/
|
||||
sitename: string | null;
|
||||
/**
|
||||
* The url of the thumbnail of that web page
|
||||
*/
|
||||
thumbnail: string | null;
|
||||
/**
|
||||
* The player of that web page
|
||||
*/
|
||||
player: Player;
|
||||
/**
|
||||
* The title of that web page
|
||||
*/
|
||||
title: string | null;
|
||||
/**
|
||||
* Possibly sensitive
|
||||
*/
|
||||
sensitive?: boolean;
|
||||
/**
|
||||
* The url of the ActivityPub representation of that web page
|
||||
*/
|
||||
activityPub: string | null;
|
||||
};
|
||||
export default Summary;
|
||||
export declare type Player = {
|
||||
/**
|
||||
* The url of the player
|
||||
*/
|
||||
url: string | null;
|
||||
/**
|
||||
* The width of the player
|
||||
*/
|
||||
width: number | null;
|
||||
/**
|
||||
* The height of the player
|
||||
*/
|
||||
height: number | null;
|
||||
/**
|
||||
* The allowed permissions of the iframe
|
||||
*/
|
||||
allow: string[];
|
||||
};
|
@ -1 +0,0 @@
|
||||
export {};
|
1
built/utils/cleanup-title.d.ts
vendored
1
built/utils/cleanup-title.d.ts
vendored
@ -1 +0,0 @@
|
||||
export default function (title: string, siteName?: string | null): string;
|
@ -1,19 +0,0 @@
|
||||
import escapeRegExp from 'escape-regexp';
|
||||
export default function (title, siteName) {
|
||||
title = title.trim();
|
||||
if (siteName) {
|
||||
siteName = siteName.trim();
|
||||
const x = escapeRegExp(siteName);
|
||||
const patterns = [
|
||||
`^(.+?)\\s?[\\-\\|:・]\\s?${x}$`
|
||||
];
|
||||
for (let i = 0; i < patterns.length; i++) {
|
||||
const pattern = new RegExp(patterns[i]);
|
||||
const [, match] = pattern.exec(title) || [null, null];
|
||||
if (match) {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
}
|
||||
return title;
|
||||
}
|
1
built/utils/clip.d.ts
vendored
1
built/utils/clip.d.ts
vendored
@ -1 +0,0 @@
|
||||
export default function (s: string, max: number): string;
|
@ -1,13 +0,0 @@
|
||||
import nullOrEmpty from './null-or-empty.js';
|
||||
export default function (s, max) {
|
||||
if (nullOrEmpty(s)) {
|
||||
return s;
|
||||
}
|
||||
s = s.trim();
|
||||
if (s.length > max) {
|
||||
return s.substr(0, max) + '...';
|
||||
}
|
||||
else {
|
||||
return s;
|
||||
}
|
||||
}
|
8
built/utils/encoding.d.ts
vendored
8
built/utils/encoding.d.ts
vendored
@ -1,8 +0,0 @@
|
||||
/// <reference types="node" />
|
||||
/**
|
||||
* Detect HTML encoding
|
||||
* @param body Body in Buffer
|
||||
* @returns encoding
|
||||
*/
|
||||
export declare function detectEncoding(body: Buffer): string;
|
||||
export declare function toUtf8(body: Buffer, encoding: string): string;
|
@ -1,40 +0,0 @@
|
||||
import iconv from 'iconv-lite';
|
||||
import jschardet from 'jschardet';
|
||||
const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i');
|
||||
/**
|
||||
* Detect HTML encoding
|
||||
* @param body Body in Buffer
|
||||
* @returns encoding
|
||||
*/
|
||||
export function detectEncoding(body) {
|
||||
// By detection
|
||||
const detected = jschardet.detect(body, { minimumThreshold: 0.99 });
|
||||
if (detected) {
|
||||
const candicate = detected.encoding;
|
||||
const encoding = toEncoding(candicate);
|
||||
if (encoding != null)
|
||||
return encoding;
|
||||
}
|
||||
// From meta
|
||||
const matchMeta = body.toString('ascii').match(regCharset);
|
||||
if (matchMeta) {
|
||||
const candicate = matchMeta[1];
|
||||
const encoding = toEncoding(candicate);
|
||||
if (encoding != null)
|
||||
return encoding;
|
||||
}
|
||||
return 'utf-8';
|
||||
}
|
||||
export function toUtf8(body, encoding) {
|
||||
return iconv.decode(body, encoding);
|
||||
}
|
||||
function toEncoding(candicate) {
|
||||
if (iconv.encodingExists(candicate)) {
|
||||
if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase()))
|
||||
return 'cp932';
|
||||
return candicate;
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
}
|
20
built/utils/got.d.ts
vendored
20
built/utils/got.d.ts
vendored
@ -1,20 +0,0 @@
|
||||
import * as Got from 'got';
|
||||
import * as cheerio from 'cheerio';
|
||||
export declare let agent: Got.Agents;
|
||||
export declare function setAgent(_agent: Got.Agents): void;
|
||||
export declare type GotOptions = {
|
||||
url: string;
|
||||
method: 'GET' | 'POST' | 'HEAD';
|
||||
body?: string;
|
||||
headers: Record<string, string | undefined>;
|
||||
typeFilter?: RegExp;
|
||||
};
|
||||
export declare function scpaping(url: string, opts?: {
|
||||
lang?: string;
|
||||
}): Promise<{
|
||||
body: string;
|
||||
$: cheerio.CheerioAPI;
|
||||
response: Got.Response<string>;
|
||||
}>;
|
||||
export declare function get(url: string): Promise<string>;
|
||||
export declare function head(url: string): Promise<Got.Response<string>>;
|
@ -1,124 +0,0 @@
|
||||
import got, * as Got from 'got';
|
||||
import { StatusError } from './status-error.js';
|
||||
import { detectEncoding, toUtf8 } from './encoding.js';
|
||||
import * as cheerio from 'cheerio';
|
||||
import PrivateIp from 'private-ip';
|
||||
import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { readFileSync } from 'node:fs';
|
||||
const _filename = fileURLToPath(import.meta.url);
|
||||
const _dirname = dirname(_filename);
|
||||
export let agent = {};
|
||||
export function setAgent(_agent) {
|
||||
agent = _agent || {};
|
||||
}
|
||||
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
|
||||
const RESPONSE_TIMEOUT = 20 * 1000;
|
||||
const OPERATION_TIMEOUT = 60 * 1000;
|
||||
const MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
||||
const BOT_UA = `SummalyBot/${repo.version}`;
|
||||
export async function scpaping(url, opts) {
|
||||
const response = await getResponse({
|
||||
url,
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'accept': 'text/html,application/xhtml+xml',
|
||||
'user-agent': BOT_UA,
|
||||
'accept-language': opts?.lang
|
||||
},
|
||||
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
|
||||
});
|
||||
// SUMMALY_ALLOW_PRIVATE_IPはテスト用
|
||||
const allowPrivateIp = process.env.SUMMALY_ALLOW_PRIVATE_IP === 'true' || Object.keys(agent).length > 0;
|
||||
if (!allowPrivateIp && response.ip && PrivateIp(response.ip)) {
|
||||
throw new StatusError(`Private IP rejected ${response.ip}`, 400, 'Private IP Rejected');
|
||||
}
|
||||
const encoding = detectEncoding(response.rawBody);
|
||||
const body = toUtf8(response.rawBody, encoding);
|
||||
const $ = cheerio.load(body);
|
||||
return {
|
||||
body,
|
||||
$,
|
||||
response,
|
||||
};
|
||||
}
|
||||
export async function get(url) {
|
||||
const res = await getResponse({
|
||||
url,
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'accept': '*/*',
|
||||
},
|
||||
});
|
||||
return await res.body;
|
||||
}
|
||||
export async function head(url) {
|
||||
const res = await getResponse({
|
||||
url,
|
||||
method: 'HEAD',
|
||||
headers: {
|
||||
'accept': '*/*',
|
||||
},
|
||||
});
|
||||
return await res;
|
||||
}
|
||||
async function getResponse(args) {
|
||||
const timeout = RESPONSE_TIMEOUT;
|
||||
const operationTimeout = OPERATION_TIMEOUT;
|
||||
const req = got(args.url, {
|
||||
method: args.method,
|
||||
headers: args.headers,
|
||||
body: args.body,
|
||||
timeout: {
|
||||
lookup: timeout,
|
||||
connect: timeout,
|
||||
secureConnect: timeout,
|
||||
socket: timeout,
|
||||
response: timeout,
|
||||
send: timeout,
|
||||
request: operationTimeout, // whole operation timeout
|
||||
},
|
||||
agent,
|
||||
http2: false,
|
||||
retry: {
|
||||
limit: 0,
|
||||
},
|
||||
});
|
||||
return await receiveResponse({ req, typeFilter: args.typeFilter });
|
||||
}
|
||||
async function receiveResponse(args) {
|
||||
const req = args.req;
|
||||
const maxSize = MAX_RESPONSE_SIZE;
|
||||
req.on('response', (res) => {
|
||||
// Check html
|
||||
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
|
||||
// console.warn(res.headers['content-type']);
|
||||
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
|
||||
return;
|
||||
}
|
||||
// 応答ヘッダでサイズチェック
|
||||
const contentLength = res.headers['content-length'];
|
||||
if (contentLength != null) {
|
||||
const size = Number(contentLength);
|
||||
if (size > maxSize) {
|
||||
req.cancel(`maxSize exceeded (${size} > ${maxSize}) on response`);
|
||||
}
|
||||
}
|
||||
});
|
||||
// 受信中のデータでサイズチェック
|
||||
req.on('downloadProgress', (progress) => {
|
||||
if (progress.transferred > maxSize && progress.percent !== 1) {
|
||||
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
|
||||
}
|
||||
});
|
||||
// 応答取得 with ステータスコードエラーの整形
|
||||
const res = await req.catch(e => {
|
||||
if (e instanceof Got.HTTPError) {
|
||||
throw new StatusError(`${e.response.statusCode} ${e.response.statusMessage}`, e.response.statusCode, e.response.statusMessage);
|
||||
}
|
||||
else {
|
||||
throw e;
|
||||
}
|
||||
});
|
||||
return res;
|
||||
}
|
1
built/utils/null-or-empty.d.ts
vendored
1
built/utils/null-or-empty.d.ts
vendored
@ -1 +0,0 @@
|
||||
export default function (val: string): boolean;
|
@ -1,14 +0,0 @@
|
||||
export default function (val) {
|
||||
if (val === undefined) {
|
||||
return true;
|
||||
}
|
||||
else if (val === null) {
|
||||
return true;
|
||||
}
|
||||
else if (val.trim() === '') {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
7
built/utils/status-error.d.ts
vendored
7
built/utils/status-error.d.ts
vendored
@ -1,7 +0,0 @@
|
||||
export declare class StatusError extends Error {
|
||||
name: string;
|
||||
statusCode: number;
|
||||
statusMessage?: string;
|
||||
isPermanentError: boolean;
|
||||
constructor(message: string, statusCode: number, statusMessage?: string);
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
export class StatusError extends Error {
|
||||
constructor(message, statusCode, statusMessage) {
|
||||
super(message);
|
||||
this.name = 'StatusError';
|
||||
this.statusCode = statusCode;
|
||||
this.statusMessage = statusMessage;
|
||||
this.isPermanentError = typeof this.statusCode === 'number' && this.statusCode >= 400 && this.statusCode < 500;
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user