mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-05-02 12:17:19 +09:00
wip
This commit is contained in:
parent
8329aec479
commit
16d2b7fd1f
@ -1,6 +1,6 @@
|
|||||||
*.*.* / 2023-2-**
|
*.*.* / 2023-2-**
|
||||||
------------------
|
------------------
|
||||||
* export a Fastify Module
|
*
|
||||||
* https/http agents options
|
* https/http agents options
|
||||||
|
|
||||||
2.7.0 / 2022-07-09
|
2.7.0 / 2022-07-09
|
||||||
|
1843
package-lock.json
generated
1843
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
17
package.json
17
package.json
@ -9,24 +9,34 @@
|
|||||||
"main": "./built/index.js",
|
"main": "./built/index.js",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"types": "./built/index.d.ts",
|
"types": "./built/index.d.ts",
|
||||||
|
"files": [
|
||||||
|
"built",
|
||||||
|
"LICENSE"
|
||||||
|
],
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"build": "tsc",
|
"build": "tsc",
|
||||||
"test": "mocha --harmony"
|
"test": "mocha --harmony",
|
||||||
|
"start": "fastify start ./built/index.js"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"fastify": "3.24.1"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/cheerio": "0.22.18",
|
"@types/cheerio": "0.22.18",
|
||||||
"@types/debug": "4.1.7",
|
"@types/debug": "4.1.7",
|
||||||
|
"@types/escape-regexp": "^0.0.1",
|
||||||
"@types/html-entities": "1.3.4",
|
"@types/html-entities": "1.3.4",
|
||||||
"@types/mocha": "9.0.0",
|
"@types/mocha": "9.0.0",
|
||||||
"@types/node": "16.11.12",
|
"@types/node": "16.11.12",
|
||||||
"@types/require-all": "^3.0.3",
|
"debug": "^4.3.4",
|
||||||
"express": "^4.18.2",
|
"express": "^4.18.2",
|
||||||
|
"fastify": "^4.13.0",
|
||||||
|
"fastify-cli": "^5.7.1",
|
||||||
"mocha": "^9.2.2",
|
"mocha": "^9.2.2",
|
||||||
"typescript": "4.5.3"
|
"typescript": "4.5.3"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"cheerio": "^1.0.0-rc.12",
|
"cheerio": "^1.0.0-rc.12",
|
||||||
"debug": "4.3.3",
|
|
||||||
"escape-regexp": "0.0.1",
|
"escape-regexp": "0.0.1",
|
||||||
"got": "^12.5.3",
|
"got": "^12.5.3",
|
||||||
"html-entities": "2.3.2",
|
"html-entities": "2.3.2",
|
||||||
@ -34,7 +44,6 @@
|
|||||||
"jschardet": "3.0.0",
|
"jschardet": "3.0.0",
|
||||||
"koa": "2.13.4",
|
"koa": "2.13.4",
|
||||||
"private-ip": "2.3.3",
|
"private-ip": "2.3.3",
|
||||||
"require-all": "3.0.0",
|
|
||||||
"trace-redirect": "1.0.6"
|
"trace-redirect": "1.0.6"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
import * as URL from 'url';
|
import * as URL from 'url';
|
||||||
import clip from './utils/clip';
|
import clip from './utils/clip.js';
|
||||||
import cleanupTitle from './utils/cleanup-title';
|
import cleanupTitle from './utils/cleanup-title.js';
|
||||||
|
|
||||||
import { decode as decodeHtml } from 'html-entities';
|
import { decode as decodeHtml } from 'html-entities';
|
||||||
|
|
||||||
import { head, scpaping } from './utils/got';
|
import { head, scpaping } from './utils/got.js';
|
||||||
import Summary from './summary';
|
import Summary from './summary.js';
|
||||||
|
|
||||||
export default async (url: URL.Url, lang: string | null = null): Promise<Summary | null> => {
|
export default async (url: URL.Url, lang: string | null = null): Promise<Summary | null> => {
|
||||||
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
|
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
|
||||||
|
62
src/index.ts
62
src/index.ts
@ -4,19 +4,15 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import * as URL from 'url';
|
import * as URL from 'url';
|
||||||
import requireAll from 'require-all';
|
|
||||||
import tracer from 'trace-redirect';
|
import tracer from 'trace-redirect';
|
||||||
import Summary from './summary';
|
import Summary from './summary.js';
|
||||||
import IPlugin from './iplugin';
|
import type { IPlugin as _IPlugin } from './iplugin.js';
|
||||||
import general from './general';
|
export type IPlugin = _IPlugin;
|
||||||
|
import general from './general.js';
|
||||||
// Load builtin plugins
|
import * as Got from 'got';
|
||||||
const _builtinPlugins = requireAll({
|
import { setAgent } from './utils/got.js';
|
||||||
dirname: __dirname + '/plugins'
|
import type { FastifyInstance } from 'fastify';
|
||||||
}) as { [key: string]: IPlugin };
|
import { plugins as builtinPlugins } from './plugins/index.js';
|
||||||
|
|
||||||
const builtinPlugins = Object.keys(_builtinPlugins)
|
|
||||||
.map(key => _builtinPlugins[key]);
|
|
||||||
|
|
||||||
type Options = {
|
type Options = {
|
||||||
/**
|
/**
|
||||||
@ -33,6 +29,11 @@ type Options = {
|
|||||||
* Custom Plugins
|
* Custom Plugins
|
||||||
*/
|
*/
|
||||||
plugins?: IPlugin[];
|
plugins?: IPlugin[];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Custom HTTP agent
|
||||||
|
*/
|
||||||
|
agent?: Got.Agents;
|
||||||
};
|
};
|
||||||
|
|
||||||
type Result = Summary & {
|
type Result = Summary & {
|
||||||
@ -51,7 +52,9 @@ const defaultOptions = {
|
|||||||
/**
|
/**
|
||||||
* Summarize an web page
|
* Summarize an web page
|
||||||
*/
|
*/
|
||||||
export default async (url: string, options?: Options): Promise<Result> => {
|
export const summaly = async (url: string, options?: Options): Promise<Result> => {
|
||||||
|
if (options?.agent) setAgent(options.agent);
|
||||||
|
|
||||||
const opts = Object.assign(defaultOptions, options);
|
const opts = Object.assign(defaultOptions, options);
|
||||||
|
|
||||||
const plugins = builtinPlugins.concat(opts.plugins || []);
|
const plugins = builtinPlugins.concat(opts.plugins || []);
|
||||||
@ -64,7 +67,7 @@ export default async (url: string, options?: Options): Promise<Result> => {
|
|||||||
const match = plugins.filter(plugin => plugin.test(_url))[0];
|
const match = plugins.filter(plugin => plugin.test(_url))[0];
|
||||||
|
|
||||||
// Get summary
|
// Get summary
|
||||||
const summary = await (match ? match.summarize : general)(_url, opts.lang);
|
const summary = await (match ? match.summarize : general)(_url, opts.lang || undefined);
|
||||||
|
|
||||||
if (summary == null) {
|
if (summary == null) {
|
||||||
throw 'failed summarize';
|
throw 'failed summarize';
|
||||||
@ -74,3 +77,34 @@ export default async (url: string, options?: Options): Promise<Result> => {
|
|||||||
url: actualUrl
|
url: actualUrl
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export default function (fastify: FastifyInstance, options: {}, done: (err?: Error) => void) {
|
||||||
|
fastify.get<{
|
||||||
|
Querystring: {
|
||||||
|
url?: string;
|
||||||
|
lang?: string;
|
||||||
|
};
|
||||||
|
}>('/', async (req, reply) => {
|
||||||
|
const url = req.query.url as string;
|
||||||
|
if (url == null) {
|
||||||
|
return reply.status(400).send({
|
||||||
|
error: 'url is required'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const summary = await summaly(url, {
|
||||||
|
lang: req.query.lang as string,
|
||||||
|
followRedirects: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
return summary;
|
||||||
|
} catch (e) {
|
||||||
|
return reply.status(500).send({
|
||||||
|
error: e
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
done();
|
||||||
|
}
|
@ -1,9 +1,7 @@
|
|||||||
import * as URL from 'url';
|
import * as URL from 'url';
|
||||||
import Summary from './summary';
|
import Summary from './summary.js';
|
||||||
|
|
||||||
interface IPlugin {
|
export interface IPlugin {
|
||||||
test: (url: URL.Url) => boolean;
|
test: (url: URL.Url) => boolean;
|
||||||
summarize: (url: URL.Url, lang?: string) => Promise<Summary>;
|
summarize: (url: URL.Url, lang?: string) => Promise<Summary>;
|
||||||
}
|
}
|
||||||
|
|
||||||
export default IPlugin;
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import * as URL from 'url';
|
import * as URL from 'url';
|
||||||
import { scpaping } from '../utils/got';
|
import { scpaping } from '../utils/got.js';
|
||||||
import summary from '../summary';
|
import summary from '../summary.js';
|
||||||
|
|
||||||
export function test(url: URL.Url): boolean {
|
export function test(url: URL.Url): boolean {
|
||||||
return url.hostname === 'www.amazon.com' ||
|
return url.hostname === 'www.amazon.com' ||
|
||||||
@ -29,19 +29,19 @@ export async function summarize(url: URL.Url): Promise<summary> {
|
|||||||
$('#productDescription').text() ||
|
$('#productDescription').text() ||
|
||||||
$('meta[name="description"]').attr('content');
|
$('meta[name="description"]').attr('content');
|
||||||
|
|
||||||
const thumbnail: string = $('#landingImage').attr('src');
|
const thumbnail: string | undefined = $('#landingImage').attr('src');
|
||||||
|
|
||||||
const playerUrl =
|
const playerUrl =
|
||||||
$('meta[property="twitter:player"]').attr('content') ||
|
$('meta[property="twitter:player"]').attr('content') ||
|
||||||
$('meta[name="twitter:player"]').attr('content');
|
$('meta[name="twitter:player"]').attr('content');
|
||||||
|
|
||||||
const playerWidth = parseInt(
|
const playerWidth =
|
||||||
$('meta[property="twitter:player:width"]').attr('content') ||
|
$('meta[property="twitter:player:width"]').attr('content') ||
|
||||||
$('meta[name="twitter:player:width"]').attr('content'));
|
$('meta[name="twitter:player:width"]').attr('content');
|
||||||
|
|
||||||
const playerHeight = parseInt(
|
const playerHeight =
|
||||||
$('meta[property="twitter:player:height"]').attr('content') ||
|
$('meta[property="twitter:player:height"]').attr('content') ||
|
||||||
$('meta[name="twitter:player:height"]').attr('content'));
|
$('meta[name="twitter:player:height"]').attr('content');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
title: title ? title.trim() : null,
|
title: title ? title.trim() : null,
|
||||||
@ -50,8 +50,8 @@ export async function summarize(url: URL.Url): Promise<summary> {
|
|||||||
thumbnail: thumbnail ? thumbnail.trim() : null,
|
thumbnail: thumbnail ? thumbnail.trim() : null,
|
||||||
player: {
|
player: {
|
||||||
url: playerUrl || null,
|
url: playerUrl || null,
|
||||||
width: playerWidth || null,
|
width: playerWidth ? parseInt(playerWidth) : null,
|
||||||
height: playerHeight || null
|
height: playerHeight ? parseInt(playerHeight) : null
|
||||||
},
|
},
|
||||||
sitename: 'Amazon'
|
sitename: 'Amazon'
|
||||||
};
|
};
|
||||||
|
8
src/plugins/index.ts
Normal file
8
src/plugins/index.ts
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
import { IPlugin } from '@/iplugin.js';
|
||||||
|
import * as amazon from './amazon.js';
|
||||||
|
import * as wikipedia from './wikipedia.js';
|
||||||
|
|
||||||
|
export const plugins: IPlugin[] = [
|
||||||
|
amazon,
|
||||||
|
wikipedia,
|
||||||
|
];
|
@ -1,18 +1,19 @@
|
|||||||
import * as URL from 'url';
|
import * as URL from 'url';
|
||||||
import { get } from '../utils/got';
|
import { get } from '../utils/got.js';
|
||||||
import * as debug from 'debug';
|
import debug from 'debug';
|
||||||
import summary from '../summary';
|
import summary from '../summary.js';
|
||||||
import clip from './../utils/clip';
|
import clip from './../utils/clip.js';
|
||||||
|
|
||||||
const log = debug('summaly:plugins:wikipedia');
|
const log = debug('summaly:plugins:wikipedia');
|
||||||
|
|
||||||
export function test(url: URL.Url): boolean {
|
export function test(url: URL.Url): boolean {
|
||||||
|
if (!url.hostname) return false;
|
||||||
return /\.wikipedia\.org$/.test(url.hostname);
|
return /\.wikipedia\.org$/.test(url.hostname);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function summarize(url: URL.Url): Promise<summary> {
|
export async function summarize(url: URL.Url): Promise<summary> {
|
||||||
const lang = url.host.split('.')[0];
|
const lang = url.host ? url.host.split('.')[0] : null;
|
||||||
const title = url.pathname.split('/')[2];
|
const title = url.pathname ? url.pathname.split('/')[2] : null;
|
||||||
const endpoint = `https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`;
|
const endpoint = `https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`;
|
||||||
|
|
||||||
log(`lang is ${lang}`);
|
log(`lang is ${lang}`);
|
||||||
|
@ -1,27 +0,0 @@
|
|||||||
import * as http from 'http';
|
|
||||||
import * as Koa from 'koa';
|
|
||||||
import summaly from '../';
|
|
||||||
|
|
||||||
const app = new Koa();
|
|
||||||
|
|
||||||
app.use(async ctx => {
|
|
||||||
if (!ctx.query.url) {
|
|
||||||
ctx.status = 400;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const summary = await summaly(ctx.query.url, {
|
|
||||||
lang: ctx.query.lang,
|
|
||||||
followRedirects: false
|
|
||||||
});
|
|
||||||
|
|
||||||
ctx.body = summary;
|
|
||||||
} catch (e) {
|
|
||||||
ctx.status = 500;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
const server = http.createServer(app.callback());
|
|
||||||
|
|
||||||
server.listen(process.env.PORT || 80);
|
|
@ -1,4 +1,4 @@
|
|||||||
const escapeRegExp = require('escape-regexp');
|
import escapeRegExp from 'escape-regexp';
|
||||||
|
|
||||||
export default function(title: string, siteName?: string | null): string {
|
export default function(title: string, siteName?: string | null): string {
|
||||||
title = title.trim();
|
title = title.trim();
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import nullOrEmpty from './null-or-empty';
|
import nullOrEmpty from './null-or-empty.js';
|
||||||
|
|
||||||
export default function(s: string, max: number): string {
|
export default function(s: string, max: number): string {
|
||||||
if (nullOrEmpty(s)) {
|
if (nullOrEmpty(s)) {
|
||||||
|
@ -1,14 +1,34 @@
|
|||||||
import { version } from '../../package.json';
|
|
||||||
import got, * as Got from 'got';
|
import got, * as Got from 'got';
|
||||||
import { StatusError } from './status-error';
|
import { StatusError } from './status-error.js';
|
||||||
import { detectEncoding, toUtf8 } from './encoding';
|
import { detectEncoding, toUtf8 } from './encoding.js';
|
||||||
import * as cheerio from 'cheerio';
|
import * as cheerio from 'cheerio';
|
||||||
const PrivateIp = require('private-ip');
|
import PrivateIp from 'private-ip';
|
||||||
|
import { dirname } from 'node:path';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
import { readFileSync } from 'node:fs';
|
||||||
|
|
||||||
|
const _filename = fileURLToPath(import.meta.url);
|
||||||
|
const _dirname = dirname(_filename);
|
||||||
|
|
||||||
|
export let agent: Got.Agents | undefined = undefined;
|
||||||
|
export function setAgent(_agent: Got.Agents) {
|
||||||
|
agent = _agent;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type GotOptions = {
|
||||||
|
url: string;
|
||||||
|
method: 'GET' | 'POST' | 'HEAD';
|
||||||
|
body?: string;
|
||||||
|
headers: Record<string, string | undefined>;
|
||||||
|
typeFilter?: RegExp;
|
||||||
|
}
|
||||||
|
|
||||||
|
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
|
||||||
|
|
||||||
const RESPONSE_TIMEOUT = 20 * 1000;
|
const RESPONSE_TIMEOUT = 20 * 1000;
|
||||||
const OPERATION_TIMEOUT = 60 * 1000;
|
const OPERATION_TIMEOUT = 60 * 1000;
|
||||||
const MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
const MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
||||||
const BOT_UA = `SummalyBot/${version}`;
|
const BOT_UA = `SummalyBot/${repo.version}`;
|
||||||
|
|
||||||
export async function scpaping(url: string, opts?: { lang?: string; }) {
|
export async function scpaping(url: string, opts?: { lang?: string; }) {
|
||||||
const response = await getResponse({
|
const response = await getResponse({
|
||||||
@ -46,13 +66,13 @@ export async function get(url: string) {
|
|||||||
method: 'GET',
|
method: 'GET',
|
||||||
headers: {
|
headers: {
|
||||||
'accept': '*/*',
|
'accept': '*/*',
|
||||||
}
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
return await res.body;
|
return await res.body;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function head(url: string, options?: ) {
|
export async function head(url: string) {
|
||||||
const res = await getResponse({
|
const res = await getResponse({
|
||||||
url,
|
url,
|
||||||
method: 'HEAD',
|
method: 'HEAD',
|
||||||
@ -64,7 +84,7 @@ export async function head(url: string, options?: ) {
|
|||||||
return await res;
|
return await res;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getResponse(args: { url: string, method: 'GET' | 'POST' | 'HEAD', body?: string, headers: Record<string, string>, typeFilter?: RegExp }) {
|
async function getResponse(args: GotOptions) {
|
||||||
const timeout = RESPONSE_TIMEOUT;
|
const timeout = RESPONSE_TIMEOUT;
|
||||||
const operationTimeout = OPERATION_TIMEOUT;
|
const operationTimeout = OPERATION_TIMEOUT;
|
||||||
|
|
||||||
@ -81,8 +101,11 @@ async function getResponse(args: { url: string, method: 'GET' | 'POST' | 'HEAD',
|
|||||||
send: timeout,
|
send: timeout,
|
||||||
request: operationTimeout, // whole operation timeout
|
request: operationTimeout, // whole operation timeout
|
||||||
},
|
},
|
||||||
|
agent,
|
||||||
http2: false,
|
http2: false,
|
||||||
retry: 0,
|
retry: {
|
||||||
|
limit: 0,
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
return await receiveResponce({ req, typeFilter: args.typeFilter });
|
return await receiveResponce({ req, typeFilter: args.typeFilter });
|
||||||
|
@ -6,9 +6,9 @@
|
|||||||
|
|
||||||
/* dependencies below */
|
/* dependencies below */
|
||||||
|
|
||||||
const assert = require('assert');
|
import * as assert from 'assert';
|
||||||
const express = require('express');
|
import express from 'express';
|
||||||
const summaly = require('../').default;
|
import { summaly } from '../built/index.js';
|
||||||
|
|
||||||
/* settings below */
|
/* settings below */
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user