mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-05-01 19:57:22 +09:00
wip
This commit is contained in:
parent
8329aec479
commit
16d2b7fd1f
@ -1,6 +1,6 @@
|
||||
*.*.* / 2023-2-**
|
||||
------------------
|
||||
* export a Fastify Module
|
||||
*
|
||||
* https/http agents options
|
||||
|
||||
2.7.0 / 2022-07-09
|
||||
|
1843
package-lock.json
generated
1843
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
17
package.json
17
package.json
@ -9,24 +9,34 @@
|
||||
"main": "./built/index.js",
|
||||
"type": "module",
|
||||
"types": "./built/index.d.ts",
|
||||
"files": [
|
||||
"built",
|
||||
"LICENSE"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"test": "mocha --harmony"
|
||||
"test": "mocha --harmony",
|
||||
"start": "fastify start ./built/index.js"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fastify": "3.24.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/cheerio": "0.22.18",
|
||||
"@types/debug": "4.1.7",
|
||||
"@types/escape-regexp": "^0.0.1",
|
||||
"@types/html-entities": "1.3.4",
|
||||
"@types/mocha": "9.0.0",
|
||||
"@types/node": "16.11.12",
|
||||
"@types/require-all": "^3.0.3",
|
||||
"debug": "^4.3.4",
|
||||
"express": "^4.18.2",
|
||||
"fastify": "^4.13.0",
|
||||
"fastify-cli": "^5.7.1",
|
||||
"mocha": "^9.2.2",
|
||||
"typescript": "4.5.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"debug": "4.3.3",
|
||||
"escape-regexp": "0.0.1",
|
||||
"got": "^12.5.3",
|
||||
"html-entities": "2.3.2",
|
||||
@ -34,7 +44,6 @@
|
||||
"jschardet": "3.0.0",
|
||||
"koa": "2.13.4",
|
||||
"private-ip": "2.3.3",
|
||||
"require-all": "3.0.0",
|
||||
"trace-redirect": "1.0.6"
|
||||
}
|
||||
}
|
||||
|
@ -1,11 +1,11 @@
|
||||
import * as URL from 'url';
|
||||
import clip from './utils/clip';
|
||||
import cleanupTitle from './utils/cleanup-title';
|
||||
import clip from './utils/clip.js';
|
||||
import cleanupTitle from './utils/cleanup-title.js';
|
||||
|
||||
import { decode as decodeHtml } from 'html-entities';
|
||||
|
||||
import { head, scpaping } from './utils/got';
|
||||
import Summary from './summary';
|
||||
import { head, scpaping } from './utils/got.js';
|
||||
import Summary from './summary.js';
|
||||
|
||||
export default async (url: URL.Url, lang: string | null = null): Promise<Summary | null> => {
|
||||
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
|
||||
|
62
src/index.ts
62
src/index.ts
@ -4,19 +4,15 @@
|
||||
*/
|
||||
|
||||
import * as URL from 'url';
|
||||
import requireAll from 'require-all';
|
||||
import tracer from 'trace-redirect';
|
||||
import Summary from './summary';
|
||||
import IPlugin from './iplugin';
|
||||
import general from './general';
|
||||
|
||||
// Load builtin plugins
|
||||
const _builtinPlugins = requireAll({
|
||||
dirname: __dirname + '/plugins'
|
||||
}) as { [key: string]: IPlugin };
|
||||
|
||||
const builtinPlugins = Object.keys(_builtinPlugins)
|
||||
.map(key => _builtinPlugins[key]);
|
||||
import Summary from './summary.js';
|
||||
import type { IPlugin as _IPlugin } from './iplugin.js';
|
||||
export type IPlugin = _IPlugin;
|
||||
import general from './general.js';
|
||||
import * as Got from 'got';
|
||||
import { setAgent } from './utils/got.js';
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { plugins as builtinPlugins } from './plugins/index.js';
|
||||
|
||||
type Options = {
|
||||
/**
|
||||
@ -33,6 +29,11 @@ type Options = {
|
||||
* Custom Plugins
|
||||
*/
|
||||
plugins?: IPlugin[];
|
||||
|
||||
/**
|
||||
* Custom HTTP agent
|
||||
*/
|
||||
agent?: Got.Agents;
|
||||
};
|
||||
|
||||
type Result = Summary & {
|
||||
@ -51,7 +52,9 @@ const defaultOptions = {
|
||||
/**
|
||||
* Summarize an web page
|
||||
*/
|
||||
export default async (url: string, options?: Options): Promise<Result> => {
|
||||
export const summaly = async (url: string, options?: Options): Promise<Result> => {
|
||||
if (options?.agent) setAgent(options.agent);
|
||||
|
||||
const opts = Object.assign(defaultOptions, options);
|
||||
|
||||
const plugins = builtinPlugins.concat(opts.plugins || []);
|
||||
@ -64,7 +67,7 @@ export default async (url: string, options?: Options): Promise<Result> => {
|
||||
const match = plugins.filter(plugin => plugin.test(_url))[0];
|
||||
|
||||
// Get summary
|
||||
const summary = await (match ? match.summarize : general)(_url, opts.lang);
|
||||
const summary = await (match ? match.summarize : general)(_url, opts.lang || undefined);
|
||||
|
||||
if (summary == null) {
|
||||
throw 'failed summarize';
|
||||
@ -74,3 +77,34 @@ export default async (url: string, options?: Options): Promise<Result> => {
|
||||
url: actualUrl
|
||||
});
|
||||
};
|
||||
|
||||
export default function (fastify: FastifyInstance, options: {}, done: (err?: Error) => void) {
|
||||
fastify.get<{
|
||||
Querystring: {
|
||||
url?: string;
|
||||
lang?: string;
|
||||
};
|
||||
}>('/', async (req, reply) => {
|
||||
const url = req.query.url as string;
|
||||
if (url == null) {
|
||||
return reply.status(400).send({
|
||||
error: 'url is required'
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
const summary = await summaly(url, {
|
||||
lang: req.query.lang as string,
|
||||
followRedirects: false,
|
||||
});
|
||||
|
||||
return summary;
|
||||
} catch (e) {
|
||||
return reply.status(500).send({
|
||||
error: e
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
done();
|
||||
}
|
@ -1,9 +1,7 @@
|
||||
import * as URL from 'url';
|
||||
import Summary from './summary';
|
||||
import Summary from './summary.js';
|
||||
|
||||
interface IPlugin {
|
||||
export interface IPlugin {
|
||||
test: (url: URL.Url) => boolean;
|
||||
summarize: (url: URL.Url, lang?: string) => Promise<Summary>;
|
||||
}
|
||||
|
||||
export default IPlugin;
|
||||
|
@ -1,6 +1,6 @@
|
||||
import * as URL from 'url';
|
||||
import { scpaping } from '../utils/got';
|
||||
import summary from '../summary';
|
||||
import { scpaping } from '../utils/got.js';
|
||||
import summary from '../summary.js';
|
||||
|
||||
export function test(url: URL.Url): boolean {
|
||||
return url.hostname === 'www.amazon.com' ||
|
||||
@ -29,19 +29,19 @@ export async function summarize(url: URL.Url): Promise<summary> {
|
||||
$('#productDescription').text() ||
|
||||
$('meta[name="description"]').attr('content');
|
||||
|
||||
const thumbnail: string = $('#landingImage').attr('src');
|
||||
const thumbnail: string | undefined = $('#landingImage').attr('src');
|
||||
|
||||
const playerUrl =
|
||||
$('meta[property="twitter:player"]').attr('content') ||
|
||||
$('meta[name="twitter:player"]').attr('content');
|
||||
|
||||
const playerWidth = parseInt(
|
||||
const playerWidth =
|
||||
$('meta[property="twitter:player:width"]').attr('content') ||
|
||||
$('meta[name="twitter:player:width"]').attr('content'));
|
||||
$('meta[name="twitter:player:width"]').attr('content');
|
||||
|
||||
const playerHeight = parseInt(
|
||||
const playerHeight =
|
||||
$('meta[property="twitter:player:height"]').attr('content') ||
|
||||
$('meta[name="twitter:player:height"]').attr('content'));
|
||||
$('meta[name="twitter:player:height"]').attr('content');
|
||||
|
||||
return {
|
||||
title: title ? title.trim() : null,
|
||||
@ -50,8 +50,8 @@ export async function summarize(url: URL.Url): Promise<summary> {
|
||||
thumbnail: thumbnail ? thumbnail.trim() : null,
|
||||
player: {
|
||||
url: playerUrl || null,
|
||||
width: playerWidth || null,
|
||||
height: playerHeight || null
|
||||
width: playerWidth ? parseInt(playerWidth) : null,
|
||||
height: playerHeight ? parseInt(playerHeight) : null
|
||||
},
|
||||
sitename: 'Amazon'
|
||||
};
|
||||
|
8
src/plugins/index.ts
Normal file
8
src/plugins/index.ts
Normal file
@ -0,0 +1,8 @@
|
||||
import { IPlugin } from '@/iplugin.js';
|
||||
import * as amazon from './amazon.js';
|
||||
import * as wikipedia from './wikipedia.js';
|
||||
|
||||
export const plugins: IPlugin[] = [
|
||||
amazon,
|
||||
wikipedia,
|
||||
];
|
@ -1,18 +1,19 @@
|
||||
import * as URL from 'url';
|
||||
import { get } from '../utils/got';
|
||||
import * as debug from 'debug';
|
||||
import summary from '../summary';
|
||||
import clip from './../utils/clip';
|
||||
import { get } from '../utils/got.js';
|
||||
import debug from 'debug';
|
||||
import summary from '../summary.js';
|
||||
import clip from './../utils/clip.js';
|
||||
|
||||
const log = debug('summaly:plugins:wikipedia');
|
||||
|
||||
export function test(url: URL.Url): boolean {
|
||||
if (!url.hostname) return false;
|
||||
return /\.wikipedia\.org$/.test(url.hostname);
|
||||
}
|
||||
|
||||
export async function summarize(url: URL.Url): Promise<summary> {
|
||||
const lang = url.host.split('.')[0];
|
||||
const title = url.pathname.split('/')[2];
|
||||
const lang = url.host ? url.host.split('.')[0] : null;
|
||||
const title = url.pathname ? url.pathname.split('/')[2] : null;
|
||||
const endpoint = `https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`;
|
||||
|
||||
log(`lang is ${lang}`);
|
||||
|
@ -1,27 +0,0 @@
|
||||
import * as http from 'http';
|
||||
import * as Koa from 'koa';
|
||||
import summaly from '../';
|
||||
|
||||
const app = new Koa();
|
||||
|
||||
app.use(async ctx => {
|
||||
if (!ctx.query.url) {
|
||||
ctx.status = 400;
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const summary = await summaly(ctx.query.url, {
|
||||
lang: ctx.query.lang,
|
||||
followRedirects: false
|
||||
});
|
||||
|
||||
ctx.body = summary;
|
||||
} catch (e) {
|
||||
ctx.status = 500;
|
||||
}
|
||||
});
|
||||
|
||||
const server = http.createServer(app.callback());
|
||||
|
||||
server.listen(process.env.PORT || 80);
|
@ -1,4 +1,4 @@
|
||||
const escapeRegExp = require('escape-regexp');
|
||||
import escapeRegExp from 'escape-regexp';
|
||||
|
||||
export default function(title: string, siteName?: string | null): string {
|
||||
title = title.trim();
|
||||
|
@ -1,4 +1,4 @@
|
||||
import nullOrEmpty from './null-or-empty';
|
||||
import nullOrEmpty from './null-or-empty.js';
|
||||
|
||||
export default function(s: string, max: number): string {
|
||||
if (nullOrEmpty(s)) {
|
||||
|
@ -1,14 +1,34 @@
|
||||
import { version } from '../../package.json';
|
||||
import got, * as Got from 'got';
|
||||
import { StatusError } from './status-error';
|
||||
import { detectEncoding, toUtf8 } from './encoding';
|
||||
import { StatusError } from './status-error.js';
|
||||
import { detectEncoding, toUtf8 } from './encoding.js';
|
||||
import * as cheerio from 'cheerio';
|
||||
const PrivateIp = require('private-ip');
|
||||
import PrivateIp from 'private-ip';
|
||||
import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { readFileSync } from 'node:fs';
|
||||
|
||||
const _filename = fileURLToPath(import.meta.url);
|
||||
const _dirname = dirname(_filename);
|
||||
|
||||
export let agent: Got.Agents | undefined = undefined;
|
||||
export function setAgent(_agent: Got.Agents) {
|
||||
agent = _agent;
|
||||
}
|
||||
|
||||
export type GotOptions = {
|
||||
url: string;
|
||||
method: 'GET' | 'POST' | 'HEAD';
|
||||
body?: string;
|
||||
headers: Record<string, string | undefined>;
|
||||
typeFilter?: RegExp;
|
||||
}
|
||||
|
||||
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
|
||||
|
||||
const RESPONSE_TIMEOUT = 20 * 1000;
|
||||
const OPERATION_TIMEOUT = 60 * 1000;
|
||||
const MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
||||
const BOT_UA = `SummalyBot/${version}`;
|
||||
const BOT_UA = `SummalyBot/${repo.version}`;
|
||||
|
||||
export async function scpaping(url: string, opts?: { lang?: string; }) {
|
||||
const response = await getResponse({
|
||||
@ -46,13 +66,13 @@ export async function get(url: string) {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'accept': '*/*',
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
return await res.body;
|
||||
}
|
||||
|
||||
export async function head(url: string, options?: ) {
|
||||
export async function head(url: string) {
|
||||
const res = await getResponse({
|
||||
url,
|
||||
method: 'HEAD',
|
||||
@ -64,7 +84,7 @@ export async function head(url: string, options?: ) {
|
||||
return await res;
|
||||
}
|
||||
|
||||
async function getResponse(args: { url: string, method: 'GET' | 'POST' | 'HEAD', body?: string, headers: Record<string, string>, typeFilter?: RegExp }) {
|
||||
async function getResponse(args: GotOptions) {
|
||||
const timeout = RESPONSE_TIMEOUT;
|
||||
const operationTimeout = OPERATION_TIMEOUT;
|
||||
|
||||
@ -81,8 +101,11 @@ async function getResponse(args: { url: string, method: 'GET' | 'POST' | 'HEAD',
|
||||
send: timeout,
|
||||
request: operationTimeout, // whole operation timeout
|
||||
},
|
||||
agent,
|
||||
http2: false,
|
||||
retry: 0,
|
||||
retry: {
|
||||
limit: 0,
|
||||
},
|
||||
});
|
||||
|
||||
return await receiveResponce({ req, typeFilter: args.typeFilter });
|
||||
|
@ -6,9 +6,9 @@
|
||||
|
||||
/* dependencies below */
|
||||
|
||||
const assert = require('assert');
|
||||
const express = require('express');
|
||||
const summaly = require('../').default;
|
||||
import * as assert from 'assert';
|
||||
import express from 'express';
|
||||
import { summaly } from '../built/index.js';
|
||||
|
||||
/* settings below */
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user