This commit is contained in:
tamaina 2023-02-12 09:53:57 +00:00
parent 8329aec479
commit 16d2b7fd1f
15 changed files with 1940 additions and 115 deletions

View File

@ -1,6 +1,6 @@
*.*.* / 2023-2-**
------------------
* export a Fastify Module
*
* https/http agents options
2.7.0 / 2022-07-09

1843
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -9,24 +9,34 @@
"main": "./built/index.js",
"type": "module",
"types": "./built/index.d.ts",
"files": [
"built",
"LICENSE"
],
"scripts": {
"build": "tsc",
"test": "mocha --harmony"
"test": "mocha --harmony",
"start": "fastify start ./built/index.js"
},
"optionalDependencies": {
"fastify": "3.24.1"
},
"devDependencies": {
"@types/cheerio": "0.22.18",
"@types/debug": "4.1.7",
"@types/escape-regexp": "^0.0.1",
"@types/html-entities": "1.3.4",
"@types/mocha": "9.0.0",
"@types/node": "16.11.12",
"@types/require-all": "^3.0.3",
"debug": "^4.3.4",
"express": "^4.18.2",
"fastify": "^4.13.0",
"fastify-cli": "^5.7.1",
"mocha": "^9.2.2",
"typescript": "4.5.3"
},
"dependencies": {
"cheerio": "^1.0.0-rc.12",
"debug": "4.3.3",
"escape-regexp": "0.0.1",
"got": "^12.5.3",
"html-entities": "2.3.2",
@ -34,7 +44,6 @@
"jschardet": "3.0.0",
"koa": "2.13.4",
"private-ip": "2.3.3",
"require-all": "3.0.0",
"trace-redirect": "1.0.6"
}
}

View File

@ -1,11 +1,11 @@
import * as URL from 'url';
import clip from './utils/clip';
import cleanupTitle from './utils/cleanup-title';
import clip from './utils/clip.js';
import cleanupTitle from './utils/cleanup-title.js';
import { decode as decodeHtml } from 'html-entities';
import { head, scpaping } from './utils/got';
import Summary from './summary';
import { head, scpaping } from './utils/got.js';
import Summary from './summary.js';
export default async (url: URL.Url, lang: string | null = null): Promise<Summary | null> => {
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;

View File

@ -4,19 +4,15 @@
*/
import * as URL from 'url';
import requireAll from 'require-all';
import tracer from 'trace-redirect';
import Summary from './summary';
import IPlugin from './iplugin';
import general from './general';
// Load builtin plugins
const _builtinPlugins = requireAll({
dirname: __dirname + '/plugins'
}) as { [key: string]: IPlugin };
const builtinPlugins = Object.keys(_builtinPlugins)
.map(key => _builtinPlugins[key]);
import Summary from './summary.js';
import type { IPlugin as _IPlugin } from './iplugin.js';
export type IPlugin = _IPlugin;
import general from './general.js';
import * as Got from 'got';
import { setAgent } from './utils/got.js';
import type { FastifyInstance } from 'fastify';
import { plugins as builtinPlugins } from './plugins/index.js';
type Options = {
/**
@ -33,6 +29,11 @@ type Options = {
* Custom Plugins
*/
plugins?: IPlugin[];
/**
* Custom HTTP agent
*/
agent?: Got.Agents;
};
type Result = Summary & {
@ -51,7 +52,9 @@ const defaultOptions = {
/**
* Summarize an web page
*/
export default async (url: string, options?: Options): Promise<Result> => {
export const summaly = async (url: string, options?: Options): Promise<Result> => {
if (options?.agent) setAgent(options.agent);
const opts = Object.assign(defaultOptions, options);
const plugins = builtinPlugins.concat(opts.plugins || []);
@ -64,7 +67,7 @@ export default async (url: string, options?: Options): Promise<Result> => {
const match = plugins.filter(plugin => plugin.test(_url))[0];
// Get summary
const summary = await (match ? match.summarize : general)(_url, opts.lang);
const summary = await (match ? match.summarize : general)(_url, opts.lang || undefined);
if (summary == null) {
throw 'failed summarize';
@ -74,3 +77,34 @@ export default async (url: string, options?: Options): Promise<Result> => {
url: actualUrl
});
};
export default function (fastify: FastifyInstance, options: {}, done: (err?: Error) => void) {
fastify.get<{
Querystring: {
url?: string;
lang?: string;
};
}>('/', async (req, reply) => {
const url = req.query.url as string;
if (url == null) {
return reply.status(400).send({
error: 'url is required'
});
}
try {
const summary = await summaly(url, {
lang: req.query.lang as string,
followRedirects: false,
});
return summary;
} catch (e) {
return reply.status(500).send({
error: e
});
}
});
done();
}

View File

@ -1,9 +1,7 @@
import * as URL from 'url';
import Summary from './summary';
import Summary from './summary.js';
interface IPlugin {
export interface IPlugin {
test: (url: URL.Url) => boolean;
summarize: (url: URL.Url, lang?: string) => Promise<Summary>;
}
export default IPlugin;

View File

@ -1,6 +1,6 @@
import * as URL from 'url';
import { scpaping } from '../utils/got';
import summary from '../summary';
import { scpaping } from '../utils/got.js';
import summary from '../summary.js';
export function test(url: URL.Url): boolean {
return url.hostname === 'www.amazon.com' ||
@ -29,19 +29,19 @@ export async function summarize(url: URL.Url): Promise<summary> {
$('#productDescription').text() ||
$('meta[name="description"]').attr('content');
const thumbnail: string = $('#landingImage').attr('src');
const thumbnail: string | undefined = $('#landingImage').attr('src');
const playerUrl =
$('meta[property="twitter:player"]').attr('content') ||
$('meta[name="twitter:player"]').attr('content');
const playerWidth = parseInt(
const playerWidth =
$('meta[property="twitter:player:width"]').attr('content') ||
$('meta[name="twitter:player:width"]').attr('content'));
$('meta[name="twitter:player:width"]').attr('content');
const playerHeight = parseInt(
const playerHeight =
$('meta[property="twitter:player:height"]').attr('content') ||
$('meta[name="twitter:player:height"]').attr('content'));
$('meta[name="twitter:player:height"]').attr('content');
return {
title: title ? title.trim() : null,
@ -50,8 +50,8 @@ export async function summarize(url: URL.Url): Promise<summary> {
thumbnail: thumbnail ? thumbnail.trim() : null,
player: {
url: playerUrl || null,
width: playerWidth || null,
height: playerHeight || null
width: playerWidth ? parseInt(playerWidth) : null,
height: playerHeight ? parseInt(playerHeight) : null
},
sitename: 'Amazon'
};

8
src/plugins/index.ts Normal file
View File

@ -0,0 +1,8 @@
import { IPlugin } from '@/iplugin.js';
import * as amazon from './amazon.js';
import * as wikipedia from './wikipedia.js';
export const plugins: IPlugin[] = [
amazon,
wikipedia,
];

View File

@ -1,18 +1,19 @@
import * as URL from 'url';
import { get } from '../utils/got';
import * as debug from 'debug';
import summary from '../summary';
import clip from './../utils/clip';
import { get } from '../utils/got.js';
import debug from 'debug';
import summary from '../summary.js';
import clip from './../utils/clip.js';
const log = debug('summaly:plugins:wikipedia');
export function test(url: URL.Url): boolean {
if (!url.hostname) return false;
return /\.wikipedia\.org$/.test(url.hostname);
}
export async function summarize(url: URL.Url): Promise<summary> {
const lang = url.host.split('.')[0];
const title = url.pathname.split('/')[2];
const lang = url.host ? url.host.split('.')[0] : null;
const title = url.pathname ? url.pathname.split('/')[2] : null;
const endpoint = `https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`;
log(`lang is ${lang}`);

View File

@ -1,27 +0,0 @@
import * as http from 'http';
import * as Koa from 'koa';
import summaly from '../';
const app = new Koa();
app.use(async ctx => {
if (!ctx.query.url) {
ctx.status = 400;
return;
}
try {
const summary = await summaly(ctx.query.url, {
lang: ctx.query.lang,
followRedirects: false
});
ctx.body = summary;
} catch (e) {
ctx.status = 500;
}
});
const server = http.createServer(app.callback());
server.listen(process.env.PORT || 80);

View File

@ -1,4 +1,4 @@
const escapeRegExp = require('escape-regexp');
import escapeRegExp from 'escape-regexp';
export default function(title: string, siteName?: string | null): string {
title = title.trim();

View File

@ -1,4 +1,4 @@
import nullOrEmpty from './null-or-empty';
import nullOrEmpty from './null-or-empty.js';
export default function(s: string, max: number): string {
if (nullOrEmpty(s)) {

View File

@ -1,14 +1,34 @@
import { version } from '../../package.json';
import got, * as Got from 'got';
import { StatusError } from './status-error';
import { detectEncoding, toUtf8 } from './encoding';
import { StatusError } from './status-error.js';
import { detectEncoding, toUtf8 } from './encoding.js';
import * as cheerio from 'cheerio';
const PrivateIp = require('private-ip');
import PrivateIp from 'private-ip';
import { dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { readFileSync } from 'node:fs';
const _filename = fileURLToPath(import.meta.url);
const _dirname = dirname(_filename);
export let agent: Got.Agents | undefined = undefined;
export function setAgent(_agent: Got.Agents) {
agent = _agent;
}
export type GotOptions = {
url: string;
method: 'GET' | 'POST' | 'HEAD';
body?: string;
headers: Record<string, string | undefined>;
typeFilter?: RegExp;
}
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
const RESPONSE_TIMEOUT = 20 * 1000;
const OPERATION_TIMEOUT = 60 * 1000;
const MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
const BOT_UA = `SummalyBot/${version}`;
const BOT_UA = `SummalyBot/${repo.version}`;
export async function scpaping(url: string, opts?: { lang?: string; }) {
const response = await getResponse({
@ -46,13 +66,13 @@ export async function get(url: string) {
method: 'GET',
headers: {
'accept': '*/*',
}
},
});
return await res.body;
}
export async function head(url: string, options?: ) {
export async function head(url: string) {
const res = await getResponse({
url,
method: 'HEAD',
@ -64,7 +84,7 @@ export async function head(url: string, options?: ) {
return await res;
}
async function getResponse(args: { url: string, method: 'GET' | 'POST' | 'HEAD', body?: string, headers: Record<string, string>, typeFilter?: RegExp }) {
async function getResponse(args: GotOptions) {
const timeout = RESPONSE_TIMEOUT;
const operationTimeout = OPERATION_TIMEOUT;
@ -81,8 +101,11 @@ async function getResponse(args: { url: string, method: 'GET' | 'POST' | 'HEAD',
send: timeout,
request: operationTimeout, // whole operation timeout
},
agent,
http2: false,
retry: 0,
retry: {
limit: 0,
},
});
return await receiveResponce({ req, typeFilter: args.typeFilter });

View File

@ -6,9 +6,9 @@
/* dependencies below */
const assert = require('assert');
const express = require('express');
const summaly = require('../').default;
import * as assert from 'assert';
import express from 'express';
import { summaly } from '../built/index.js';
/* settings below */