enhance(parse): sensitive判定を強化 (#36)

* enhance(parse): sensitive判定を強化

* Update Changelog
This commit is contained in:
かっこかり 2024-11-15 17:44:17 +09:00 committed by GitHub
parent 1b541a1418
commit 7fbab86441
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 94 additions and 1 deletions

View File

@ -1,5 +1,6 @@
(unreleased)
------------------
* センシティブフラグの判定を `<meta property="rating">` および `rating` ヘッダでも行うように
* 依存関係の更新
* eslintの設定を更新

View File

@ -138,6 +138,18 @@ export type GeneralScrapingOptions = {
contentLengthRequired?: boolean;
}
function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
if (!headerValue) {
return false;
}
if (Array.isArray(headerValue)) {
return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
}
return headerValue.toLowerCase() === search.toLowerCase();
}
export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOptions): Promise<Summary | null> {
let lang = opts?.lang;
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
@ -235,7 +247,11 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt
// https://developer.mixi.co.jp/connect/mixi_plugin/mixi_check/spec_mixi_check/#toc-18-
const sensitive =
$('meta[property=\'mixi:content-rating\']').attr('content') === '1';
$('meta[property=\'mixi:content-rating\']').attr('content') === '1' ||
headerEqualValueContains('adult', res.response.headers.rating) ||
headerEqualValueContains('RTA-5042-1996-1400-1577-RTA', res.response.headers.rating) ||
$('meta[name=\'rating\']').attr('content') === 'adult' ||
$('meta[name=\'rating\']').attr('content')?.toUpperCase() === 'RTA-5042-1996-1400-1577-RTA';
const find = async (path: string) => {
const target = new URL(path, url.href);

View File

@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="rating" content="adult">
<title>SENSITIVE CONTENT!!</title>
</head>
<body>
<h1>Yo</h1>
<p>Hey hey hey syuilo.</p>
</body>
</html>

View File

@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="rating" content="RTA-5042-1996-1400-1577-RTA">
<title>SENSITIVE CONTENT!!</title>
</head>
<body>
<h1>Yo</h1>
<p>Hey hey hey syuilo.</p>
</body>
</html>

View File

@ -531,6 +531,56 @@ describe('sensitive', () => {
await app.listen({ port });
expect((await summaly(host)).sensitive).toBe(true);
});
test('meta rating adult', async () => {
app = fastify();
app.get('/', (request, reply) => {
const content = fs.readFileSync(_dirname + '/htmls/meta-adult-sensitive.html');
reply.header('content-length', content.length);
reply.header('content-type', 'text/html');
return reply.send(content);
});
await app.listen({ port });
expect((await summaly(host)).sensitive).toBe(true);
});
test('meta rating rta', async () => {
app = fastify();
app.get('/', (request, reply) => {
const content = fs.readFileSync(_dirname + '/htmls/meta-rta-sensitive.html');
reply.header('content-length', content.length);
reply.header('content-type', 'text/html');
return reply.send(content);
});
await app.listen({ port });
expect((await summaly(host)).sensitive).toBe(true);
});
test('HTTP Header rating adult', async () => {
app = fastify();
app.get('/', (request, reply) => {
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
reply.header('content-length', content.length);
reply.header('content-type', 'text/html');
reply.header('rating', 'adult');
return reply.send(content);
});
await app.listen({ port });
expect((await summaly(host)).sensitive).toBe(true);
});
test('HTTP Header rating rta', async () => {
app = fastify();
app.get('/', (request, reply) => {
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
reply.header('content-length', content.length);
reply.header('content-type', 'text/html');
reply.header('rating', 'RTA-5042-1996-1400-1577-RTA');
return reply.send(content);
});
await app.listen({ port });
expect((await summaly(host)).sensitive).toBe(true);
});
});
describe('UserAgent', () => {