From 7fbab86441cc3557343a3b9069f839ab7aaab613 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E3=81=8B=E3=81=A3=E3=81=93=E3=81=8B=E3=82=8A?=
<67428053+kakkokari-gtyih@users.noreply.github.com>
Date: Fri, 15 Nov 2024 17:44:17 +0900
Subject: [PATCH] =?UTF-8?q?enhance(parse):=20sensitive=E5=88=A4=E5=AE=9A?=
=?UTF-8?q?=E3=82=92=E5=BC=B7=E5=8C=96=20(#36)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* enhance(parse): sensitive判定を強化
* Update Changelog
---
CHANGELOG.md | 1 +
src/general.ts | 18 +++++++++-
test/htmls/meta-adult-sensitive.html | 13 ++++++++
test/htmls/meta-rta-sensitive.html | 13 ++++++++
test/index.ts | 50 ++++++++++++++++++++++++++++
5 files changed, 94 insertions(+), 1 deletion(-)
create mode 100644 test/htmls/meta-adult-sensitive.html
create mode 100644 test/htmls/meta-rta-sensitive.html
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e0fe1a6..ee9c593 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,6 @@
(unreleased)
------------------
+* センシティブフラグの判定を `` および `rating` ヘッダでも行うように
* 依存関係の更新
* eslintの設定を更新
diff --git a/src/general.ts b/src/general.ts
index 9ed0a4d..b1facce 100644
--- a/src/general.ts
+++ b/src/general.ts
@@ -138,6 +138,18 @@ export type GeneralScrapingOptions = {
contentLengthRequired?: boolean;
}
+function headerEqualValueContains(search: string, headerValue: string | string[] | undefined) {
+ if (!headerValue) {
+ return false;
+ }
+
+ if (Array.isArray(headerValue)) {
+ return headerValue.some(value => value.toLowerCase() === search.toLowerCase());
+ }
+
+ return headerValue.toLowerCase() === search.toLowerCase();
+}
+
export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOptions): Promise {
let lang = opts?.lang;
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
@@ -235,7 +247,11 @@ export async function parseGeneral(_url: URL | string, opts?: GeneralScrapingOpt
// https://developer.mixi.co.jp/connect/mixi_plugin/mixi_check/spec_mixi_check/#toc-18-
const sensitive =
- $('meta[property=\'mixi:content-rating\']').attr('content') === '1';
+ $('meta[property=\'mixi:content-rating\']').attr('content') === '1' ||
+ headerEqualValueContains('adult', res.response.headers.rating) ||
+ headerEqualValueContains('RTA-5042-1996-1400-1577-RTA', res.response.headers.rating) ||
+ $('meta[name=\'rating\']').attr('content') === 'adult' ||
+ $('meta[name=\'rating\']').attr('content')?.toUpperCase() === 'RTA-5042-1996-1400-1577-RTA';
const find = async (path: string) => {
const target = new URL(path, url.href);
diff --git a/test/htmls/meta-adult-sensitive.html b/test/htmls/meta-adult-sensitive.html
new file mode 100644
index 0000000..b563fd1
--- /dev/null
+++ b/test/htmls/meta-adult-sensitive.html
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+ SENSITIVE CONTENT!!
+
+
+ Yo
+ Hey hey hey syuilo.
+
+
diff --git a/test/htmls/meta-rta-sensitive.html b/test/htmls/meta-rta-sensitive.html
new file mode 100644
index 0000000..a244530
--- /dev/null
+++ b/test/htmls/meta-rta-sensitive.html
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+ SENSITIVE CONTENT!!
+
+
+ Yo
+ Hey hey hey syuilo.
+
+
diff --git a/test/index.ts b/test/index.ts
index cab8fd9..e983e83 100644
--- a/test/index.ts
+++ b/test/index.ts
@@ -531,6 +531,56 @@ describe('sensitive', () => {
await app.listen({ port });
expect((await summaly(host)).sensitive).toBe(true);
});
+
+ test('meta rating adult', async () => {
+ app = fastify();
+ app.get('/', (request, reply) => {
+ const content = fs.readFileSync(_dirname + '/htmls/meta-adult-sensitive.html');
+ reply.header('content-length', content.length);
+ reply.header('content-type', 'text/html');
+ return reply.send(content);
+ });
+ await app.listen({ port });
+ expect((await summaly(host)).sensitive).toBe(true);
+ });
+
+ test('meta rating rta', async () => {
+ app = fastify();
+ app.get('/', (request, reply) => {
+ const content = fs.readFileSync(_dirname + '/htmls/meta-rta-sensitive.html');
+ reply.header('content-length', content.length);
+ reply.header('content-type', 'text/html');
+ return reply.send(content);
+ });
+ await app.listen({ port });
+ expect((await summaly(host)).sensitive).toBe(true);
+ });
+
+ test('HTTP Header rating adult', async () => {
+ app = fastify();
+ app.get('/', (request, reply) => {
+ const content = fs.readFileSync(_dirname + '/htmls/basic.html');
+ reply.header('content-length', content.length);
+ reply.header('content-type', 'text/html');
+ reply.header('rating', 'adult');
+ return reply.send(content);
+ });
+ await app.listen({ port });
+ expect((await summaly(host)).sensitive).toBe(true);
+ });
+
+ test('HTTP Header rating rta', async () => {
+ app = fastify();
+ app.get('/', (request, reply) => {
+ const content = fs.readFileSync(_dirname + '/htmls/basic.html');
+ reply.header('content-length', content.length);
+ reply.header('content-type', 'text/html');
+ reply.header('rating', 'RTA-5042-1996-1400-1577-RTA');
+ return reply.send(content);
+ });
+ await app.listen({ port });
+ expect((await summaly(host)).sensitive).toBe(true);
+ });
});
describe('UserAgent', () => {