feat: auto nsfw detection (#8840)

* feat: auto nsfw detection

* ✌️

* Update ja-JP.yml

* Update ja-JP.yml

* ポルノ判定のしきい値を高めに

* エラーハンドリングちゃんとした

* Update ja-JP.yml

* 感度設定を強化

* refactor

* feat: add video support for auto nsfw detection

* rename: image -> media

* .js

* fix: add missing error handling

* fix: use valid pathname instead of using filename due to invalid usage

* perf(nsfw-detection): decode frames

* disable detection of video for some reasons

* perf(nsfw-detection): streamify detection process for video

* disable disallowUploadWhenPredictedAsPorn option

* fix(nsfw-detection): improve reliability

* fix(nsfw-detection): use Math.ceil instead of Math.round

* perf(nsfw-detection): delete tmp frames after used

* fix(nsfw-detection): FSWatcher does not emit ready event

* perf(nsfw-detection): skip black frames

* refactor: strip exists check

* Update package.json

* めっちゃ変えた

* lint

* Update COPYING

* オプションで動画解析できるように

* Update yarn.lock

* Update CHANGELOG.md

Co-authored-by: Acid Chicken (硫酸鶏) <root@acid-chicken.com>
This commit is contained in:
syuilo
2022-07-07 21:06:37 +09:00
committed by GitHub
parent 010db2515c
commit e560601815
39 changed files with 1275 additions and 78 deletions

View File

@ -1,12 +1,18 @@
import * as fs from 'node:fs';
import * as crypto from 'node:crypto';
import { join } from 'node:path';
import * as stream from 'node:stream';
import * as util from 'node:util';
import { FSWatcher } from 'chokidar';
import { fileTypeFromFile } from 'file-type';
import FFmpeg from 'fluent-ffmpeg';
import isSvg from 'is-svg';
import probeImageSize from 'probe-image-size';
import { type predictionType } from 'nsfwjs';
import sharp from 'sharp';
import { encode } from 'blurhash';
import { detectSensitive } from '@/services/detect-sensitive.js';
import { createTempDir } from './create-temp.js';
const pipeline = util.promisify(stream.pipeline);
@ -21,6 +27,8 @@ export type FileInfo = {
height?: number;
orientation?: number;
blurhash?: string;
sensitive: boolean;
porn: boolean;
warnings: string[];
};
@ -37,7 +45,12 @@ const TYPE_SVG = {
/**
* Get file information
*/
export async function getFileInfo(path: string): Promise<FileInfo> {
export async function getFileInfo(path: string, opts: {
skipSensitiveDetection: boolean;
sensitiveThreshold?: number;
sensitiveThresholdForPorn?: number;
enableSensitiveMediaDetectionForVideos?: boolean;
}): Promise<FileInfo> {
const warnings = [] as string[];
const size = await getFileSize(path);
@ -58,7 +71,7 @@ export async function getFileInfo(path: string): Promise<FileInfo> {
// うまく判定できない画像は octet-stream にする
if (!imageSize) {
warnings.push(`cannot detect image dimensions`);
warnings.push('cannot detect image dimensions');
type = TYPE_OCTET_STREAM;
} else if (imageSize.wUnits === 'px') {
width = imageSize.width;
@ -67,7 +80,7 @@ export async function getFileInfo(path: string): Promise<FileInfo> {
// 制限を超えている画像は octet-stream にする
if (imageSize.width > 16383 || imageSize.height > 16383) {
warnings.push(`image dimensions exceeds limits`);
warnings.push('image dimensions exceeds limits');
type = TYPE_OCTET_STREAM;
}
} else {
@ -84,6 +97,19 @@ export async function getFileInfo(path: string): Promise<FileInfo> {
});
}
let sensitive = false;
let porn = false;
if (!opts.skipSensitiveDetection) {
[sensitive, porn] = await detectSensitivity(
path,
type.mime,
opts.sensitiveThreshold ?? 0.5,
opts.sensitiveThresholdForPorn ?? 0.75,
opts.enableSensitiveMediaDetectionForVideos ?? false,
);
}
return {
size,
md5,
@ -92,10 +118,150 @@ export async function getFileInfo(path: string): Promise<FileInfo> {
height,
orientation,
blurhash,
sensitive,
porn,
warnings,
};
}
async function detectSensitivity(source: string, mime: string, sensitiveThreshold: number, sensitiveThresholdForPorn: number, analyzeVideo: boolean): Promise<[sensitive: boolean, porn: boolean]> {
let sensitive = false;
let porn = false;
function judgePrediction(result: readonly predictionType[]): [sensitive: boolean, porn: boolean] {
let sensitive = false;
let porn = false;
if ((result.find(x => x.className === 'Sexy')?.probability ?? 0) > sensitiveThreshold) sensitive = true;
if ((result.find(x => x.className === 'Hentai')?.probability ?? 0) > sensitiveThreshold) sensitive = true;
if ((result.find(x => x.className === 'Porn')?.probability ?? 0) > sensitiveThreshold) sensitive = true;
if ((result.find(x => x.className === 'Porn')?.probability ?? 0) > sensitiveThresholdForPorn) porn = true;
return [sensitive, porn];
}
if (['image/jpeg', 'image/png', 'image/webp'].includes(mime)) {
const result = await detectSensitive(source);
if (result) {
[sensitive, porn] = judgePrediction(result);
}
} else if (analyzeVideo && (mime === 'image/apng' || mime.startsWith('video/'))) {
const [outDir, disposeOutDir] = await createTempDir();
try {
const command = FFmpeg()
.input(source)
.inputOptions([
'-skip_frame', 'nokey', // 可能ならキーフレームのみを取得してほしいとする(そうなるとは限らない)
'-lowres', '3', // 元の画質でデコードする必要はないので 1/8 画質でデコードしてもよいとする(そうなるとは限らない)
])
.noAudio()
.videoFilters([
{
filter: 'select', // フレームのフィルタリング
options: {
e: 'eq(pict_type,PICT_TYPE_I)', // I-Frame のみをフィルタするVP9 とかはデコードしてみないとわからないっぽい)
},
},
{
filter: 'blackframe', // 暗いフレームの検出
options: {
amount: '0', // 暗さに関わらず全てのフレームで測定値を取る
},
},
{
filter: 'metadata',
options: {
mode: 'select', // フレーム選択モード
key: 'lavfi.blackframe.pblack', // フレームにおける暗部の百分率(前のフィルタからのメタデータを参照する)
value: '50',
function: 'less', // 50% 未満のフレームを選択する50% 以上暗部があるフレームだと誤検知を招くかもしれないので)
},
},
{
filter: 'scale',
options: {
w: 299,
h: 299,
},
},
])
.format('image2')
.output(join(outDir, '%d.png'))
.outputOptions(['-vsync', '0']); // 可変フレームレートにすることで穴埋めをさせない
const results: ReturnType<typeof judgePrediction>[] = [];
let frameIndex = 0;
let targetIndex = 0;
let nextIndex = 1;
for await (const path of asyncIterateFrames(outDir, command)) {
try {
const index = frameIndex++;
if (index !== targetIndex) {
continue;
}
targetIndex = nextIndex;
nextIndex += index; // fibonacci sequence によってフレーム数制限を掛ける
const result = await detectSensitive(path);
if (result) {
results.push(judgePrediction(result));
}
} finally {
fs.promises.unlink(path);
}
}
sensitive = results.filter(x => x[0]).length >= Math.ceil(results.length * sensitiveThreshold);
porn = results.filter(x => x[1]).length >= Math.ceil(results.length * sensitiveThresholdForPorn);
} finally {
disposeOutDir();
}
}
return [sensitive, porn];
}
async function* asyncIterateFrames(cwd: string, command: FFmpeg.FfmpegCommand): AsyncGenerator<string, void> {
const watcher = new FSWatcher({
cwd,
disableGlobbing: true,
});
let finished = false;
command.once('end', () => {
finished = true;
watcher.close();
});
command.run();
for (let i = 1; true; i++) { // eslint-disable-line @typescript-eslint/no-unnecessary-condition
const current = `${i}.png`;
const next = `${i + 1}.png`;
const framePath = join(cwd, current);
if (await exists(join(cwd, next))) {
yield framePath;
} else if (!finished) { // eslint-disable-line @typescript-eslint/no-unnecessary-condition
watcher.add(next);
await new Promise<void>((resolve, reject) => {
watcher.on('add', function onAdd(path) {
if (path === next) { // 次フレームの書き出しが始まっているなら、現在フレームの書き出しは終わっている
watcher.unwatch(current);
watcher.off('add', onAdd);
resolve();
}
});
command.once('end', resolve); // 全てのフレームを処理し終わったなら、最終フレームである現在フレームの書き出しは終わっている
command.once('error', reject);
});
yield framePath;
} else if (await exists(framePath)) {
yield framePath;
} else {
return;
}
}
}
function exists(path: string): Promise<boolean> {
return fs.promises.access(path).then(() => true, () => false);
}
/**
* Detect MIME Type and extension
*/

View File

@ -156,6 +156,19 @@ export class DriveFile {
})
public isSensitive: boolean;
@Index()
@Column('boolean', {
default: false,
comment: 'Whether the DriveFile is NSFW. (predict)',
})
public maybeSensitive: boolean;
@Index()
@Column('boolean', {
default: false,
})
public maybePorn: boolean;
/**
* 外部の(信頼されていない)URLへの直リンクか否か
*/

View File

@ -188,6 +188,28 @@ export class Meta {
})
public recaptchaSecretKey: string | null;
@Column('enum', {
enum: ['none', 'all', 'local', 'remote'],
default: 'none',
})
public sensitiveMediaDetection: 'none' | 'all' | 'local' | 'remote';
@Column('enum', {
enum: ['medium', 'low', 'high', 'veryLow', 'veryHigh'],
default: 'medium',
})
public sensitiveMediaDetectionSensitivity: 'medium' | 'low' | 'high' | 'veryLow' | 'veryHigh';
@Column('boolean', {
default: false,
})
public setSensitiveFlagAutomatically: boolean;
@Column('boolean', {
default: false,
})
public enableSensitiveMediaDetectionForVideos: boolean;
@Column('integer', {
default: 1024,
comment: 'Drive capacity of a local user (MB)',

View File

@ -152,6 +152,11 @@ export class UserProfile {
})
public alwaysMarkNsfw: boolean;
@Column('boolean', {
default: false,
})
public autoSensitive: boolean;
@Column('boolean', {
default: false,
})

View File

@ -360,6 +360,7 @@ export const UserRepository = db.getRepository(User).extend({
injectFeaturedNote: profile!.injectFeaturedNote,
receiveAnnouncementEmail: profile!.receiveAnnouncementEmail,
alwaysMarkNsfw: profile!.alwaysMarkNsfw,
autoSensitive: profile!.autoSensitive,
carefulBot: profile!.carefulBot,
autoAcceptFollowed: profile!.autoAcceptFollowed,
noCrawle: profile!.noCrawle,

View File

@ -161,19 +161,19 @@ export const packedUserDetailedNotMeOnlySchema = {
type: 'array',
nullable: false, optional: false,
items: {
type: 'object',
nullable: false, optional: false,
properties: {
name: {
type: 'string',
nullable: false, optional: false,
},
value: {
type: 'string',
nullable: false, optional: false,
},
type: 'object',
nullable: false, optional: false,
properties: {
name: {
type: 'string',
nullable: false, optional: false,
},
maxLength: 4,
value: {
type: 'string',
nullable: false, optional: false,
},
},
maxLength: 4,
},
},
followersCount: {
@ -292,6 +292,10 @@ export const packedMeDetailedOnlySchema = {
type: 'boolean',
nullable: true, optional: false,
},
autoSensitive: {
type: 'boolean',
nullable: true, optional: false,
},
carefulBot: {
type: 'boolean',
nullable: true, optional: false,

View File

@ -195,6 +195,22 @@ export const meta = {
type: 'string',
optional: true, nullable: true,
},
sensitiveMediaDetection: {
type: 'string',
optional: true, nullable: false,
},
sensitiveMediaDetectionSensitivity: {
type: 'string',
optional: true, nullable: false,
},
setSensitiveFlagAutomatically: {
type: 'boolean',
optional: true, nullable: false,
},
enableSensitiveMediaDetectionForVideos: {
type: 'boolean',
optional: true, nullable: false,
},
proxyAccountId: {
type: 'string',
optional: true, nullable: true,
@ -370,6 +386,10 @@ export default define(meta, paramDef, async (ps, me) => {
blockedHosts: instance.blockedHosts,
hcaptchaSecretKey: instance.hcaptchaSecretKey,
recaptchaSecretKey: instance.recaptchaSecretKey,
sensitiveMediaDetection: instance.sensitiveMediaDetection,
sensitiveMediaDetectionSensitivity: instance.sensitiveMediaDetectionSensitivity,
setSensitiveFlagAutomatically: instance.setSensitiveFlagAutomatically,
enableSensitiveMediaDetectionForVideos: instance.enableSensitiveMediaDetectionForVideos,
proxyAccountId: instance.proxyAccountId,
twitterConsumerKey: instance.twitterConsumerKey,
twitterConsumerSecret: instance.twitterConsumerSecret,

View File

@ -58,6 +58,7 @@ export default define(meta, paramDef, async (ps, me) => {
autoAcceptFollowed: profile.autoAcceptFollowed,
noCrawle: profile.noCrawle,
alwaysMarkNsfw: profile.alwaysMarkNsfw,
autoSensitive: profile.autoSensitive,
carefulBot: profile.carefulBot,
injectFeaturedNote: profile.injectFeaturedNote,
receiveAnnouncementEmail: profile.receiveAnnouncementEmail,

View File

@ -48,6 +48,10 @@ export const paramDef = {
enableRecaptcha: { type: 'boolean' },
recaptchaSiteKey: { type: 'string', nullable: true },
recaptchaSecretKey: { type: 'string', nullable: true },
sensitiveMediaDetection: { type: 'string', enum: ['none', 'all', 'local', 'remote'] },
sensitiveMediaDetectionSensitivity: { type: 'string', enum: ['medium', 'low', 'high', 'veryLow', 'veryHigh'] },
setSensitiveFlagAutomatically: { type: 'boolean' },
enableSensitiveMediaDetectionForVideos: { type: 'boolean' },
proxyAccountId: { type: 'string', format: 'misskey:id', nullable: true },
maintainerName: { type: 'string', nullable: true },
maintainerEmail: { type: 'string', nullable: true },
@ -213,6 +217,22 @@ export default define(meta, paramDef, async (ps, me) => {
set.recaptchaSecretKey = ps.recaptchaSecretKey;
}
if (ps.sensitiveMediaDetection !== undefined) {
set.sensitiveMediaDetection = ps.sensitiveMediaDetection;
}
if (ps.sensitiveMediaDetectionSensitivity !== undefined) {
set.sensitiveMediaDetectionSensitivity = ps.sensitiveMediaDetectionSensitivity;
}
if (ps.setSensitiveFlagAutomatically !== undefined) {
set.setSensitiveFlagAutomatically = ps.setSensitiveFlagAutomatically;
}
if (ps.enableSensitiveMediaDetectionForVideos !== undefined) {
set.enableSensitiveMediaDetectionForVideos = ps.enableSensitiveMediaDetectionForVideos;
}
if (ps.proxyAccountId !== undefined) {
set.proxyAccountId = ps.proxyAccountId;
}

View File

@ -2,6 +2,7 @@ import ms from 'ms';
import { addFile } from '@/services/drive/add-file.js';
import { DriveFiles } from '@/models/index.js';
import { DB_MAX_IMAGE_COMMENT_LENGTH } from '@/misc/hard-limits.js';
import { IdentifiableError } from '@/misc/identifiable-error.js';
import { fetchMeta } from '@/misc/fetch-meta.js';
import define from '../../../define.js';
import { apiLogger } from '../../../logger.js';
@ -35,6 +36,18 @@ export const meta = {
code: 'INVALID_FILE_NAME',
id: 'f449b209-0c60-4e51-84d5-29486263bfd4',
},
inappropriate: {
message: 'Cannot upload the file because it has been determined that it possibly contains inappropriate content.',
code: 'INAPPROPRIATE',
id: 'bec5bd69-fba3-43c9-b4fb-2894b66ad5d2',
},
noFreeSpace: {
message: 'Cannot upload the file because you have no free space of drive.',
code: 'NO_FREE_SPACE',
id: 'd08dbc37-a6a9-463a-8c47-96c32ab5f064',
},
},
} as const;
@ -87,6 +100,10 @@ export default define(meta, paramDef, async (ps, user, _, file, cleanup, ip, hea
if (e instanceof Error || typeof e === 'string') {
apiLogger.error(e);
}
if (e instanceof IdentifiableError) {
if (e.id === '282f77bf-5816-4f72-9264-aa14d8261a21') throw new ApiError(meta.errors.inappropriate);
if (e.id === 'c6244ed2-a39a-4e1c-bf93-f0fbd7764fa6') throw new ApiError(meta.errors.noFreeSpace);
}
throw new ApiError();
} finally {
cleanup!();

View File

@ -1,8 +1,8 @@
import { publishDriveStream } from '@/services/stream.js';
import define from '../../../define.js';
import { ApiError } from '../../../error.js';
import { DriveFiles, DriveFolders, Users } from '@/models/index.js';
import { DB_MAX_IMAGE_COMMENT_LENGTH } from '@/misc/hard-limits.js';
import define from '../../../define.js';
import { ApiError } from '../../../error.js';
export const meta = {
tags: ['drive'],

View File

@ -3,17 +3,17 @@ import * as mfm from 'mfm-js';
import { publishMainStream, publishUserEvent } from '@/services/stream.js';
import acceptAllFollowRequests from '@/services/following/requests/accept-all.js';
import { publishToFollowers } from '@/services/i/update.js';
import define from '../../define.js';
import { extractCustomEmojisFromMfm } from '@/misc/extract-custom-emojis-from-mfm.js';
import { extractHashtags } from '@/misc/extract-hashtags.js';
import { updateUsertags } from '@/services/update-hashtag.js';
import { ApiError } from '../../error.js';
import { Users, DriveFiles, UserProfiles, Pages } from '@/models/index.js';
import { User } from '@/models/entities/user.js';
import { UserProfile } from '@/models/entities/user-profile.js';
import { notificationTypes } from '@/types.js';
import { normalizeForSearch } from '@/misc/normalize-for-search.js';
import { langmap } from '@/misc/langmap.js';
import { ApiError } from '../../error.js';
import define from '../../define.js';
export const meta = {
tags: ['account'],
@ -57,7 +57,7 @@ export const meta = {
message: 'Invalid Regular Expression.',
code: 'INVALID_REGEXP',
id: '0d786918-10df-41cd-8f33-8dec7d9a89a5',
}
},
},
res: {
@ -77,7 +77,8 @@ export const paramDef = {
lang: { type: 'string', enum: [null, ...Object.keys(langmap)], nullable: true },
avatarId: { type: 'string', format: 'misskey:id', nullable: true },
bannerId: { type: 'string', format: 'misskey:id', nullable: true },
fields: { type: 'array',
fields: {
type: 'array',
minItems: 0,
maxItems: 16,
items: {
@ -102,6 +103,7 @@ export const paramDef = {
injectFeaturedNote: { type: 'boolean' },
receiveAnnouncementEmail: { type: 'boolean' },
alwaysMarkNsfw: { type: 'boolean' },
autoSensitive: { type: 'boolean' },
ffVisibility: { type: 'string', enum: ['public', 'followers', 'private'] },
pinnedPageId: { type: 'array', items: {
type: 'string', format: 'misskey:id',
@ -168,6 +170,7 @@ export default define(meta, paramDef, async (ps, _user, token) => {
if (typeof ps.injectFeaturedNote === 'boolean') profileUpdates.injectFeaturedNote = ps.injectFeaturedNote;
if (typeof ps.receiveAnnouncementEmail === 'boolean') profileUpdates.receiveAnnouncementEmail = ps.receiveAnnouncementEmail;
if (typeof ps.alwaysMarkNsfw === 'boolean') profileUpdates.alwaysMarkNsfw = ps.alwaysMarkNsfw;
if (typeof ps.autoSensitive === 'boolean') profileUpdates.autoSensitive = ps.autoSensitive;
if (ps.emailNotificationTypes !== undefined) profileUpdates.emailNotificationTypes = ps.emailNotificationTypes;
if (ps.avatarId) {

View File

@ -0,0 +1,28 @@
import * as fs from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname } from 'node:path';
import * as nsfw from 'nsfwjs';
import * as tf from '@tensorflow/tfjs-node';
const _filename = fileURLToPath(import.meta.url);
const _dirname = dirname(_filename);
let model: nsfw.NSFWJS;
export async function detectSensitive(path: string): Promise<nsfw.predictionType[] | null> {
try {
if (model == null) model = await nsfw.load(`file://${_dirname}/../../nsfw-model/`, { size: 299 });
const buffer = await fs.promises.readFile(path);
const image = await tf.node.decodeImage(buffer, 3) as tf.Tensor3D;
try {
const predictions = await model.classify(image);
return predictions;
} finally {
image.dispose();
}
} catch (err) {
console.error(err);
return null;
}
}

View File

@ -16,6 +16,7 @@ import { driveChart, perUserDriveChart, instanceChart } from '@/services/chart/i
import { genId } from '@/misc/gen-id.js';
import { isDuplicateKeyValueError } from '@/misc/is-duplicate-key-value-error.js';
import { FILE_TYPE_BROWSERSAFE } from '@/const.js';
import { IdentifiableError } from '@/misc/identifiable-error.js';
import { getS3 } from './s3.js';
import { InternalStorage } from './internal-storage.js';
import { IImage, convertSharpToJpeg, convertSharpToWebp, convertSharpToPng } from './image-processor.js';
@ -349,9 +350,31 @@ export async function addFile({
requestIp = null,
requestHeaders = null,
}: AddFileArgs): Promise<DriveFile> {
const info = await getFileInfo(path);
let skipNsfwCheck = false;
const instance = await fetchMeta();
if (user == null) skipNsfwCheck = true;
if (instance.sensitiveMediaDetection === 'none') skipNsfwCheck = true;
if (user && instance.sensitiveMediaDetection === 'local' && Users.isRemoteUser(user)) skipNsfwCheck = true;
if (user && instance.sensitiveMediaDetection === 'remote' && Users.isLocalUser(user)) skipNsfwCheck = true;
const info = await getFileInfo(path, {
skipSensitiveDetection: skipNsfwCheck,
sensitiveThreshold: // 感度が高いほどしきい値は低くすることになる
instance.sensitiveMediaDetectionSensitivity === 'veryHigh' ? 0.1 :
instance.sensitiveMediaDetectionSensitivity === 'high' ? 0.3 :
instance.sensitiveMediaDetectionSensitivity === 'low' ? 0.7 :
instance.sensitiveMediaDetectionSensitivity === 'veryLow' ? 0.9 :
0.5,
sensitiveThresholdForPorn: 0.75,
enableSensitiveMediaDetectionForVideos: instance.enableSensitiveMediaDetectionForVideos,
});
logger.info(`${JSON.stringify(info)}`);
// 現状 false positive が多すぎて実用に耐えない
//if (info.porn && instance.disallowUploadWhenPredictedAsPorn) {
// throw new IdentifiableError('282f77bf-5816-4f72-9264-aa14d8261a21', 'Detected as porn.');
//}
// detect name
const detectedName = name || (info.type.ext ? `untitled.${info.type.ext}` : 'untitled');
@ -387,7 +410,7 @@ export async function addFile({
// If usage limit exceeded
if (usage + info.size > driveCapacity) {
if (Users.isLocalUser(user)) {
throw new Error('no-free-space');
throw new IdentifiableError('c6244ed2-a39a-4e1c-bf93-f0fbd7764fa6', 'No free space.');
} else {
// (アバターまたはバナーを含まず)最も古いファイルを削除する
deleteOldFile(await Users.findOneByOrFail({ id: user.id }) as IRemoteUser);
@ -441,6 +464,8 @@ export async function addFile({
file.isLink = isLink;
file.requestIp = requestIp;
file.requestHeaders = requestHeaders;
file.maybeSensitive = info.sensitive;
file.maybePorn = info.porn;
file.isSensitive = user
? Users.isLocalUser(user) && profile!.alwaysMarkNsfw ? true :
(sensitive !== null && sensitive !== undefined)
@ -448,6 +473,9 @@ export async function addFile({
: false
: false;
if (info.sensitive && profile!.autoSensitive) file.isSensitive = true;
if (info.sensitive && instance.setSensitiveFlagAutomatically) file.isSensitive = true;
if (url !== null) {
file.src = url;