mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-08-07 16:54:01 +09:00
44 lines
1.1 KiB
TypeScript
44 lines
1.1 KiB
TypeScript
import iconv from 'iconv-lite';
|
|
import jschardet from 'jschardet';
|
|
|
|
const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i');
|
|
|
|
/**
|
|
* Detect HTML encoding
|
|
* @param body Body in Buffer
|
|
* @returns encoding
|
|
*/
|
|
export function detectEncoding(body: Buffer): string {
|
|
// By detection
|
|
const detected = jschardet.detect(body, { minimumThreshold: 0.99 });
|
|
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
if (detected) {
|
|
const candicate = detected.encoding;
|
|
const encoding = toEncoding(candicate);
|
|
if (encoding != null) return encoding;
|
|
}
|
|
|
|
// From meta
|
|
const matchMeta = body.toString('ascii').match(regCharset);
|
|
if (matchMeta) {
|
|
const candicate = matchMeta[1];
|
|
const encoding = toEncoding(candicate);
|
|
if (encoding != null) return encoding;
|
|
}
|
|
|
|
return 'utf-8';
|
|
}
|
|
|
|
export function toUtf8(body: Buffer, encoding: string): string {
|
|
return iconv.decode(body, encoding);
|
|
}
|
|
|
|
function toEncoding(candicate: string): string | null {
|
|
if (iconv.encodingExists(candicate)) {
|
|
if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase())) return 'cp932';
|
|
return candicate;
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|