mirror of
https://github.com/misskey-dev/summaly.git
synced 2025-07-01 07:40:00 +09:00
3.0.1
This commit is contained in:
40
built/utils/encoding.js
Normal file
40
built/utils/encoding.js
Normal file
@ -0,0 +1,40 @@
|
||||
import iconv from 'iconv-lite';
|
||||
import jschardet from 'jschardet';
|
||||
const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i');
|
||||
/**
|
||||
* Detect HTML encoding
|
||||
* @param body Body in Buffer
|
||||
* @returns encoding
|
||||
*/
|
||||
export function detectEncoding(body) {
|
||||
// By detection
|
||||
const detected = jschardet.detect(body, { minimumThreshold: 0.99 });
|
||||
if (detected) {
|
||||
const candicate = detected.encoding;
|
||||
const encoding = toEncoding(candicate);
|
||||
if (encoding != null)
|
||||
return encoding;
|
||||
}
|
||||
// From meta
|
||||
const matchMeta = body.toString('ascii').match(regCharset);
|
||||
if (matchMeta) {
|
||||
const candicate = matchMeta[1];
|
||||
const encoding = toEncoding(candicate);
|
||||
if (encoding != null)
|
||||
return encoding;
|
||||
}
|
||||
return 'utf-8';
|
||||
}
|
||||
export function toUtf8(body, encoding) {
|
||||
return iconv.decode(body, encoding);
|
||||
}
|
||||
function toEncoding(candicate) {
|
||||
if (iconv.encodingExists(candicate)) {
|
||||
if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase()))
|
||||
return 'cp932';
|
||||
return candicate;
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user