リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように

This commit is contained in:
syuilo
2018-06-21 01:21:57 +09:00
parent 3633d7ada1
commit 79d1bf30a4
29 changed files with 90 additions and 92 deletions

71
src/mfm/html-to-mfm.ts Normal file
View File

@ -0,0 +1,71 @@
const parse5 = require('parse5');
export default function(html: string): string {
const dom = parse5.parseFragment(html);
let text = '';
dom.childNodes.forEach((n: any) => analyze(n));
return text.trim();
function getText(node: any) {
if (node.nodeName == '#text') return node.value;
if (node.childNodes) {
return node.childNodes.map((n: any) => getText(n)).join('');
}
return '';
}
function analyze(node: any) {
switch (node.nodeName) {
case '#text':
text += node.value;
break;
case 'br':
text += '\n';
break;
case 'a':
const txt = getText(node);
// メンション
if (txt.startsWith('@')) {
const part = txt.split('@');
if (part.length == 2) {
//#region ホスト名部分が省略されているので復元する
const href = new URL(node.attrs.find((x: any) => x.name == 'href').value);
const acct = txt + '@' + href.hostname;
text += acct;
break;
//#endregion
} else if (part.length == 3) {
text += txt;
break;
}
}
if (node.childNodes) {
node.childNodes.forEach((n: any) => analyze(n));
}
break;
case 'p':
text += '\n\n';
if (node.childNodes) {
node.childNodes.forEach((n: any) => analyze(n));
}
break;
default:
if (node.childNodes) {
node.childNodes.forEach((n: any) => analyze(n));
}
break;
}
}
}

102
src/mfm/html.ts Normal file
View File

@ -0,0 +1,102 @@
const { lib: emojilib } = require('emojilib');
import { JSDOM } from 'jsdom';
import config from '../config';
import { INote } from '../models/note';
import { TextElement } from './parse';
const handlers: { [key: string]: (window: any, token: any, mentionedRemoteUsers: INote['mentionedRemoteUsers']) => void } = {
bold({ document }, { bold }) {
const b = document.createElement('b');
b.textContent = bold;
document.body.appendChild(b);
},
code({ document }, { code }) {
const pre = document.createElement('pre');
const inner = document.createElement('code');
inner.innerHTML = code;
pre.appendChild(inner);
document.body.appendChild(pre);
},
emoji({ document }, { content, emoji }) {
const found = emojilib[emoji];
const node = document.createTextNode(found ? found.char : content);
document.body.appendChild(node);
},
hashtag({ document }, { hashtag }) {
const a = document.createElement('a');
a.href = config.url + '/tags/' + hashtag;
a.textContent = '#' + hashtag;
a.setAttribute('rel', 'tag');
document.body.appendChild(a);
},
'inline-code'({ document }, { code }) {
const element = document.createElement('code');
element.textContent = code;
document.body.appendChild(element);
},
link({ document }, { url, title }) {
const a = document.createElement('a');
a.href = url;
a.textContent = title;
document.body.appendChild(a);
},
mention({ document }, { content, username, host }, mentionedRemoteUsers) {
const a = document.createElement('a');
const remoteUserInfo = mentionedRemoteUsers.find(remoteUser => remoteUser.username === username && remoteUser.host === host);
a.href = remoteUserInfo ? remoteUserInfo.uri : `${config.url}/${content}`;
a.textContent = content;
document.body.appendChild(a);
},
quote({ document }, { quote }) {
const blockquote = document.createElement('blockquote');
blockquote.textContent = quote;
document.body.appendChild(blockquote);
},
title({ document }, { content }) {
const h1 = document.createElement('h1');
h1.textContent = content;
document.body.appendChild(h1);
},
text({ document }, { content }) {
for (const text of content.split('\n')) {
const node = document.createTextNode(text);
document.body.appendChild(node);
const br = document.createElement('br');
document.body.appendChild(br);
}
},
url({ document }, { url }) {
const a = document.createElement('a');
a.href = url;
a.textContent = url;
document.body.appendChild(a);
},
search({ document }, { content, query }) {
const a = document.createElement('a');
a.href = `https://www.google.com/?#q=${query}`;
a.textContent = content;
document.body.appendChild(a);
}
};
export default (tokens: TextElement[], mentionedRemoteUsers: INote['mentionedRemoteUsers'] = []) => {
const { window } = new JSDOM('');
for (const token of tokens) {
handlers[token.type](window, token, mentionedRemoteUsers);
}
return `<p>${window.document.body.innerHTML}</p>`;
};

View File

@ -0,0 +1,341 @@
function escape(text: string) {
return text
.replace(/>/g, '&gt;')
.replace(/</g, '&lt;');
}
// 文字数が多い順にソートします
// そうしないと、「function」という文字列が与えられたときに「func」が先にマッチしてしまう可能性があるためです
const _keywords = [
'true',
'false',
'null',
'nil',
'undefined',
'void',
'var',
'const',
'let',
'mut',
'dim',
'if',
'then',
'else',
'switch',
'match',
'case',
'default',
'for',
'each',
'in',
'while',
'loop',
'continue',
'break',
'do',
'goto',
'next',
'end',
'sub',
'throw',
'try',
'catch',
'finally',
'enum',
'delegate',
'function',
'func',
'fun',
'fn',
'return',
'yield',
'async',
'await',
'require',
'include',
'import',
'imports',
'export',
'exports',
'from',
'as',
'using',
'use',
'internal',
'module',
'namespace',
'where',
'select',
'struct',
'union',
'new',
'delete',
'this',
'super',
'base',
'class',
'interface',
'abstract',
'static',
'public',
'private',
'protected',
'virtual',
'partial',
'override',
'extends',
'implements',
'constructor'
];
const keywords = _keywords
.concat(_keywords.map(k => k[0].toUpperCase() + k.substr(1)))
.concat(_keywords.map(k => k.toUpperCase()))
.sort((a, b) => b.length - a.length);
const symbols = [
'=',
'+',
'-',
'*',
'/',
'%',
'~',
'^',
'&',
'|',
'>',
'<',
'!',
'?'
];
type Token = {
html: string
next: number
};
type Element = (code: string, i: number, source: string) => (Token | null);
const elements: Element[] = [
// comment
code => {
if (code.substr(0, 2) != '//') return null;
const match = code.match(/^\/\/(.+?)(\n|$)/);
if (!match) return null;
const comment = match[0];
return {
html: `<span class="comment">${escape(comment)}</span>`,
next: comment.length
};
},
// block comment
code => {
const match = code.match(/^\/\*([\s\S]+?)\*\//);
if (!match) return null;
return {
html: `<span class="comment">${escape(match[0])}</span>`,
next: match[0].length
};
},
// string
code => {
if (!/^['"`]/.test(code)) return null;
const begin = code[0];
let str = begin;
let thisIsNotAString = false;
for (let i = 1; i < code.length; i++) {
const char = code[i];
if (char == '\\') {
str += char;
str += code[i + 1] || '';
i++;
continue;
} else if (char == begin) {
str += char;
break;
} else if (char == '\n' || i == (code.length - 1)) {
thisIsNotAString = true;
break;
} else {
str += char;
}
}
if (thisIsNotAString) {
return null;
} else {
return {
html: `<span class="string">${escape(str)}</span>`,
next: str.length
};
}
},
// regexp
code => {
if (code[0] != '/') return null;
let regexp = '';
let thisIsNotARegexp = false;
for (let i = 1; i < code.length; i++) {
const char = code[i];
if (char == '\\') {
regexp += char;
regexp += code[i + 1] || '';
i++;
continue;
} else if (char == '/') {
break;
} else if (char == '\n' || i == (code.length - 1)) {
thisIsNotARegexp = true;
break;
} else {
regexp += char;
}
}
if (thisIsNotARegexp) return null;
if (regexp == '') return null;
if (regexp[0] == ' ' && regexp[regexp.length - 1] == ' ') return null;
return {
html: `<span class="regexp">/${escape(regexp)}/</span>`,
next: regexp.length + 2
};
},
// label
code => {
if (code[0] != '@') return null;
const match = code.match(/^@([a-zA-Z_-]+?)\n/);
if (!match) return null;
const label = match[0];
return {
html: `<span class="label">${label}</span>`,
next: label.length
};
},
// number
(code, i, source) => {
const prev = source[i - 1];
if (prev && /[a-zA-Z]/.test(prev)) return null;
if (!/^[\-\+]?[0-9\.]+/.test(code)) return null;
const match = code.match(/^[\-\+]?[0-9\.]+/)[0];
if (match) {
return {
html: `<span class="number">${match}</span>`,
next: match.length
};
} else {
return null;
}
},
// nan
(code, i, source) => {
const prev = source[i - 1];
if (prev && /[a-zA-Z]/.test(prev)) return null;
if (code.substr(0, 3) == 'NaN') {
return {
html: `<span class="nan">NaN</span>`,
next: 3
};
} else {
return null;
}
},
// method
code => {
const match = code.match(/^([a-zA-Z_-]+?)\(/);
if (!match) return null;
if (match[1] == '-') return null;
return {
html: `<span class="method">${match[1]}</span>`,
next: match[1].length
};
},
// property
(code, i, source) => {
const prev = source[i - 1];
if (prev != '.') return null;
const match = code.match(/^[a-zA-Z0-9_-]+/);
if (!match) return null;
return {
html: `<span class="property">${match[0]}</span>`,
next: match[0].length
};
},
// keyword
(code, i, source) => {
const prev = source[i - 1];
if (prev && /[a-zA-Z]/.test(prev)) return null;
const match = keywords.filter(k => code.substr(0, k.length) == k)[0];
if (match) {
if (/^[a-zA-Z]/.test(code.substr(match.length))) return null;
return {
html: `<span class="keyword ${match}">${match}</span>`,
next: match.length
};
} else {
return null;
}
},
// symbol
code => {
const match = symbols.filter(s => code[0] == s)[0];
if (match) {
return {
html: `<span class="symbol">${match}</span>`,
next: 1
};
} else {
return null;
}
}
];
// specify lang is todo
export default (source: string, lang?: string) => {
let code = source;
let html = '';
let i = 0;
function push(token: Token) {
html += token.html;
code = code.substr(token.next);
i += token.next;
}
while (code != '') {
const parsed = elements.some(el => {
const e = el(code, i, source);
if (e) {
push(e);
return true;
} else {
return false;
}
});
if (!parsed) {
push({
html: escape(code[0]),
next: 1
});
}
}
return html;
};

View File

@ -0,0 +1,20 @@
/**
* Bold
*/
export type TextElementBold = {
type: 'bold'
content: string
bold: string
};
export default function(text: string) {
const match = text.match(/^\*\*(.+?)\*\*/);
if (!match) return null;
const bold = match[0];
return {
type: 'bold',
content: bold,
bold: bold.substr(2, bold.length - 4)
} as TextElementBold;
}

View File

@ -0,0 +1,24 @@
/**
* Code (block)
*/
import genHtml from '../core/syntax-highlighter';
export type TextElementCode = {
type: 'code'
content: string
code: string
html: string
};
export default function(text: string) {
const match = text.match(/^```([\s\S]+?)```/);
if (!match) return null;
const code = match[0];
return {
type: 'code',
content: code,
code: code.substr(3, code.length - 6).trim(),
html: genHtml(code.substr(3, code.length - 6).trim())
} as TextElementCode;
}

View File

@ -0,0 +1,20 @@
/**
* Emoji
*/
export type TextElementEmoji = {
type: 'emoji'
content: string
emoji: string
};
export default function(text: string) {
const match = text.match(/^:[a-zA-Z0-9+-_]+:/);
if (!match) return null;
const emoji = match[0];
return {
type: 'emoji',
content: emoji,
emoji: emoji.substr(1, emoji.length - 2)
} as TextElementEmoji;
}

View File

@ -0,0 +1,25 @@
/**
* Hashtag
*/
export type TextElementHashtag = {
type: 'hashtag'
content: string
hashtag: string
};
export default function(text: string, i: number) {
if (!(/^\s#[^\s]+/.test(text) || (i == 0 && /^#[^\s]+/.test(text)))) return null;
const isHead = text[0] == '#';
const hashtag = text.match(/^\s?#[^\s]+/)[0];
const res: any[] = !isHead ? [{
type: 'text',
content: text[0]
}] : [];
res.push({
type: 'hashtag',
content: isHead ? hashtag : hashtag.substr(1),
hashtag: isHead ? hashtag.substr(1) : hashtag.substr(2)
});
return res as TextElementHashtag[];
}

View File

@ -0,0 +1,24 @@
/**
* Code (inline)
*/
import genHtml from '../core/syntax-highlighter';
export type TextElementInlineCode = {
type: 'inline-code'
content: string
code: string
html: string
};
export default function(text: string) {
const match = text.match(/^`(.+?)`/);
if (!match) return null;
const code = match[0];
return {
type: 'inline-code',
content: code,
code: code.substr(1, code.length - 2).trim(),
html: genHtml(code.substr(1, code.length - 2).trim())
} as TextElementInlineCode;
}

View File

@ -0,0 +1,27 @@
/**
* Link
*/
export type TextElementLink = {
type: 'link'
content: string
title: string
url: string
silent: boolean
};
export default function(text: string) {
const match = text.match(/^\??\[([^\[\]]+?)\]\((https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.=\+\-]+?)\)/);
if (!match) return null;
const silent = text[0] == '?';
const link = match[0];
const title = match[1];
const url = match[2];
return {
type: 'link',
content: link,
title: title,
url: url,
silent: silent
} as TextElementLink;
}

View File

@ -0,0 +1,24 @@
/**
* Mention
*/
import parseAcct from '../../../acct/parse';
export type TextElementMention = {
type: 'mention'
content: string
username: string
host: string
};
export default function(text: string) {
const match = text.match(/^@[a-z0-9_]+(?:@[a-z0-9\.\-]+[a-z0-9])?/i);
if (!match) return null;
const mention = match[0];
const { username, host } = parseAcct(mention.substr(1));
return {
type: 'mention',
content: mention,
username,
host
} as TextElementMention;
}

View File

@ -0,0 +1,20 @@
/**
* Quoted text
*/
export type TextElementQuote = {
type: 'quote'
content: string
quote: string
};
export default function(text: string) {
const match = text.match(/^"([\s\S]+?)\n"/);
if (!match) return null;
const quote = match[0];
return {
type: 'quote',
content: quote,
quote: quote.substr(1, quote.length - 2).trim(),
} as TextElementQuote;
}

View File

@ -0,0 +1,19 @@
/**
* Search
*/
export type TextElementSearch = {
type: 'search'
content: string
query: string
};
export default function(text: string) {
const match = text.match(/^(.+?) 検索(\n|$)/);
if (!match) return null;
return {
type: 'search',
content: match[0],
query: match[1]
};
}

View File

@ -0,0 +1,20 @@
/**
* Title
*/
export type TextElementTitle = {
type: 'title'
content: string
title: string
};
export default function(text: string) {
const match = text.match(/^【(.+?)】\n/);
if (!match) return null;
const title = match[0];
return {
type: 'title',
content: title,
title: title.substr(1, title.length - 3)
} as TextElementTitle;
}

View File

@ -0,0 +1,20 @@
/**
* URL
*/
export type TextElementUrl = {
type: 'url'
content: string
url: string
};
export default function(text: string) {
const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.=\+\-]+/);
if (!match) return null;
const url = match[0];
return {
type: 'url',
content: url,
url: url
} as TextElementUrl;
}

99
src/mfm/parse/index.ts Normal file
View File

@ -0,0 +1,99 @@
/**
* Misskey Text Analyzer
*/
import { TextElementBold } from './elements/bold';
import { TextElementCode } from './elements/code';
import { TextElementEmoji } from './elements/emoji';
import { TextElementHashtag } from './elements/hashtag';
import { TextElementInlineCode } from './elements/inline-code';
import { TextElementLink } from './elements/link';
import { TextElementMention } from './elements/mention';
import { TextElementQuote } from './elements/quote';
import { TextElementSearch } from './elements/search';
import { TextElementTitle } from './elements/title';
import { TextElementUrl } from './elements/url';
const elements = [
require('./elements/bold'),
require('./elements/title'),
require('./elements/url'),
require('./elements/link'),
require('./elements/mention'),
require('./elements/hashtag'),
require('./elements/code'),
require('./elements/inline-code'),
require('./elements/quote'),
require('./elements/emoji'),
require('./elements/search')
].map(element => element.default as TextElementProcessor);
export type TextElement = { type: 'text', content: string }
| TextElementBold
| TextElementCode
| TextElementEmoji
| TextElementHashtag
| TextElementInlineCode
| TextElementLink
| TextElementMention
| TextElementQuote
| TextElementSearch
| TextElementTitle
| TextElementUrl;
export type TextElementProcessor = (text: string, i: number) => TextElement | TextElement[];
export default (source: string): TextElement[] => {
if (source == '') {
return null;
}
const tokens: TextElement[] = [];
function push(token: TextElement) {
if (token != null) {
tokens.push(token);
source = source.substr(token.content.length);
}
}
let i = 0;
// パース
while (source != '') {
const parsed = elements.some(el => {
let _tokens = el(source, i);
if (_tokens) {
if (!Array.isArray(_tokens)) {
_tokens = [_tokens];
}
_tokens.forEach(push);
return true;
} else {
return false;
}
});
if (!parsed) {
push({
type: 'text',
content: source[0]
});
}
i++;
}
// テキストを纏める
return tokens.reduce((a, b) => {
if (a.length && a[a.length - 1].type == 'text' && b.type == 'text') {
const tail = a.pop();
return a.concat({
type: 'text',
content: tail.content + b.content
});
} else {
return a.concat(b);
}
}, [] as TextElement[]);
};