171 lines
6.0 KiB
Python
171 lines
6.0 KiB
Python
# matrix-appservice-kakaotalk - A Matrix-KakaoTalk puppeting bridge.
|
|
# Copyright (C) 2022 Tulir Asokan, Andrew Ferrazzutti
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
from __future__ import annotations
|
|
|
|
from typing import Match
|
|
from html import escape
|
|
import re
|
|
|
|
from mautrix.types import Format, MessageType, TextMessageEventContent
|
|
|
|
from .. import puppet as pu, user as u
|
|
|
|
_START = r"^|\s"
|
|
_END = r"$|\s"
|
|
_TEXT_NO_SURROUNDING_SPACE = r"(?:[^\s].*?[^\s])|[^\s]"
|
|
COMMON_REGEX = re.compile(rf"({_START})([_~*])({_TEXT_NO_SURROUNDING_SPACE})\2({_END})")
|
|
INLINE_CODE_REGEX = re.compile(rf"({_START})(`)(.+?)`({_END})")
|
|
MENTION_REGEX = re.compile(r"@([0-9]{1,15})\u2063(.+?)\u2063")
|
|
|
|
tags = {"_": "em", "*": "strong", "~": "del", "`": "code"}
|
|
|
|
|
|
def _handle_match(html: str, match: Match, nested: bool) -> tuple[str, int]:
|
|
start, end = match.start(), match.end()
|
|
prefix, sigil, text, suffix = match.groups()
|
|
if nested:
|
|
text = _convert_formatting(text)
|
|
tag = tags[sigil]
|
|
# We don't want to include the whitespace suffix length, as that could be used as the
|
|
# whitespace prefix right after this formatting block.
|
|
pos = start + len(prefix) + (2 * len(tag) + 5) + len(text)
|
|
html = f"{html[:start]}{prefix}<{tag}>{text}</{tag}>{suffix}{html[end:]}"
|
|
return html, pos
|
|
|
|
|
|
def _convert_formatting(html: str) -> str:
|
|
pos = 0
|
|
while pos < len(html):
|
|
i_match = INLINE_CODE_REGEX.search(html, pos)
|
|
c_match = COMMON_REGEX.search(html, pos)
|
|
if i_match and c_match:
|
|
match = min(i_match, c_match, key=lambda match: match.start())
|
|
else:
|
|
match = i_match or c_match
|
|
|
|
if match:
|
|
html, pos = _handle_match(html, match, nested=match != i_match)
|
|
else:
|
|
break
|
|
return html
|
|
|
|
|
|
def _handle_blockquote(output: list[str], blockquote: bool, line: str) -> tuple[bool, str]:
|
|
if not blockquote and line.startswith("> "):
|
|
line = line[len("> ") :]
|
|
output.append("<blockquote>")
|
|
blockquote = True
|
|
elif blockquote:
|
|
if line.startswith(">"):
|
|
line = line[len(">") :]
|
|
if line.startswith(" "):
|
|
line = line[1:]
|
|
else:
|
|
output.append("</blockquote>")
|
|
blockquote = False
|
|
return blockquote, line
|
|
|
|
|
|
def _handle_codeblock_pre(
|
|
output: list[str], codeblock: bool, line: str
|
|
) -> tuple[bool, str, tuple[str | None, str | None, str | None]]:
|
|
cb = line.find("```")
|
|
cb_lang = None
|
|
cb_content = None
|
|
post_cb_content = None
|
|
if cb != -1:
|
|
if not codeblock:
|
|
cb_lang = line[cb + 3 :]
|
|
if "```" in cb_lang:
|
|
end = cb_lang.index("```")
|
|
cb_content = cb_lang[:end]
|
|
post_cb_content = cb_lang[end + 3 :]
|
|
cb_lang = ""
|
|
else:
|
|
codeblock = True
|
|
line = line[:cb]
|
|
else:
|
|
output.append("</code></pre>")
|
|
codeblock = False
|
|
line = line[cb + 3 :]
|
|
return codeblock, line, (cb_lang, cb_content, post_cb_content)
|
|
|
|
|
|
def _handle_codeblock_post(
|
|
output: list[str], cb_lang: str | None, cb_content: str | None, post_cb_content: str | None
|
|
) -> None:
|
|
if cb_lang is not None:
|
|
if cb_lang:
|
|
output.append(f'<pre><code class="language-{cb_lang}">')
|
|
else:
|
|
output.append("<pre><code>")
|
|
if cb_content:
|
|
output.append(cb_content)
|
|
output.append("</code></pre>")
|
|
output.append(_convert_formatting(post_cb_content))
|
|
|
|
|
|
async def kakaotalk_to_matrix(msg: str) -> TextMessageEventContent:
|
|
text = msg or ""
|
|
mentions = []
|
|
content = TextMessageEventContent(msgtype=MessageType.TEXT, body=text)
|
|
mention_user_ids = []
|
|
for m in reversed(mentions):
|
|
original = text[m.offset : m.offset + m.length]
|
|
if len(original) > 0 and original[0] == "@":
|
|
original = original[1:]
|
|
mention_user_ids.append(int(m.user_id))
|
|
text = f"{text[:m.offset]}@{m.user_id}\u2063{original}\u2063{text[m.offset + m.length:]}"
|
|
html = escape(text)
|
|
output = []
|
|
if html:
|
|
codeblock = False
|
|
blockquote = False
|
|
line: str
|
|
lines = html.split("\n")
|
|
for i, line in enumerate(lines):
|
|
blockquote, line = _handle_blockquote(output, blockquote, line)
|
|
codeblock, line, post_args = _handle_codeblock_pre(output, codeblock, line)
|
|
output.append(_convert_formatting(line))
|
|
if i != len(lines) - 1:
|
|
if codeblock:
|
|
output.append("\n")
|
|
else:
|
|
output.append("<br/>")
|
|
_handle_codeblock_post(output, *post_args)
|
|
html = "".join(output)
|
|
|
|
mention_user_map = {}
|
|
for ktid in mention_user_ids:
|
|
user = await u.User.get_by_ktid(ktid)
|
|
if user:
|
|
mention_user_map[ktid] = user.mxid
|
|
else:
|
|
puppet = await pu.Puppet.get_by_ktid(ktid, create=False)
|
|
mention_user_map[ktid] = puppet.mxid if puppet else None
|
|
|
|
def _mention_replacer(match: Match) -> str:
|
|
mxid = mention_user_map[int(match.group(1))]
|
|
if not mxid:
|
|
return match.group(2)
|
|
return f'<a href="https://matrix.to/#/{mxid}">{match.group(2)}</a>'
|
|
|
|
html = MENTION_REGEX.sub(_mention_replacer, html)
|
|
if html != escape(content.body).replace("\n", "<br/>\n"):
|
|
content.format = Format.HTML
|
|
content.formatted_body = html
|
|
return content
|