matrix-appservice-kakaotalk/matrix_appservice_kakaotalk/formatter/from_kakaotalk.py

118 lines
4.3 KiB
Python
Raw Normal View History

2022-02-25 02:22:50 -05:00
# matrix-appservice-kakaotalk - A Matrix-KakaoTalk puppeting bridge.
# Copyright (C) 2022 Tulir Asokan, Andrew Ferrazzutti
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from typing import Match
import re
from mautrix.types import Format, MessageType, TextMessageEventContent
2022-04-05 18:21:35 -04:00
from ..kt.types.chat.attachment.mention import MentionStruct
2022-02-25 02:22:50 -05:00
from .. import puppet as pu, user as u
_START = r"^|\s"
_END = r"$|\s"
_TEXT_NO_SURROUNDING_SPACE = r"(?:[^\s].*?[^\s])|[^\s]"
COMMON_REGEX = re.compile(rf"({_START})([_~*])({_TEXT_NO_SURROUNDING_SPACE})\2({_END})")
INLINE_CODE_REGEX = re.compile(rf"({_START})(`)(.+?)`({_END})")
MENTION_REGEX = re.compile(r"@([0-9]{1,15})\u2063(.+?)\u2063")
tags = {"_": "em", "*": "strong", "~": "del", "`": "code"}
def _handle_match(html: str, match: Match, nested: bool) -> tuple[str, int]:
start, end = match.start(), match.end()
prefix, sigil, text, suffix = match.groups()
if nested:
text = _convert_formatting(text)
tag = tags[sigil]
# We don't want to include the whitespace suffix length, as that could be used as the
# whitespace prefix right after this formatting block.
pos = start + len(prefix) + (2 * len(tag) + 5) + len(text)
html = f"{html[:start]}{prefix}<{tag}>{text}</{tag}>{suffix}{html[end:]}"
return html, pos
def _convert_formatting(html: str) -> str:
pos = 0
while pos < len(html):
i_match = INLINE_CODE_REGEX.search(html, pos)
c_match = COMMON_REGEX.search(html, pos)
if i_match and c_match:
match = min(i_match, c_match, key=lambda match: match.start())
else:
match = i_match or c_match
if match:
html, pos = _handle_match(html, match, nested=match != i_match)
else:
break
return html
def _handle_blockquote(output: list[str], blockquote: bool, line: str) -> tuple[bool, str]:
if not blockquote and line.startswith("&gt; "):
line = line[len("&gt; ") :]
output.append("<blockquote>")
blockquote = True
elif blockquote:
if line.startswith("&gt;"):
line = line[len("&gt;") :]
if line.startswith(" "):
line = line[1:]
else:
output.append("</blockquote>")
blockquote = False
return blockquote, line
2022-04-05 18:21:35 -04:00
async def kakaotalk_to_matrix(msg: str | None, mentions: list[MentionStruct] | None) -> TextMessageEventContent:
# TODO Shouts
2022-02-25 02:22:50 -05:00
text = msg or ""
content = TextMessageEventContent(msgtype=MessageType.TEXT, body=text)
2022-04-05 18:21:35 -04:00
if mentions:
mention_user_ids = []
at_chunks = text.split("@")
for m in mentions:
for idx in m.at:
chunk = at_chunks[idx]
original = chunk[:m.len]
mention_user_ids.append(int(m.user_id))
at_chunks[idx] = f"{m.user_id}\u2063{original}\u2063{chunk[m.len:]}"
text = "@".join(at_chunks)
mention_user_map = {}
for ktid in mention_user_ids:
user = await u.User.get_by_ktid(ktid)
if user:
mention_user_map[ktid] = user.mxid
else:
puppet = await pu.Puppet.get_by_ktid(ktid, create=False)
mention_user_map[ktid] = puppet.mxid if puppet else None
if mention_user_map:
def _mention_replacer(match: Match) -> str:
mxid = mention_user_map[int(match.group(1))]
if not mxid:
return match.group(2)
return f'<a href="https://matrix.to/#/{mxid}">{match.group(2)}</a>'
content.format = Format.HTML
content.formatted_body = MENTION_REGEX.sub(_mention_replacer, text).replace("\n", "<br/>\n")
2022-02-25 02:22:50 -05:00
return content