matrix-appservice-kakaotalk/matrix_appservice_kakaotalk/formatter/from_matrix.py

198 lines
7.9 KiB
Python
Raw Permalink Normal View History

2022-02-25 02:22:50 -05:00
# matrix-appservice-kakaotalk - A Matrix-KakaoTalk puppeting bridge.
# Copyright (C) 2022 Tulir Asokan, Andrew Ferrazzutti
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from typing import NamedTuple
2022-04-06 12:49:23 -04:00
from mautrix.types import Format, MessageEventContent, RelationType, RoomID, UserID
2022-04-12 00:56:06 -04:00
from mautrix.util import utf16_surrogate
2022-04-06 12:49:23 -04:00
from mautrix.util.formatter import (
EntityString,
EntityType,
MarkdownString,
MatrixParser,
SimpleEntity,
)
2022-02-25 02:22:50 -05:00
from mautrix.util.logging import TraceLogger
2022-04-05 15:44:02 -04:00
from ..kt.types.bson import Long
2022-04-06 14:33:01 -04:00
from ..kt.types.chat import KnownChatType
2022-04-06 12:49:23 -04:00
from ..kt.types.chat.attachment import ReplyAttachment, MentionStruct
2022-04-05 15:44:02 -04:00
from ..kt.client.types import TO_MSGTYPE_MAP
2022-04-14 04:24:23 -04:00
from .. import portal as po, puppet as pu, user as u
2022-02-25 02:22:50 -05:00
from ..db import Message as DBMessage
class SendParams(NamedTuple):
text: str
2022-04-06 12:49:23 -04:00
mentions: list[MentionStruct] | None
2022-04-05 15:44:02 -04:00
reply_to: ReplyAttachment
2022-02-25 02:22:50 -05:00
2022-04-06 12:49:23 -04:00
class KakaoTalkFormatString(EntityString[SimpleEntity, EntityType], MarkdownString):
def format(self, entity_type: EntityType, **kwargs) -> KakaoTalkFormatString:
2022-04-12 00:56:06 -04:00
prefix = suffix = ""
2022-04-06 12:49:23 -04:00
if entity_type == EntityType.USER_MENTION:
self.entities.append(
SimpleEntity(
type=entity_type,
offset=0,
length=len(self.text),
extra_info={"user_id": kwargs["user_id"]},
)
)
2022-04-12 00:56:06 -04:00
return self
elif entity_type == EntityType.BOLD:
prefix = suffix = "*"
elif entity_type == EntityType.ITALIC:
prefix = suffix = "_"
elif entity_type == EntityType.STRIKETHROUGH:
prefix = suffix = "~"
elif entity_type == EntityType.URL:
if kwargs["url"] != self.text:
suffix = f" ({kwargs['url']})"
elif entity_type == EntityType.PREFORMATTED:
prefix = f"```{kwargs['language']}\n"
suffix = "\n```"
elif entity_type == EntityType.INLINE_CODE:
prefix = suffix = "`"
elif entity_type == EntityType.BLOCKQUOTE:
children = self.trim().split("\n")
children = [child.prepend("> ") for child in children]
return self.join(children, "\n")
elif entity_type == EntityType.HEADER:
prefix = "#" * kwargs["size"] + " "
else:
return self
self._offset_entities(len(prefix))
self.text = f"{prefix}{self.text}{suffix}"
2022-04-06 12:49:23 -04:00
return self
class ToKakaoTalkParser(MatrixParser[KakaoTalkFormatString]):
fs = KakaoTalkFormatString
2022-04-14 04:24:23 -04:00
async def _get_id_from_mxid(mxid: UserID, portal: po.Portal) -> Long | None:
orig_sender = await u.User.get_by_mxid(mxid, create=False)
if orig_sender and orig_sender.ktid:
return orig_sender.ktid
elif orig_sender:
sender, _ = await portal.get_relay_sender(orig_sender, "relation")
if sender and sender.ktid:
return sender.ktid
puppet = await pu.Puppet.get_by_mxid(mxid, create=False)
return puppet.ktid if puppet else None
2022-04-06 12:49:23 -04:00
2022-02-25 02:22:50 -05:00
async def matrix_to_kakaotalk(
2022-04-06 14:11:15 -04:00
content: MessageEventContent,
room_id: RoomID,
log: TraceLogger,
2022-04-14 04:24:23 -04:00
portal: po.Portal,
2022-04-06 14:11:15 -04:00
skip_reply: bool = False
2022-02-25 02:22:50 -05:00
) -> SendParams:
2022-04-05 15:44:02 -04:00
# NOTE By design, this *throws* if user intent can't be matched (i.e. if a reply can't be created)
2022-04-06 14:11:15 -04:00
if content.relates_to.rel_type == RelationType.REPLY and not skip_reply:
2022-02-25 02:22:50 -05:00
message = await DBMessage.get_by_mxid(content.relates_to.event_id, room_id)
if not message or not message.ktid:
2022-04-05 15:44:02 -04:00
raise ValueError(
2022-02-25 02:22:50 -05:00
f"Couldn't find reply target {content.relates_to.event_id}"
2022-04-05 15:44:02 -04:00
" to bridge text message reply metadata to KakaoTalk"
)
try:
2022-04-14 04:24:23 -04:00
src_event = await portal.main_intent.get_event(room_id, message.mxid)
2022-04-05 15:44:02 -04:00
except:
log.exception(f"Failed to find Matrix event for reply target {message.mxid}")
raise
2022-04-14 04:24:23 -04:00
src_kt_sender = await _get_id_from_mxid(src_event.sender, portal)
2022-04-06 14:11:15 -04:00
if src_kt_sender is None:
2022-04-05 15:44:02 -04:00
raise ValueError(
2022-04-06 14:11:15 -04:00
f"Found no KakaoTalk user ID for reply target sender {src_event.sender}"
2022-02-25 02:22:50 -05:00
)
2022-04-05 15:44:02 -04:00
content.trim_reply_fallback()
2022-04-14 04:24:23 -04:00
src_converted = await matrix_to_kakaotalk(src_event.content, room_id, log, portal, skip_reply=True)
2022-04-06 14:33:01 -04:00
if src_event.content.relates_to.rel_type == RelationType.REPLY:
src_type = KnownChatType.REPLY
src_message = src_converted.text
2022-04-06 14:33:01 -04:00
else:
src_type = TO_MSGTYPE_MAP[src_event.content.msgtype]
if src_type == KnownChatType.FILE:
src_message = _media_type_reply_body_map[KnownChatType.FILE] + src_converted.text
else:
src_message = _media_type_reply_body_map.get(src_type, src_converted.text)
2022-04-05 15:44:02 -04:00
reply_to = ReplyAttachment(
2022-04-06 14:11:15 -04:00
# NOTE mentions will be merged into this later
# TODO Set this for emoticon reply, but must first support them
2022-04-05 15:44:02 -04:00
attach_only=False,
# TODO If replying with media works, must set type AND all attachment properties
# But then, the reply object must be an intersection of a ReplyAttachment and something else
#attach_type=TO_MSGTYPE_MAP.get(content.msgtype),
2022-04-06 14:33:01 -04:00
# TODO Confirm why official client sets this to 0, and whether this should be left as None instead
attach_type=0,
2022-04-05 15:44:02 -04:00
src_logId=message.ktid,
2022-04-06 14:11:15 -04:00
src_mentions=src_converted.mentions or [],
src_message=src_message,
2022-04-06 14:33:01 -04:00
src_type=src_type,
2022-04-06 14:11:15 -04:00
src_userId=src_kt_sender,
2022-04-05 15:44:02 -04:00
)
2022-02-25 02:22:50 -05:00
else:
2022-04-05 15:44:02 -04:00
reply_to = None
if (
content.get("format", None) == Format.HTML and content["formatted_body"] and
content.msgtype.is_text and
not portal.is_direct
):
2022-04-12 00:56:06 -04:00
parsed = await ToKakaoTalkParser().parse(utf16_surrogate.add(content["formatted_body"]))
text = utf16_surrogate.remove(parsed.text)
2022-04-06 12:49:23 -04:00
mentions_by_user: dict[Long, MentionStruct] = {}
# Make sure to not create remote mentions for any remote user not in the room
if parsed.entities:
2022-04-14 04:24:23 -04:00
joined_members = set(await portal.main_intent.get_room_members(room_id))
2022-04-06 12:49:23 -04:00
last_offset = 0
at = 0
for mention in sorted(parsed.entities, key=lambda entity: entity.offset):
mxid = mention.extra_info["user_id"]
if mxid not in joined_members:
continue
ktid = await _get_id_from_mxid(mxid, portal)
2022-04-06 12:49:23 -04:00
if ktid is None:
continue
at += text[last_offset:mention.offset+1].count("@")
last_offset = mention.offset+1
mention_by_user = mentions_by_user.setdefault(ktid, MentionStruct(
2022-04-06 12:49:23 -04:00
at=[],
len=mention.length,
user_id=ktid,
))
mention_by_user.at.append(at)
2022-04-06 14:33:01 -04:00
mentions = list(mentions_by_user.values()) if mentions_by_user else None
2022-04-06 12:49:23 -04:00
else:
text = content.body
mentions = None
return SendParams(text=text, mentions=mentions, reply_to=reply_to)
_media_type_reply_body_map: dict[KnownChatType, str] = {
KnownChatType.PHOTO: "Photo",
KnownChatType.VIDEO: "Video",
KnownChatType.AUDIO: "Voice Note",
KnownChatType.FILE: "File: ",
}