# matrix-appservice-kakaotalk - A Matrix-KakaoTalk puppeting bridge. # Copyright (C) 2022 Tulir Asokan, Andrew Ferrazzutti # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. from __future__ import annotations from typing import Match from html import escape import re from mautrix.types import Format, MessageType, TextMessageEventContent from .. import puppet as pu, user as u _START = r"^|\s" _END = r"$|\s" _TEXT_NO_SURROUNDING_SPACE = r"(?:[^\s].*?[^\s])|[^\s]" COMMON_REGEX = re.compile(rf"({_START})([_~*])({_TEXT_NO_SURROUNDING_SPACE})\2({_END})") INLINE_CODE_REGEX = re.compile(rf"({_START})(`)(.+?)`({_END})") MENTION_REGEX = re.compile(r"@([0-9]{1,15})\u2063(.+?)\u2063") tags = {"_": "em", "*": "strong", "~": "del", "`": "code"} def _handle_match(html: str, match: Match, nested: bool) -> tuple[str, int]: start, end = match.start(), match.end() prefix, sigil, text, suffix = match.groups() if nested: text = _convert_formatting(text) tag = tags[sigil] # We don't want to include the whitespace suffix length, as that could be used as the # whitespace prefix right after this formatting block. pos = start + len(prefix) + (2 * len(tag) + 5) + len(text) html = f"{html[:start]}{prefix}<{tag}>{text}</{tag}>{suffix}{html[end:]}" return html, pos def _convert_formatting(html: str) -> str: pos = 0 while pos < len(html): i_match = INLINE_CODE_REGEX.search(html, pos) c_match = COMMON_REGEX.search(html, pos) if i_match and c_match: match = min(i_match, c_match, key=lambda match: match.start()) else: match = i_match or c_match if match: html, pos = _handle_match(html, match, nested=match != i_match) else: break return html def _handle_blockquote(output: list[str], blockquote: bool, line: str) -> tuple[bool, str]: if not blockquote and line.startswith("> "): line = line[len("> ") :] output.append("<blockquote>") blockquote = True elif blockquote: if line.startswith(">"): line = line[len(">") :] if line.startswith(" "): line = line[1:] else: output.append("</blockquote>") blockquote = False return blockquote, line def _handle_codeblock_pre( output: list[str], codeblock: bool, line: str ) -> tuple[bool, str, tuple[str | None, str | None, str | None]]: cb = line.find("```") cb_lang = None cb_content = None post_cb_content = None if cb != -1: if not codeblock: cb_lang = line[cb + 3 :] if "```" in cb_lang: end = cb_lang.index("```") cb_content = cb_lang[:end] post_cb_content = cb_lang[end + 3 :] cb_lang = "" else: codeblock = True line = line[:cb] else: output.append("</code></pre>") codeblock = False line = line[cb + 3 :] return codeblock, line, (cb_lang, cb_content, post_cb_content) def _handle_codeblock_post( output: list[str], cb_lang: str | None, cb_content: str | None, post_cb_content: str | None ) -> None: if cb_lang is not None: if cb_lang: output.append(f'<pre><code class="language-{cb_lang}">') else: output.append("<pre><code>") if cb_content: output.append(cb_content) output.append("</code></pre>") output.append(_convert_formatting(post_cb_content)) async def kakaotalk_to_matrix(msg: str) -> TextMessageEventContent: text = msg or "" mentions = [] content = TextMessageEventContent(msgtype=MessageType.TEXT, body=text) mention_user_ids = [] for m in reversed(mentions): original = text[m.offset : m.offset + m.length] if len(original) > 0 and original[0] == "@": original = original[1:] mention_user_ids.append(int(m.user_id)) text = f"{text[:m.offset]}@{m.user_id}\u2063{original}\u2063{text[m.offset + m.length:]}" html = escape(text) output = [] if html: codeblock = False blockquote = False line: str lines = html.split("\n") for i, line in enumerate(lines): blockquote, line = _handle_blockquote(output, blockquote, line) codeblock, line, post_args = _handle_codeblock_pre(output, codeblock, line) output.append(_convert_formatting(line)) if i != len(lines) - 1: if codeblock: output.append("\n") else: output.append("<br/>") _handle_codeblock_post(output, *post_args) html = "".join(output) mention_user_map = {} for ktid in mention_user_ids: user = await u.User.get_by_ktid(ktid) if user: mention_user_map[ktid] = user.mxid else: puppet = await pu.Puppet.get_by_ktid(ktid, create=False) mention_user_map[ktid] = puppet.mxid if puppet else None def _mention_replacer(match: Match) -> str: mxid = mention_user_map[int(match.group(1))] if not mxid: return match.group(2) return f'<a href="https://matrix.to/#/{mxid}">{match.group(2)}</a>' html = MENTION_REGEX.sub(_mention_replacer, html) if html != escape(content.body).replace("\n", "<br/>\n"): content.format = Format.HTML content.formatted_body = html return content