From fc8bc79ffd1bc48dfea701f8d8c4991c52519866 Mon Sep 17 00:00:00 2001 From: Andrew Ferrazzutti Date: Mon, 5 Apr 2021 03:54:35 -0400 Subject: [PATCH] Inbound sticons (emoticons) MAJOR TODO: Non-duplication of uploaded image data --- ROADMAP.md | 4 +-- matrix_puppeteer_line/portal.py | 55 ++++++++++++++++++++++++++++-- matrix_puppeteer_line/rpc/types.py | 2 +- puppet/src/contentscript.js | 34 ++++-------------- 4 files changed, 62 insertions(+), 33 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index f3bc838..ca92ada 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -19,9 +19,9 @@ * [ ] Location * [ ] Videos * [x] Stickers - * [ ] Sticons + * [x] Sticons * [x] Single - * [ ] Multiple or mixed with text + * [x] Multiple or mixed with text * [x] EmojiOne * [x] Notification for message send failure * [ ] Read receipts diff --git a/matrix_puppeteer_line/portal.py b/matrix_puppeteer_line/portal.py index 567ce64..e0c8df1 100644 --- a/matrix_puppeteer_line/portal.py +++ b/matrix_puppeteer_line/portal.py @@ -15,6 +15,7 @@ # along with this program. If not, see . from typing import Dict, Optional, List, Set, Any, AsyncGenerator, NamedTuple, TYPE_CHECKING, cast from asyncpg.exceptions import UniqueViolationError +from html.parser import HTMLParser import mimetypes import asyncio @@ -25,7 +26,7 @@ from os import remove from mautrix.appservice import AppService, IntentAPI from mautrix.bridge import BasePortal, NotificationDisabler from mautrix.types import (EventID, MessageEventContent, RoomID, EventType, MessageType, - TextMessageEventContent, MediaMessageEventContent, Membership, + TextMessageEventContent, MediaMessageEventContent, Membership, Format, ContentURI, EncryptedFile, ImageInfo, RelatesTo, RelationType) from mautrix.errors import MatrixError @@ -213,8 +214,56 @@ class Portal(DBPortal, BasePortal): if evt.image_url: content = await self._handle_remote_photo(source, intent, evt) event_id = await self._send_message(intent, content, timestamp=evt.timestamp) - elif evt.text and not evt.text.isspace(): - content = TextMessageEventContent(msgtype=MessageType.TEXT, body=evt.text) + elif evt.html and not evt.html.isspace(): + chunks = [] + + def handle_data(data): + nonlocal chunks + chunks.append({"type": "data", "data": data}) + + def handle_starttag(tag, attrs): + if tag == "img": + obj = {"type": tag} + for attr in attrs: + obj[attr[0]] = attr[1] + nonlocal chunks + chunks.append(obj) + + parser = HTMLParser() + parser.handle_data = handle_data + parser.handle_starttag = handle_starttag + parser.feed(evt.html) + + msg_text = "" + msg_html = None + + for chunk in chunks: + ctype = chunk["type"] + if ctype == "data": + msg_text += chunk["data"] + if msg_html: + msg_html += chunk["data"] + elif ctype == "img": + if not msg_html: + msg_html = msg_text + + cclass = chunk["class"] + if cclass == "emojione": + alt = chunk["alt"] + else: + alt = f':{"?" if "alt" not in chunk else "".join(filter(lambda char: char.isprintable(), chunk["alt"]))}:' + + msg_text += alt + # TODO Make a standalone function for this, and cache mxc in DB + # ID is some combination of data-stickon-pkg-cd, data-stickon-stk-cd, src + resp = await source.client.read_image(chunk["src"]) + media_info = await self._reupload_remote_media(resp.data, intent, resp.mime) + msg_html += f'{alt}' + + content = TextMessageEventContent( + msgtype=MessageType.TEXT, + format=Format.HTML if msg_html else None, + body=msg_text, formatted_body=msg_html) event_id = await self._send_message(intent, content, timestamp=evt.timestamp) if event_id: msg = DBMessage(mxid=event_id, mx_room=self.mxid, mid=evt.id, chat_id=self.chat_id) diff --git a/matrix_puppeteer_line/rpc/types.py b/matrix_puppeteer_line/rpc/types.py index 109513f..7cc3502 100644 --- a/matrix_puppeteer_line/rpc/types.py +++ b/matrix_puppeteer_line/rpc/types.py @@ -58,7 +58,7 @@ class Message(SerializableAttrs['Message']): is_outgoing: bool sender: Optional[Participant] timestamp: int = None - text: Optional[str] = None + html: Optional[str] = None image_url: Optional[str] = None diff --git a/puppet/src/contentscript.js b/puppet/src/contentscript.js index caff1ac..30ceed8 100644 --- a/puppet/src/contentscript.js +++ b/puppet/src/contentscript.js @@ -145,13 +145,13 @@ class MautrixController { * @property {number} id - The ID of the message. Seems to be sequential. * @property {number} timestamp - The unix timestamp of the message. Not very accurate. * @property {boolean} is_outgoing - Whether or not this user sent the message. - * @property {null|Participant} sender - Full data of the participant who sent the message, if needed and available. - * @property {string} [text] - The text in the message. - * @property {string} [image] - The URL to the image in the message. + * @property {?Participant} sender - Full data of the participant who sent the message, if needed and available. + * @property {?string} html - The HTML format of the message, if necessary. + * @property {?string} image_url - The URL to the image in the message, if it's an image-only message. */ _isLoadedImageURL(src) { - return src?.startsWith("blob:") + return src && (src.startsWith("blob:") || src.startsWith("res/")) } /** @@ -212,30 +212,10 @@ class MautrixController { } const messageElement = element.querySelector(".mdRGT07Body > .mdRGT07Msg") if (messageElement.classList.contains("mdRGT07Text")) { - const msgTextInner = messageElement.querySelector(".mdRGT07MsgTextInner") - if (msgTextInner) { - const imgs = msgTextInner.querySelectorAll("img") - if (imgs.length == 0) { - messageData.text = msgTextInner.innerText - } else { - // TODO Consider using a custom sticker pack (MSC1951) - //messageData.image_urls = Array.from(imgs).map(img => img.src) - //messageData.html = msgTextInner.innerHTML - - let msgTextInnerCopy = msgTextInner.cloneNode(true) - // TODO Consider skipping img.CMSticon elements, - // whose alt-text is ugly - // TODO Confirm that img is the only possible kind - // of child element for a text message - for (let child of Array.from(msgTextInnerCopy.children)) { - child.replaceWith(child.getAttribute("alt")) - } - messageData.text = msgTextInnerCopy.innerText - } - } + messageData.html = messageElement.querySelector(".mdRGT07MsgTextInner")?.innerHTML } else if ( - messageElement.classList.contains("mdRGT07Image") || - messageElement.classList.contains("mdRGT07Sticker") + messageElement.classList.contains("mdRGT07Image") || + messageElement.classList.contains("mdRGT07Sticker") ) { const img = messageElement.querySelector(".mdRGT07MsgImg > img") if (img) {