Deduplicate emotes

This commit is contained in:
Andrew Ferrazzutti 2021-04-06 01:54:21 -04:00
parent a743d149df
commit 3126543321
4 changed files with 94 additions and 16 deletions

View File

@ -5,11 +5,12 @@ from .user import User
from .puppet import Puppet from .puppet import Puppet
from .portal import Portal from .portal import Portal
from .message import Message from .message import Message
from .media import Media
def init(db: Database) -> None: def init(db: Database) -> None:
for table in (User, Puppet, Portal, Message): for table in (User, Puppet, Portal, Message, Media):
table.db = db table.db = db
__all__ = ["upgrade_table", "User", "Puppet", "Portal", "Message"] __all__ = ["upgrade_table", "User", "Puppet", "Portal", "Message", "Media"]

View File

@ -0,0 +1,51 @@
# matrix-puppeteer-line - A very hacky Matrix-LINE bridge based on running LINE's Chrome extension in Puppeteer
# Copyright (C) 2020-2021 Tulir Asokan, Andrew Ferrazzutti
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Optional, ClassVar, TYPE_CHECKING
from attr import dataclass
from mautrix.types import ContentURI
from mautrix.util.async_db import Database
fake_db = Database("") if TYPE_CHECKING else None
@dataclass
class Media:
db: ClassVar[Database] = fake_db
media_id: str
mxc: ContentURI
# TODO Consider whether mime_type, file_name, and size are needed.
async def insert(self) -> None:
q = ("INSERT INTO media (media_id, mxc) "
"VALUES ($1, $2)")
await self.db.execute(q, self.media_id, self.mxc)
async def update(self) -> None:
q = ("UPDATE media SET mxc=$2 "
"WHERE media_id=$1")
await self.db.execute(q, self.media_id, self.mxc)
@classmethod
async def get_by_id(cls, media_id: str) -> Optional[ContentURI]:
q = ("SELECT media_id, mxc "
"FROM media WHERE media_id=$1")
row = await cls.db.fetchrow(q, media_id)
if not row:
return None
return cls(**row)

View File

@ -59,4 +59,12 @@ async def upgrade_avatars(conn: Connection) -> None:
await conn.execute("""ALTER TABLE portal await conn.execute("""ALTER TABLE portal
ADD COLUMN IF NOT EXISTS icon_path TEXT, ADD COLUMN IF NOT EXISTS icon_path TEXT,
ADD COLUMN IF NOT EXISTS icon_mxc TEXT ADD COLUMN IF NOT EXISTS icon_mxc TEXT
""") """)
@upgrade_table.register(description="Deduplicated media")
async def upgrade_media(conn: Connection) -> None:
await conn.execute("""CREATE TABLE IF NOT EXISTS media (
media_id TEXT PRIMARY KEY,
mxc TEXT NOT NULL
)""")

View File

@ -33,7 +33,7 @@ from mautrix.errors import MatrixError
from mautrix.util.simple_lock import SimpleLock from mautrix.util.simple_lock import SimpleLock
from mautrix.util.network_retry import call_with_net_retry from mautrix.util.network_retry import call_with_net_retry
from .db import Portal as DBPortal, Message as DBMessage from .db import Portal as DBPortal, Message as DBMessage, Media as DBMedia
from .config import Config from .config import Config
from .rpc import ChatInfo, Participant, Message, Client, PathImage from .rpc import ChatInfo, Participant, Message, Client, PathImage
from . import user as u, puppet as p, matrix as m from . import user as u, puppet as p, matrix as m
@ -212,6 +212,7 @@ class Portal(DBPortal, BasePortal):
event_id = None event_id = None
if evt.image_url: if evt.image_url:
# TODO Deduplicate stickers, but only if encryption is disabled
content = await self._handle_remote_photo(source, intent, evt) content = await self._handle_remote_photo(source, intent, evt)
event_id = await self._send_message(intent, content, timestamp=evt.timestamp) event_id = await self._send_message(intent, content, timestamp=evt.timestamp)
elif evt.html and not evt.html.isspace(): elif evt.html and not evt.html.isspace():
@ -244,21 +245,22 @@ class Portal(DBPortal, BasePortal):
if msg_html: if msg_html:
msg_html += chunk["data"] msg_html += chunk["data"]
elif ctype == "img": elif ctype == "img":
if not msg_html:
msg_html = msg_text
cclass = chunk["class"] cclass = chunk["class"]
if cclass == "emojione": if cclass == "emojione":
alt = chunk["alt"] alt = chunk["alt"]
media_id = None
else: else:
alt = f':{"?" if "alt" not in chunk else "".join(filter(lambda char: char.isprintable(), chunk["alt"]))}:' alt = "".join(filter(lambda char: char.isprintable(), chunk["alt"])).strip()
alt = f':{alt if alt else "n/a"}:'
media_id = f'{chunk.get("data-stickon-pkg-cd", 0)}/{chunk.get("data-stickon-stk-cd", 0)}'
# NOTE Not encrypting content linked to by HTML tags
if not self.encrypted:
media_mxc = await self._get_mxc_for_remote_media(source, intent, chunk["src"], media_id)
if not msg_html:
msg_html = msg_text
msg_html += f'<img data-mx-emoticon src="{media_mxc}" alt="{alt}" title="{alt}" height="32">'
msg_text += alt msg_text += alt
# TODO Make a standalone function for this, and cache mxc in DB
# ID is some combination of data-stickon-pkg-cd, data-stickon-stk-cd, src
resp = await source.client.read_image(chunk["src"])
media_info = await self._reupload_remote_media(resp.data, intent, resp.mime)
msg_html += f'<img data-mx-emoticon src="{media_info.mxc}" alt="{alt}" title="{alt}" height="32">'
content = TextMessageEventContent( content = TextMessageEventContent(
msgtype=MessageType.TEXT, msgtype=MessageType.TEXT,
@ -279,9 +281,25 @@ class Portal(DBPortal, BasePortal):
msgtype=MessageType.IMAGE, body=media_info.file_name, msgtype=MessageType.IMAGE, body=media_info.file_name,
info=ImageInfo(mimetype=media_info.mime_type, size=media_info.size)) info=ImageInfo(mimetype=media_info.mime_type, size=media_info.size))
async def _get_mxc_for_remote_media(self, source: 'u.User', intent: IntentAPI,
media_url: str, media_id: Optional[str] = None
) -> ContentURI:
if not media_id:
media_id = media_url
media_info = await DBMedia.get_by_id(media_id)
if not media_info:
self.log.debug(f"Did not find existing mxc URL for {media_id}, uploading media now")
resp = await source.client.read_image(media_url)
media_info = await self._reupload_remote_media(resp.data, intent, resp.mime, disable_encryption=True)
await DBMedia(media_id=media_id, mxc=media_info.mxc).insert()
self.log.debug(f"Uploaded media as {media_info.mxc}")
else:
self.log.debug(f"Found existing mxc URL for {media_id}: {media_info.mxc}")
return media_info.mxc
async def _reupload_remote_media(self, data: bytes, intent: IntentAPI, async def _reupload_remote_media(self, data: bytes, intent: IntentAPI,
mime_type: str = None, file_name: str = None mime_type: str = None, file_name: str = None,
) -> ReuploadedMediaInfo: disable_encryption: bool = True) -> ReuploadedMediaInfo:
if not mime_type: if not mime_type:
mime_type = magic.from_buffer(data, mime=True) mime_type = magic.from_buffer(data, mime=True)
upload_mime_type = mime_type upload_mime_type = mime_type
@ -290,7 +308,7 @@ class Portal(DBPortal, BasePortal):
upload_file_name = file_name upload_file_name = file_name
decryption_info = None decryption_info = None
if self.encrypted and encrypt_attachment: if self.encrypted and encrypt_attachment and not disable_encryption:
data, decryption_info = encrypt_attachment(data) data, decryption_info = encrypt_attachment(data)
upload_mime_type = "application/octet-stream" upload_mime_type = "application/octet-stream"
upload_file_name = None upload_file_name = None