diff --git a/matrix_puppeteer_line/db/__init__.py b/matrix_puppeteer_line/db/__init__.py
index c52a762..9368399 100644
--- a/matrix_puppeteer_line/db/__init__.py
+++ b/matrix_puppeteer_line/db/__init__.py
@@ -5,11 +5,12 @@ from .user import User
from .puppet import Puppet
from .portal import Portal
from .message import Message
+from .media import Media
def init(db: Database) -> None:
- for table in (User, Puppet, Portal, Message):
+ for table in (User, Puppet, Portal, Message, Media):
table.db = db
-__all__ = ["upgrade_table", "User", "Puppet", "Portal", "Message"]
+__all__ = ["upgrade_table", "User", "Puppet", "Portal", "Message", "Media"]
diff --git a/matrix_puppeteer_line/db/media.py b/matrix_puppeteer_line/db/media.py
new file mode 100644
index 0000000..24c71b3
--- /dev/null
+++ b/matrix_puppeteer_line/db/media.py
@@ -0,0 +1,51 @@
+# matrix-puppeteer-line - A very hacky Matrix-LINE bridge based on running LINE's Chrome extension in Puppeteer
+# Copyright (C) 2020-2021 Tulir Asokan, Andrew Ferrazzutti
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+from typing import Optional, ClassVar, TYPE_CHECKING
+
+from attr import dataclass
+
+from mautrix.types import ContentURI
+from mautrix.util.async_db import Database
+
+fake_db = Database("") if TYPE_CHECKING else None
+
+
+@dataclass
+class Media:
+ db: ClassVar[Database] = fake_db
+
+ media_id: str
+ mxc: ContentURI
+ # TODO Consider whether mime_type, file_name, and size are needed.
+
+ async def insert(self) -> None:
+ q = ("INSERT INTO media (media_id, mxc) "
+ "VALUES ($1, $2)")
+ await self.db.execute(q, self.media_id, self.mxc)
+
+ async def update(self) -> None:
+ q = ("UPDATE media SET mxc=$2 "
+ "WHERE media_id=$1")
+ await self.db.execute(q, self.media_id, self.mxc)
+
+ @classmethod
+ async def get_by_id(cls, media_id: str) -> Optional[ContentURI]:
+ q = ("SELECT media_id, mxc "
+ "FROM media WHERE media_id=$1")
+ row = await cls.db.fetchrow(q, media_id)
+ if not row:
+ return None
+ return cls(**row)
diff --git a/matrix_puppeteer_line/db/upgrade.py b/matrix_puppeteer_line/db/upgrade.py
index 5f796a0..09236e3 100644
--- a/matrix_puppeteer_line/db/upgrade.py
+++ b/matrix_puppeteer_line/db/upgrade.py
@@ -59,4 +59,12 @@ async def upgrade_avatars(conn: Connection) -> None:
await conn.execute("""ALTER TABLE portal
ADD COLUMN IF NOT EXISTS icon_path TEXT,
ADD COLUMN IF NOT EXISTS icon_mxc TEXT
- """)
\ No newline at end of file
+ """)
+
+
+@upgrade_table.register(description="Deduplicated media")
+async def upgrade_media(conn: Connection) -> None:
+ await conn.execute("""CREATE TABLE IF NOT EXISTS media (
+ media_id TEXT PRIMARY KEY,
+ mxc TEXT NOT NULL
+ )""")
\ No newline at end of file
diff --git a/matrix_puppeteer_line/portal.py b/matrix_puppeteer_line/portal.py
index 4a1dbcb..d112378 100644
--- a/matrix_puppeteer_line/portal.py
+++ b/matrix_puppeteer_line/portal.py
@@ -33,7 +33,7 @@ from mautrix.errors import MatrixError
from mautrix.util.simple_lock import SimpleLock
from mautrix.util.network_retry import call_with_net_retry
-from .db import Portal as DBPortal, Message as DBMessage
+from .db import Portal as DBPortal, Message as DBMessage, Media as DBMedia
from .config import Config
from .rpc import ChatInfo, Participant, Message, Client, PathImage
from . import user as u, puppet as p, matrix as m
@@ -212,6 +212,7 @@ class Portal(DBPortal, BasePortal):
event_id = None
if evt.image_url:
+ # TODO Deduplicate stickers, but only if encryption is disabled
content = await self._handle_remote_photo(source, intent, evt)
event_id = await self._send_message(intent, content, timestamp=evt.timestamp)
elif evt.html and not evt.html.isspace():
@@ -244,21 +245,22 @@ class Portal(DBPortal, BasePortal):
if msg_html:
msg_html += chunk["data"]
elif ctype == "img":
- if not msg_html:
- msg_html = msg_text
-
cclass = chunk["class"]
if cclass == "emojione":
alt = chunk["alt"]
+ media_id = None
else:
- alt = f':{"?" if "alt" not in chunk else "".join(filter(lambda char: char.isprintable(), chunk["alt"]))}:'
+ alt = "".join(filter(lambda char: char.isprintable(), chunk["alt"])).strip()
+ alt = f':{alt if alt else "n/a"}:'
+ media_id = f'{chunk.get("data-stickon-pkg-cd", 0)}/{chunk.get("data-stickon-stk-cd", 0)}'
+ # NOTE Not encrypting content linked to by HTML tags
+ if not self.encrypted:
+ media_mxc = await self._get_mxc_for_remote_media(source, intent, chunk["src"], media_id)
+ if not msg_html:
+ msg_html = msg_text
+ msg_html += f''
msg_text += alt
- # TODO Make a standalone function for this, and cache mxc in DB
- # ID is some combination of data-stickon-pkg-cd, data-stickon-stk-cd, src
- resp = await source.client.read_image(chunk["src"])
- media_info = await self._reupload_remote_media(resp.data, intent, resp.mime)
- msg_html += f''
content = TextMessageEventContent(
msgtype=MessageType.TEXT,
@@ -279,9 +281,25 @@ class Portal(DBPortal, BasePortal):
msgtype=MessageType.IMAGE, body=media_info.file_name,
info=ImageInfo(mimetype=media_info.mime_type, size=media_info.size))
+ async def _get_mxc_for_remote_media(self, source: 'u.User', intent: IntentAPI,
+ media_url: str, media_id: Optional[str] = None
+ ) -> ContentURI:
+ if not media_id:
+ media_id = media_url
+ media_info = await DBMedia.get_by_id(media_id)
+ if not media_info:
+ self.log.debug(f"Did not find existing mxc URL for {media_id}, uploading media now")
+ resp = await source.client.read_image(media_url)
+ media_info = await self._reupload_remote_media(resp.data, intent, resp.mime, disable_encryption=True)
+ await DBMedia(media_id=media_id, mxc=media_info.mxc).insert()
+ self.log.debug(f"Uploaded media as {media_info.mxc}")
+ else:
+ self.log.debug(f"Found existing mxc URL for {media_id}: {media_info.mxc}")
+ return media_info.mxc
+
async def _reupload_remote_media(self, data: bytes, intent: IntentAPI,
- mime_type: str = None, file_name: str = None
- ) -> ReuploadedMediaInfo:
+ mime_type: str = None, file_name: str = None,
+ disable_encryption: bool = True) -> ReuploadedMediaInfo:
if not mime_type:
mime_type = magic.from_buffer(data, mime=True)
upload_mime_type = mime_type
@@ -290,7 +308,7 @@ class Portal(DBPortal, BasePortal):
upload_file_name = file_name
decryption_info = None
- if self.encrypted and encrypt_attachment:
+ if self.encrypted and encrypt_attachment and not disable_encryption:
data, decryption_info = encrypt_attachment(data)
upload_mime_type = "application/octet-stream"
upload_file_name = None