From 4cda93dd9b4e84e2c5b7aae76b71fb4bb3a2c7c1 Mon Sep 17 00:00:00 2001 From: Andrew Ferrazzutti Date: Sat, 20 Feb 2021 20:00:32 -0500 Subject: [PATCH] First crack at backfilling --- mautrix_line/portal.py | 4 +-- mautrix_line/puppet.py | 5 +-- mautrix_line/user.py | 3 +- puppet/src/contentscript.js | 64 ++++++++++++++++++------------------- puppet/src/puppet.js | 11 +++---- 5 files changed, 44 insertions(+), 43 deletions(-) diff --git a/mautrix_line/portal.py b/mautrix_line/portal.py index af408e5..5e747d3 100644 --- a/mautrix_line/portal.py +++ b/mautrix_line/portal.py @@ -200,6 +200,7 @@ class Portal(DBPortal, BasePortal): return ReuploadedMediaInfo(mxc, decryption_info, mime_type, file_name, len(data)) async def update_info(self, conv: ChatInfo) -> None: + # TODO Not true: a single-participant chat could be a group! if len(conv.participants) == 1: self.other_user = conv.participants[0].id if self._main_intent is self.az.intent: @@ -251,8 +252,7 @@ class Portal(DBPortal, BasePortal): async def backfill(self, source: 'u.User') -> None: with self.backfill_lock: - self.log.debug("Backfill: TODO!") - #await self._backfill(source) + await self._backfill(source) async def _backfill(self, source: 'u.User') -> None: self.log.debug("Backfilling history through %s", source.mxid) diff --git a/mautrix_line/puppet.py b/mautrix_line/puppet.py index d2c0390..31b13f3 100644 --- a/mautrix_line/puppet.py +++ b/mautrix_line/puppet.py @@ -118,6 +118,7 @@ class Puppet(DBPuppet, BasePuppet): @classmethod async def get_by_custom_mxid(cls, mxid: UserID) -> Optional['u.User']: - if mxid == cls.config["bridge.user"]: - return await cls.bridge.get_user(mxid) + # TODO double-puppeting + #if mxid == cls.config["bridge.user"]: + # return await cls.bridge.get_user(mxid) return None diff --git a/mautrix_line/user.py b/mautrix_line/user.py index 477d0dc..934dc90 100644 --- a/mautrix_line/user.py +++ b/mautrix_line/user.py @@ -55,6 +55,7 @@ class User(DBUser, BaseUser): self._metric_value = defaultdict(lambda: False) self._connection_check_task = None self.client = None + self.intent = None @classmethod def init_cls(cls, bridge: 'MessagesBridge') -> None: @@ -83,7 +84,7 @@ class User(DBUser, BaseUser): self.log.warning("Failed to log in with shared secret") return self.log.debug("Logged in with shared secret") - #self.intent = self.az.intent.user(self.mxid, access_token) + self.intent = self.az.intent.user(self.mxid, access_token) except Exception: self.log.exception("Error logging in with shared secret") diff --git a/puppet/src/contentscript.js b/puppet/src/contentscript.js index f0db8e5..e2e7dc9 100644 --- a/puppet/src/contentscript.js +++ b/puppet/src/contentscript.js @@ -73,10 +73,7 @@ class MautrixController { */ async _tryParseDate(text, ref, option) { const parsed = await window.__chronoParseDate(text, ref, option) - if (parsed) { - return new Date(parsed) - } - return null + return parsed ? new Date(parsed) : null } /** @@ -86,14 +83,11 @@ class MautrixController { * @return {?Date} - The value in the date separator. * @private */ - async _parseDate(text) { + async _tryParseDayDate(text) { if (!text) { return null } - text = text - .replace(/[^\w\d\s,:.-]/g, "") - .replace(/\s{2,}/g, " ") - .trim() + text = text.replace(/\. /, "/") const now = new Date() let newDate = await this._tryParseDate(text) if (!newDate || newDate > now) { @@ -101,7 +95,7 @@ class MautrixController { lastWeek.setDate(lastWeek.getDate() - 7) newDate = await this._tryParseDate(text, lastWeek, { forwardDate: true }) } - return newDate <= now ? newDate : null + return newDate && newDate <= now ? newDate : null } /** @@ -122,14 +116,18 @@ class MautrixController { * @return {MessageData} * @private */ - _parseMessage(date, element) { + _tryParseMessage(date, element) { const messageData = { - id: +element.getAttribute("msg-id"), + id: +element.getAttribute("data-local-id"), timestamp: date ? date.getTime() : null, - is_outgoing: element.getAttribute("is-outgoing") === "true", + is_outgoing: element.classList.contains("mdRGT07Own"), } - messageData.text = element.querySelector("mws-text-message-part .text-msg")?.innerText - if (element.querySelector("mws-image-message-part .image-msg")) { + const messageElement = element.querySelector(".mdRGT07Body > .mdRGT07Msg") + if (messageElement.classList.contains("mdRGT07Text")) { + // TODO Use "Inner" or not? + messageData.text = messageElement.querySelector(".mdRGT07MsgTextInner")?.innerText + } else if (messageElement.classList.contains("mdRGT07Image")) { + // TODO Doesn't this need to be a URL? messageData.image = true } return messageData @@ -148,7 +146,7 @@ class MautrixController { if (addedNode.classList.contains("mdRGT07Own")) { const timeElement = addedNode.querySelector("time.MdNonDisp") if (timeElement) { - msgID = addedNode.getAttribute("data-local-id") + msgID = +addedNode.getAttribute("data-local-id") observer.disconnect() observer = new MutationObserver(visibleTimeCallback) observer.observe(timeElement, { attributes: true, attributeFilter: ["class"] }) @@ -184,27 +182,29 @@ class MautrixController { } /** - * Parse a message list in the given element. The element should probably be the .content div - * inside a mws-message-list element. + * Parse the message list of whatever the currently-viewed chat is. * - * @param {Element} element - The message list element. * @return {[MessageData]} - A list of messages. */ - async parseMessageList(element) { + async parseMessageList() { + const msgList = document.querySelector("#_chat_room_msg_list") const messages = [] - let messageDate = null - for (const child of element.children) { - switch (child.tagName.toLowerCase()) { - case "mws-message-wrapper": - if (!child.getAttribute("msg-id").startsWith("tmp_")) { - messages.push(this._parseMessage(messageDate, child)) + let refDate = null + for (const child of msgList.children) { + if (child.tagName == "DIV") { + if (child.classList.contains("mdRGT10Date")) { + refDate = await this._tryParseDayDate(child.firstElementChild.innerText) + } + else if (child.classList.contains("MdRGT07Cont")) { + // TODO :not(.MdNonDisp) to exclude not-yet-posted messages, + // but that is unlikely to be a problem here. + // Also, offscreen times may have .MdNonDisp on them + const timeElement = child.querySelector("time") + if (timeElement) { + const messageDate = await this._tryParseDate(timeElement.innerText, refDate) + messages.push(this._tryParseMessage(messageDate, child)) + } } - break - case "mws-tombstone-message-wrapper": - messageDate = await this._parseDate( - child.querySelector("mws-relative-timestamp")?.innerText, - ) || messageDate - break } } return messages diff --git a/puppet/src/puppet.js b/puppet/src/puppet.js index d801b0e..600f9b5 100644 --- a/puppet/src/puppet.js +++ b/puppet/src/puppet.js @@ -98,8 +98,8 @@ export default class MessagesPuppeteer { /* TODO await this.page.exposeFunction("__mautrixReceiveChanges", this._receiveChatListChanges.bind(this)) - await this.page.exposeFunction("__chronoParseDate", chrono.parseDate) */ + await this.page.exposeFunction("__chronoParseDate", chrono.parseDate) // NOTE Must *always* re-login on a browser session, so no need to check if already logged in this.loginRunning = false @@ -504,14 +504,13 @@ export default class MessagesPuppeteer { // Probably use a MutationObserver mapped to msgID async _getMessagesUnsafe(id, minID = 0) { - /* TODO Also handle "decrypting" state + // TODO Also handle "decrypting" state + // TODO Handle unloaded messages. Maybe scroll up await this._switchChatUnsafe(id) this.log("Waiting for messages to load") - await this.page.waitFor("mws-message-wrapper") - const messages = await this.page.$eval("mws-messages-list .content", - element => window.__mautrixController.parseMessageList(element)) + const messages = await this.page.evaluate( + () => window.__mautrixController.parseMessageList()) return messages.filter(msg => msg.id > minID && !this.sentMessageIDs.has(msg.id)) - */ } async _processChatListChangeUnsafe(id) {