First crack at backfilling

This commit is contained in:
Andrew Ferrazzutti 2021-02-20 20:00:32 -05:00
parent ec081e94df
commit 0260ad89f4
5 changed files with 44 additions and 43 deletions

View File

@ -200,6 +200,7 @@ class Portal(DBPortal, BasePortal):
return ReuploadedMediaInfo(mxc, decryption_info, mime_type, file_name, len(data))
async def update_info(self, conv: ChatInfo) -> None:
# TODO Not true: a single-participant chat could be a group!
if len(conv.participants) == 1:
self.other_user = conv.participants[0].id
if self._main_intent is self.az.intent:
@ -251,8 +252,7 @@ class Portal(DBPortal, BasePortal):
async def backfill(self, source: 'u.User') -> None:
with self.backfill_lock:
self.log.debug("Backfill: TODO!")
#await self._backfill(source)
await self._backfill(source)
async def _backfill(self, source: 'u.User') -> None:
self.log.debug("Backfilling history through %s", source.mxid)

View File

@ -118,6 +118,7 @@ class Puppet(DBPuppet, BasePuppet):
@classmethod
async def get_by_custom_mxid(cls, mxid: UserID) -> Optional['u.User']:
if mxid == cls.config["bridge.user"]:
return await cls.bridge.get_user(mxid)
# TODO double-puppeting
#if mxid == cls.config["bridge.user"]:
# return await cls.bridge.get_user(mxid)
return None

View File

@ -55,6 +55,7 @@ class User(DBUser, BaseUser):
self._metric_value = defaultdict(lambda: False)
self._connection_check_task = None
self.client = None
self.intent = None
@classmethod
def init_cls(cls, bridge: 'MessagesBridge') -> None:
@ -83,7 +84,7 @@ class User(DBUser, BaseUser):
self.log.warning("Failed to log in with shared secret")
return
self.log.debug("Logged in with shared secret")
#self.intent = self.az.intent.user(self.mxid, access_token)
self.intent = self.az.intent.user(self.mxid, access_token)
except Exception:
self.log.exception("Error logging in with shared secret")

View File

@ -73,10 +73,7 @@ class MautrixController {
*/
async _tryParseDate(text, ref, option) {
const parsed = await window.__chronoParseDate(text, ref, option)
if (parsed) {
return new Date(parsed)
}
return null
return parsed ? new Date(parsed) : null
}
/**
@ -86,14 +83,11 @@ class MautrixController {
* @return {?Date} - The value in the date separator.
* @private
*/
async _parseDate(text) {
async _tryParseDayDate(text) {
if (!text) {
return null
}
text = text
.replace(/[^\w\d\s,:.-]/g, "")
.replace(/\s{2,}/g, " ")
.trim()
text = text.replace(/\. /, "/")
const now = new Date()
let newDate = await this._tryParseDate(text)
if (!newDate || newDate > now) {
@ -101,7 +95,7 @@ class MautrixController {
lastWeek.setDate(lastWeek.getDate() - 7)
newDate = await this._tryParseDate(text, lastWeek, { forwardDate: true })
}
return newDate <= now ? newDate : null
return newDate && newDate <= now ? newDate : null
}
/**
@ -122,14 +116,18 @@ class MautrixController {
* @return {MessageData}
* @private
*/
_parseMessage(date, element) {
_tryParseMessage(date, element) {
const messageData = {
id: +element.getAttribute("msg-id"),
id: +element.getAttribute("data-local-id"),
timestamp: date ? date.getTime() : null,
is_outgoing: element.getAttribute("is-outgoing") === "true",
is_outgoing: element.classList.contains("mdRGT07Own"),
}
messageData.text = element.querySelector("mws-text-message-part .text-msg")?.innerText
if (element.querySelector("mws-image-message-part .image-msg")) {
const messageElement = element.querySelector(".mdRGT07Body > .mdRGT07Msg")
if (messageElement.classList.contains("mdRGT07Text")) {
// TODO Use "Inner" or not?
messageData.text = messageElement.querySelector(".mdRGT07MsgTextInner")?.innerText
} else if (messageElement.classList.contains("mdRGT07Image")) {
// TODO Doesn't this need to be a URL?
messageData.image = true
}
return messageData
@ -148,7 +146,7 @@ class MautrixController {
if (addedNode.classList.contains("mdRGT07Own")) {
const timeElement = addedNode.querySelector("time.MdNonDisp")
if (timeElement) {
msgID = addedNode.getAttribute("data-local-id")
msgID = +addedNode.getAttribute("data-local-id")
observer.disconnect()
observer = new MutationObserver(visibleTimeCallback)
observer.observe(timeElement, { attributes: true, attributeFilter: ["class"] })
@ -184,27 +182,29 @@ class MautrixController {
}
/**
* Parse a message list in the given element. The element should probably be the .content div
* inside a mws-message-list element.
* Parse the message list of whatever the currently-viewed chat is.
*
* @param {Element} element - The message list element.
* @return {[MessageData]} - A list of messages.
*/
async parseMessageList(element) {
async parseMessageList() {
const msgList = document.querySelector("#_chat_room_msg_list")
const messages = []
let messageDate = null
for (const child of element.children) {
switch (child.tagName.toLowerCase()) {
case "mws-message-wrapper":
if (!child.getAttribute("msg-id").startsWith("tmp_")) {
messages.push(this._parseMessage(messageDate, child))
let refDate = null
for (const child of msgList.children) {
if (child.tagName == "DIV") {
if (child.classList.contains("mdRGT10Date")) {
refDate = await this._tryParseDayDate(child.firstElementChild.innerText)
}
else if (child.classList.contains("MdRGT07Cont")) {
// TODO :not(.MdNonDisp) to exclude not-yet-posted messages,
// but that is unlikely to be a problem here.
// Also, offscreen times may have .MdNonDisp on them
const timeElement = child.querySelector("time")
if (timeElement) {
const messageDate = await this._tryParseDate(timeElement.innerText, refDate)
messages.push(this._tryParseMessage(messageDate, child))
}
}
break
case "mws-tombstone-message-wrapper":
messageDate = await this._parseDate(
child.querySelector("mws-relative-timestamp")?.innerText,
) || messageDate
break
}
}
return messages

View File

@ -98,8 +98,8 @@ export default class MessagesPuppeteer {
/* TODO
await this.page.exposeFunction("__mautrixReceiveChanges",
this._receiveChatListChanges.bind(this))
await this.page.exposeFunction("__chronoParseDate", chrono.parseDate)
*/
await this.page.exposeFunction("__chronoParseDate", chrono.parseDate)
// NOTE Must *always* re-login on a browser session, so no need to check if already logged in
this.loginRunning = false
@ -504,14 +504,13 @@ export default class MessagesPuppeteer {
// Probably use a MutationObserver mapped to msgID
async _getMessagesUnsafe(id, minID = 0) {
/* TODO Also handle "decrypting" state
// TODO Also handle "decrypting" state
// TODO Handle unloaded messages. Maybe scroll up
await this._switchChatUnsafe(id)
this.log("Waiting for messages to load")
await this.page.waitFor("mws-message-wrapper")
const messages = await this.page.$eval("mws-messages-list .content",
element => window.__mautrixController.parseMessageList(element))
const messages = await this.page.evaluate(
() => window.__mautrixController.parseMessageList())
return messages.filter(msg => msg.id > minID && !this.sentMessageIDs.has(msg.id))
*/
}
async _processChatListChangeUnsafe(id) {