Smiley/Link exclude for simple replace and misspell

2022-06-17 05:23:52 +02:00 · 2022-06-17 05:23:52 +02:00 · 1cc5b9f245
commit 1cc5b9f245
parent 1b479031fd
2 changed files with 101 additions and 25 deletions
--- a/dataobjs.py
+++ b/dataobjs.py
@ -10,14 +10,22 @@ import re
 import random
 from mood import Mood
-from parsetools import timeDifference, convertTags, lexMessage, parseRegexpFunctions
+from parsetools import (timeDifference,
                        convertTags,
                        lexMessage,
                        parseRegexpFunctions,
                        smiledict,
                        smilelist)
 from mispeller import mispeller
 _urlre = re.compile(r"(?i)(?:^|(?<=\s))(?:(?:https?|ftp)://|magnet:)[^\s]+")
 _url2re = re.compile(r"(?i)(?<!//)\bwww\.[^\s]+?\.")
 _groupre = re.compile(r"\\([0-9]+)")
 _upperre = re.compile(r"upper\(([\w<>\\]+)\)")
 _lowerre = re.compile(r"lower\(([\w<>\\]+)\)")
 _scramblere = re.compile(r"scramble\(([\w<>\\]+)\)")
 _reversere = re.compile(r"reverse\(([\w<>\\]+)\)")
 _ctagre = re.compile("(</?c=?.*?>)", re.I)
 class pesterQuirk(object):
    def __init__(self, quirk):
@ -32,6 +40,26 @@ class pesterQuirk(object):
            self.quirk["group"] = "Miscellaneous"
        self.group = self.quirk["group"]
    def apply(self, string, first=False, last=False):
        # This function applies the quirks :3
        # Try to get a list of links and smilies in the message.
        try:
            # Check for links, store list of links.
            links = list()
            match = re.findall(_urlre, string)
            match2 = re.findall(_url2re, string)
            for x in match2:
                links.append(x)
            for x in match:
                links.append(x)
            # Check for smilies, store list of smilies.
            smilies = list()
            for x in smilelist:
                if x in string:
                    smilies.append(x)
        except Exception as e:
                PchumLog.warning("Quirk issue: " + str(e))
        if not self.on:
            return string
        elif self.type == "prefix":
@ -39,7 +67,19 @@ class pesterQuirk(object):
        elif self.type == "suffix":
            return string + self.quirk["value"]
        elif self.type == "replace":
-            return string.replace(self.quirk["from"], self.quirk["to"])
+            try:
                # Replace like normal
                output = string.replace(self.quirk["from"], self.quirk["to"])
                # Try to revert links based on list.
                for link in links:
                    output = output.replace(link.replace(self.quirk["from"], self.quirk["to"]), link)
                # Try to revert smilies based on list.
                for smiley in smilies:
                    output = output.replace(smiley.replace(self.quirk["from"], self.quirk["to"]), smiley)
                return output
            except Exception as e:
                PchumLog.warning("Replace issue: " + str(e))
                return string.replace(self.quirk["from"], self.quirk["to"])
        elif self.type == "regexp":
            fr = self.quirk["from"]
            if not first and len(fr) > 0 and fr[0] == "^":
@ -66,22 +106,66 @@ class pesterQuirk(object):
            percentage = self.quirk["percentage"]/100.0
            words = string.split(" ")
            newl = []
-            ctag = re.compile("(</?c=?.*?>)", re.I)
+            p = random.random()
            # Main /word loop
            for w in words:
-                p = random.random()
+                # Check if word contains smiley
-                if not ctag.search(w) and p < percentage:
+                smiling = False
-                    newl.append(mispeller(w))
+                for smiley in smilies:
-                elif p < percentage:
+                    if smiley in w:
-                    split = ctag.split(w)
+                        # Smiley is in word
-                    tmp = []
+                        smiling = True
-                    for s in split:
+                
-                        if s and not ctag.search(s):
+                if re.match(_url2re, w):
-                            tmp.append(mispeller(s))
+                    # Word is an url, don't break.
                        else:
                            tmp.append(s)
                    newl.append("".join(tmp))
                else:
                    newl.append(w)
                elif re.match(_urlre, w):
                    # Word is an url, don't break.
                    newl.append(w)
                elif smiling:
                    # Word contains a smiley
                    # Split by ':' and only skip the smiley,
                    # this part is very messy and optional really.
                    stripped_smiles = list()
                    for smiley in smilies:
                        stripped_smiles.append(smiley.strip(':'))
                    denominated = w.split(':')
                    output = ''
                    for part in range(0, len(denominated)):
                        if denominated[part] in stripped_smiles:
                            output +=  denominated[part]
                        else:
                            if not _ctagre.search(denominated[part]) and p < percentage:
                                output += mispeller(denominated[part])
                            elif p < percentage:
                                split = _ctagre.split(denominated[part])
                                tmp = []
                                for s in split:
                                    if s and not _ctagre.search(s):
                                        tmp.append(mispeller(s))
                                    else:
                                        tmp.append(s)
                                output += tmp
                            else:
                                output += denominated[part]
                        if part != len(denominated)-1:
                            output += ':'
                    newl.append(output)
                else:
                    if not _ctagre.search(w) and p < percentage:
                        newl.append(mispeller(w))
                    elif p < percentage:
                        split = _ctagre.split(w)
                        tmp = []
                        for s in split:
                            if s and not _ctagre.search(s):
                                tmp.append(mispeller(s))
                            else:
                                tmp.append(s)
                        newl.append("".join(tmp))
                    else:
                        newl.append(w)
            return " ".join(newl)
    def __str__(self):
--- a/parsetools.py
+++ b/parsetools.py
@ -1002,15 +1002,7 @@ smiledict = {
    ":honk:": "honk.png",
    }
-if ostools.isOSXBundle():
+smilelist = list(smiledict.keys())
    for emote in smiledict:
        graphic = smiledict[emote]
        if graphic.find(".gif"):
            graphic = graphic.replace(".gif", ".png")
            smiledict[emote] = graphic
 reverse_smiley = dict((v,k) for k, v in smiledict.items())
 _smilere = re.compile("|".join(list(smiledict.keys())))