Lexer modifications. Still not quite ready for primetime - incoming messages use the old one.

2016-12-06 20:04:19 -05:00 · 2016-12-06 20:04:19 -05:00 · c019e45c7a
commit c019e45c7a
parent 02e1e0e012
3 changed files with 106 additions and 36 deletions
--- a/TODO.mkdn
+++ b/TODO.mkdn
@ -60,6 +60,9 @@ Features
 	* There is basically no good way to do this without moving to Qt5. I might try that myself later, but that's a long-term goal.
 * Make it possible to test quirk things and such without connecting? This'd be hard to separate out, but useful.
 * Right-click Time entry field to see those used? (Replace left/right buttons?)
 * Make the memo name entry box accept a comma-separated list
 * Make right-clicking on a tab open up the right-click menu one would get on right-clicking the title (frame??)
 * Separate auto-idle and checkbox idle so they don't share a state
 Todo/Done
 ----
--- a/parsetools.py
+++ b/parsetools.py
@ -382,6 +382,9 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
    safekeeping = lexed[:]
    lexed = collections.deque(lexed)
    rounds = 0
    # NOTE: This entire mess is due for a rewrite. I'll start splitting it into
    # sub-functions for the eventualities that arise during parsing.
    # (E.g. the text block splitter NEEDS to be a different function....)
    while len(lexed) > 0:
        rounds += 1
        if debug:
@ -389,7 +392,6 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
        msg = lexed.popleft()
        msglen = 0
        is_text = False
        text_preproc = False
        try:
            msglen = len(msg.convert(fmt))
@ -411,7 +413,6 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
            # Thus, we can split it, finalize it, and add the remainder to the
            # next line (after the color codes).
            if is_text and efflenleft() > 30:
                text_preproc = True
                # We use 30 as a general 'guess' - if there's less space than
                # that, it's probably not worth trying to cram text in.
                # This also saves us from infinitely trying to reduce the size
@ -424,6 +425,8 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
                lenl = efflenleft()
                subround = 0
                while len(msg) > lenl:
                    # NOTE: This may be cutting it a little close. Maybe use >=
                    # instead?
                    subround += 1
                    if debug:
                        print "[Splitting round {}-{}...]".format(
@ -448,9 +451,12 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
                    if debug:
                        print "msg caught; stack = {!r}".format(stack)
                # Done processing. Pluck out the first portion so we can
-                # continue processing, then add the rest to our waiting list.
+                # continue processing, clean it up a bit, then add the rest to
-                msg = stack.pop(0)
+                # our waiting list.
                msg = stack.pop(0).rstrip()
                msglen = len(msg)
                # A little bit of touching up for the head of our next line.
                stack[0] = stack[0].lstrip()
                # Now we have a separated list, so we can add it.
                # First we have to reverse it, because the extendleft method of
                # deque objects - like our lexed queue - inserts the elements
@ -464,41 +470,78 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
                # means forcing the issue....
                working.append(msg)
                curlen += msglen
                # NOTE: This is here so we can catch it later - it marks that
                # we've already worked on this.
                msg = None
            # Clear the slate. Add the remaining ctags, then add working to
-            # output, then clear working and statistics. Then we can move on to
+            # output, then clear working and statistics. Then we can move on.
            # append as normal.
            # Keep in mind that any open ctags get added to the beginning of
            # working again, since they're still open!
-            # ...
+            # Add proper CTagEnd objects ourselves. Won't break anything to use
-            # ON SECOND THOUGHT: The lexer balances for us, so let's just use
+            # raw text at time of writing, but it can't hurt to be careful.
-            # that for now. I can split up the function for this later.
+            # We specify the ref as our format, to note. They should match up,
-            working = u''.join(kxpclexer.list_convert(working))
+            # both being 'pchum'.
-            working = kxpclexer.lex(working)
+            # It shouldn't matter that we use the same object for this - the
-            working = u''.join(kxpclexer.list_convert(working))
+            # process of rendering isn't destructive.
-            # TODO: Is that lazy? Yes. This is a modification made to test if
+            # This also doesn't follow compression settings, but closing color
-            # it'll work, *not* if it'll be efficient.
+            # tags can't BE compressed, so it hardly matters.
            cte = lexercon.CTagEnd("</c>", fmt, None)
            working.extend([cte] * len(open_ctags))
            if debug:
                print "\tRound {0} linebreak: Added {1} closing ctags".format(
                        rounds, len(open_ctags)
                        )
            # Run it through the lexer again to render it.
            working = u''.join(kxpclexer.list_convert(working))
            if debug:
                print "\tRound {0} add: len == {1} (of {2})".format(
                        rounds, len(working), maxlen
                        )
            # Now that it's done the work for us, append and resume.
            output.append(working)
-            # Reset working, starting it with the unclosed ctags.
+
-            working = open_ctags[:]
+            if msg is not None:
-            # Calculate the length of the starting tags, add it before anything
+                # We didn't catch it earlier for preprocessing. Thus, toss it
-            # else.
+                # on the stack and continue, so it'll go through the loop.
-            curlen = sum(len(tag.convert(fmt)) for tag in working)
+                # Remember, we're doing this because we don't have enough space
-            if text_preproc:
+                # for it. Hopefully it'll fit on the next line, or split.
-                # If we got here, it means we overflowed due to text - which
+                lexed.appendleft(msg)
-                # means we also split and added it to working. There's no
+                # Fall through to the next case.
-                # reason to go on and add it twice.
+            if lexed:
-                # This could be handled with an elif chain, but eh.
+                # We have more to go.
                # Reset working, starting it with the unclosed ctags.
                if debug:
                    print "\tRound {0}: More to lex".format(rounds)
                working = open_ctags[:]
                # Calculate the length of the starting tags, add it before
                # anything else.
                curlen = sum(len(tag.convert(fmt)) for tag in working)
            else:
                # There's nothing in lexed - but if msg wasn't None, we ADDED
                # it to lexed. Thus, if we get here, we don't have anything
                # more to add.
                # Getting here means we already flushed the last of what we had
                # to the stack.
                # Nothing in lexed. If we didn't preprocess, then we're done.
                if debug or True:
                    # This probably shouldn't happen, and if it does, I want to
                    # know if it *works* properly.
                    print "\tRound {0}: No more to lex".format(rounds)
                # Clean up, just in case.
                working = []
                open_ctags = []
                curlen = 0
                # TODO: What does this mean for the ctags that'd be applied?
                # Will this break parsing? It shouldn't, but....
                # Break us out of the loop...we could BREAK here and skip the
                # else, since we know what's going on.
                continue
-            # If we got here, it means we haven't done anything with 'msg' yet,
+            # We got here because we have more to process, so head back to
-            # in spite of popping it from lexed, so add it back for the next
+            # resume.
            # round.
            # This sends it through for another round of splitting and work,
            # possibly.
            lexed.appendleft(msg)
            continue
        # Normal tag processing stuff. Considerably less interesting/intensive
@ -519,6 +562,9 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
        elif isinstance(msg, lexercon.CTag):
            # It's an opening color tag!
            open_ctags.append(msg)
            # TODO: Check and see if we have enough room for the lexemes
            # *after* this one. If not, shunt it back into lexed and flush
            # working into output.
        # Add it to the working message.
        working.append(msg)
@ -531,6 +577,8 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
        # So add working to the result one last time.
        working = kxpclexer.list_convert(working)
        if len(working) > 0:
            if debug:
                print "Adding end trails: {!r}".format(working)
            working = u''.join(working)
            output.append(working)
@ -645,7 +693,10 @@ def _is_ooc(msg, strict=True):
 def kxhandleInput(ctx, text=None, flavor=None):
    """The function that user input that should be sent to the server is routed
-    through. Handles lexing, splitting, and quirk application."""
+    through. Handles lexing, splitting, and quirk application, as well as
    sending."""
    # TODO: This needs a 'dryrun' option, and ways to specify alternative
    # outputs and such, if it's to handle all of these.
    # Flavor is important for logic, ctx is 'self'.
    # Flavors are 'convo', 'menus', and 'memos' - so named after the source
    # files for the original sentMessage variants.
@ -775,7 +826,8 @@ def kxhandleInput(ctx, text=None, flavor=None):
        maxlen = 300
    elif flavor == "memos":
        # Use the max, with some room added so we can make additions.
-        maxlen -= 20
+        # The additions are theoretically 23 characters long, max.
        maxlen -= 25
    # Split the message. (Finally.)
    # This is also set up to parse it into strings.
--- a/pnc/lexercon.py
+++ b/pnc/lexercon.py
@ -23,14 +23,18 @@ except NameError:
 class Lexeme(object):
    def __init__(self, string, origin):
        # The 'string' property is just what it came from; the original
        # representation. It doesn't have to be used, and honestly probably
        # shouldn't be.
        self.string = string
        self.origin = origin
    def __str__(self):
        return self.string
    def __len__(self):
        return len(self.string)
    def convert(self, format):
        ##return self.string
        return self.convert(self.origin)
    def __len__(self):
        ##return len(self.string)
        return len(str(self))
    def convert(self, format):
        # This is supposed to be overwritten by subclasses
        raise NotImplementedError
    def rebuild(self, format):
@ -74,6 +78,8 @@ class Specifier(Lexeme):
    sets_color = sets_bold = sets_italic = sets_underline = None
    resets_color = resets_bold = resets_italic = resets_underline = None
    resets_formatting = None
    # If this form has a more compact form, use it
    compact = False
 # Made so that certain odd message-ish things have a place to go. May have its
 # class changed later.
@ -115,6 +121,8 @@ class CTag(Specifier):
            else:
                if color.name:
                    text = "<c=%s>" % color.name
                elif self.compact:
                    text = "<c=%s>" % color.reduce_hexstr(color.hexstr)
                else:
                    text = "<c=%d,%d,%d>" % color.to_rgb_tuple()
        elif format == "plaintext":
@ -197,6 +205,7 @@ class SpecifierEnd(CTagEnd, FTagEnd):
 class Lexer(object):
    # Subclasses need to supply a ref themselves
    ref = None
    compress_tags = False
    def breakdown(self, string, objlist):
        if not isinstance(string, basestr): msglist = string
        else: msglist = [string]
@ -284,6 +293,10 @@ class Pesterchum(Lexer):
                    beginc += 1
                elif beginc >= endc:
                    endc += 1
                # Apply compression, if we're set to. We made these objects, so
                # that should be okay.
                if self.compress_tags:
                    o.compact = True
            balanced.append(o)
            # Original (Pesterchum) code:
            ##if isinstance(o, colorBegin):
@ -304,6 +317,8 @@ class Pesterchum(Lexer):
                balanced.append(CTagEnd("</c>", self.ref, None))
        return balanced
    # TODO: Let us contextually set compression here or something, ugh. If
    # 'None' assume the self-set one.
    def list_convert(self, target, format=None):
        if format is None: format = self.ref
        converted = []