Lexer modifications. Still not quite ready for primetime - incoming messages use the old one.

This commit is contained in:
karxi 2016-12-06 20:04:19 -05:00
parent 02e1e0e012
commit c019e45c7a
3 changed files with 106 additions and 36 deletions

View file

@ -60,6 +60,9 @@ Features
* There is basically no good way to do this without moving to Qt5. I might try that myself later, but that's a long-term goal. * There is basically no good way to do this without moving to Qt5. I might try that myself later, but that's a long-term goal.
* Make it possible to test quirk things and such without connecting? This'd be hard to separate out, but useful. * Make it possible to test quirk things and such without connecting? This'd be hard to separate out, but useful.
* Right-click Time entry field to see those used? (Replace left/right buttons?) * Right-click Time entry field to see those used? (Replace left/right buttons?)
* Make the memo name entry box accept a comma-separated list
* Make right-clicking on a tab open up the right-click menu one would get on right-clicking the title (frame??)
* Separate auto-idle and checkbox idle so they don't share a state
Todo/Done Todo/Done
---- ----

View file

@ -382,6 +382,9 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
safekeeping = lexed[:] safekeeping = lexed[:]
lexed = collections.deque(lexed) lexed = collections.deque(lexed)
rounds = 0 rounds = 0
# NOTE: This entire mess is due for a rewrite. I'll start splitting it into
# sub-functions for the eventualities that arise during parsing.
# (E.g. the text block splitter NEEDS to be a different function....)
while len(lexed) > 0: while len(lexed) > 0:
rounds += 1 rounds += 1
if debug: if debug:
@ -389,7 +392,6 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
msg = lexed.popleft() msg = lexed.popleft()
msglen = 0 msglen = 0
is_text = False is_text = False
text_preproc = False
try: try:
msglen = len(msg.convert(fmt)) msglen = len(msg.convert(fmt))
@ -411,7 +413,6 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
# Thus, we can split it, finalize it, and add the remainder to the # Thus, we can split it, finalize it, and add the remainder to the
# next line (after the color codes). # next line (after the color codes).
if is_text and efflenleft() > 30: if is_text and efflenleft() > 30:
text_preproc = True
# We use 30 as a general 'guess' - if there's less space than # We use 30 as a general 'guess' - if there's less space than
# that, it's probably not worth trying to cram text in. # that, it's probably not worth trying to cram text in.
# This also saves us from infinitely trying to reduce the size # This also saves us from infinitely trying to reduce the size
@ -424,6 +425,8 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
lenl = efflenleft() lenl = efflenleft()
subround = 0 subround = 0
while len(msg) > lenl: while len(msg) > lenl:
# NOTE: This may be cutting it a little close. Maybe use >=
# instead?
subround += 1 subround += 1
if debug: if debug:
print "[Splitting round {}-{}...]".format( print "[Splitting round {}-{}...]".format(
@ -448,9 +451,12 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
if debug: if debug:
print "msg caught; stack = {!r}".format(stack) print "msg caught; stack = {!r}".format(stack)
# Done processing. Pluck out the first portion so we can # Done processing. Pluck out the first portion so we can
# continue processing, then add the rest to our waiting list. # continue processing, clean it up a bit, then add the rest to
msg = stack.pop(0) # our waiting list.
msg = stack.pop(0).rstrip()
msglen = len(msg) msglen = len(msg)
# A little bit of touching up for the head of our next line.
stack[0] = stack[0].lstrip()
# Now we have a separated list, so we can add it. # Now we have a separated list, so we can add it.
# First we have to reverse it, because the extendleft method of # First we have to reverse it, because the extendleft method of
# deque objects - like our lexed queue - inserts the elements # deque objects - like our lexed queue - inserts the elements
@ -464,41 +470,78 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
# means forcing the issue.... # means forcing the issue....
working.append(msg) working.append(msg)
curlen += msglen curlen += msglen
# NOTE: This is here so we can catch it later - it marks that
# we've already worked on this.
msg = None
# Clear the slate. Add the remaining ctags, then add working to # Clear the slate. Add the remaining ctags, then add working to
# output, then clear working and statistics. Then we can move on to # output, then clear working and statistics. Then we can move on.
# append as normal.
# Keep in mind that any open ctags get added to the beginning of # Keep in mind that any open ctags get added to the beginning of
# working again, since they're still open! # working again, since they're still open!
# ... # Add proper CTagEnd objects ourselves. Won't break anything to use
# ON SECOND THOUGHT: The lexer balances for us, so let's just use # raw text at time of writing, but it can't hurt to be careful.
# that for now. I can split up the function for this later. # We specify the ref as our format, to note. They should match up,
working = u''.join(kxpclexer.list_convert(working)) # both being 'pchum'.
working = kxpclexer.lex(working) # It shouldn't matter that we use the same object for this - the
working = u''.join(kxpclexer.list_convert(working)) # process of rendering isn't destructive.
# TODO: Is that lazy? Yes. This is a modification made to test if # This also doesn't follow compression settings, but closing color
# it'll work, *not* if it'll be efficient. # tags can't BE compressed, so it hardly matters.
cte = lexercon.CTagEnd("</c>", fmt, None)
working.extend([cte] * len(open_ctags))
if debug:
print "\tRound {0} linebreak: Added {1} closing ctags".format(
rounds, len(open_ctags)
)
# Run it through the lexer again to render it.
working = u''.join(kxpclexer.list_convert(working))
if debug:
print "\tRound {0} add: len == {1} (of {2})".format(
rounds, len(working), maxlen
)
# Now that it's done the work for us, append and resume. # Now that it's done the work for us, append and resume.
output.append(working) output.append(working)
# Reset working, starting it with the unclosed ctags.
working = open_ctags[:] if msg is not None:
# Calculate the length of the starting tags, add it before anything # We didn't catch it earlier for preprocessing. Thus, toss it
# else. # on the stack and continue, so it'll go through the loop.
curlen = sum(len(tag.convert(fmt)) for tag in working) # Remember, we're doing this because we don't have enough space
if text_preproc: # for it. Hopefully it'll fit on the next line, or split.
# If we got here, it means we overflowed due to text - which lexed.appendleft(msg)
# means we also split and added it to working. There's no # Fall through to the next case.
# reason to go on and add it twice. if lexed:
# This could be handled with an elif chain, but eh. # We have more to go.
# Reset working, starting it with the unclosed ctags.
if debug:
print "\tRound {0}: More to lex".format(rounds)
working = open_ctags[:]
# Calculate the length of the starting tags, add it before
# anything else.
curlen = sum(len(tag.convert(fmt)) for tag in working)
else:
# There's nothing in lexed - but if msg wasn't None, we ADDED
# it to lexed. Thus, if we get here, we don't have anything
# more to add.
# Getting here means we already flushed the last of what we had
# to the stack.
# Nothing in lexed. If we didn't preprocess, then we're done.
if debug or True:
# This probably shouldn't happen, and if it does, I want to
# know if it *works* properly.
print "\tRound {0}: No more to lex".format(rounds)
# Clean up, just in case.
working = []
open_ctags = []
curlen = 0
# TODO: What does this mean for the ctags that'd be applied?
# Will this break parsing? It shouldn't, but....
# Break us out of the loop...we could BREAK here and skip the
# else, since we know what's going on.
continue continue
# If we got here, it means we haven't done anything with 'msg' yet, # We got here because we have more to process, so head back to
# in spite of popping it from lexed, so add it back for the next # resume.
# round.
# This sends it through for another round of splitting and work,
# possibly.
lexed.appendleft(msg)
continue continue
# Normal tag processing stuff. Considerably less interesting/intensive # Normal tag processing stuff. Considerably less interesting/intensive
@ -519,6 +562,9 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
elif isinstance(msg, lexercon.CTag): elif isinstance(msg, lexercon.CTag):
# It's an opening color tag! # It's an opening color tag!
open_ctags.append(msg) open_ctags.append(msg)
# TODO: Check and see if we have enough room for the lexemes
# *after* this one. If not, shunt it back into lexed and flush
# working into output.
# Add it to the working message. # Add it to the working message.
working.append(msg) working.append(msg)
@ -531,6 +577,8 @@ def kxsplitMsg(lexed, fmt="pchum", maxlen=None, debug=False):
# So add working to the result one last time. # So add working to the result one last time.
working = kxpclexer.list_convert(working) working = kxpclexer.list_convert(working)
if len(working) > 0: if len(working) > 0:
if debug:
print "Adding end trails: {!r}".format(working)
working = u''.join(working) working = u''.join(working)
output.append(working) output.append(working)
@ -645,7 +693,10 @@ def _is_ooc(msg, strict=True):
def kxhandleInput(ctx, text=None, flavor=None): def kxhandleInput(ctx, text=None, flavor=None):
"""The function that user input that should be sent to the server is routed """The function that user input that should be sent to the server is routed
through. Handles lexing, splitting, and quirk application.""" through. Handles lexing, splitting, and quirk application, as well as
sending."""
# TODO: This needs a 'dryrun' option, and ways to specify alternative
# outputs and such, if it's to handle all of these.
# Flavor is important for logic, ctx is 'self'. # Flavor is important for logic, ctx is 'self'.
# Flavors are 'convo', 'menus', and 'memos' - so named after the source # Flavors are 'convo', 'menus', and 'memos' - so named after the source
# files for the original sentMessage variants. # files for the original sentMessage variants.
@ -775,7 +826,8 @@ def kxhandleInput(ctx, text=None, flavor=None):
maxlen = 300 maxlen = 300
elif flavor == "memos": elif flavor == "memos":
# Use the max, with some room added so we can make additions. # Use the max, with some room added so we can make additions.
maxlen -= 20 # The additions are theoretically 23 characters long, max.
maxlen -= 25
# Split the message. (Finally.) # Split the message. (Finally.)
# This is also set up to parse it into strings. # This is also set up to parse it into strings.

View file

@ -23,14 +23,18 @@ except NameError:
class Lexeme(object): class Lexeme(object):
def __init__(self, string, origin): def __init__(self, string, origin):
# The 'string' property is just what it came from; the original
# representation. It doesn't have to be used, and honestly probably
# shouldn't be.
self.string = string self.string = string
self.origin = origin self.origin = origin
def __str__(self): def __str__(self):
return self.string
def __len__(self):
return len(self.string)
def convert(self, format):
##return self.string ##return self.string
return self.convert(self.origin)
def __len__(self):
##return len(self.string)
return len(str(self))
def convert(self, format):
# This is supposed to be overwritten by subclasses # This is supposed to be overwritten by subclasses
raise NotImplementedError raise NotImplementedError
def rebuild(self, format): def rebuild(self, format):
@ -74,6 +78,8 @@ class Specifier(Lexeme):
sets_color = sets_bold = sets_italic = sets_underline = None sets_color = sets_bold = sets_italic = sets_underline = None
resets_color = resets_bold = resets_italic = resets_underline = None resets_color = resets_bold = resets_italic = resets_underline = None
resets_formatting = None resets_formatting = None
# If this form has a more compact form, use it
compact = False
# Made so that certain odd message-ish things have a place to go. May have its # Made so that certain odd message-ish things have a place to go. May have its
# class changed later. # class changed later.
@ -115,6 +121,8 @@ class CTag(Specifier):
else: else:
if color.name: if color.name:
text = "<c=%s>" % color.name text = "<c=%s>" % color.name
elif self.compact:
text = "<c=%s>" % color.reduce_hexstr(color.hexstr)
else: else:
text = "<c=%d,%d,%d>" % color.to_rgb_tuple() text = "<c=%d,%d,%d>" % color.to_rgb_tuple()
elif format == "plaintext": elif format == "plaintext":
@ -197,6 +205,7 @@ class SpecifierEnd(CTagEnd, FTagEnd):
class Lexer(object): class Lexer(object):
# Subclasses need to supply a ref themselves # Subclasses need to supply a ref themselves
ref = None ref = None
compress_tags = False
def breakdown(self, string, objlist): def breakdown(self, string, objlist):
if not isinstance(string, basestr): msglist = string if not isinstance(string, basestr): msglist = string
else: msglist = [string] else: msglist = [string]
@ -284,6 +293,10 @@ class Pesterchum(Lexer):
beginc += 1 beginc += 1
elif beginc >= endc: elif beginc >= endc:
endc += 1 endc += 1
# Apply compression, if we're set to. We made these objects, so
# that should be okay.
if self.compress_tags:
o.compact = True
balanced.append(o) balanced.append(o)
# Original (Pesterchum) code: # Original (Pesterchum) code:
##if isinstance(o, colorBegin): ##if isinstance(o, colorBegin):
@ -304,6 +317,8 @@ class Pesterchum(Lexer):
balanced.append(CTagEnd("</c>", self.ref, None)) balanced.append(CTagEnd("</c>", self.ref, None))
return balanced return balanced
# TODO: Let us contextually set compression here or something, ugh. If
# 'None' assume the self-set one.
def list_convert(self, target, format=None): def list_convert(self, target, format=None):
if format is None: format = self.ref if format is None: format = self.ref
converted = [] converted = []