diff --git a/pyrepl/reader.py b/pyrepl/reader.py index e1db998..3c41844 100644 --- a/pyrepl/reader.py +++ b/pyrepl/reader.py @@ -20,15 +20,24 @@ # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. from __future__ import unicode_literals +import sys import unicodedata from pyrepl import commands from pyrepl import input try: unicode + decode = lambda x, enc = sys.stdout.encoding: unicode(x, enc) except NameError: unicode = str unichr = chr basestring = bytes, str + decode = lambda x, _ = None: x + + +def width(c): + return 2 if unicodedata.east_asian_width(c) in "FW" else 1 +def wlen(s): + return sum(map(width, s)) def _make_unctrl_map(): @@ -39,8 +48,8 @@ def _make_unctrl_map(): for i in range(32): c = unichr(i) uc_map[c] = '^' + unichr(ord('A') + i - 1) - uc_map[b'\t'] = ' ' # display TABs as 4 characters - uc_map[b'\177'] = unicode('^?') + uc_map['\t'] = ' ' # display TABs as 4 characters + uc_map['\177'] = unicode('^?') for i in range(256): c = unichr(i) if c not in uc_map: @@ -53,7 +62,7 @@ def _my_unctrl(c, u=_make_unctrl_map()): return u[c] else: if unicodedata.category(c).startswith('C'): - return br'\u%04x' % ord(c) + return '\\u%04x' % ord(c) else: return c @@ -75,7 +84,7 @@ def disp_str(buffer, join=''.join, uc=_my_unctrl): s = [uc(x) for x in buffer] b = [] # XXX: bytearray for x in s: - b.append(1) + b.append(width(x[0])) b.extend([0] * (len(x) - 1)) return join(s), b @@ -280,7 +289,7 @@ def calc_screen(self): for mline in self.msg.split("\n"): screen.append(mline) screeninfo.append((0, [])) - self.lxy = p, ln + # self.lxy = p, ln prompt = self.get_prompt(ln, ll >= p >= 0) while '\n' in prompt: pre_prompt, _, prompt = prompt.partition('\n') @@ -289,8 +298,8 @@ def calc_screen(self): p -= ll + 1 prompt, lp = self.process_prompt(prompt) l, l2 = disp_str(line) - wrapcount = (len(l) + lp) // w - if wrapcount == 0: + wrapcount = (wlen(l) + lp) // w + if 1 or wrapcount == 0: # FIXME screen.append(prompt + l) screeninfo.append((lp, l2 + [1])) else: @@ -318,7 +327,7 @@ def process_prompt(self, prompt): is returned with these control characters removed. """ out_prompt = '' - l = len(prompt) + l = wlen(prompt) pos = 0 while True: s = prompt.find('\x01', pos) @@ -420,7 +429,7 @@ def get_prompt(self, lineno, cursor_on_line): # the object on which str() was called. This ensures that even if the # same object is used e.g. for ps1 and ps2, str() is called only once. if res not in self._pscache: - self._pscache[res] = str(res) + self._pscache[res] = decode(res) return self._pscache[res] def push_input_trans(self, itrans): @@ -438,10 +447,10 @@ def pos2xy(self, pos): if pos == len(self.buffer): y = len(self.screeninfo) - 1 p, l2 = self.screeninfo[y] - return p + len(l2) - 1, y + return p + sum(l2) + l2.count(0) - 1, y else: for p, l2 in self.screeninfo: - l = l2.count(1) + l = len(l2) - l2.count(0) if l > pos: break else: @@ -449,12 +458,15 @@ def pos2xy(self, pos): y += 1 c = 0 i = 0 - while c < pos: - c += l2[i] + j = 0 + while j < pos: + j += 1 if l2[i] else 0 + c += l2[i] or 1 i += 1 while l2[i] == 0: + c += 1 i += 1 - return p + i, y + return p + c, y def insert(self, text): """Insert 'text' at the insertion point.""" diff --git a/pyrepl/unix_console.py b/pyrepl/unix_console.py index 3ec9a7b..6b7b692 100644 --- a/pyrepl/unix_console.py +++ b/pyrepl/unix_console.py @@ -28,6 +28,7 @@ import re import time import sys +import unicodedata from fcntl import ioctl from . import curses from .fancy_termios import tcgetattr, tcsetattr @@ -44,6 +45,13 @@ class InvalidTerminal(RuntimeError): except NameError: unicode = str + +def width(c): + return 2 if unicodedata.east_asian_width(c) in "FW" else 1 +def wlen(s): + return sum(map(width, s)) + + _error = (termios.error, curses.error, InvalidTerminal) # there are arguments for changing this to "refresh" @@ -247,46 +255,56 @@ def __write_changed_line(self, y, oldline, newline, px): # structuring this function are equally painful (I'm trying to # avoid writing code generators these days...) x = 0 - minlen = min(len(oldline), len(newline)) + i = 0 + minlen = min(wlen(oldline), wlen(newline)) + pi = 0 + xx = 0 + for c in oldline: + xx += width(c) + pi += 1 + if xx >= px: break # # reuse the oldline as much as possible, but stop as soon as we # encounter an ESCAPE, because it might be the start of an escape # sequene - #XXX unicode check! - while x < minlen and oldline[x] == newline[x] and newline[x] != '\x1b': - x += 1 - if oldline[x:] == newline[x+1:] and self.ich1: + while x < minlen and oldline[i] == newline[i] and newline[i] != '\x1b': + x += width(newline[i]) + i += 1 + if oldline[i:] == newline[i+1:] and self.ich1: if (y == self.__posxy[1] and x > self.__posxy[0] and - oldline[px:x] == newline[px+1:x+1]): + oldline[pi:i] == newline[pi+1:i+1]): + i = pi x = px self.__move(x, y) - self.__write_code(self.ich1) - self.__write(newline[x]) - self.__posxy = x + 1, y - elif x < minlen and oldline[x + 1:] == newline[x + 1:]: + cw = width(newline[i]) + self.__write_code(cw*self.ich1) + self.__write(newline[i]) + self.__posxy = x + cw, y + elif (x < minlen and oldline[i + 1:] == newline[i + 1:] + and width(oldline[i]) == width(newline[i])): self.__move(x, y) - self.__write(newline[x]) - self.__posxy = x + 1, y - elif (self.dch1 and self.ich1 and len(newline) == self.width - and x < len(newline) - 2 - and newline[x+1:-1] == oldline[x:-2]): + self.__write(newline[i]) + self.__posxy = x + width(newline[i]), y + elif (self.dch1 and self.ich1 and wlen(newline) == self.width + and x < wlen(newline) - 2 + and newline[i+1:-1] == oldline[i:-2]): + raise NotImplementedError() # FIXME self.__hide_cursor() self.__move(self.width - 2, y) self.__posxy = self.width - 2, y self.__write_code(self.dch1) self.__move(x, y) self.__write_code(self.ich1) - self.__write(newline[x]) - self.__posxy = x + 1, y + self.__write(newline[i]) + self.__posxy = x + width(newline[i]), y else: self.__hide_cursor() self.__move(x, y) - if len(oldline) > len(newline): + if wlen(oldline) > wlen(newline): self.__write_code(self._el) - self.__write(newline[x:]) - self.__posxy = len(newline), y + self.__write(newline[i:]) + self.__posxy = wlen(newline), y - #XXX: check for unicode mess if '\x1b' in newline: # ANSI escape characters are present, so we can't assume # anything about the position of the cursor. Moving the cursor