Python unicodedata 模块，combining() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用unicodedata.combining()。

项目：aiodownload 作者：jelloslinger | 项目源码 | 文件源码

def clean_filename(filename):
    """Return a sanitized filename (replace / strip out illegal characters)

    :param filename: string used for a filename
    :type filename: str

    :return: sanitized filename
    :rtype: str
    """

    return ''.join([
        c for c in unicodedata.normalize(
            'NFKD',
            ''.join([REPLACEMENT_CHAR.get(c, c) for c in filename])
        )
        if not unicodedata.combining(c) and c in '-_.() {0}{1}'.format(string.ascii_letters, string.digits)
    ])

项目：linter 作者：ethz-asl | 项目源码 | 文件源码

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)

项目：CodingDojo 作者：ComputerSocietyUNB | 项目源码 | 文件源码

def chars(self, num, truncate=None, html=False):
        """
        Returns the text truncated to be no longer than the specified number
        of characters.

        Takes an optional argument of what should be used to notify that the
        string has been truncated, defaulting to a translatable string of an
        ellipsis (...).
        """
        length = int(num)
        text = unicodedata.normalize('NFC', self._wrapped)

        # Calculate the length to truncate to (max length - end_text length)
        truncate_len = length
        for char in self.add_truncation_text('', truncate):
            if not unicodedata.combining(char):
                truncate_len -= 1
                if truncate_len == 0:
                    break
        if html:
            return self._truncate_html(length, truncate, text, truncate_len, False)
        return self._text_chars(length, truncate, text, truncate_len)

项目：CodingDojo 作者：ComputerSocietyUNB | 项目源码 | 文件源码

def _text_chars(self, length, truncate, text, truncate_len):
        """
        Truncates a string after a certain number of chars.
        """
        s_len = 0
        end_index = None
        for i, char in enumerate(text):
            if unicodedata.combining(char):
                # Don't consider combining characters
                # as adding to the string length
                continue
            s_len += 1
            if end_index is None and s_len > truncate_len:
                end_index = i
            if s_len > length:
                # Return the truncated string
                return self.add_truncation_text(text[:end_index or 0],
                                                truncate)

        # Return the original string since no truncation was necessary
        return text

项目：notebooks 作者：fluentpython | 项目源码 | 文件源码

def shave_marks_latin(txt):
    """Remove all diacritic marks from Latin base characters"""
    norm_txt = unicodedata.normalize('NFD', txt)  # <1>
    latin_base = False
    keepers = []
    for c in norm_txt:
        if unicodedata.combining(c) and latin_base:   # <2>
            continue  # ignore diacritic on Latin base char
        keepers.append(c)                             # <3>
        # if it isn't combining char, it's a new base char
        if not unicodedata.combining(c):              # <4>
            latin_base = c in string.ascii_letters
    shaved = ''.join(keepers)
    return unicodedata.normalize('NFC', shaved)   # <5>
# END SHAVE_MARKS_LATIN

# BEGIN ASCIIZE

项目：2016CCF_BDCI_Sougou 作者：coderSkyChen | 项目源码 | 文件源码

def strip_accents_unicode(s):
    """Transform accentuated unicode symbols into their simple counterpart

    Warning: the python-level loop and join operations make this
    implementation 20 times slower than the strip_accents_ascii basic
    normalization.

    See also
    --------
    strip_accents_ascii
        Remove accentuated char for any unicode symbol that has a direct
        ASCII equivalent.
    """
    normalized = unicodedata.normalize('NFKD', s)
    if normalized == s:
        return s
    else:
        return ''.join([c for c in normalized if not unicodedata.combining(c)])

项目：2016CCF-sougou 作者：prozhuchen | 项目源码 | 文件源码

def strip_accents_unicode(s):
    """Transform accentuated unicode symbols into their simple counterpart

    Warning: the python-level loop and join operations make this
    implementation 20 times slower than the strip_accents_ascii basic
    normalization.

    See also
    --------
    strip_accents_ascii
        Remove accentuated char for any unicode symbol that has a direct
        ASCII equivalent.
    """
    normalized = unicodedata.normalize('NFKD', s)
    if normalized == s:
        return s
    else:
        return ''.join([c for c in normalized if not unicodedata.combining(c)])

项目：oil 作者：oilshell | 项目源码 | 文件源码

def calibrate(self):

            data = (u'a', u'1', u' ', u'\u1234', u'\uFFFF')
            len_data = len(data)
            digit = unicodedata.digit
            numeric = unicodedata.numeric
            decimal = unicodedata.decimal
            category = unicodedata.category
            bidirectional = unicodedata.bidirectional
            decomposition = unicodedata.decomposition
            mirrored = unicodedata.mirrored
            combining = unicodedata.combining

            for i in xrange(self.rounds):

                c = data[i % len_data]

项目：python2-tracer 作者：extremecoders-re | 项目源码 | 文件源码

def calibrate(self):

            data = (u'a', u'1', u' ', u'\u1234', u'\uFFFF')
            len_data = len(data)
            digit = unicodedata.digit
            numeric = unicodedata.numeric
            decimal = unicodedata.decimal
            category = unicodedata.category
            bidirectional = unicodedata.bidirectional
            decomposition = unicodedata.decomposition
            mirrored = unicodedata.mirrored
            combining = unicodedata.combining

            for i in xrange(self.rounds):

                c = data[i % len_data]

项目：haros_plugins 作者：git-afsantos | 项目源码 | 文件源码

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)

项目：lifesoundtrack 作者：MTG | 项目源码 | 文件源码

def chars(self, num, truncate=None, html=False):
        """
        Returns the text truncated to be no longer than the specified number
        of characters.

        Takes an optional argument of what should be used to notify that the
        string has been truncated, defaulting to a translatable string of an
        ellipsis (...).
        """
        self._setup()
        length = int(num)
        text = unicodedata.normalize('NFC', self._wrapped)

        # Calculate the length to truncate to (max length - end_text length)
        truncate_len = length
        for char in self.add_truncation_text('', truncate):
            if not unicodedata.combining(char):
                truncate_len -= 1
                if truncate_len == 0:
                    break
        if html:
            return self._truncate_html(length, truncate, text, truncate_len, False)
        return self._text_chars(length, truncate, text, truncate_len)

项目：lifesoundtrack 作者：MTG | 项目源码 | 文件源码

def _text_chars(self, length, truncate, text, truncate_len):
        """
        Truncates a string after a certain number of chars.
        """
        s_len = 0
        end_index = None
        for i, char in enumerate(text):
            if unicodedata.combining(char):
                # Don't consider combining characters
                # as adding to the string length
                continue
            s_len += 1
            if end_index is None and s_len > truncate_len:
                end_index = i
            if s_len > length:
                # Return the truncated string
                return self.add_truncation_text(text[:end_index or 0],
                                                truncate)

        # Return the original string since no truncation was necessary
        return text

项目：LeetCode 作者：YJL33 | 项目源码 | 文件源码

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)

项目：2016_CCFsougou 作者：dhdsjy | 项目源码 | 文件源码

def strip_accents_unicode(s):
    """Transform accentuated unicode symbols into their simple counterpart

    Warning: the python-level loop and join operations make this
    implementation 20 times slower than the strip_accents_ascii basic
    normalization.

    See also
    --------
    strip_accents_ascii
        Remove accentuated char for any unicode symbol that has a direct
        ASCII equivalent.
    """
    normalized = unicodedata.normalize('NFKD', s)
    if normalized == s:
        return s
    else:
        return ''.join([c for c in normalized if not unicodedata.combining(c)])

项目：OpenSky_BL 作者：fishpepper | 项目源码 | 文件源码

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)

项目：muvio.bundle 作者：piplongrun | 项目源码 | 文件源码

def ArtistName(artist):

  try:
    artist = unicodedata.normalize('NFKD', artist.decode('utf-8'))
  except UnicodeError:
    artist = unicodedata.normalize('NFKD', artist)

  # Strip diacritics
  stripped = u''

  for i in range(len(artist)):
    point = artist[i]

    if not unicodedata.combining(point):
      stripped += point

  return stripped

####################################################################################################

项目：grasp 作者：textgain | 项目源码 | 文件源码

def destress(s, replace={}):
    """ Returns the string with no diacritics.
    """
    for k, v in replace.items():
        s = s.replace(k, v)
    for k, v in {
     u'ø' : 'o' ,
     u'ß' : 'ss',
     u'œ' : 'ae',
     u'æ' : 'oe',
     u'“' : '"' ,
     u'”' : '"' ,
     u'‘' : "'" ,
     u'’' : "'" ,
     u'?' : '/' ,
     u'¿' : '?' ,
     u'¡' : '!'}.items():
        s = s.replace(k, v)
    f = unicodedata.combining             # f('´') == 0
    s = unicodedata.normalize('NFKD', s)  # é => e + ´
    s = ''.join(ch for ch in s if not f(ch))
    return s

# print(destress(u'pâté')) # 'pate'

项目：Chromium_DepotTools 作者：p07r0457 | 项目源码 | 文件源码

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)

项目：liberator 作者：libscie | 项目源码 | 文件源码

def chars(self, num, truncate=None, html=False):
        """
        Returns the text truncated to be no longer than the specified number
        of characters.

        Takes an optional argument of what should be used to notify that the
        string has been truncated, defaulting to a translatable string of an
        ellipsis (...).
        """
        self._setup()
        length = int(num)
        text = unicodedata.normalize('NFC', self._wrapped)

        # Calculate the length to truncate to (max length - end_text length)
        truncate_len = length
        for char in self.add_truncation_text('', truncate):
            if not unicodedata.combining(char):
                truncate_len -= 1
                if truncate_len == 0:
                    break
        if html:
            return self._truncate_html(length, truncate, text, truncate_len, False)
        return self._text_chars(length, truncate, text, truncate_len)

项目：liberator 作者：libscie | 项目源码 | 文件源码

def _text_chars(self, length, truncate, text, truncate_len):
        """
        Truncates a string after a certain number of chars.
        """
        s_len = 0
        end_index = None
        for i, char in enumerate(text):
            if unicodedata.combining(char):
                # Don't consider combining characters
                # as adding to the string length
                continue
            s_len += 1
            if end_index is None and s_len > truncate_len:
                end_index = i
            if s_len > length:
                # Return the truncated string
                return self.add_truncation_text(text[:end_index or 0],
                                                truncate)

        # Return the original string since no truncation was necessary
        return text

项目：2016_CCFsougou2 作者：dhdsjy | 项目源码 | 文件源码

def strip_accents_unicode(s):
    """Transform accentuated unicode symbols into their simple counterpart

    Warning: the python-level loop and join operations make this
    implementation 20 times slower than the strip_accents_ascii basic
    normalization.

    See also
    --------
    strip_accents_ascii
        Remove accentuated char for any unicode symbol that has a direct
        ASCII equivalent.
    """
    normalized = unicodedata.normalize('NFKD', s)
    if normalized == s:
        return s
    else:
        return ''.join([c for c in normalized if not unicodedata.combining(c)])

项目：node-gn 作者：Shouqun | 项目源码 | 文件源码

def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line)

项目：djanoDoc 作者：JustinChavez | 项目源码 | 文件源码

def chars(self, num, truncate=None, html=False):
        """
        Returns the text truncated to be no longer than the specified number
        of characters.

        Takes an optional argument of what should be used to notify that the
        string has been truncated, defaulting to a translatable string of an
        ellipsis (...).
        """
        length = int(num)
        text = unicodedata.normalize('NFC', self._wrapped)

        # Calculate the length to truncate to (max length - end_text length)
        truncate_len = length
        for char in self.add_truncation_text('', truncate):
            if not unicodedata.combining(char):
                truncate_len -= 1
                if truncate_len == 0:
                    break
        if html:
            return self._truncate_html(length, truncate, text, truncate_len, False)
        return self._text_chars(length, truncate, text, truncate_len)

项目：djanoDoc 作者：JustinChavez | 项目源码 | 文件源码

def _text_chars(self, length, truncate, text, truncate_len):
        """
        Truncates a string after a certain number of chars.
        """
        s_len = 0
        end_index = None
        for i, char in enumerate(text):
            if unicodedata.combining(char):
                # Don't consider combining characters
                # as adding to the string length
                continue
            s_len += 1
            if end_index is None and s_len > truncate_len:
                end_index = i
            if s_len > length:
                # Return the truncated string
                return self.add_truncation_text(text[:end_index or 0],
                                                truncate)

        # Return the original string since no truncation was necessary
        return text

项目：ln2sql 作者：FerreroJeremy | 项目源码 | 文件源码

def remove_accents(self, string):
        nkfd_form = unicodedata.normalize('NFKD', str(string))
        return "".join([c for c in nkfd_form if not unicodedata.combining(c)])

项目：ln2sql 作者：FerreroJeremy | 项目源码 | 文件源码

def remove_accents(self, string):
        nkfd_form = unicodedata.normalize('NFKD', str(string))
        return "".join([c for c in nkfd_form if not unicodedata.combining(c)])

项目：ln2sql 作者：FerreroJeremy | 项目源码 | 文件源码

def remove_accents(self, string):
        nkfd_form = unicodedata.normalize('NFKD', str(string))
        return "".join([c for c in nkfd_form if not unicodedata.combining(c)])

项目：ln2sql 作者：FerreroJeremy | 项目源码 | 文件源码

def remove_accents(self, string):
        nkfd_form = unicodedata.normalize('NFKD', str(string))
        return "".join([c for c in nkfd_form if not unicodedata.combining(c)])

项目：my-first-blog 作者：AnkurBegining | 项目源码 | 文件源码

def _combining_class(cp):
    return unicodedata.combining(unichr(cp))

项目：my-first-blog 作者：AnkurBegining | 项目源码 | 文件源码

def check_initial_combiner(label):

    if unicodedata.category(label[0])[0] == 'M':
        raise IDNAError('Label begins with an illegal combining character')
    return True

项目：googletranslate.popclipext 作者：wizyoung | 项目源码 | 文件源码

def _combining_class(cp):
    return unicodedata.combining(unichr(cp))

项目：googletranslate.popclipext 作者：wizyoung | 项目源码 | 文件源码

def check_initial_combiner(label):

    if unicodedata.category(label[0])[0] == 'M':
        raise IDNAError('Label begins with an illegal combining character')
    return True

项目：pip-update-requirements 作者：alanhamlett | 项目源码 | 文件源码

def _combining_class(cp):
    return unicodedata.combining(unichr(cp))

项目：pip-update-requirements 作者：alanhamlett | 项目源码 | 文件源码

def check_initial_combiner(label):

    if unicodedata.category(label[0])[0] == 'M':
        raise IDNAError('Label begins with an illegal combining character')
    return True

项目：code 作者：ActiveState | 项目源码 | 文件源码

def remove_accents(s): 
    nkfd_form = unicodedata.normalize('NFKD', s) 
    return u''.join([c for c in nkfd_form if not unicodedata.combining(c)])

项目：noc-orchestrator 作者：DirceuSilvaLabs | 项目源码 | 文件源码

def _combining_class(cp):
    return unicodedata.combining(unichr(cp))

项目：noc-orchestrator 作者：DirceuSilvaLabs | 项目源码 | 文件源码

def check_initial_combiner(label):

    if unicodedata.category(label[0])[0] == 'M':
        raise IDNAError('Label begins with an illegal combining character')
    return True

项目：jira_worklog_scanner 作者：pgarneau | 项目源码 | 文件源码

def _combining_class(cp):
    return unicodedata.combining(unichr(cp))

项目：jira_worklog_scanner 作者：pgarneau | 项目源码 | 文件源码

def check_initial_combiner(label):

    if unicodedata.category(label[0])[0] == 'M':
        raise IDNAError('Label begins with an illegal combining character')
    return True

项目：workflows.kyoyue 作者：wizyoung | 项目源码 | 文件源码

def _combining_class(cp):
    return unicodedata.combining(unichr(cp))

项目：workflows.kyoyue 作者：wizyoung | 项目源码 | 文件源码

def check_initial_combiner(label):

    if unicodedata.category(label[0])[0] == 'M':
        raise IDNAError('Label begins with an illegal combining character')
    return True

项目：purelove 作者：hucmosin | 项目源码 | 文件源码

def _combining_class(cp):
    return unicodedata.combining(unichr(cp))

项目：purelove 作者：hucmosin | 项目源码 | 文件源码

def check_initial_combiner(label):

    if unicodedata.category(label[0])[0] == 'M':
        raise IDNAError('Label begins with an illegal combining character')
    return True

项目：centos-base-consul 作者：zeroc0d3lab | 项目源码 | 文件源码

def strwidth_ucs_4(width_data, string):
    return sum(((
        (
            0
        ) if combining(symbol) else (
            width_data[east_asian_width(symbol)]
        )
    ) for symbol in string))

项目：centos-base-consul 作者：zeroc0d3lab | 项目源码 | 文件源码

def strwidth_ucs_2(width_data, string):
    return sum(((
        (
            width_data[east_asian_width(string[i - 1] + symbol)]
        ) if 0xDC00 <= ord(symbol) <= 0xDFFF else (
            0
        ) if combining(symbol) or 0xD800 <= ord(symbol) <= 0xDBFF else (
            width_data[east_asian_width(symbol)]
        )
    ) for i, symbol in enumerate(string)))

项目：harbour-sailfinder 作者：DylanVanAssche | 项目源码 | 文件源码

def _combining_class(cp):
    return unicodedata.combining(unichr(cp))

项目：harbour-sailfinder 作者：DylanVanAssche | 项目源码 | 文件源码

def check_initial_combiner(label):

    if unicodedata.category(label[0])[0] == 'M':
        raise IDNAError('Label begins with an illegal combining character')
    return True

项目：harbour-sailfinder 作者：DylanVanAssche | 项目源码 | 文件源码

def _combining_class(cp):
    return unicodedata.combining(unichr(cp))

项目：harbour-sailfinder 作者：DylanVanAssche | 项目源码 | 文件源码

def check_initial_combiner(label):

    if unicodedata.category(label[0])[0] == 'M':
        raise IDNAError('Label begins with an illegal combining character')
    return True

项目：stegator 作者：1modm | 项目源码 | 文件源码

def remove_accents(self, input_str):
        nkfd_form = unicodedata.normalize('NFKD', unicode(input_str, encoding='utf-8', errors='ignore'))
        return u"".join([c for c in nkfd_form if not unicodedata.combining(c)])