Python difflib 模块，SequenceMatcher() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用difflib.SequenceMatcher()。

项目：CorpBot.py 作者：corpnewt | 项目源码 | 文件源码

def search(searchTerm, list, keyName : str = None, numMatches : int = 3):
    """Searches the provided list for the searchTerm - using a keyName if provided for dicts."""
    if len(list) < 1:
        return None
    # Iterate through the list and create a list of items
    searchList = []
    for item in list:
        if keyName:
            testName = item[keyName]
        else:
            testName = item
        matchRatio = difflib.SequenceMatcher(None, searchTerm.lower(), testName.lower()).ratio()
        # matchRatio = Levenshtein.ratio(searchTerm.lower(), testName.lower())
        searchList.append({ 'Item' : item, 'Ratio' : matchRatio })
    # sort the servers by population
    searchList = sorted(searchList, key=lambda x:x['Ratio'], reverse=True)
    if numMatches > len(searchList):
        # Less than three - let's just give what we've got
        numMatches = len(searchList)
    return searchList[:numMatches]

项目：CodingDojo 作者：ComputerSocietyUNB | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：NarshaTech 作者：KimJangHyeon | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：scm-workbench 作者：barry-scott | 项目源码 | 文件源码

def filecompare( self, filename_left, filename_right ):
        if type(filename_left) == type([]):
            lines_left = filename_left
        else:
            try:
                lines_left = wb_read_file.readFileContentsAsUnicode( filename_left ).split('\n')

            except IOError as e:
                print( 'Error opening %s\n%s' % (filename_left, e) )
                return 0

        if type(filename_right) == type([]):
            lines_right = filename_right
        else:
            try:
                lines_right = wb_read_file.readFileContentsAsUnicode( filename_right ).split('\n')

            except IOError as e:
                print( 'Error opening %s\n%s' % (filename_right, e) )
                return 0

        lines_left = [eolRemoval( line ) for line in lines_left]
        lines_right = [eolRemoval( line ) for line in lines_right]

        matcher = difflib.SequenceMatcher( isLineJunk, lines_left, lines_right )
        for tag, left_lo, left_hi, right_lo, right_hi in matcher.get_opcodes():
            if tag == 'replace':
                self.fancy_replace( lines_left, left_lo, left_hi, lines_right, right_lo, right_hi )
            elif tag == 'delete':
                self.dump( self.text_body.addDeletedLine, lines_left, left_lo, left_hi )
            elif tag == 'insert':
                self.dump( self.text_body.addInsertedLine, lines_right, right_lo, right_hi )
            elif tag == 'equal':
                self.dump( self.text_body.addNormalLine, lines_left, left_lo, left_hi )
            else:
                raise ValueError( 'unknown tag ' + str( tag ) )

        self.text_body.addEnd()
        return 1

# need to strip any \n or \r thats on the end of the line

项目：Scrum 作者：prakharchoudhary | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split(r'\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() >= self.max_similarity:
                    try:
                        verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    except FieldDoesNotExist:
                        verbose_name = attribute_name
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：django 作者：alexsukhrin | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split(r'\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() >= self.max_similarity:
                    try:
                        verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    except FieldDoesNotExist:
                        verbose_name = attribute_name
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：Taigabot 作者：FrozenPigs | 项目源码 | 文件源码

def get_matching_blocks(self):
        size = min(len(self.b), len(self.b))
        threshold = min(self.threshold, size / 4)
        actual = difflib.SequenceMatcher.get_matching_blocks(self)
        return [item for item in actual
                if item[2] > threshold
                or not item[2]]

项目：pandachaika 作者：pandabuilder | 项目源码 | 文件源码

def get_scored_matches(word: str, possibilities: List[str], n: int=3, cutoff: float=0.6) -> List[Tuple[float, str]]:
    if not n > 0:
        raise ValueError("n must be > 0: %r" % (n,))
    if not (0.0 <= cutoff <= 1.0):
        raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
    result = []
    s: SequenceMatcher = SequenceMatcher()
    s.set_seq2(word)
    for x in possibilities:
        s.set_seq1(x)
        if s.real_quick_ratio() >= cutoff and s.quick_ratio() >= cutoff and s.ratio() >= cutoff:
            result.append((s.ratio(), x))

    # Move the best scorers to head of list
    result = heapq.nlargest(n, result)
    # Strip scores for the best n matches
    return result

项目：aurora 作者：carnby | 项目源码 | 文件源码

def build_token_counts(characterizer, texts):
    tokenizer = Tokenizer(characterizer=characterizer)
    tokenizer.train([t['text'] for t in texts])

    token_counts = Counter()
    seq_matcher = difflib.SequenceMatcher()

    for t in texts:
        t['tokens'] = tokenizer.tokenize(t['text'])
        if not t['tokens']:
            continue

        if 'urls' in t['entities'] and t['entities']['urls']:
            #TODO: replace those urls instead of adding them
            for url in t['entities']['urls']:
                t['tokens'].append(url['display_url'])

        if t['__is_rt__']:
            t['tokens'].append(u'@{0}'.format(t['user']['screen_name']).lower())

        token_counts.update(t['tokens'])

    return token_counts

项目：Gypsy 作者：benticarlos | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：wanblog 作者：wanzifa | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：orizonhub 作者：gumblex | 项目源码 | 文件源码

def sededit(a, b, context=0):
    '''
    Take two strings and output a sed-like diff
    '''
    if a == b:
        return ''
    a_len = len(a)
    b_len = len(b)
    start1, end1, start2, end2 = a_len, 0, b_len, 0
    s = difflib.SequenceMatcher(None, a, b)
    for tag, i1, i2, j1, j2 in s.get_opcodes():
        if tag == 'equal':
            continue
        elif tag == 'insert':
            ins = 1
        else:
            ins = 0
        start1 = max(min(i1-context-ins, start1), 0)
        start2 = max(min(j1-context-ins, start2), 0)
        end1 = min(max(i2+context+ins, end1), a_len)
        end2 = min(max(j2+context+ins, end2), b_len)
    return 's/%s%s%s/%s/' % (
            ('' if start1 else '^'), a[start1:end1],
            ('$' if end1 == a_len else ''), b[start2:end2])

项目：autobib 作者：jdumas | 项目源码 | 文件源码

def pick_best(title, item1, item2):
    """
    Pick best record among two items with identical scores.
    """
    def compare(x):
        return difflib.SequenceMatcher(None, title.lower(), x.lower()).ratio()
    if not item1['title']:
        return item2
    elif not item2['title']:
        return item2
    r1 = compare(item1['title'][0])
    r2 = compare(item2['title'][0])
    if r1 > r2:
        return item1
    elif r2 > r1:
        return item2
    else:
        # Try to find other discriminating criteria... e.g. prefer journal-articles
        if score_type(item1["type"]) > score_type(item2["type"]):
            return item1
        else:
            return item2

项目：tabmaster 作者：NicolasMinghetti | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：text-matcher 作者：JonathanReeve | 项目源码 | 文件源码

def get_initial_matches(self):
        """
        This does the main work of finding matching n-gram sequences between
        the texts.
        """
        sequence = SequenceMatcher(None,self.textAgrams,self.textBgrams)
        matchingBlocks = sequence.get_matching_blocks()

        # Only return the matching sequences that are higher than the
        # threshold given by the user.
        highMatchingBlocks = [match for match in matchingBlocks if match.size > self.threshold]

        numBlocks = len(highMatchingBlocks)

        if numBlocks > 0:
            print('%s total matches found.' % numBlocks, flush=True)

        return highMatchingBlocks

项目：adversarial-squad 作者：robinjia | 项目源码 | 文件源码

def render_diff(old_text, new_text):
  print (old_text, old_text.__class__)
  print (new_text, new_text.__class__)
  sm = difflib.SequenceMatcher(a=old_text, b=new_text)
  out_toks = []
  for opcode, s1, e1, s2, e2 in sm.get_opcodes():
    if opcode == 'equal':
      out_toks.append(old_text[s1:e1])
    elif opcode == 'insert':
      out_toks.append('<span class="insert">' + new_text[s2:e2] + '</span>')
    elif opcode == 'delete':
      out_toks.append('<span class="delete">' + old_text[s1:e1] + '</span>')
    elif opcode == 'replace':
      out_toks.append('<span class="delete">' + old_text[s1:e1] + '</span>')
      out_toks.append('<span class="insert">' + new_text[s2:e2] + '</span>')
  return ''.join(out_toks)

项目：ims 作者：ims-team | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：lifesoundtrack 作者：MTG | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split(r'\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    try:
                        verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    except FieldDoesNotExist:
                        verbose_name = attribute_name
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：onreview 作者：ichi404gh | 项目源码 | 文件源码

def get_diff_lines(self):
        import difflib

        postdiffs = list()
        commentdiffs = list()

        s = difflib.SequenceMatcher(lambda x: x.isspace(), self.post.code, self.code)
        for o in s.get_opcodes():
            if o[0] in ('replace','delete'):
                postdiffs.append(('mod', s.a[o[1]:o[2]]))
            if o[0] in ('replace','insert'):
                commentdiffs.append(('mod', s.b[o[3]:o[4]]))
            if o[0] == 'equal':
                postdiffs.append(('eq', s.a[o[1]:o[2]]))
                commentdiffs.append(('eq', s.b[o[3]:o[4]]))

            self.__normalize__(postdiffs)
            self.__normalize__(commentdiffs)
        return (postdiffs,commentdiffs)

项目：script.module.metadatautils 作者：marcelveldt | 项目源码 | 文件源码

def get_custom_path(self, searchtitle, title):
        '''locate custom folder on disk as pvrart location'''
        title_path = ""
        custom_path = self._mutils.addon.getSetting("pvr_art_custom_path")
        if custom_path and self._mutils.addon.getSetting("pvr_art_custom") == "true":
            delim = "\\" if "\\" in custom_path else "/"
            dirs = xbmcvfs.listdir(custom_path)[0]
            for strictness in [1, 0.95, 0.9, 0.8]:
                if title_path:
                    break
                for directory in dirs:
                    if title_path:
                        break
                    directory = directory.decode("utf-8")
                    curpath = os.path.join(custom_path, directory) + delim
                    for item in [title, searchtitle]:
                        match = SM(None, item, directory).ratio()
                        if match >= strictness:
                            title_path = curpath
                            break
            if not title_path and self._mutils.addon.getSetting("pvr_art_download") == "true":
                title_path = os.path.join(custom_path, normalize_string(title)) + delim
        return title_path

项目：django-open-lecture 作者：DmLitov4 | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：plugin.video.streamondemand-pureita 作者：orione7 | 项目源码 | 文件源码

def ratio(s1, s2):

    if s1 is None:
        raise TypeError("s1 is None")
    if s2 is None:
        raise TypeError("s2 is None")
    s1, s2 = utils.make_type_consistent(s1, s2)

    if len(s1) == 0 or len(s2) == 0:
        return 0

    m = SequenceMatcher(None, s1, s2)
    return utils.intr(100 * m.ratio())


# todo: skip duplicate indexes for a little more speed

项目：ComicReader.bundle 作者：coryo | 项目源码 | 文件源码

def P_update_tree(self, user, archive_path):  # private, plex can't use _var
        """update the cache of the dir read state for everything between cb_path and archive_path."""
        Log.Debug('updating tree {}'.format(archive_path))
        base = Prefs['cb_path']
        x = difflib.SequenceMatcher(a=base, b=archive_path)
        for tag, i1, i2, j1, j2 in x.get_opcodes():
            if tag == 'insert':
                try:
                    diff = os.path.split(archive_path[j1:j2])[0]
                    d = diff.replace('\\', '/').split('/')[1]
                    path = os.path.join(base, d)
                    Log.Debug('archive root: {}'.format(path))
                    if os.path.abspath(base) == os.path.abspath(path):
                        Log.Debug('item is in root dir. skipping.')
                    else:
                        state = self.dir_read_state(user, path, True)
                except Exception as e:
                    Log.Error('P_update_tree {}'.format(e))
                return

项目：travlr 作者：gauravkulkarni96 | 项目源码 | 文件源码

def validate(self, password, user=None):
        if not user:
            return

        for attribute_name in self.user_attributes:
            value = getattr(user, attribute_name, None)
            if not value or not isinstance(value, string_types):
                continue
            value_parts = re.split('\W+', value) + [value]
            for value_part in value_parts:
                if SequenceMatcher(a=password.lower(), b=value_part.lower()).quick_ratio() > self.max_similarity:
                    verbose_name = force_text(user._meta.get_field(attribute_name).verbose_name)
                    raise ValidationError(
                        _("The password is too similar to the %(verbose_name)s."),
                        code='password_too_similar',
                        params={'verbose_name': verbose_name},
                    )

项目：Eagle 作者：magerx | 项目源码 | 文件源码

def reset(self):
        """
        Resets thread data model
        """

        self.disableStdOut = False
        self.hashDBCursor = None
        self.inTransaction = False
        self.lastComparisonPage = None
        self.lastComparisonHeaders = None
        self.lastErrorPage = None
        self.lastHTTPError = None
        self.lastRedirectMsg = None
        self.lastQueryDuration = 0
        self.lastRequestMsg = None
        self.lastRequestUID = 0
        self.lastRedirectURL = None
        self.resumed = False
        self.retriesCount = 0
        self.seqMatcher = difflib.SequenceMatcher(None)
        self.shared = shared
        self.valueStack = []

项目：algo-trading-pipeline 作者：NeuralKnot | 项目源码 | 文件源码

def get_relevant_entities(self, google_cloud_entities, target_entities, target_wikipedia_urls):
        entities_to_return = []
        target_wikipedia_urls_lower = [target_wikipedia_url.lower() for target_wikipedia_url in target_wikipedia_urls]

        for google_cloud_entity in google_cloud_entities:
            # Look at Wikipedia URLs
            if google_cloud_entity.wikipedia_url and google_cloud_entity.wikipedia_url.lower() in target_wikipedia_urls_lower:
                entities_to_return.append(google_cloud_entity.name)
                continue

            # Look at names
            a = google_cloud_entity.name.lower().split(" ")
            for target_entity in target_entities:
                b = target_entity.lower().split(" ")

                if google_cloud_entity in entities_to_return:
                    break

                for google_cloud_entity_part in a:
                    for target_entity_part in b:
                        ratio = SequenceMatcher(None, google_cloud_entity_part, target_entity_part).ratio()

                        if ratio > 0.7:
                            entities_to_return.append(google_cloud_entity.name)
                            break

                    if google_cloud_entity in entities_to_return:
                        break

        return entities_to_return

项目：Flask_Blog 作者：sugarguo | 项目源码 | 文件源码

def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None

项目：plexMusicPlayer 作者：Tyzer34 | 项目源码 | 文件源码

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

项目：swjtu-pyscraper 作者：Desgard | 项目源码 | 文件源码

def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None

项目：health-mosconi 作者：GNUHealth-Mosconi | 项目源码 | 文件源码

def global_search(cls, text, limit, menu='ir.ui.menu'):
        """
        Search on models for text including menu
        Returns a list of tuple (ratio, model, model_name, id, name, icon)
        The size of the list is limited to limit
        """
        pool = Pool()
        ModelAccess = pool.get('ir.model.access')

        if not limit > 0:
            raise ValueError('limit must be > 0: %r' % (limit,))

        models = cls.search(['OR',
                ('global_search_p', '=', True),
                ('model', '=', menu),
                ])
        access = ModelAccess.get_access([m.model for m in models])
        s = StringMatcher()
        if isinstance(text, str):
            text = text.decode('utf-8')
        s.set_seq2(text)

        def generate():
            for model in models:
                if not access[model.model]['read']:
                    continue
                Model = pool.get(model.model)
                if not hasattr(Model, 'search_global'):
                    continue
                for record, name, icon in Model.search_global(text):
                    if isinstance(name, str):
                        name = name.decode('utf-8')
                    s.set_seq1(name)
                    yield (s.ratio(), model.model, model.rec_name,
                        record.id, name, icon)
        return heapq.nlargest(int(limit), generate())

项目：zanph 作者：zanph | 项目源码 | 文件源码

def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None

项目：release-script 作者：mitodl | 项目源码 | 文件源码

def match_user(slack_users, author_name, threshold=0.6):
    """
    Do a fuzzy match of author name to full name. If it matches, return a formatted Slack handle. Else return original
    full name.

    Args:
        slack_users (list of dict): A list of slack users from their API
        author_name (str): The commit author's full name
        threshold (float): All matches must be at least this high to pass.

    Returns:
        str: The slack markup for the handle of that author.
             If one can't be found, the author's name is returned unaltered.
    """

    lower_author_name = reformatted_full_name(author_name)

    def match_for_user(slack_user):
        """Get match ratio for slack user, or 0 if below threshold"""
        lower_name = reformatted_full_name(slack_user['profile']['real_name'])
        ratio = SequenceMatcher(a=lower_author_name, b=lower_name).ratio()
        if ratio >= threshold:
            return ratio
        else:
            return 0

    slack_matches = [(slack_user, match_for_user(slack_user)) for slack_user in slack_users]
    slack_matches = [(slack_user, match) for (slack_user, match) in slack_matches if match >= threshold]

    if len(slack_matches) > 0:
        matched_user = max(slack_matches, key=lambda pair: pair[1])[0]
        return "<@{id}>".format(id=matched_user['id'])
    else:
        return author_name

项目：zing 作者：evernote | 项目源码 | 文件源码

def opcodes(self):
        sm = difflib.SequenceMatcher(None,
                                     self.target.active_uids,
                                     self.new_unit_list)
        return sm.get_opcodes()

项目：Sci-Finder 作者：snverse | 项目源码 | 文件源码

def closest_rule(self, adapter):
        def _score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=_score_rule)

项目：Sci-Finder 作者：snverse | 项目源码 | 文件源码

def closest_rule(self, adapter):
        def _score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=_score_rule)

项目：harbour-sailfinder 作者：DylanVanAssche | 项目源码 | 文件源码

def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None

项目：harbour-sailfinder 作者：DylanVanAssche | 项目源码 | 文件源码

def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None

项目：Texty 作者：sarthfrey | 项目源码 | 文件源码

def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None

项目：epuap-watchdog 作者：ad-m | 项目源码 | 文件源码

def diff_text(a, b):
    s = SequenceMatcher(None, a, b)
    opcode = {'replace': lambda i1, i2, j1, j2: "<strike>%s</strike><strong>%s</strong>" % (a[i1:i2], b[j1:j2]),
              'delete': lambda i1, i2, j1, j2: "<strike>%s</strike>" % (a[i1:i2], ),
              'insert': lambda i1, i2, j1, j2: "<strong>%s</strong>" % (b[j1:j2], ),
              'equal': lambda i1, i2, j1, j2: a[i1:i2]}
    return safe("".join(opcode[tag](*args) for tag, *args in s.get_opcodes()))

项目：OPMLtoMM 作者：adxsoft | 项目源码 | 文件源码

def print_diffs(expected,actual):
    a=expected
    b=actual
    s = SequenceMatcher(None,a,b)
    print '\n'
    ctr=0
    for block in s.get_matching_blocks():
        apos=block[0]
        bpos=block[0]
        aendpos=apos+block[2]
        bendpos=bpos+block[2]
        achunk=expected[apos:aendpos]
        bchunk=actual[bpos:bendpos]
        # print "a[%d] and b[%d] match for %d elements" % block
        print '\nACTUAL has matching Error at '+str(aendpos)
        print 'Expected ='+expected[bendpos:bendpos+100]+'\nFound    ='+actual[aendpos:aendpos+100]
        print 'Matched values from 0 to '+str(aendpos-1)+' are'
        print ' EXPECTED='+bchunk
        print ' ACTUAL  ='+achunk
        print ''
        if ctr==0:
            break
        else:
            ctr+=1

###########################################################################
## Unit Tests - OPML to MM conversions
###########################################################################
#
# These tests are designed to run in the local project folder opmltomm

项目：rapier 作者：apigee-labs | 项目源码 | 文件源码

def similar(self, a, b):
        return SequenceMatcher(None, a, b).ratio() > self.similarity_ratio

项目：tsubasa-reddit-bot 作者：ArmandSyah | 项目源码 | 文件源码

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

项目：bpy_lambda 作者：bcongdon | 项目源码 | 文件源码

def get_best_similar(data):
    import difflib
    key, use_similar, similar_pool = data

    # try to find some close key in existing messages...
    # Optimized code inspired by difflib.get_close_matches (as we only need the best match).
    # We also consider to never make a match when len differs more than -len_key / 2, +len_key * 2 (which is valid
    # as long as use_similar is not below ~0.7).
    # Gives an overall ~20% of improvement!
    #tmp = difflib.get_close_matches(key[1], similar_pool, n=1, cutoff=use_similar)
    #if tmp:
        #tmp = tmp[0]
    tmp = None
    s = difflib.SequenceMatcher()
    s.set_seq2(key[1])
    len_key = len(key[1])
    min_len = len_key // 2
    max_len = len_key * 2
    for x in similar_pool:
        if min_len < len(x) < max_len:
            s.set_seq1(x)
            if s.real_quick_ratio() >= use_similar and s.quick_ratio() >= use_similar:
                sratio = s.ratio()
                if sratio >= use_similar:
                    tmp = x
                    use_similar = sratio
    return key, tmp

项目：base_function 作者：Rockyzsu | 项目源码 | 文件源码

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
    #return Differ(None, a, b).ratio()

项目：RPoint 作者：george17-meet | 项目源码 | 文件源码

def closest_rule(self, adapter):
        def _score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=_score_rule)

项目：isni-reconcile 作者：cmh2166 | 项目源码 | 文件源码

def ratio(s1, s2):
    s1, s2 = utils.make_type_consistent(s1, s2)

    m = SequenceMatcher(None, s1, s2)
    return utils.intr(100 * m.ratio())

项目：isni-reconcile 作者：cmh2166 | 项目源码 | 文件源码

def partial_ratio(s1, s2):
    """"Return the ratio of the most similar substring
    as a number between 0 and 100."""
    s1, s2 = utils.make_type_consistent(s1, s2)

    if len(s1) <= len(s2):
        shorter = s1
        longer = s2
    else:
        shorter = s2
        longer = s1

    m = SequenceMatcher(None, shorter, longer)
    blocks = m.get_matching_blocks()

    # each block represents a sequence of matching characters in a string
    # of the form (idx_1, idx_2, len)
    # the best partial match will block align with at least one of those blocks
    #   e.g. shorter = "abcd", longer = XXXbcdeEEE
    #   block = (1,3,3)
    #   best score === ratio("abcd", "Xbcd")
    scores = []
    for block in blocks:
        long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
        long_end = long_start + len(shorter)
        long_substr = longer[long_start:long_end]

        m2 = SequenceMatcher(None, shorter, long_substr)
        r = m2.ratio()
        if r > .995:
            return 100
        else:
            scores.append(r)

    return utils.intr(100 * max(scores))


##############################
# Advanced Scoring Functions #
##############################

项目：isni-reconcile 作者：cmh2166 | 项目源码 | 文件源码

def closest_rule(self, adapter):
        def score_rule(rule):
            return sum([
                0.98 * difflib.SequenceMatcher(
                    None, rule.endpoint, self.endpoint
                ).ratio(),
                0.01 * bool(set(self.values or ()).issubset(rule.arguments)),
                0.01 * bool(rule.methods and self.method in rule.methods)
            ])

        if adapter and adapter.map._rules:
            return max(adapter.map._rules, key=score_rule)
        else:
            return None

项目：isni-reconcile 作者：cmh2166 | 项目源码 | 文件源码

def get_matching_blocks(self):
        size = min(len(self.b), len(self.b))
        threshold = min(self.threshold, size / 4)
        actual = difflib.SequenceMatcher.get_matching_blocks(self)
        return [item for item in actual
                if item[2] > threshold
                or not item[2]]

项目：speech-to-text 作者：pluteski | 项目源码 | 文件源码

def ratcliff_obershelp_similarity(a, b):
    """
    A kind of approximate string matching.
    Computes the generalized Ratcliff/Obershelp similarity of two strings
    as the number of matching characters divided by the total number of characters in the two strings.
    Matching characters are those in the longest common subsequence plus,
    recursively matching characters in the unmatched region on either side of the longest common subsequence.
    """
    if a and b:
        return SequenceMatcher(None, a, b).ratio()
    else:
        return None