Python html.entities 模块,entitydefs() 实例源码


项目:nstock    作者:ybenitezf    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:ExptWizNote    作者:Ext4FAT    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:catchup4kodi    作者:catchup4kodi    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:localdocindex    作者:stcioc    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:acmpv    作者:Vayn    | 项目源码 | 文件源码
def unescape_html(string):
        '''HTML entity decode'''
        string = re.sub(r'&#[^;]+;', _sharp2uni, string)
        string = re.sub(r'&[^;]+;', lambda m: entitydefs[[1:-1]], string)
        return string
项目:acmpv    作者:Vayn    | 项目源码 | 文件源码
def unescape_html(string):
        '''HTML entity decode'''
        string = re.sub(r'&#[^;]+;', _sharp2uni, string)
        string = re.sub(r'&[^;]+;', lambda m: entitydefs[[1:-1]], string)
        return string
项目:script.reddit.reader    作者:gedisony    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:googMeow    作者:aaaddress1    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:gitsome    作者:donnemartin    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
项目:machine-learning-python    作者:pspxiaochen    | 项目源码 | 文件源码
def lookslikehtml(s):
        # must have a close tag or an entity reference to qualify
        if not ('</(\w+)>', s) or'&#?\w+;', s)):

        # all tags must be in a restricted subset of valid HTML tags
        if any((t for t in re.findall(r'</?(\w+)', s) if t.lower() not in _HTMLSanitizer.acceptable_elements)):

        # all entities must have been defined as valid HTML entities
        if any((e for e in re.findall(r'&(\w+);', s) if e not in entitydefs)):

        return 1
项目:machine-learning-python    作者:pspxiaochen    | 项目源码 | 文件源码
def lookslikehtml(s):
        # must have a close tag or an entity reference to qualify
        if not ('</(\w+)>', s) or'&#?\w+;', s)):

        # all tags must be in a restricted subset of valid HTML tags
        if any((t for t in re.findall(r'</?(\w+)', s) if t.lower() not in _HTMLSanitizer.acceptable_elements)):

        # all entities must have been defined as valid HTML entities
        if any((e for e in re.findall(r'&(\w+);', s) if e not in entitydefs)):

        return 1
项目:mbox-to-csv    作者:jarrodparkes    | 项目源码 | 文件源码
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])