Python re 模块,VERBOSE 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用re.VERBOSE。
def str_flags_to_int(str_flags):
flags = 0
if "i" in str_flags:
flags |= re.IGNORECASE
if "l" in str_flags:
flags |= re.LOCALE
if "m" in str_flags:
flags |= re.MULTILINE
if "s" in str_flags:
flags |= re.DOTALL
if "u" in str_flags:
flags |= re.UNICODE
if "x" in str_flags:
flags |= re.VERBOSE
return flags
def _encode_regex(name, value, dummy0, dummy1):
"""Encode a python regex or bson.regex.Regex."""
flags = value.flags
# Python 2 common case
if flags == 0:
return b"\x0B" + name + _make_c_string_check(value.pattern) + b"\x00"
# Python 3 common case
elif flags == re.UNICODE:
return b"\x0B" + name + _make_c_string_check(value.pattern) + b"u\x00"
else:
sflags = b""
if flags & re.IGNORECASE:
sflags += b"i"
if flags & re.LOCALE:
sflags += b"l"
if flags & re.MULTILINE:
sflags += b"m"
if flags & re.DOTALL:
sflags += b"s"
if flags & re.UNICODE:
sflags += b"u"
if flags & re.VERBOSE:
sflags += b"x"
sflags += b"\x00"
return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags
def str_flags_to_int(str_flags):
flags = 0
if "i" in str_flags:
flags |= re.IGNORECASE
if "l" in str_flags:
flags |= re.LOCALE
if "m" in str_flags:
flags |= re.MULTILINE
if "s" in str_flags:
flags |= re.DOTALL
if "u" in str_flags:
flags |= re.UNICODE
if "x" in str_flags:
flags |= re.VERBOSE
return flags
def str_flags_to_int(str_flags):
flags = 0
if "i" in str_flags:
flags |= re.IGNORECASE
if "l" in str_flags:
flags |= re.LOCALE
if "m" in str_flags:
flags |= re.MULTILINE
if "s" in str_flags:
flags |= re.DOTALL
if "u" in str_flags:
flags |= re.UNICODE
if "x" in str_flags:
flags |= re.VERBOSE
return flags
def str_flags_to_int(str_flags):
flags = 0
if "i" in str_flags:
flags |= re.IGNORECASE
if "l" in str_flags:
flags |= re.LOCALE
if "m" in str_flags:
flags |= re.MULTILINE
if "s" in str_flags:
flags |= re.DOTALL
if "u" in str_flags:
flags |= re.UNICODE
if "x" in str_flags:
flags |= re.VERBOSE
return flags
def str_flags_to_int(str_flags):
flags = 0
if "i" in str_flags:
flags |= re.IGNORECASE
if "l" in str_flags:
flags |= re.LOCALE
if "m" in str_flags:
flags |= re.MULTILINE
if "s" in str_flags:
flags |= re.DOTALL
if "u" in str_flags:
flags |= re.UNICODE
if "x" in str_flags:
flags |= re.VERBOSE
return flags
def execute(cls, ids, data):
import pydot
pool = Pool()
Model = pool.get('ir.model')
ActionReport = pool.get('ir.action.report')
if not data['filter']:
filter = None
else:
filter = re.compile(data['filter'], re.VERBOSE)
action_report_ids = ActionReport.search([
('report_name', '=', cls.__name__)
])
if not action_report_ids:
raise Exception('Error', 'Report (%s) not find!' % cls.__name__)
action_report = ActionReport(action_report_ids[0])
models = Model.browse(ids)
graph = pydot.Dot(fontsize="8")
graph.set('center', '1')
graph.set('ratio', 'auto')
cls.fill_graph(models, graph, level=data['level'], filter=filter)
data = graph.create(prog='dot', format='png')
return ('png', fields.Binary.cast(data), False, action_report.name)
def remove_stack_traces(out):
# this regexp taken from Python 2.5's doctest
traceback_re = re.compile(r"""
# Grab the traceback header. Different versions of Python have
# said different things on the first traceback line.
^(?P<hdr> Traceback\ \(
(?: most\ recent\ call\ last
| innermost\ last
) \) :
)
\s* $ # toss trailing whitespace on the header.
(?P<stack> .*?) # don't blink: absorb stuff until...
^(?=\w) # a line *starts* with alphanum.
.*?(?P<exception> \w+ ) # exception name
(?P<msg> [:\n] .*) # the rest
""", re.VERBOSE | re.MULTILINE | re.DOTALL)
blocks = []
for block in blankline_separated_blocks(out):
blocks.append(traceback_re.sub(r"\g<hdr>\n...\n\g<exception>\g<msg>", block))
return "".join(blocks)
def set_memlimit(limit):
global max_memuse
global real_max_memuse
sizes = {
'k': 1024,
'm': _1M,
'g': _1G,
't': 1024*_1G,
}
m = re.match(r'(\d+(\.\d+)?) (K|M|G|T)b?$', limit,
re.IGNORECASE | re.VERBOSE)
if m is None:
raise ValueError('Invalid memory limit %r' % (limit,))
memlimit = int(float(m.group(1)) * sizes[m.group(3).lower()])
real_max_memuse = memlimit
if memlimit > MAX_Py_ssize_t:
memlimit = MAX_Py_ssize_t
if memlimit < _2G - 1:
raise ValueError('Memory limit %r too low to be useful' % (limit,))
max_memuse = memlimit
def parse_title(title):
"""
Returns parsed contents of a post's title
"""
ro = re.compile(r"""
(?P<artist>.+[^- ]+) # The artist
\s*-+\s* # Skip some spaces and dashes
(?P<title>.*) # The title
\s*\[ # Skip some spaces and opening bracket
(?P<genre>.*) # The genre
\]\s*\( # Skip closing bracket, spaces and opening parenthesis
(?P<year>\d+) # The year
\) # Skip closing parenthesis
""", re.VERBOSE | re.IGNORECASE)
mo = ro.search(title)
if mo is None:
return
return {'artist': mo.group('artist'), 'title': mo.group('title'), 'genre': mo.group('genre'), 'year': mo.group(
'year')}
def wikilink(value):
"""
Produce wiki style links to other pages within the database, for use in
comments fields: {{ a_note|wikilink|truncatewords_html:5 }}
Note that it's better to use truncatewords_html with this filter, rather
than plain truncatewords
"""
WIKILINK_RE = re.compile(r"""
(?P<lead>\s|^) # possible leading whitespace
(?P<wikilink>/ # an initial /
(\w+/)+ # multiples of any number of identifier chars + /
)
""",
re.VERBOSE)
def wikilink_sub_callback(match_obj):
link = match_obj.group("wikilink")
lead = match_obj.group("lead")
return '%s<a href="%s">%s</a>' % (lead, escape(link), escape(link))
return mark_safe(WIKILINK_RE.sub(wikilink_sub_callback, value))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
text_id = mobj.group('textid')
page = self._download_json(
'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
info = page['info']
formats = [
{
'format_id': f['type'],
'filesize': int(f['filesize']),
'url': f['url']
} for f in info['rfiles']
]
self._sort_formats(formats)
return {
'id': info['vid'],
'title': info['Subject'],
'duration': int(info['duration']) / 1000.0,
'formats': formats,
'thumbnail': info.get('bimg') or info.get('img'),
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
video_id = mobj.group('id')
info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
info = self._download_json(info_url, video_id)
videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
# Prefer sina video since they have thumbnails
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
player_url = videos_urls[-1]
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
player_url)
if m_sina is not None:
self.to_screen('Sina video detected')
sina_id = m_sina.group(1)
player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
return self.url_result(player_url)
def set_memlimit(limit):
global max_memuse
global real_max_memuse
sizes = {
'k': 1024,
'm': _1M,
'g': _1G,
't': 1024*_1G,
}
m = re.match(r'(\d+(\.\d+)?) (K|M|G|T)b?$', limit,
re.IGNORECASE | re.VERBOSE)
if m is None:
raise ValueError('Invalid memory limit %r' % (limit,))
memlimit = int(float(m.group(1)) * sizes[m.group(3).lower()])
real_max_memuse = memlimit
if memlimit > MAX_Py_ssize_t:
memlimit = MAX_Py_ssize_t
if memlimit < _2G - 1:
raise ValueError('Memory limit %r too low to be useful' % (limit,))
max_memuse = memlimit
def test_detect_verbose(self):
"""Test verbose."""
pattern = bre.compile_search(
r'''
This is a # \Qcomment\E
This is not a \# \Qcomment\E
This is not a [#\ ] \Qcomment\E
This is not a [\#] \Qcomment\E
This\ is\ a # \Qcomment\E
''',
re.VERBOSE
)
self.assertEqual(
pattern.pattern,
r'''
This is a # \\Qcomment\\E
This is not a \# comment
This is not a [#\ ] comment
This is not a [\#] comment
This\ is\ a # \\Qcomment\\E
'''
)
def _apply_search_backrefs(pattern, flags=0):
"""Apply the search backrefs to the search pattern."""
if isinstance(pattern, (compat.string_type, compat.binary_type)):
re_verbose = bool(VERBOSE & flags)
re_unicode = None
if compat.PY3 and bool(ASCII & flags):
re_unicode = False
elif bool(UNICODE & flags):
re_unicode = True
pattern = SearchTemplate(pattern, re_verbose, re_unicode).apply()
elif isinstance(pattern, RE_TYPE):
if flags:
raise ValueError("Cannot process flags argument with a compiled pattern!")
else:
raise TypeError("Not a string or compiled pattern!")
return pattern
def set_memlimit(limit):
global max_memuse
global real_max_memuse
sizes = {
'k': 1024,
'm': _1M,
'g': _1G,
't': 1024*_1G,
}
m = re.match(r'(\d+(\.\d+)?) (K|M|G|T)b?$', limit,
re.IGNORECASE | re.VERBOSE)
if m is None:
raise ValueError('Invalid memory limit %r' % (limit,))
memlimit = int(float(m.group(1)) * sizes[m.group(3).lower()])
real_max_memuse = memlimit
if memlimit > MAX_Py_ssize_t:
memlimit = MAX_Py_ssize_t
if memlimit < _2G - 1:
raise ValueError('Memory limit %r too low to be useful' % (limit,))
max_memuse = memlimit
def stat_regexp_generator(data):
"""Generate a regeular expression that will swift-recon stats.
Lines printed by swift-recon look like::
[data] low: 0, high: 0, avg: 0.0, total: 0, Failed: 0.0%, no_result: 0, reported: 0
Where data above is the value of the ``data`` parameter passed to the
function.
"""
expression = """\s+low:\s+(?P<low>\d+), # parse out the low result
\s+high:\s+(?P<high>\d+), # parse out the high result
\s+avg:\s+(?P<avg>\d+.\d+), # you get the idea now
\s+total:\s+(?P<total>\d+),
\s+Failed:\s+(?P<failed>\d+.\d+%),
\s+no_result:\s+(?P<no_result>\d+),
\s+reported:\s+(?P<reported>\d+)"""
return re.compile('\[' + data + '\]' + expression, re.VERBOSE)
def rnc_markup_tokenizer(s):
"""
[rn][mod1][num][\s-]
"""
rn_re = re.compile(u"""(?P<p1>[b??#]?[ivIV]+)
(?P<p2>[^\d\s-]*)
(?P<p3>[^\s-]*)
(?P<sep>(\s*-\s*|\s*))""",
re.VERBOSE|re.UNICODE)
i = 0
retval = []
while i < len(s):
m = rn_re.match(s[i:])
if not m:
retval.append((u'ERR:%s' % s[i:], '', '', ''))
break
retval.append((m.group('p1'), m.group('p2'), m.group('p3'), m.group('sep')))
i += m.end()
return retval
def get_single_author_pattern():
"""Generates a simple, one-hit-only, author name pattern, matching just one author
name in either of the 'S I' or 'I S' formats. The author patterns are the same
ones used inside the main 'author group' pattern generator. This function is used
not for reference extraction, but for author extraction. Numeration is appended
to author patterns by default.
@return (string): Just the author name pattern designed to identify single author names
in both SI and IS formats. (NO 'et al', editors, 'and'... matching)
@return: (string) the union of 'initial surname' and 'surname initial'
authors"""
return "(?:" + get_initial_surname_author_pattern(incl_numeration=True) + \
"|" + get_surname_initial_author_pattern(incl_numeration=True) + ")"
# Targets single author names
# re_single_author_pattern = re.compile(get_single_author_pattern(), re.VERBOSE)
# pylint: enable=C0103
def __init__(cls, name, bases, dct):
super(_TemplateMetaclass, cls).__init__(name, bases, dct)
if 'pattern' in dct:
pattern = cls.pattern
else:
pattern = _TemplateMetaclass.pattern % {
'delim' : _re.escape(cls.delimiter),
'id' : cls.idpattern,
}
cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
def __compile_tokenize_pattern(self):
"""
Compiles the regular expression used by self.tokenize() and stores
a reference to it in self.tokenize_pattern. The full regular expression
used here is a concatenation of several patterns (as written above
self.__init__() and conditionally using either the word pattern that
matches hyphen-broken words, or the pattern that only captures "whole"
words.
"""
# Capture hyphen-broken words as single tokens by default.
word_pattern_str = self._pattern_str_word_with_hyphen_breaks
# If we're not supposed to remove hyphen breaks, use the alternate word
# pattern, which doesn't look for "hyphen breaks".
if not self.remove_hyphen_breaks:
word_pattern_str = self._pattern_str_word
# Concatenate the separate pattern strings into the final pattern string.
# The order here indicates group match priority (i.e. match "words"
# first, etc.)
# Join the regex pattern strings with the "or" character ("|").
final_tokenize_pattern_str = r"|".join([
word_pattern_str,
self._pattern_str_entity,
self._pattern_str_remnant,
self._pattern_str_whitespace,
self._pattern_str_newline
])
# Compile the final pattern. Those strings have whitespace, so make
# sure re.VERBOSE is one of the flags used!
self.tokenize_pattern = re.compile(final_tokenize_pattern_str, re.I | re.VERBOSE)
def __init__(self, fileLoader, baseDir, varBase, sourceName):
self.__pattern = re.compile(r"""
\$<(?:
(?P<escaped>\$) |
(?P<named>[<'][^'>]+)['>]> |
(?P<braced>[<'][^'>]+)['>]> |
(?P<invalid>)
)
""", re.VERBOSE)
self.__baseDir = baseDir
self.__varBase = re.sub(r'[^a-zA-Z0-9_]', '_', varBase, flags=re.DOTALL)
self.__fileLoader = fileLoader
self.__sourceName = sourceName
def _env_var_constructor(loader, node):
var = re.compile(r"\$\{([^}:\s]+):?([^}]+)?\}", re.VERBOSE)
value = loader.construct_scalar(node)
return var.sub(_replace_env_var, value)
def setup_yaml_parser():
var = re.compile(r".*\$\{.*\}.*", re.VERBOSE)
yaml.add_constructor('!env_var', _env_var_constructor)
yaml.add_implicit_resolver('!env_var', var)
def test_ip_v4_pattern(self):
ip_v4_pattern = self.patterns.IP_V4
for ip_v4, result in IP_V4_DATA.items():
if result:
self.assertIsNotNone(re.match(ip_v4_pattern, ip_v4, re.VERBOSE | re.IGNORECASE | re.DOTALL))
else:
self.assertIsNone(re.match(ip_v4_pattern, ip_v4, re.VERBOSE | re.IGNORECASE | re.DOTALL))
def test_ip_v6_pattern(self):
ip_v6_pattern = self.patterns.IP_V6
for ip_v6, result in IP_V6_DATA.items():
if result:
self.assertIsNotNone(re.match(ip_v6_pattern, ip_v6, re.VERBOSE | re.IGNORECASE | re.DOTALL))
else:
self.assertIsNone(re.match(ip_v6_pattern, ip_v6, re.VERBOSE | re.IGNORECASE | re.DOTALL))
def check_name(self,name):
pattern = re.compile(r"^[ a-zA-Z']+$",re.VERBOSE)
if re.match(pattern,name):
return True
else:
return False
# Check for vaild Unix username
def check_username(self,username):
pattern = re.compile(r"^\w{5,255}$",re.VERBOSE)
if re.match(pattern,username):
return True
else:
return False
# Check for vaild Unix UID
def check_uid(self,uid):
pattern = re.compile(r"^\d{1,10}$",re.VERBOSE)
if re.match(pattern,uid):
return True
else:
return False
# Check for vaild IP address
def check_ip(self,ip):
pattern = re.compile(r"\b(([01]?\d?\d|2[0-4]\d|25[0-5])\.){3}([01]?\d?\d|2[0-4]\d|25[0-5])\b",re.VERBOSE)
if re.match(pattern,ip) and ip != "0.0.0.0":
return True
else:
return False
# Check for vaild system hostanme
def check_hostname(self,hostname):
pattern = re.compile(r"^[a-zA-Z0-9\-\.]{1,100}$",re.VERBOSE)
if re.match(pattern,hostname):
return True
else:
return False
# Display Menu
def _form_master_re(relist,reflags,ldict,toknames):
if not relist: return []
regex = "|".join(relist)
try:
lexre = re.compile(regex,re.VERBOSE | reflags)
# Build the index to function map for the matching engine
lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
lexindexnames = lexindexfunc[:]
for f,i in lexre.groupindex.items():
handle = ldict.get(f,None)
if type(handle) in (types.FunctionType, types.MethodType):
lexindexfunc[i] = (handle,toknames[f])
lexindexnames[i] = f
elif handle is not None:
lexindexnames[i] = f
if f.find("ignore_") > 0:
lexindexfunc[i] = (None,None)
else:
lexindexfunc[i] = (None, toknames[f])
return [(lexre,lexindexfunc)],[regex],[lexindexnames]
except Exception:
m = int(len(relist)/2)
if m == 0: m = 1
llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)
rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)
return llist+rlist, lre+rre, lnames+rnames
# -----------------------------------------------------------------------------
# def _statetoken(s,names)
#
# Given a declaration name s of the form "t_" and a dictionary whose keys are
# state names, this function returns a tuple (states,tokenname) where states
# is a tuple of state names and tokenname is the name of the token. For example,
# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
# -----------------------------------------------------------------------------
def _form_master_re(relist,reflags,ldict,toknames):
if not relist: return []
regex = "|".join(relist)
try:
lexre = re.compile(regex,re.VERBOSE | reflags)
# Build the index to function map for the matching engine
lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
lexindexnames = lexindexfunc[:]
for f,i in lexre.groupindex.items():
handle = ldict.get(f,None)
if type(handle) in (types.FunctionType, types.MethodType):
lexindexfunc[i] = (handle,toknames[f])
lexindexnames[i] = f
elif handle is not None:
lexindexnames[i] = f
if f.find("ignore_") > 0:
lexindexfunc[i] = (None,None)
else:
lexindexfunc[i] = (None, toknames[f])
return [(lexre,lexindexfunc)],[regex],[lexindexnames]
except Exception:
m = int(len(relist)/2)
if m == 0: m = 1
llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)
rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)
return llist+rlist, lre+rre, lnames+rnames
# -----------------------------------------------------------------------------
# def _statetoken(s,names)
#
# Given a declaration name s of the form "t_" and a dictionary whose keys are
# state names, this function returns a tuple (states,tokenname) where states
# is a tuple of state names and tokenname is the name of the token. For example,
# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
# -----------------------------------------------------------------------------
def translate(string, lang_direction):
"""Takes a string that is to be translated and returns the translated string, doesn't translate the %(format)s parts, they must remain the same text as the msgid"""
# simple format chars like %s can be 'translated' ok, they just pass through unaffected
named_format_regex = re.compile(r"%\([^\)]+?\)[sd]", re.VERBOSE)
matches = named_format_regex.findall(string)
new = None
if len(matches) == 0:
# There are no format specifiers in this string, so just do a straight translation
# this fails if we've missed a format specifier
assert "%(" not in string, string
new = translate_subpart(string, lang_direction)
else:
# we need to do complicate translation of the bits inside
full_trans = translate_subpart(string, lang_direction)
for match in matches:
# then, for each format specifier, replace back in the string
translated_match = translate_subpart(match, lang_direction)
# during the translation some extra punctuation/spaces might have been added
# remove them
translated_match_match = named_format_regex.search(translated_match)
assert translated_match_match
translated_match = translated_match_match.group(0)
# put back the format specifier, the case of the format specifier might have changed
replace = re.compile(re.escape(translated_match), re.IGNORECASE)
full_trans = replace.sub(match, full_trans)
new = full_trans
return new
def flags(key):
flag = 0
if 'a' in key:
flag += re.ASCII
if 'i' in key:
flag += re.IGNORECASE
if 'l' in key:
flag += re.LOCALE
if 'm' in key:
flag += re.MULTILINE
if 's' in key:
flag += re.DOTALL
if 'x' in key:
flag += re.VERBOSE
return flag
def extract_dependencies(file_path):
"""
Parse the file contents and return the list of dependencies.
"""
with open(file_path) as fh:
file_contents = fh.read()
match = re.search(r"""^\s+dependencies [^\[]+
\[
([^\]]*)
\]""",
file_contents,
flags=re.VERBOSE | re.MULTILINE)
if not match:
return []
deps = match.group(1).strip()
if not deps:
return []
match_iter = re.finditer(r"""\(
'([^']+)'
,\s*
'([^_][^']+)'
\)""",
deps,
flags=re.VERBOSE)
return [(match.group(1), match.group(2)) for match in match_iter]
def __init__(self, states, first):
self.regexes = {}
self.toks = {}
for state, rules in states.items():
parts = []
for tok in rules:
groupid = "t%d" % tok.id
self.toks[groupid] = tok
parts.append("(?P<%s>%s)" % (groupid, tok.regex))
self.regexes[state] = re.compile("|".join(parts), re.MULTILINE | re.VERBOSE)
self.state = first
def is_valid_ipv4(ip):
"""Validates IPv4 addresses.
"""
pattern = re.compile(r"^\d{1,3}\.\d{1,3}.\d{1,3}.\d{1,3}$", re.VERBOSE | re.IGNORECASE)
if pattern.match(ip) is None:
return False
for x in ip.split("."):
val = int(x)
if val < 0 or val > 255:
return False
return True
def simplify_warnings(out):
warn_re = re.compile(r"""
# Cut the file and line no, up to the warning name
^.*:\d+:\s
(?P<category>\w+): \s+ # warning category
(?P<detail>.+) $ \n? # warning message
^ .* $ # stack frame
""", re.VERBOSE | re.MULTILINE)
return warn_re.sub(r"\g<category>: \g<detail>", out)
def __init__(cls, name, bases, dct):
super(_TemplateMetaclass, cls).__init__(name, bases, dct)
if 'pattern' in dct:
pattern = cls.pattern
else:
pattern = _TemplateMetaclass.pattern % {
'delim' : _re.escape(cls.delimiter),
'id' : cls.idpattern,
}
cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
def rcompile(pattern, flags=0, verbose=False):
"""A wrapper for re.compile that checks whether "pattern" is a regex object
or a string to be compiled, and automatically adds the re.UNICODE flag.
"""
if not isinstance(pattern, string_type):
# If it's not a string, assume it's already a compiled pattern
return pattern
if verbose:
flags |= re.VERBOSE
return re.compile(pattern, re.UNICODE | flags)
def readtab(self,tabfile,fdict):
if isinstance(tabfile,types.ModuleType):
lextab = tabfile
else:
if sys.version_info[0] < 3:
exec("import %s as lextab" % tabfile)
else:
env = { }
exec("import %s as lextab" % tabfile, env,env)
lextab = env['lextab']
if getattr(lextab,"_tabversion","0.0") != __version__:
raise ImportError("Inconsistent PLY version")
self.lextokens = lextab._lextokens
self.lexreflags = lextab._lexreflags
self.lexliterals = lextab._lexliterals
self.lexstateinfo = lextab._lexstateinfo
self.lexstateignore = lextab._lexstateignore
self.lexstatere = { }
self.lexstateretext = { }
for key,lre in lextab._lexstatere.items():
titem = []
txtitem = []
for i in range(len(lre)):
titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict)))
txtitem.append(lre[i][0])
self.lexstatere[key] = titem
self.lexstateretext[key] = txtitem
self.lexstateerrorf = { }
for key,ef in lextab._lexstateerrorf.items():
self.lexstateerrorf[key] = fdict[ef]
self.begin('INITIAL')
# ------------------------------------------------------------
# input() - Push a new string into the lexer
# ------------------------------------------------------------
def _form_master_re(relist,reflags,ldict,toknames):
if not relist: return []
regex = "|".join(relist)
try:
lexre = re.compile(regex,re.VERBOSE | reflags)
# Build the index to function map for the matching engine
lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
lexindexnames = lexindexfunc[:]
for f,i in lexre.groupindex.items():
handle = ldict.get(f,None)
if type(handle) in (types.FunctionType, types.MethodType):
lexindexfunc[i] = (handle,toknames[f])
lexindexnames[i] = f
elif handle is not None:
lexindexnames[i] = f
if f.find("ignore_") > 0:
lexindexfunc[i] = (None,None)
else:
lexindexfunc[i] = (None, toknames[f])
return [(lexre,lexindexfunc)],[regex],[lexindexnames]
except Exception:
m = int(len(relist)/2)
if m == 0: m = 1
llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)
rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)
return llist+rlist, lre+rre, lnames+rnames
# -----------------------------------------------------------------------------
# def _statetoken(s,names)
#
# Given a declaration name s of the form "t_" and a dictionary whose keys are
# state names, this function returns a tuple (states,tokenname) where states
# is a tuple of state names and tokenname is the name of the token. For example,
# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
# -----------------------------------------------------------------------------
def _real_extract(self, url):
m = re.match(self._VALID_URL, url, re.VERBOSE)
if m.group('type').startswith('embed'):
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
return self.url_result(desktop_url, 'TED')
name = m.group('name')
if m.group('type_talk'):
return self._talk_info(url, name)
elif m.group('type_watch'):
return self._watch_info(url, name)
else:
return self._playlist_videos_info(url, name)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
track_id = mobj.group('track_id')
token = None
if track_id is not None:
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id
token = mobj.group('secret_token')
if token:
info_json_url += "&secret_token=" + token
elif mobj.group('player'):
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
real_url = query['url'][0]
# If the token is in the query of the original url we have to
# manually add it
if 'secret_token' in query:
real_url += '?secret_token=' + query['secret_token'][0]
return self.url_result(real_url)
else:
# extract uploader (which is in the url)
uploader = mobj.group('uploader')
# extract simple title (uploader + slug of song title)
slug_title = mobj.group('title')
token = mobj.group('token')
full_title = resolve_title = '%s/%s' % (uploader, slug_title)
if token:
resolve_title += '/%s' % token
self.report_resolve(full_title)
url = 'http://soundcloud.com/%s' % resolve_title
info_json_url = self._resolv_url(url)
info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
return self._extract_info_dict(info, full_title, secret_token=token)
def cds_from_gbk(gb_file):
gb_record = SeqIO.read(open(gb_file,"rU"), "genbank")
#if strain_id is not None:
# gb_record.id = strain_id
output = pd.DataFrame()
sign = lambda x: '+' if x > 0 else '-'
for feature in gb_record.features:
if feature.type == "CDS":
tmp = {}
tmp = {'BGC': gb_record.id,
'locus_tag': feature.qualifiers['locus_tag'][0],
'start': feature.location.start.position,
'stop': feature.location.end.position,
'strand': sign(feature.location.strand) }
if 'note' in feature.qualifiers:
for note in feature.qualifiers['note']:
product = re.search( r"""smCOG: \s (?P<product>.*?) \s+ \(Score: \s* (?P<score>.*); \s* E-value: \s (?P<e_value>.*?)\);""", note, re.VERBOSE)
if product is not None:
product = product.groupdict()
product['score'] = float(product['score'])
product['e_value'] = float(product['e_value'])
for p in product:
tmp[p] = product[p]
output = output.append(pd.Series(tmp), ignore_index=True)
return output
def find_id(self, contents=None):
contents = self._load_url() if not contents else contents
if not contents:
return False
pattern = r'(entity_id["\' ]{1,3}:["\' ]{1,3})([\d]+)'
regex = re.compile(pattern, flags=re.VERBOSE)
match = regex.search(contents)
try:
return match.group(2)
except (IndexError, AttributeError):
return False
def _parse(self):
"""Get axes and shape from file names."""
if not self.pattern:
raise self.ParseError("invalid pattern")
pattern = re.compile(self.pattern, re.IGNORECASE | re.VERBOSE)
matches = pattern.findall(self.files[0])
if not matches:
raise self.ParseError("pattern doesn't match file names")
matches = matches[-1]
if len(matches) % 2:
raise self.ParseError("pattern doesn't match axis name and index")
axes = ''.join(m for m in matches[::2] if m)
if not axes:
raise self.ParseError("pattern doesn't match file names")
indices = []
for fname in self.files:
matches = pattern.findall(fname)[-1]
if axes != ''.join(m for m in matches[::2] if m):
raise ValueError("axes don't match within the image sequence")
indices.append([int(m) for m in matches[1::2] if m])
shape = tuple(numpy.max(indices, axis=0))
start_index = tuple(numpy.min(indices, axis=0))
shape = tuple(i-j+1 for i, j in zip(shape, start_index))
if product(shape) != len(self.files):
warnings.warn("files are missing. Missing data are zeroed")
self.axes = axes.upper()
self.shape = shape
self._indices = indices
self._start_index = start_index