Python re 模块,finditer() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用re.finditer()。
def fetch():
retval = {}
content = retrieve_content(__url__)
if __check__ in content:
for match in re.finditer(r"(\d+\.\d+\.\d+\.\d+)/(\d+)", content):
prefix, mask = match.groups()
mask = int(mask)
start_int = addr_to_int(prefix) & make_mask(mask)
end_int = start_int | ((1 << 32 - mask) - 1)
if 0 <= end_int - start_int <= 1024:
address = start_int
while start_int <= address <= end_int:
retval[int_to_addr(address)] = (__info__, __reference__)
address += 1
return retval
def split_arg_string(string):
"""Given an argument string this attempts to split it into small parts."""
rv = []
for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
r'|\S+)\s*', string, re.S):
arg = match.group().strip()
if arg[:1] == arg[-1:] and arg[:1] in '"\'':
arg = arg[1:-1].encode('ascii', 'backslashreplace') \
.decode('unicode-escape')
try:
arg = type(string)(arg)
except UnicodeError:
pass
rv.append(arg)
return rv
def _GetEndOfTableIfNotAlias(self, query: str, column_name: str) -> bool:
"""Getting the start of the column if it is not an alias column
Args:
query (str): the query to be searched
column_name (str): the name to be searched for
Returns:
bool: 0 if no column could be found or the starting position of the
column
"""
wrong_positions = [name.start() for name in
re.finditer('.{0} as'.format(column_name), query)]
found_positions = []
for space in self._POSSIBLEQUERYSEPERATOR:
found_positions += [name.start() for name in
re.finditer('.{0}{1}'.format(column_name, space),
query)]
position = set(found_positions) - set(wrong_positions)
if position:
return position.pop()
else:
return 0
def register_options(self):
# type: () -> None
"""Parse options from text like this:
Preferences:
[+|-]alignArguments Enable/disable ...
...
[+|-]spacesWithinPatternBinders Enable/disable ...
-alignSingleLineCaseStatements.maxArrowIndent=[1-100] Set Maximum number ...
-indentSpaces=[1-10] Set Number of spaces ...
"""
exeresult = run_executable(self.exe, ['--help'], cache=self.cache)
options = []
text = unistr(exeresult.stdout)
for m in re.finditer(r'^ (\[\+\|-\]|-)([a-z][a-zA-Z.]+)(?:=\[(\d+)-(\d+)\])?', text,
re.MULTILINE):
optionprefix, optionname, start, end = m.groups()
if start is None:
optiontype = 'bool'
configs = [True, False] # type: List[OptionValue]
else:
optiontype = 'int'
configs = list(inclusiverange(int(start), int(end)))
options.append(option_make(optionname, optiontype, configs))
self.styledefinition = styledef_make(options)
def filter_output(self, output, regex):
""" Filter output from a command """
result = {}
result_list = []
if isinstance(output, str):
lines = [output]
else:
lines = output
for line in lines:
iterator = re.finditer(regex, line)
try:
while True:
cur = iterator.next()
result = cur.groupdict()
result['hostname'] = self.host
result_list.append(result)
except StopIteration:
pass
return result_list
def split_arg_string(string):
"""Given an argument string this attempts to split it into small parts."""
rv = []
for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
r'|\S+)\s*', string, re.S):
arg = match.group().strip()
if arg[:1] == arg[-1:] and arg[:1] in '"\'':
arg = arg[1:-1].encode('ascii', 'backslashreplace') \
.decode('unicode-escape')
try:
arg = type(string)(arg)
except UnicodeError:
pass
rv.append(arg)
return rv
def split_into_sentences(text):
potential_end_pat = re.compile(r"".join([
r"([\w\.'’&\]\)]+[\.\?!])", # A word that ends with punctuation
r"([‘’“”'\"\)\]]*)", # Followed by optional quote/parens/etc
r"(\s+(?![a-z\-–—]))", # Followed by whitespace + non-(lowercase or dash)
]),
re.U
)
dot_iter = re.finditer(potential_end_pat, text)
end_indices = [
(x.start() + len(x.group(1)) + len(x.group(2)))
for x in dot_iter
if is_sentence_ender(x.group(1))
]
spans = zip([None] + end_indices, end_indices + [None])
sentences = [
text[start:end].strip() for start, end in spans
]
return sentences
def create_ad_hoc_field(cls, db_type):
'''
Give an SQL column description such as "Enum8('apple' = 1, 'banana' = 2, 'orange' = 3)"
this method returns a matching enum field.
'''
import re
try:
Enum # exists in Python 3.4+
except NameError:
from enum import Enum # use the enum34 library instead
members = {}
for match in re.finditer("'(\w+)' = (\d+)", db_type):
members[match.group(1)] = int(match.group(2))
enum_cls = Enum('AdHocEnum', members)
field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field
return field_class(enum_cls)
def tokenize(token_specification, text):
Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column', 'mo'])
token_specification.extend((
('NEWLINE', r'\n'), # Line endings
('SKIP', r'.'), # Any other character
))
tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
line_num = 1
line_start = 0
for mo in re.finditer(tok_regex, text):
kind = mo.lastgroup
value = filter(lambda x: x is not None, mo.groups())
if kind == 'NEWLINE':
line_start = mo.end()
line_num += 1
elif kind == 'SKIP':
pass
else:
column = mo.start() - line_start
yield Token(kind, value, line_num, column, mo)
def fetch():
retval = {}
content = retrieve_content(__url__)
if __check__ in content:
for match in re.finditer(r"(?m)^([\d.]+),IP used by ([^,]+) C&C", content):
retval[match.group(1)] = ("%s (malware)" % match.group(2).lower(), __reference__)
for row in re.finditer(r"(?s)<tr>(.+?)</tr>", content):
if "<span>100%</span>" in row.group(1):
domain = re.search(r"get_data_domain\('([^']+)", row.group(1))
if domain:
tag = re.search(r">(trojan|spyware|adware)\.([^<]+)", row.group(1))
retval[domain.group(1)] = (("%s (malware)" % tag.group(2)) if tag else "malware", __reference__)
return retval
def remove_job(self, job_guid):
"""
Remove a job given its GUID, including decreasing the job counter of the queue
:param job_guid: the GUID of the job to remove from queue
:return: void
"""
global _os_ver
occurs = [(oc.start(), oc.end()) for oc in
list(re.finditer('%s' % (BITSStateFile.JOB_HEADER_FOOTER_HEX[_os_ver].decode('hex')),
self._original_data))
]
if occurs:
self.set_jobs_counter(self.get_jobs_counter() - 1)
state_off = self._get_job_state_off(job_guid)
new_data_list = list(self._new_data)
job_start_off, job_end_off = BITSStateFile._get_job_limits_by_index_in_between(occurs, state_off)
new_data_list = new_data_list[:job_start_off + 1] + new_data_list[job_end_off + 1:]
self._update_new_data(0, "".join(new_data_list))
self.commit()
def split_arg_string(string):
"""Given an argument string this attempts to split it into small parts."""
rv = []
for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
r'|\S+)\s*', string, re.S):
arg = match.group().strip()
if arg[:1] == arg[-1:] and arg[:1] in '"\'':
arg = arg[1:-1].encode('ascii', 'backslashreplace') \
.decode('unicode-escape')
try:
arg = type(string)(arg)
except UnicodeError:
pass
rv.append(arg)
return rv
def post(self, request, pk, **kwargs):
instance = CognateClassCitation.objects.get(id=pk)
form = EditCognateClassCitationForm(request.POST, instance=instance)
try:
# validate {ref foo ...}
s = Source.objects.all().filter(deprecated=False)
pattern = re.compile(r'(\{ref +([^\{]+?)(:[^\{]+?)? *\})')
for m in re.finditer(pattern, form.data['comment']):
foundSet = s.filter(shorthand=m.group(2))
if not foundSet.count() == 1:
raise ValidationError('In field “Comment” source shorthand “%(name)s” is unknown.',
params={'name': m.group(2)})
form.save()
except ValidationError as e:
messages.error(
request,
'Sorry, the server had problems updating the cognate citation. %s' % e)
return self.render_to_response({"form": form})
return HttpResponseRedirect(reverse('cognate-class-citation-detail', args=[pk]))
def tokenize(token_specification, text):
Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column', 'mo'])
token_specification.extend((
('NEWLINE', r'\n'), # Line endings
('SKIP', r'.'), # Any other character
))
tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
line_num = 1
line_start = 0
for mo in re.finditer(tok_regex, text):
kind = mo.lastgroup
value = filter(lambda x: x is not None, mo.groups())
if kind == 'NEWLINE':
line_start = mo.end()
line_num += 1
elif kind == 'SKIP':
pass
else:
column = mo.start() - line_start
yield Token(kind, value, line_num, column, mo)
def after_compile(self):
if self.engine.positional:
self.positiontup = []
match = r'%\(([\w_]+)\)s'
params = re.finditer(match, self.strings[self.statement])
for p in params:
self.positiontup.append(p.group(1))
if self.engine.paramstyle=='qmark':
self.strings[self.statement] = re.sub(match, '?', self.strings[self.statement])
elif self.engine.paramstyle=='format':
self.strings[self.statement] = re.sub(match, '%s', self.strings[self.statement])
elif self.engine.paramstyle=='numeric':
i = [0]
def getnum(x):
i[0] += 1
return str(i[0])
self.strings[self.statement] = re.sub(match, getnum, self.strings[self.statement])
def test_patterns(text, patterns):
"""Given source text and a list of patterns, look for
matches for each pattern within the text and print
them to stdout.
"""
# Look for each pattern in the text and print the results
for pattern, desc in patterns:
print("'{}' ({})\n".format(pattern, desc))
print(" '{}'".format(text))
for match in re.finditer(pattern, text):
s = match.start()
e = match.end()
substr = text[s:e]
n_backslashes = text[:s].count('\\')
prefix = '.' * (s + n_backslashes)
print(" {}'{}'".format(prefix, substr))
print()
return
def pun(self, ctx):
'''
Gives a random pun from the depths of the internet
'''
# Read from page
async with self.session.get('http://www.punoftheday.com/cgi-bin/randompun.pl') as r:
page = await r.text()
# Scrape the raw HTML
r = r'(<div class=\"dropshadow1\">\n<p>).*(</p>\n</div>)'
foundPun = [i for i in finditer(r, page)][0].group()
# Filter out the pun
r = r'(>).*(<)'
filteredPun = [i for i in finditer(r, foundPun)][0].group()
# Boop it out
fullPun = filteredPun[1:-1]
await self.sparcli.say(fullPun)
def steamid(self, ctx, *, gameURL:str):
'''
Gets the information of a game from Steam URL
'''
await self.sparcli.send_typing(ctx.message.channel)
# Grab the game ID from the user input
regexMatches = finditer(r'\d+', gameURL)
regexList = [i for i in regexMatches]
# Parse it as a group
if len(regexList) == 0:
await self.sparcli.say('I was unable to find the ID of that game on the Steam API.')
return
else:
await self.getSteamGameInfo(regexList[0].group())
def messageToEmbed(message):
# Get some default values that'll be in the embed
author = message.author
description = message.content
image = False
# Check to see if any images were added
regexMatch = r'.+(.png)|.+(.jpg)|.+(.jpeg)|.+(.gif)'
if len(message.attachments) > 0:
attachment = message.attachments[0]
matchList = [i for i in finditer(regexMatch, attachment['filename'])]
if len(matchList) > 0:
image = attachment['url']
# Get the time the message was created
createdTime = '.'.join(str(message.timestamp).split('.')[:-1])
# Make and return the embed
return makeEmbed(user=author, description=description, image=image, footer=createdTime)
def match_and_replace(
text = None,
rule = None,
phoneme = None
):
"""
Replace found text from a single rule.
"""
# Find all rule matches.
matches = [(match.start(), match.end()) for \
match in re.finditer(rule, text)]
# Start from behind, so replace in-place.
matches.reverse()
# Convert to characters because strings are immutable.
characters = list(text)
for start, end in matches:
characters[start:end] = phoneme
# Convert back to string.
return "".join(characters)
def unpack_dword(line):
outs = ''
i = 0
for m in re.finditer(r'((?:0x[0-9a-f]{8},?\s*))', line):
l = m.group(0)
l = l.replace(',', '')
l = l.replace(' ', '')
dword = int(l, 16)
unpack = reversed([
(dword & 0xff000000) >> 24,
(dword & 0x00ff0000) >> 16,
(dword & 0x0000ff00) >> 8,
(dword & 0x000000ff)
])
i += 4
for b in unpack:
outs += '%02x' % b
out(dbg("After callback ('%s')" % outs))
return BytesParser.formats_compiled['hexstring'].match(outs)
def extract_videos_from_page(self, page):
ids_in_page = []
titles_in_page = []
for mobj in re.finditer(self._VIDEO_RE, page):
# The link with index 0 is not the first video of the playlist (not sure if still actual)
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
continue
video_id = mobj.group('id')
video_title = unescapeHTML(mobj.group('title'))
if video_title:
video_title = video_title.strip()
try:
idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]:
titles_in_page[idx] = video_title
except ValueError:
ids_in_page.append(video_id)
titles_in_page.append(video_title)
return zip(ids_in_page, titles_in_page)
def format_to_regex(self, fmt):
"""
Converts a string like
'%(title)s - %(artist)s'
to a regex like
'(?P<title>.+)\ \-\ (?P<artist>.+)'
"""
lastpos = 0
regex = ""
# replace %(..)s with regex group and escape other string parts
for match in re.finditer(r'%\((\w+)\)s', fmt):
regex += re.escape(fmt[lastpos:match.start()])
regex += r'(?P<' + match.group(1) + '>.+)'
lastpos = match.end()
if lastpos < len(fmt):
regex += re.escape(fmt[lastpos:len(fmt)])
return regex
def extract_object(self, objname):
obj = {}
obj_m = re.search(
(r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
r'\}\s*;',
self.code)
fields = obj_m.group('fields')
# Currently, it only supports function definitions
fields_m = re.finditer(
r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
fields)
for f in fields_m:
argnames = f.group('args').split(',')
obj[f.group('key')] = self.build_function(argnames, f.group('code'))
return obj
def find_links(file):
"""Find all markdown links in a file object.
Yield (lineno, regexmatch) tuples.
"""
# don't yield same link twice
seen = set()
# we need to loop over the file two lines at a time to support
# multi-line (actually two-line) links, so this is kind of a mess
firsts, seconds = itertools.tee(file)
next(seconds) # first line is never second line
# we want 1-based indexing instead of 0-based and one-line links get
# caught from linepair[1], so we need to start at two
for lineno, linepair in enumerate(zip(firsts, seconds), start=2):
lines = linepair[0] + linepair[1]
for match in re.finditer(_LINK_REGEX, lines, flags=re.DOTALL):
if match.group(0) not in seen:
seen.add(match.group(0))
yield match, lineno
def split_problematic_endpoints_line(line):
"""
If the line of host contains more than one ":",
for example: 10.99.184.69:900010.37.170.125:9006
this splits the line and return a list of correct endpoints
Args:
``line``: the problemtic line which contains more than one endpoint string.
Returns:
the splitted list of the problematic line which has correct endpoint strings.
"""
colon_parts = line.strip().split(":")
offset = len(colon_parts[-1])
colon_positions = [m.start() for m in re.finditer(':', line)]
start = 0
split_parts = []
for colon_position in colon_positions:
end = colon_position + offset + 1
split_part = line[start:end]
split_parts.append(split_part)
start = end
return split_parts
def get_classes(self, folder, class_suffix, selected = None):
classes = []
for file in glob.glob(folder + "/*.py"):
handle = open(file, "r")
content = handle.read()
handle.close()
module = folder.replace('/', '.').replace('\\', '.') + '.' + os.path.basename(file).replace('.py', '')
regexp = "\sclass\s+([\w\d]+"+class_suffix+")\s*\(([\w\d]*)\)\s*:\s"
for m in re.finditer(regexp, content):
parent_class = m.group(2)
if len(parent_class) == 0 or parent_class == 'object':
continue
class_name = m.group(1)
classes.append(module + '.' + class_name)
return classes
def analyze(line, linenum, lang):
annotations = []
if lang is Language.en_EN or lang is None:
weasel_words = WEASEL_WORDS_EN
elif lang is Language.de_DE:
weasel_words = WEASEL_WORDS_DE
for weasel_pattern in weasel_words:
for match in re.finditer(weasel_pattern, line, flags=re.IGNORECASE):
if (not verify_match(match, line)) or (not matches_whole_words(match, line)):
continue
index = match.start()
annotation = WeaselWord(linenum, line, index, word=match.group(0))
annotations.append(annotation)
return annotations
def analyze(line, linenum, lang):
annotations = []
if lang is Language.en_EN or lang is None:
contractions = CONTRACTIONS_EN
elif lang is Language.de_DE:
contractions = CONTRACTIONS_DE
for pattern in contractions:
for match in re.finditer(pattern, line, flags=re.IGNORECASE):
if (not verify_match(match, line)) or (not matches_whole_words(match, line)):
continue
index = match.start()
replaced_contraction = re.sub(pattern, contractions[pattern], match.group(0), flags=re.IGNORECASE)
annotation = Contraction(linenum, line, index, word=match.group(0), contraction=replaced_contraction)
annotations.append(annotation)
return annotations
def paged_github_json_request(url, headers=None):
response = requests.get(url, headers=headers)
assert response.ok, response.content
results = response.json()
if 'Link' in response.headers:
links = response.headers['Link']
# There are likely better ways to parse/extract the link information
# but here we just find the last page number mentioned in the header
# 'Link' section and then loop over all pages to get the comments
last_match = list(re.finditer('page=[0-9]+', links))[-1]
last_page = int(links[last_match.start():last_match.end()].split('=')[1])
# If there are other pages, just loop over them and get all the
# comments
if last_page > 1:
for page in range(2, last_page + 1):
response = requests.get(url + '?page={0}'.format(page), headers=headers)
assert response.ok, response.content
results += response.json()
return results
def getOfflineMediaList(self, folderName=False, title=False, contentType=7):
mediaFiles = []
for r1 in re.finditer('\{(.*?)\"spaces\"\:' , entryS, re.DOTALL):
entry = r1.group(1)
media = self.getMediaPackage(entry, folderName=folderName, contentType=contentType, fanart=folderFanart, icon=folderIcon)
if media is not None:
mediaFiles.append(media)
return mediaFiles
##
# retrieve a list of videos, using playback type stream
# parameters: prompt for video quality (optional), cache type (optional)
# returns: list of videos
##
def get_occurences(self, pattern, overlap=False):
"""Return position of the input pattern in the sequence
::
>>> from sequana import Sequence
>>> s = Sequence('ACGTTTTACGT')
>>> s.get_occurences("ACGT")
[0, 7]
"""
if overlap is False:
res = [m.start() for m in re.finditer(pattern, self.sequence)]
elif overlap is True:
res = [m.start() for m in re.finditer('(?=%s)'%pattern, self.sequence)]
return res
# reverse find-all without overlaps, you can combine positive and
# negative lookahead into an expression like this:
#res = [m.start() for m in re.finditer('(?=%s)(?!.{1,%d}%s)' % (search,
# len(pattern)-1, pattern), 'ttt')]
def find_cute(url):
# ??????
r = requests.get(url)
# ??r.encoding
encoding = re.search('content="text/html;\s*charset=(.*?)"', r.text).group(1)
r.encoding = encoding
# print(r.text)
finds = re.finditer(r'<p>\s*([^>]*?)\s*\n', r.text)
i = random.randint(0, sum(1 for _ in finds))
start = 0
finds = re.finditer(r'<p>\s*([^>]*?)\s*\n', r.text)
for f in finds:
if start == i:
print(f.group(1))
break
start += 1
def _proc_gnusparse_00(self, next, pax_headers, buf):
"""Process a GNU tar extended sparse header, version 0.0.
"""
offsets = []
for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
offsets.append(int(match.group(1)))
numbytes = []
for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
numbytes.append(int(match.group(1)))
next.sparse = list(zip(offsets, numbytes))
def _proc_gnusparse_00(self, next, pax_headers, buf):
"""Process a GNU tar extended sparse header, version 0.0.
"""
offsets = []
for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
offsets.append(int(match.group(1)))
numbytes = []
for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
numbytes.append(int(match.group(1)))
next.sparse = list(zip(offsets, numbytes))
def _proc_gnusparse_00(self, next, pax_headers, buf):
"""Process a GNU tar extended sparse header, version 0.0.
"""
offsets = []
for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
offsets.append(int(match.group(1)))
numbytes = []
for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
numbytes.append(int(match.group(1)))
next.sparse = list(zip(offsets, numbytes))
def get_selects_from_text(content):
sqls = []
select_keyword = '@Select\s*\('
for m in re.finditer(select_keyword, content):
rparen_pos = MybatisInlineSqlExtractor.find_right_paren_pos(content[m.end():])
if rparen_pos < 0:
continue
sqls.append(SQL('', eval(content[m.end():m.end() + rparen_pos].replace('\r', '').replace('\n', '')).strip()))
return sqls
def get_properties_dict(self, properties_str):
if isinstance(properties_str, dict):
return properties_str
properties = collections.OrderedDict()
pattern = re.compile('(\S+?)\s*"(.*?)"')
for m in re.finditer(pattern, properties_str):
key = m.group(1)
value = m.group(2)
properties[key] = value
return properties
def get_unambiguous_regions(reference_path):
'''Calculate regions corresponding to unambiguous bases'''
chrom_map = {}
for chrom, seq in open_reference(reference_path).items():
regions = [(m.start(), m.end()) for m in re.finditer('[acgtACGT]+', seq[:])]
chrom_map[chrom] = Regions(regions=regions)
return chrom_map
def tokenize_sents(string):
"""
Tokenize input text to sentences.
:param string: Text to tokenize
:type string: str or unicode
:return: sentences
:rtype: list of strings
"""
string = six.text_type(string)
spans = []
for match in re.finditer('[^\s]+', string):
spans.append(match)
spans_count = len(spans)
rez = []
off = 0
for i in range(spans_count):
tok = string[spans[i].start():spans[i].end()]
if i == spans_count - 1:
rez.append(string[off:spans[i].end()])
elif tok[-1] in ['.', '!', '?', '…', '»']:
tok1 = tok[re.search('[.!?…»]', tok).start()-1]
next_tok = string[spans[i + 1].start():spans[i + 1].end()]
if (next_tok[0].isupper()
and not tok1.isupper()
and not (tok[-1] != '.'
or tok1[0] == '('
or tok in ABBRS)):
rez.append(string[off:spans[i].end()])
off = spans[i + 1].start()
return rez
def register_options(self):
# type: () -> None
"""Parse options from text like this:
# Uncrustify 0.63
#
# General options
#
newlines { Auto, LF, CR, CRLF }
The type of line endings
input_tab_size Number
The original size of tabs in the input
indent_align_string { False, True }
Whether to indent strings broken by '\' so that they line up
"""
exeresult = run_executable(self.exe, ['--show-config'], cache=self.cache)
options = []
text = unistr(exeresult.stdout)
for m in re.finditer(r'^(\w+)\s+(.*?)\s*$', text, re.MULTILINE):
optionname, optiondesc = m.group(1), m.group(2)
if optiondesc.startswith('{'):
optiontype = 'Enum'
configs = optiondesc[1:-1].strip().split(', ')
configs = [c.lower() for c in configs]
else:
optiontype = optiondesc
configs = []
options.append(option_make(optionname, optiontype, configs))
self.styledefinition = styledef_make(options)
def __handle_tostring(self):
for match in re.finditer('(\d+)\[t\+o\+S\+t\+r\+i\+n\+g\](\d+)', self.js):
repl = to_base(match.group(1), match.group(2))
self.js = self.js.replace(match.group(0), repl)
def __get_attribs(element):
attribs = {}
for match in re.finditer('''\s+(?P<key>[^=]+)=\s*(?:(?P<delim>["'])(?P<value1>.*?)(?P=delim)|(?P<value2>[^"'][^>\s]*))''', element):
match = match.groupdict()
value1 = match.get('value1')
value2 = match.get('value2')
value = value1 if value1 is not None else value2
if value is None: continue
attribs[match['key'].lower().strip()] = value
return attribs
def album(URL):
track_list=[]
if (URL.find('?index=')>0):
all_track_nr=((html.count('?index='))//2)-1
a1=URL[:URL.find('?index=')]
current_track_no=int(URL[len(a1)+len('?index='):])
ID=a1[a1.find('/album/')+len('/album/'):]
track_list.append('%s'%current_track_no)
elif (URL.find('?start')>0):
all_track_nr=((html.count('?index='))//2)-1
a1=URL[:URL.find('?start')]
current_track_no=int(URL[len(a1)+len('?start'):])
ID=a1[a1.find('/album/')+len('/album/'):]
track_list.append('%s'%current_track_no)
else:
all_track_nr=(html.count('?index='))//2
a1=URL
current_track_no='null'
ID=a1[a1.find('/album/')+len('/album/'):]
track_list.append('%s'%current_track_no)
i=0
b=html[html.find('<span class="song_name">'):html.rfind('<span class="song_name">')]
b_len=len('<span class="song_name">')
iter=re.finditer(r'<span class="song_name">', b)
indices=[m.start(0) for m in iter]
while i<all_track_nr:
track_list.append('%s?index=%d'%(a1,i))
d=(b[indices[i]:].find('</span>'))
track_name=b[indices[i]+b_len:indices[i]+d]
track_list.append(track_name)
i+=1
return(track_list)
def album(URL):
track_list=[]
if (URL.find('?index=')>0):
all_track_nr=((html.count('?index='))//2)-1
a1=URL[:URL.find('?index=')]
current_track_no=int(URL[len(a1)+len('?index='):])
ID=a1[a1.find('/album/')+len('/album/'):]
track_list.append('%s'%current_track_no)
elif (URL.find('?start')>0):
all_track_nr=((html.count('?index='))//2)-1
a1=URL[:URL.find('?start')]
current_track_no=int(URL[len(a1)+len('?start'):])
ID=a1[a1.find('/album/')+len('/album/'):]
track_list.append('%s'%current_track_no)
else:
all_track_nr=(html.count('?index='))//2
a1=URL
current_track_no='null'
ID=a1[a1.find('/album/')+len('/album/'):]
track_list.append('%s'%current_track_no)
i=0
b=html[html.find('<span class="song_name">'):html.rfind('<span class="song_name">')]
b_len=len('<span class="song_name">')
iter=re.finditer(r'<span class="song_name">', b)
indices=[m.start(0) for m in iter]
while i<all_track_nr:
track_list.append('%s?index=%d'%(a1,i))
d=(b[indices[i]:].find('</span>'))
track_name=b[indices[i]+b_len:indices[i]+d]
track_list.append(track_name)
i+=1
return(track_list)
def legacy_format_json(original):
# save state
states = []
text = original
# save position for double-quoted texts
for i, pos in enumerate(re.finditer('"', text)):
# pos.start() is a double-quote
p = pos.start() + 1
if i % 2 == 0:
nxt = text.find('"', p)
states.append((p, text[p:nxt]))
# replace all weired characters in text
while text.find(',,') > -1:
text = text.replace(',,', ',null,')
while text.find('[,') > -1:
text = text.replace('[,', '[null,')
# recover state
for i, pos in enumerate(re.finditer('"', text)):
p = pos.start() + 1
if i % 2 == 0:
j = int(i / 2)
nxt = text.find('"', p)
# replacing a portion of a string
# use slicing to extract those parts of the original string to be kept
text = text[:p] + states[j][1] + text[nxt:]
converted = json.loads(text)
return converted
def make_RefCmap(fasta_file, enz=None, min_len=20, min_nsite=5, path=None):
name = fasta_file.rsplit('.',1)[0].split('/')[-1]
index = 0
enzymes = {'BspQI':'GCTCTTC',
'BbvCI':'CCTCAGC',
'Bsml':'GAATGC',
'BsrDI':'GCAATG',
'bseCI':'ATCGAT',
'BssSI':'CACGAG'}
try:
cmap_file='%s/%s_%s.cmap'%(path,name,enz)
forwards = enzymes[enz]
reverse = str(Seq(forwards).reverse_complement())
with open (cmap_file,'a') as ref_cmap:
ref_cmap.write('# CMAP File Version:\t0.1\n')
ref_cmap.write('# Label Channels:\t1\n')
ref_cmap.write('# Nickase Recognition Site 1:\t%s\n'%forwards)
ref_cmap.write('# Enzyme1:\tNt.%s\n'%enz)
ref_cmap.write('# Number of Consensus Nanomaps:\tN/A\n')
ref_cmap.write('#h CMapId\tContigLength\tNumSites\tSiteID\tLabelChannel\tPosition\tStdDev\tCoverage\tOccurrence\n')
ref_cmap.write('#f int\tfloat\tint\tint\tint\tfloat\tfloat\tint\tint\n')
for seqs in SeqIO.parse(fasta_file,'fasta'):
seq = str(seqs.seq.upper())
seq_len = len(seq)
index+=1
if seq_len >= min_len*1000:
nsites = len(re.findall('%s|%s'%(forwards,reverse),seq))
if nsites >=min_nsite:
j=1
for o in re.finditer('%s|%s'%(forwards,reverse),seq):
ref_cmap.write('%s\t%.1f\t%d\t%d\t1\t%.1f\t1.0\t1\t1\n'%(index,seq_len,nsites,j,o.start()+1))
j+=1
ref_cmap.write('%s\t%.1f\t%d\t%d\t0\t%.1f\t0.0\t1\t0\n'%(index,seq_len,nsites,j,seq_len))
except:
pass
def formatString(self, text, stats, not_found_replacement = None):
#try:
values = stats['values']
for m in re.finditer("{{([gc]:)?([^}:]*)((:d)|(:1f)|:(\d+)|:(\d+)\.(\d+)f|(:\+d)|(:\+1f))?}}", text):
g, g1, key, g2, sg1, sg2, sg3, sg4a, sg4b, sg5, sg6 = m.group(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
if not key in values:
if not_found_replacement is None:
if d: LOG_NOTE('No key in values of %s (%s)' % (stats.get('_type', 'unknown'), key))
else:
text = text.replace('%s' % g, not_found_replacement)
elif g1 is None:
if g2 is None:
text = text.replace('{{%s}}' % key, self.applyMacros(values[key]))
elif sg1:
text = text.replace('{{%s:d}}' % key, self.applyMacros(values[key], 0))
elif sg2:
text = text.replace('{{%s:1f}}' % key, self.applyMacros(values[key], 1))
elif sg3:
xx = int(sg3)
text = text.replace('{{%s:%d}}' % (key, xx), self.applyMacros2(values[key], xx))
elif sg4a:
xx, yy = int(sg4a), int(sg4b)
text = text.replace('{{%s:%d.%df}}' % (key, xx, yy), self.applyMacros2(values[key], xx, yy))
elif sg5:
text = text.replace('{{%s:+d}}' % key, self.applyMacros(values[key], 0, '+'))
elif sg6:
text = text.replace('{{%s:+1f}}' % key, self.applyMacros(values[key], 1, '+'))
elif g1=="g:":
text = text.replace('{{g:%s}}' % key, stats['gradient'][key])
elif g1=="c:":
text = text.replace('{{c:%s}}' % key, stats['palette'][key])
#except:
# LOG_CURRENT_EXCEPTION()
#finally:
return text
def __call__(self, text):
for match in re.finditer(self.regexp, text):
name = match.lastgroup
value = match.group(0)
span = match.span()
rule = self.mapping[name]
token = rule(value, span)
yield token
def load(self, model_name='main'):
checkpoint_dir = os.path.join(self.checkpoint_dir, self.model_dir)
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
counter = int(next(re.finditer("(\d+)(?!.*\d)", ckpt_name)).group(0))
return True, counter
else:
print("Failed to find a checkpoint")
return False, 0