Python os.path 模块,commonprefix() 实例源码
我们从Python开源项目中,提取了以下32个代码示例,用于说明如何使用os.path.commonprefix()。
def _common_shorten_repr(*args):
args = tuple(map(safe_repr, args))
maxlen = max(map(len, args))
if maxlen <= _MAX_LENGTH:
return args
prefix = commonprefix(args)
prefixlen = len(prefix)
common_len = _MAX_LENGTH - \
(maxlen - prefixlen + _MIN_BEGIN_LEN + _PLACEHOLDER_LEN)
if common_len > _MIN_COMMON_LEN:
assert _MIN_BEGIN_LEN + _PLACEHOLDER_LEN + _MIN_COMMON_LEN + \
(maxlen - prefixlen) < _MAX_LENGTH
prefix = _shorten(prefix, _MIN_BEGIN_LEN, common_len)
return tuple(prefix + s[prefixlen:] for s in args)
prefix = _shorten(prefix, _MIN_BEGIN_LEN, _MIN_COMMON_LEN)
return tuple(prefix + _shorten(s[prefixlen:], _MIN_DIFF_LEN, _MIN_END_LEN)
for s in args)
def _traverse_paths_for(self, filename):
# Current directory
if path.isfile(path.abspath(filename)):
base_directory = path.commonprefix([
path.dirname(path.abspath(filename)),
getcwd()
])
self.dirs.append(base_directory)
return base_directory
# Try base dir's for files we've previously loaded
# Lets definitions refer to each other relatively
for known_file in self.files:
full_known_file_path = path.abspath(known_file)
known_directory = path.dirname(full_known_file_path)
if path.isfile(known_directory + "/" + filename):
return known_directory
raise KeyError("Definition not found for %s" % filename)
def slotClipboardChange(self):
"""
called if clipboard changes
"""
if self.checkClipboard:
text = self.clipboard.text()
pattern = re.compile(r"(http|https|ftp)://[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?/.*)?")
matches = pattern.finditer(text)
# thanks to: jmansour //#139
links = [str(match.group(0)) for match in matches]
if len(links) == 0:
return
filenames = [link.rpartition("/")[2] for link in links]
packagename = commonprefix(filenames)
if len(packagename) == 0:
packagename = filenames[0]
self.slotAddPackage(packagename, links)
def _fill_ot_entry(self, row, col):
"""
Fill an entry of the observation table.
Only save the part of the output generated by the col parameter.
Args:
row(tuple(int)): A tuple of integers specifiying the row to fill.
col(tuple(int)): A tuple of integers specifying the column to fill.
"""
prefix = self.membership_query(row)
full_output = self.membership_query(row + col)
common_prefix_len = len(commonprefix([prefix, full_output]))
self.ot[row, col] = full_output[common_prefix_len:]
#########################################################################
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def _common_shorten_repr(*args):
args = tuple(map(safe_repr, args))
maxlen = max(map(len, args))
if maxlen <= _MAX_LENGTH:
return args
prefix = commonprefix(args)
prefixlen = len(prefix)
common_len = _MAX_LENGTH - \
(maxlen - prefixlen + _MIN_BEGIN_LEN + _PLACEHOLDER_LEN)
if common_len > _MIN_COMMON_LEN:
assert _MIN_BEGIN_LEN + _PLACEHOLDER_LEN + _MIN_COMMON_LEN + \
(maxlen - prefixlen) < _MAX_LENGTH
prefix = _shorten(prefix, _MIN_BEGIN_LEN, common_len)
return tuple(prefix + s[prefixlen:] for s in args)
prefix = _shorten(prefix, _MIN_BEGIN_LEN, _MIN_COMMON_LEN)
return tuple(prefix + _shorten(s[prefixlen:], _MIN_DIFF_LEN, _MIN_END_LEN)
for s in args)
def _common_shorten_repr(*args):
args = tuple(map(safe_repr, args))
maxlen = max(map(len, args))
if maxlen <= _MAX_LENGTH:
return args
prefix = commonprefix(args)
prefixlen = len(prefix)
common_len = _MAX_LENGTH - \
(maxlen - prefixlen + _MIN_BEGIN_LEN + _PLACEHOLDER_LEN)
if common_len > _MIN_COMMON_LEN:
assert _MIN_BEGIN_LEN + _PLACEHOLDER_LEN + _MIN_COMMON_LEN + \
(maxlen - prefixlen) < _MAX_LENGTH
prefix = _shorten(prefix, _MIN_BEGIN_LEN, common_len)
return tuple(prefix + s[prefixlen:] for s in args)
prefix = _shorten(prefix, _MIN_BEGIN_LEN, _MIN_COMMON_LEN)
return tuple(prefix + _shorten(s[prefixlen:], _MIN_DIFF_LEN, _MIN_END_LEN)
for s in args)
def _common_shorten_repr(*args):
args = tuple(map(safe_repr, args))
maxlen = max(map(len, args))
if maxlen <= _MAX_LENGTH:
return args
prefix = commonprefix(args)
prefixlen = len(prefix)
common_len = _MAX_LENGTH - \
(maxlen - prefixlen + _MIN_BEGIN_LEN + _PLACEHOLDER_LEN)
if common_len > _MIN_COMMON_LEN:
assert _MIN_BEGIN_LEN + _PLACEHOLDER_LEN + _MIN_COMMON_LEN + \
(maxlen - prefixlen) < _MAX_LENGTH
prefix = _shorten(prefix, _MIN_BEGIN_LEN, common_len)
return tuple(prefix + s[prefixlen:] for s in args)
prefix = _shorten(prefix, _MIN_BEGIN_LEN, _MIN_COMMON_LEN)
return tuple(prefix + _shorten(s[prefixlen:], _MIN_DIFF_LEN, _MIN_END_LEN)
for s in args)
def select_dir_to_delete(transfer_client, task):
"""Find the common directory under which all the files live. If one exists,
it will be deleted recursively, even if not all files under it were
transferred. If there is no common directory, each file that was
transferred will be deleted"""
successful_file_transfers = \
transfer_client.endpoint_manager_task_successful_transfers(
task["task_id"]
)
print("Transfer Task({}): {} -> {}\n was submitted by {}\n".
format(task["task_id"], task["source_endpoint"],
task["destination_endpoint"],
task["owner_string"]))
files_list = [
globr["source_path"] for globr in successful_file_transfers]
print("files list is ", files_list)
common_dir = dirname(commonprefix(files_list))
return files_list, common_dir
def __getMatchingVersion(version, versions_dates):
date = None
longest_match = []
longest_match_v = None
for v, d in versions_dates:
match = commonprefix([v.split('.'), str(version).split('.')])
if len(match) > 0 and (len(match) > len(longest_match) or (len(match) == len(longest_match) and int(v[-1]) <= int(longest_match_v[-1]))):
longest_match = match
longest_match_v = v
date = d
return date
def strip_root(self, d_path):
"""Strip the root component of d_path"""
# the path should be either relative, or a child of root
if d_path.startswith('/'):
if path.commonprefix([d_path, self.root]) != self.root:
raise ValueError('Path should be a subdirectory of root')
return strip_prefix(strip_prefix(d_path, self.root), '/')
return d_path
def score_model(self, model_txn, txn):
"""Score an existing transaction for its ability to provide a model
for an incomplete transaction.
Args:
model_txn: The transaction to be scored.
txn: The incomplete transaction.
Returns:
A float number representing the score, normalized in [0,1].
"""
def get_description(txn):
return ('{} {}'.format(txn.payee or '', txn.narration or '')).strip()
# If the target transaction does not have a description, there is
# nothing we can do
txn_description = get_description(txn)
n_max = len(txn_description)
if n_max > 1:
# Only consider model transactions whose posting to the target
# account has the same sign as the transaction to be completed
posting = [p for p in model_txn.postings if p.account == self.account][0]
if number.same_sign(posting.units.number, txn.postings[0].units.number):
model_txn_description = get_description(model_txn)
n_match = len(path.commonprefix(
[model_txn_description, txn_description]))
score = float(n_match) / float(n_max)
return score
return 0
def relpath(path, start=curdir):
"""Return a relative version of a path"""
if not path:
raise ValueError("no path specified")
start_list = abspath(start).split(sep)
path_list = abspath(path).split(sep)
# Work out how much of the filepath is shared by start and path.
i = len(commonprefix([start_list, path_list]))
rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
if not rel_list:
return curdir
return join(*rel_list)
def _remove_common_prefix(main, prefix):
"""
Return the suffix of main after removing its common prefix with "prefix"
Args:
main (list): List to return suffix
prefix (list): list to match prefix
Returns:
list: suffix of main after removing common prefix with prefix list.
"""
common_part = commonprefix([main, prefix])
return main[len(common_part):]
def _fill_ot_entry(self, row, col):
"""
Fill an entry of the observation table.
Only save the part of the output generated by the col parameter.
Args:
row(tuple(int)): A tuple of integers specifiying the row to fill.
col(tuple(int)): A tuple of integers specifying the column to fill.
"""
prefix = self.membership_query(row)
full_output = self.membership_query(row + col)
prefix_len = len(commonprefix([prefix, full_output]))
self.ot[row, col] = full_output[prefix_len:]
def _process_counterexample(self, ce):
"""
Counterexample processing method. The method is similar with the
Shabaz-Groz counterexample processing with an additional module to
check for counterexamples resulting from lookahead transitions.
Args:
ce (list): counterexample input
"""
# Process lookaheads
self._check_lookahead(ce)
#Finding longest prefix among strings in access_strings
maxlen = 0
for row in self.ot.access_strings:
if not row:
continue
# Seems that commonprefix works for tuple/list pairs but convert
# just to be sure.
prefix = commonprefix([ce, list(row)])
if len(prefix) > maxlen:
maxlen = len(prefix)
# Add the all the suffixes as experiments in distinguishing strings
suff = ()
for c in reversed(ce[maxlen:]):
suff = (c,) + suff
# Add the experiment if not already there
if suff not in self.ot.dist_strings:
self.ot.dist_strings.append(suff)
# Fill the entries in the observation table
for row in self.ot.access_strings + self.ot.transitions:
self._fill_ot_entry(row, suff)
# Fill the lookahead transitions
for (src, inp, _) in self.ot.lookaheads:
self._fill_ot_entry(src+inp, suff)
def _process_ce_sg(self, ce):
"""
Counterexample processing using the Shabaz-Groz algorithm.
Args:
ce (list): counterexample input
"""
#Finding longest prefix among strings in S_m
maxlen = 0
for row in self.ot.access_strings:
if not row:
continue
# Seems that commonprefix works for tuple/list pairs but convert
# just to be sure.
prefix = commonprefix([ce, list(row)])
if len(prefix) > maxlen:
maxlen = len(prefix)
# Add the all the suffixes as experiments in E_m
suff = ()
for c in reversed(ce[maxlen:]):
suff = (c,) + suff
# Add the experiment if not already there
if suff not in self.ot.dist_strings:
self.ot.dist_strings.append(suff)
# Fill the entries in the observation table
for row in self.ot.access_strings + self.ot.transitions:
self._fill_ot_entry(row, suff)
#########################################################################
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def refresh(self):
self.macro_path = mp = self._ms().get_property("MacroPath")[
"MacroPath"]
self.base_macro_path = osp.commonprefix(self.macro_path)
self.rel_macro_path = [osp.relpath for p in mp, self.base_macro_path]
def makeRelativePath(self, path):
if self.__sourceType == self.LOCAL and (isabs(self.__source) or commonprefix([self.__source, path]) != ""):
self.__source = relpath(self.__source, path)
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return make_charset(oneletter)
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return make_charset(oneletter)
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return open_paren + make_charset(oneletter) + close_paren
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren
def build_grammars(out, antlr):
"""
Build lexer and grammar from ANTLRv4 grammar files in Python3 target.
:param out: Output directory.
:param antlr: Path to the ANTLR4 tool (Java jar binary).
:return: List of references/names of the lexer, parser and listener classes of the target.
"""
try:
# TODO: support Java parsers too.
languages = {
'python': {'antlr_arg': '-Dlanguage=Python3',
'ext': 'py',
'listener_format': 'Listener',
'sources': ['ANTLRv4Lexer.g4', 'ANTLRv4Parser.g4', 'LexBasic.g4', 'LexerAdaptor.py']}
}
# Copy the grammars from the package to the given working directory.
for resource in languages['python']['sources']:
with open(join(out, resource), 'wb') as f:
f.write(get_data(__package__, join('resources', 'antlr', resource)))
grammars = tuple([file for file in languages['python']['sources'] if file.endswith('.g4')])
# Generate parser and lexer in the target language and return either with
# python class ref or the name of java classes.
cmd = 'java -jar {antlr} {lang} {grammars}'.format(antlr=antlr,
lang=languages['python']['antlr_arg'],
grammars=' '.join(grammars))
with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, cwd=out) as proc:
stdout, stderr = proc.communicate()
if proc.returncode:
logger.error('Building grammars %r failed!\n%s\n%s\n', grammars,
stdout.decode('utf-8', 'ignore'),
stderr.decode('utf-8', 'ignore'))
raise CalledProcessError(returncode=proc.returncode, cmd=cmd, output=stdout + stderr)
files = listdir(out)
filename = basename(grammars[0])
def file_endswith(end_pattern):
return splitext(split(list(
filter(lambda x: len(commonprefix([filename, x])) > 0 and x.endswith(end_pattern), files))[0])[1])[0]
# Extract the name of lexer and parser from their path.
lexer = file_endswith('Lexer.{ext}'.format(ext=languages['python']['ext']))
parser = file_endswith('Parser.{ext}'.format(ext=languages['python']['ext']))
# The name of the generated listeners differs if Python or other language target is used.
listener = file_endswith('{listener_format}.{ext}'.format(listener_format=languages['python']['listener_format'], ext=languages['python']['ext']))
return (getattr(__import__(x, globals(), locals(), [x], 0), x) for x in [lexer, parser, listener])
except Exception as e:
logger.error('Exception while loading parser modules', exc_info=e)
raise e
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return make_charset(oneletter)
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren
def _check_lookahead(self, inp):
"""
Check a counterexample for lookahead transitions using prefix-closed
queries. If an unknown lookahead is found it is added on the observation
table.
Args:
inp (list): Counterexample input.
"""
# Make a prefix closed membership query and gather the result
prefix = []
prefix_set = [[]]
prefix_set_input = [[]]
for c in inp:
prefix.append(c)
prefix_set_input.append(prefix)
prefix_set.append(self.membership_query(prefix))
for i in xrange(1, len(prefix_set)):
if commonprefix([prefix_set[i], prefix_set[i-1]]) != prefix_set[i-1]:
logging.debug('Lookahead detected at position %s : %s, %s',
i, prefix_set[i-1], prefix_set[i])
la_out = _remove_common_prefix(prefix_set[i], prefix_set[i-1])
j = None
for j in reversed(xrange(i)):
if commonprefix([prefix_set[i], prefix_set[j]]) == prefix_set[j]:
la_inp = inp[j:i]
break
la_out = _remove_common_prefix(prefix_set[i], prefix_set[j])
access_string = self._run_in_hypothesis(inp, j)
out_as = self.membership_query(access_string)
out_complete = self.membership_query(list(access_string)+la_inp)
# If The access string for the lookahead state is wrong, we will
# add the lookahead path once this is fixed in a next iteration.
if _remove_common_prefix(out_complete, out_as) != la_out:
logging.debug('Lookahead detected but access string is '+ \
'wrong, skipping.')
continue
if self.ot.add_lookahead_transition(access_string,
tuple(la_inp),
tuple(la_out)):
# Fill all table entries for the lookahead transition
for col in self.ot.dist_strings:
self._fill_ot_entry(access_string + tuple(la_inp), col)
# New lookahead added, no need for further processing.
break
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return make_charset(oneletter)
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return open_paren + make_charset(oneletter) + close_paren
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren
def prefixes(self, max_prefixes=1, **fields_dict):
for (key, value) in fields_dict.items():
assert key in self.field_names, key
assert value is None or isinstance(value, list), type(value)
def make_prefixes(
template_pieces,
max_prefixes=max_prefixes,
fields_dict=fields_dict):
result = [[]]
if not template_pieces:
return result
(literal, field_name) = template_pieces[0]
if literal:
for piece in result:
piece.append(literal)
values = fields_dict.get(field_name)
if values is None:
values = self.valid_values.get(field_name)
if values is not None:
if len(result) * len(values) > max_prefixes:
common_prefix = commonprefix(values)
for piece in result:
piece.append(common_prefix)
else:
new_result = []
for value in values:
new_fields_dict = dict(fields_dict)
new_fields_dict[field_name] = [value]
rest = make_prefixes(
template_pieces[1:],
max_prefixes=max_prefixes / (
len(result) * len(values)),
fields_dict=new_fields_dict)
for some_rest in rest:
new_result.extend(
[x + [value] + some_rest for x in result])
result = new_result
return result
prefix_components = make_prefixes(self.template_pieces)
assert len(prefix_components) <= max_prefixes
return [''.join(x) for x in prefix_components]
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return open_paren + make_charset(oneletter) + close_paren
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren
def build_grammars(grammars, out, antlr, lang='python'):
"""
Build lexer and grammar from ANTLRv4 grammar files in Python3 target.
:param grammars: Tuple of grammar files.
:param out: Output directory.
:param antlr: Path to the ANTLR4 tool (Java jar binary).
:param lang: The target language of the parser.
:return: List of references/names of the lexer, parser and listener classes of the target.
"""
# Generate parser and lexer in the target language and return either with
# python class ref or the name of java classes.
global grammar_cache
if lang not in grammar_cache:
grammar_cache[lang] = {}
if grammars in grammar_cache[lang]:
logger.debug('%r is already built with %s target.', grammars, lang)
return grammar_cache[lang][grammars]
try:
languages = {
'python': {'antlr_arg': '-Dlanguage=Python3', 'ext': 'py', 'listener_format': 'Listener'},
'java': {'antlr_arg': '', 'ext': 'java', 'listener_format': 'BaseListener'},
}
cmd = 'java -jar {antlr} {lang} -o {out} {grammars}'.format(antlr=antlr,
lang=languages[lang]['antlr_arg'],
out=out,
grammars=' '.join(grammars))
with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, cwd=out) as proc:
stdout, stderr = proc.communicate()
if proc.returncode:
logger.error('Building grammars %r failed!\n%s\n%s\n', grammars, stdout, stderr)
raise CalledProcessError(returncode=proc.returncode, cmd=cmd, output=stdout + stderr)
files = listdir(out)
filename = basename(grammars[0])
def file_endswith(end_pattern):
return splitext(split(list(
filter(lambda x: len(commonprefix([filename, x])) > 0 and x.endswith(end_pattern), files))[0])[1])[0]
# Extract the name of lexer and parser from their path.
lexer = file_endswith('Lexer.{ext}'.format(ext=languages[lang]['ext']))
parser = file_endswith('Parser.{ext}'.format(ext=languages[lang]['ext']))
# The name of the generated listeners differs if Python or other language target is used.
listener = file_endswith('{listener_format}.{ext}'.format(listener_format=languages[lang]['listener_format'], ext=languages[lang]['ext']))
if lang == 'python':
grammar_cache[lang][grammars] = [getattr(__import__(x, globals(), locals(), [x], 0), x) for x in [lexer, parser, listener]]
else:
grammar_cache[lang][grammars] = [lexer, parser, listener]
return grammar_cache[lang][grammars]
except Exception as e:
logger.error('Exception while loading parser modules', exc_info=e)
raise e
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return make_charset(oneletter)
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return make_charset(oneletter)
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren
def regex_opt_inner(strings, open_paren):
"""Return a regex that matches any string in the sorted list of strings."""
close_paren = open_paren and ')' or ''
# print strings, repr(open_paren)
if not strings:
# print '-> nothing left'
return ''
first = strings[0]
if len(strings) == 1:
# print '-> only 1 string'
return open_paren + escape(first) + close_paren
if not first:
# print '-> first string empty'
return open_paren + regex_opt_inner(strings[1:], '(?:') \
+ '?' + close_paren
if len(first) == 1:
# multiple one-char strings? make a charset
oneletter = []
rest = []
for s in strings:
if len(s) == 1:
oneletter.append(s)
else:
rest.append(s)
if len(oneletter) > 1: # do we have more than one oneletter string?
if rest:
# print '-> 1-character + rest'
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
return open_paren + make_charset(oneletter) + close_paren
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
# we have a prefix for all strings
# print '-> prefix:', prefix
return open_paren + escape(prefix) \
+ regex_opt_inner([s[plen:] for s in strings], '(?:') \
+ close_paren
# is there a suffix?
strings_rev = [s[::-1] for s in strings]
suffix = commonprefix(strings_rev)
if suffix:
slen = len(suffix)
# print '-> suffix:', suffix[::-1]
return open_paren \
+ regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
+ escape(suffix[::-1]) + close_paren
# recurse on common 1-string prefixes
# print '-> last resort'
return open_paren + \
'|'.join(regex_opt_inner(list(group[1]), '')
for group in groupby(strings, lambda s: s[0] == first[0])) \
+ close_paren