Python xml.etree 模块,ElementTree() 实例源码
我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用xml.etree.ElementTree()。
def __call__(self, driver, session: requests.Session, element: ElementTree):
pass
def extract_trust_anchors_from_xml(trust_anchor_xml):
"""Takes a bytestring with the XML from IANA; returns a list of trust anchors."""
# Turn the bytes from trust_anchor_xml into a string
trust_anchor_xml_string = bytes_to_string(trust_anchor_xml)
# Sanity check: make sure there is enough text in the returned stuff
if len(trust_anchor_xml_string) < 100:
die("The XML was too short: {} chars.".format(len(trust_anchor_xml_string)))
# ElementTree requries a file so use StringIO to turn the string into a file
try:
trust_anchor_as_file = StringIO(trust_anchor_xml_string) # This works for Python 3
except:
trust_anchor_as_file = StringIO(unicode(trust_anchor_xml_string)) # Needed for Python 2
# Get the tree
trust_anchor_tree = xml.etree.ElementTree.ElementTree(file=trust_anchor_as_file)
# Get all the KeyDigest elements
digest_elements = trust_anchor_tree.findall(".//KeyDigest")
print("There were {} KeyDigest elements in the trust anchor file.".format(\
len(digest_elements)))
trust_anchors = [] # Global list of dicts that is taken from the XML file
# Collect the values for the KeyDigest subelements and attributes
for (count, this_digest_element) in enumerate(digest_elements):
digest_value_dict = {}
for this_subelement in ["KeyTag", "Algorithm", "DigestType", "Digest"]:
try:
this_key_tag_text = (this_digest_element.find(this_subelement)).text
except:
die("Did not find {} element in a KeyDigest in a trust anchor.".format(\
this_subelement))
digest_value_dict[this_subelement] = this_key_tag_text
for this_attribute in ["validFrom", "validUntil"]:
if this_attribute in this_digest_element.keys():
digest_value_dict[this_attribute] = this_digest_element.attrib[this_attribute]
else:
digest_value_dict[this_attribute] = "" # Missing attributes get empty values
# Save this to the global trust_anchors list
print("Added the trust anchor {} to the list:\n{}".format(count, pprint.pformat(\
digest_value_dict)))
trust_anchors.append(digest_value_dict)
if len(trust_anchors) == 0:
die("There were no trust anchors found in the XML file.")
return trust_anchors
def extract_trust_anchors_from_xml(trust_anchor_xml):
"""Takes a bytestring with the XML from IANA; returns a list of trust anchors."""
# Turn the bytes from trust_anchor_xml into a string
trust_anchor_xml_string = bytes_to_string(trust_anchor_xml)
# Sanity check: make sure there is enough text in the returned stuff
if len(trust_anchor_xml_string) < 100:
die("The XML was too short: {} chars.".format(len(trust_anchor_xml_string)))
# ElementTree requries a file so use StringIO to turn the string into a file
try:
trust_anchor_as_file = StringIO(trust_anchor_xml_string) # This works for Python 3
except:
trust_anchor_as_file = StringIO(unicode(trust_anchor_xml_string)) # Needed for Python 2
# Get the tree
trust_anchor_tree = xml.etree.ElementTree.ElementTree(file=trust_anchor_as_file)
# Get all the KeyDigest elements
digest_elements = trust_anchor_tree.findall(".//KeyDigest")
print("There were {} KeyDigest elements in the trust anchor file.".format(\
len(digest_elements)))
trust_anchors = [] # Global list of dicts that is taken from the XML file
# Collect the values for the KeyDigest subelements and attributes
for (count, this_digest_element) in enumerate(digest_elements):
digest_value_dict = {}
for this_subelement in ["KeyTag", "Algorithm", "DigestType", "Digest"]:
try:
this_key_tag_text = (this_digest_element.find(this_subelement)).text
except:
die("Did not find {} element in a KeyDigest in a trust anchor.".format(\
this_subelement))
digest_value_dict[this_subelement] = this_key_tag_text
for this_attribute in ["validFrom", "validUntil"]:
if this_attribute in this_digest_element.keys():
digest_value_dict[this_attribute] = this_digest_element.attrib[this_attribute]
else:
digest_value_dict[this_attribute] = "" # Missing attributes get empty values
# Save this to the global trust_anchors list
print("Added the trust anchor {} to the list:\n{}".format(count, pprint.pformat(\
digest_value_dict)))
trust_anchors.append(digest_value_dict)
if len(trust_anchors) == 0:
die("There were no trust anchors found in the XML file.")
return trust_anchors
def trainSVMTK(docs, pairs, dditype, model="svm_tk_classifier.model", excludesentences=[]):
if os.path.isfile("ddi_models/" + model):
os.remove("ddi_models/" + model)
if os.path.isfile("ddi_models/" + model + ".txt"):
os.remove("ddi_models/" + model + ".txt")
#docs = use_external_data(docs, excludesentences, dditype)
xerrors = 0
with open("ddi_models/" + model + ".txt", 'w') as train:
#print pairs
for p in pairs:
if dditype != "all" and pairs[p][relations.PAIR_DDI] and pairs[p][relations.PAIR_TYPE] != dditype:
continue
sid = relations.getSentenceID(p)
if sid not in excludesentences:
tree = pairs[p][relations.PAIR_DEP_TREE][:]
#print "tree1:", tree
#if len(docs[sid][ddi.SENTENCE_ENTITIES]) > 20:
#print line
# line = "1 |BT| (ROOT (NP (NN candidatedrug) (, ,) (NN candidatedrug))) |ET|"
# xerrors += 1
#else:
line = get_svm_train_line(tree, pairs[p], sid,
docs[sid][relations.SENTENCE_PAIRS][p])
if not pairs[p][relations.PAIR_DDI]:
line = '-' + line
elif pairs[p][relations.PAIR_TYPE] != dditype and dditype != "all":
line = '-' + line
train.write(line)
#print "tree errors:", xerrors
svmlightcall = Popen(["./svm-light-TK-1.2/svm-light-TK-1.2.1/svm_learn", "-t", "5",
"-L", "0.4", "-T", "2", "-S", "2", "-g", "10",
"-D", "0", "-C", "T", basedir + model + ".txt", basedir + model],
stdout = PIPE, stderr = PIPE)
res = svmlightcall.communicate()
if not os.path.isfile("ddi_models/" + model):
print "failed training model " + basedir + model
print res
sys.exit()
def get_act(self, scraper):
"""Creates Parse_XML_Action. Higher order function.
Args: @Action
"""
def act():
if not scraper.text:
Get_Action().execute(scraper)
if not scraper.text:
return
scraper.xml_tree = xml.etree.ElementTree.parse(scraper.text)
return act
def get_act(self, scraper):
"""Creates Find_XML_Elements_Action. Higher-order function
Args: @Action
"""
def act():
if not scraper.xml_tree:
Parse_XML_Action().execute(scraper)
if not scraper.xml_tree:
return
xml_elements = scraper.xml_elements[:]
def find(element):
"""Helper function. Recursively traverses tree to find if the elements
satisfy the tag/attribute pairs.
Args:
element <ElementTree>: the HTML element that is about to be examined
"""
if element.tag in self.tags or not self.tags:
element_index = self.tags.index(element.tag)
if all([(key in element.attrib and element.attrib[key] == self.attributes[element_index][key])
for key in self.attributes[element_index]]):
scraper.xml_elements.append(element)
for sub_element in element:
find(sub_element)
if self.find_subelements:
for e in xml_elements:
find(e)
else:
find(scraper.xml_tree)
return act
def get_act(self, scraper):
"""Creates Parse_XML_Action. Higher order function.
Args: @Action
"""
def act():
if not scraper.text:
Get_Action().execute(scraper)
if not scraper.text:
return
scraper.xml_tree = xml.etree.ElementTree.parse(scraper.text)
return act
def get_act(self, scraper):
"""Creates Find_XML_Elements_Action. Higher-order function
Args: @Action
"""
def act():
if not scraper.xml_tree:
Parse_XML_Action().execute(scraper)
if not scraper.xml_tree:
return
xml_elements = scraper.xml_elements[:]
def find(element):
"""Helper function. Recursively traverses tree to find if the elements
satisfy the tag/attribute pairs.
Args:
element <ElementTree>: the HTML element that is about to be examined
"""
if element.tag in self.tags or not self.tags:
element_index = self.tags.index(element.tag)
if all([(key in element.attrib and element.attrib[key] == self.attributes[element_index][key])
for key in self.attributes[element_index]]):
scraper.xml_elements.append(element)
for sub_element in element:
find(sub_element)
if self.find_subelements:
for e in xml_elements:
find(e)
else:
find(scraper.xml_tree)
return act
def preprocess(self, **k):
self.login = k.get('login')
tree = etree.ElementTree()
if k.get('xmlfile') is not None:
tree.parse(k['xmlfile'])
else:
tree.fromstring(k['xml'])
self.cred = tree.find(".//Account/Credentials2")
if self.cred is None:
self.cred = tree.find(".//Account/Credentials3")
if self.cred is not None:
self.cred = self.cred.text.decode('hex')
def get_svm_train_line(tree, pair, sid):
lmtzr = WordNetLemmatizer()
e1id = compact_id(pair.eids[0])
e2id = compact_id(pair.eids[1])
tree = tree.replace(pair.entities[0].tokens[0].text, 'candidatedrug')
tree = tree.replace(pair.entities[1].tokens[0].text, 'candidatedrug')
#tree = tree.replace(sid.replace('.', '').replace('-', '') + 'e', 'otherdrug')
sid2 = compact_id(sid) + 'e'
# TODO: replace other entities
#tree = rext.sub(sid2 + r'\d+', 'otherdrug', tree)
#print "tree2:", tree
if tree[0] != '(':
tree = '(S (' + tree + ' NN))'
#this depends on the version of nlkt
ptree = Tree.fromstring(tree)
#ptree = Tree.parse(tree)
leaves = list(ptree.pos())
lemmaleaves = []
for t in leaves:
pos = get_wordnet_pos(t[1])
lemma = lmtzr.lemmatize(t[0].lower(), pos)
lemmaleaves.append(lemma)
#lemmaleaves = [ for t in leaves)]
logging.debug("tree:" + tree)
line = '1 '
line += '|BT|' + tree
#bowline = '(BOW (' + ' *)('.join(lemmaleaves) + ' *)) '
#ptree = Tree.parse(bowline)
#ptree = ptree.pprint(indent=-1000)
#bowline = ptree.replace('\n', ' ')
#bowline = '|BT| ' + bowline
#if not bowline.count("otherdrug") > 8:
# line += bowline
#else:
#print "problem with BOW!"
#line += bowline
line += '|ET| '
#i = 1
#for m in docsp[ddi.PAIR_SSM_VECTOR]:
# line += " %s:%s" % (i, m)
# i += 1
#line += " 2:" + str()
#line += " |EV|"
line += '\n'
return line
def testSVMTK(sentence, pairs, pairs_list, model="svm_tk_classifier.model", tag=""):
if os.path.isfile(basedir + tag + "svm_test_data.txt"):
os.remove(basedir + tag + "svm_test_data.txt")
if os.path.isfile(basedir + tag + "svm_test_output.txt"):
os.remove(basedir + tag + "svm_test_output.txt")
#docs = use_external_data(docs, excludesentences, dditype)
#pidlist = pairs.keys()
total = 0
with open(temp_dir + tag + "svm_test_data.txt", 'w') as test:
for pid in pairs:
sid = pairs[pid].sid
tree = sentence.parsetree
#if len(docs[sid][ddi.SENTENCE_ENTITIES]) > 30:
#print line
#line = reparse_tree(line)
# line = "1 |BT| (ROOT (NP (NN candidatedrug) (, ,) (NN candidatedrug))) |ET|\n"
# xerrors += 1
#else:
line = get_svm_train_line(tree, pairs[pid], sid)
line = '-' + line
test.write(line)
total += 1
#print "tree errors:", xerrors, "total:", total
svmtklightargs = ["./bin/svm-light-TK-1.2/svm-light-TK-1.2.1/svm_classify",
temp_dir + tag + "svm_test_data.txt", basedir + model,
temp_dir + tag + "svm_test_output.txt"]
svmlightcall = Popen(svmtklightargs, stdout=PIPE, stderr=PIPE)
res = svmlightcall.communicate()
# logging.debug(res[0].split('\n')[-3:])
#os.system(' '.join(svmtklightargs))
if not os.path.isfile(temp_dir + tag + "svm_test_output.txt"):
print "something went wrong with SVM-light-TK"
print res
sys.exit()
with open(temp_dir + tag + "svm_test_output.txt", 'r') as out:
lines = out.readlines()
if len(lines) != len(pairs_list):
print "check " + tag + "svm_test_output.txt! something is wrong"
print res
sys.exit()
for p, pid in enumerate(pairs):
score = float(lines[p])
if float(score) < 0:
pairs[pid].recognized_by[relations.SST_PRED] = -1
else:
pairs[pid].recognized_by[relations.SST_PRED] = 1
logging.info("{} - {} SST: {}".format(pairs[pid].entities[0], pairs[pid].entities[0], score))
return pairs