我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用lxml.html.Element()。
def parse_rsc_html(htmlstring): """Messy RSC HTML needs this special parser to fix problems before creating selector.""" converted = UnicodeDammit(htmlstring) if not converted.unicode_markup: raise UnicodeDecodeError('Failed to detect encoding, tried [%s]') root = fromstring(htmlstring, parser=HTMLParser(recover=True, encoding=converted.original_encoding)) # Add p.otherpara tags around orphan text newp = None for child in root.get_element_by_id('wrapper'): if newp is not None: if child.tag in BLOCK_ELEMENTS or child.get('id', '').startswith('sect') or child.getnext() is None: child.addprevious(newp) newp = None else: newp.append(child) if newp is None and child.tag in BLOCK_ELEMENTS and child.tail and child.tail.strip(): newp = Element('p', **{'class': 'otherpara'}) newp.text = child.tail child.tail = '' return root
def fragment_fromstring(html, create_parent=False, guess_charset=False, parser=None): """Parses a single HTML element; it is an error if there is more than one element, or if anything but whitespace precedes or follows the element. If create_parent is true (or is a tag name) then a parent node will be created to encapsulate the HTML in a single element. In this case, leading or trailing text is allowed. """ if not isinstance(html, _strings): raise TypeError('string required') accept_leading_text = bool(create_parent) elements = fragments_fromstring( html, guess_charset=guess_charset, parser=parser, no_leading_text=not accept_leading_text) if create_parent: if not isinstance(create_parent, _strings): create_parent = 'div' new_root = Element(create_parent) if elements: if isinstance(elements[0], _strings): new_root.text = elements[0] del elements[0] new_root.extend(elements) return new_root if not elements: raise etree.ParserError('No elements found') if len(elements) > 1: raise etree.ParserError('Multiple elements found') result = elements[0] if result.tail and result.tail.strip(): raise etree.ParserError('Element followed by text: %r' % result.tail) result.tail = None return result
def __init__(self, failure): traceback = html.Element("pre") traceback.text = failure.getTraceback() super(StartFailedPage, self).__init__( status=int(SERVICE_UNAVAILABLE), brief="MAAS failed to start", detail=html.tostring(traceback, encoding=str))
def _add_tracking_pixel(tree, extra_metadata, configuration): url = get_open_tracking_url(extra_metadata, configuration) pixel = html.Element("img", {"src": url}) tree.body.append(pixel)