我们从Python开源项目中,提取了以下37个代码示例,用于说明如何使用sgmllib.SGMLParseError()。
def parse_declaration(self, i): """Treat a bogus SGML declaration as raw data. Treat a CDATA declaration as a CData object.""" j = None if self.rawdata[i:i+9] == '<![CDATA[': k = self.rawdata.find(']]>', i) if k == -1: k = len(self.rawdata) data = self.rawdata[i+9:k] j = k+3 self._toStringSubclass(data, CData) else: try: j = SGMLParser.parse_declaration(self, i) except SGMLParseError: toHandle = self.rawdata[i:] self.handle_data(toHandle) j = i + len(toHandle) return j
def parse_declaration(self, i): try: return sgmllib.SGMLParser.parse_declaration(self, i) except sgmllib.SGMLParseError: # escape the doctype declaration and continue parsing self.handle_data('<') return i+1
def get_description_urls(html): from htmllib import HTMLParser from formatter import NullFormatter import urlparse, sgmllib try: parser = HTMLParser(NullFormatter()) parser.feed(html) parser.close() except sgmllib.SGMLParseError: return [] result = [] for url in parser.anchorlist: if urlparse.urlparse(url)[0]: result.append(xmlescape(url)) return result
def dopage(self, url_pair): # All printing of URLs uses format_url(); argument changed to # url_pair for clarity. if self.verbose > 1: if self.verbose > 2: self.show("Check ", self.format_url(url_pair), " from", self.todo[url_pair]) else: self.message("Check %s", self.format_url(url_pair)) url, local_fragment = url_pair if local_fragment and self.nonames: self.markdone(url_pair) return try: page = self.getpage(url_pair) except sgmllib.SGMLParseError, msg: msg = self.sanitize(msg) self.note(0, "Error parsing %s: %s", self.format_url(url_pair), msg) # Dont actually mark the URL as bad - it exists, just # we can't parse it! page = None if page: # Store the page which corresponds to this URL. self.name_table[url] = page # If there is a fragment in this url_pair, and it's not # in the list of names for the page, call setbad(), since # it's a missing anchor. if local_fragment and local_fragment not in page.getnames(): self.setbad(url_pair, ("Missing name anchor `%s'" % local_fragment)) for info in page.getlinkinfos(): # getlinkinfos() now returns the fragment as well, # and we store that fragment here in the "todo" dictionary. link, rawlink, fragment = info # However, we don't want the fragment as the origin, since # the origin is logically a page. origin = url, rawlink self.newlink((link, fragment), origin) else: # If no page has been created yet, we want to # record that fact. self.name_table[url_pair[0]] = None self.markdone(url_pair)