我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用html.parser.HTMLParser.__init__()。
def __init__(self, data_def = None, warnaction = "default", warngoal = sys.stderr, caller_id = 0): self.tree_lock = RLock() with self.tree_lock: self.dtc = DataTreeConstants() self.known_urlid = (0, 4, 11, 14) self.known_linkid = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) self.errorcode = dte.dtDataDefOK self.caller_id = caller_id self.cdata_def = {} self.ddtype = "" if sys.modules['DataTreeGrab']._warnings == None: sys.modules['DataTreeGrab']._warnings = _Warnings(warnaction, warngoal, caller_id) elif caller_id not in sys.modules['DataTreeGrab']._warnings._ids or warnaction != None: sys.modules['DataTreeGrab']._warnings.set_warnaction(warnaction, caller_id) if isinstance(data_def, dict): self.data_def = data_def self.convert_data_def() else: self.data_def = {}
def __init__(self, dtree, data = None, parent = None, key = None): self.type = "value" self.key = key self.keys = [] self.key_index = {} self.value = None DATAnode.__init__(self, dtree, parent) with self.node_lock: if isinstance(data, list): self.type = "list" for k in range(len(data)): JSONnode(self.dtree, data[k], self, k) elif isinstance(data, dict): self.type = "dict" for k, item in data.items(): JSONnode(self.dtree, item, self, k) else: self.type = "value" self.value = data
def __init__(self, data, output = sys.stdout, warnaction = "default", warngoal = sys.stderr, caller_id = 0): DATAtree.__init__(self, output, warnaction, warngoal, caller_id) with self.tree_lock: self.tree_type ='json' self.extract_from_parent = True self.data = data # Read the json data into the tree try: self.root = JSONnode(self, data, key = 'ROOT') self.start_node = self.root except: self.warn('Unable to parse the JSON data. Invalid dataset!', dtDataWarning, 1) self.start_node = NULLnode() # end JSONtree
def __init__(self, data_def, data = None, warnaction = "default", warngoal = sys.stderr, caller_id = 0): self.tree_lock = RLock() with self.tree_lock: self.dtc = DataTreeConstants() self.ddconv = DataDef_Convert(warnaction = warnaction , warngoal = warngoal, caller_id = caller_id) self.caller_id = caller_id self.print_tags = False self.print_searchtree = False self.show_result = False self.fle = sys.stdout if sys.modules['DataTreeGrab']._warnings == None: sys.modules['DataTreeGrab']._warnings = _Warnings(warnaction, warngoal, caller_id) else: sys.modules['DataTreeGrab']._warnings.set_warnaction(warnaction, caller_id) self.searchtree = None self.timezone = pytz.utc self.errorcode = dte.dtDataInvalid self.result = [] self.data_def = None self.init_data_def(data_def) if data != None: self.init_data(data)
def __init__(self, base_url, url=None): """ Initializer :param base_url: site base url :param url: current url """ HTMLParser.__init__(self) self.cache = {} self.items = [] self.pages = {} self.total_pages = 0 self.base_url = base_url self.url = self.base_url self.site_total_pages = 0 self.CACHE_SIZE = 500 if url: self.url = url
def __init__(self, name, outbox, max_task): ''' @name: ???, ???????????, @outbox: ???????? url ???? @max_task: ????????? (????????? coroutine ??) ''' multiprocessing.Process.__init__(self) self.name = name self.inbox = multiprocessing.Queue() # ??????????? url self.outbox = outbox self.max_task = max_task self.doing = multiprocessing.Value('i', 0) self._doing = set() self.result = set() # ?????? url self.loop = None
def __init__(self): """Initialize attributes.""" if sys.version.startswith('3.'): # Python 3.x super().__init__(convert_charrefs=False) else: # use HTMLParser.__init__ because HTMLParser is an 'old' style class, which cannot be passed to super() # see http://codependentcodr.blogspot.com/2012/02/python-htmlparser-and-super.html HTMLParser.__init__(self) self._root = _HtmlHeaderNode(level=0) # root node with no data of itself, only 'children' matters self._curr_node = self._root # most recently handled header node self._in_header = False self._header_id_count = {} # record header ids to avoid collisions self._html = '' # full HTML string parsed self._temp_start_tag = '' # temporary HTML start tag of this current header node
def __init__(self, results, url): HTMLParser.__init__(self) self.results = results self.url = url self.current_item = {} # One torrent result self.add_query = True self.torrent_info_index = 0 # Count of the meta data encountered self.torrent_info_array = [] self.meta_data_grabbing = 0 self.meta_data_array = [] self.torrent_no_files = 0 self.torrent_date_added = 0 self.torrent_popularity = 0 self.mangnet_link = "" self.desc_link = "" self.torrent_name = ""
def __init__(self, model, label, data=[]): """ Returns a new Model calibrated on the given data, which is a set of (vector, label)-tuples. """ self._model = model self._label = label # Isotonic regression: y = ((model.predict(v)[label], label == x) for v, x in data) y = sorted(y) # monotonic y = zip(*y) y = list(y or ((),())) x = list(y[0]) y = list(y[1]) y = pav(y) x = [0] + x + [1] y = [0] + y + [1] f = {} i = 0 # Linear interpolation: for p in range(100 + 1): p *= 0.01 while x[i] < p: i += 1 f[p] = (y[i-1] * (x[i] - p) + y[i] * (p - x[i-1])) / (x[i] - x[i-1]) self._f = f
def __init__(self, path='WordNet-3.0'): """ Opens the WordNet database from the given path (that contains dict/index.noun, dict/data.noun, ...) """ self._f = {} # {'n': <open file 'dict/index.noun'>} for k, v in (('n', 'noun'), ('v', 'verb'), ('a', 'adj' ), ('r', 'adv' )): f = cd(path, 'dict', 'data.%s' % v) f = open(f, 'rb') self._f[k] = f f = cd(path, 'dict', 'index.%s' % v) f = open(f, 'r') for s in f: if not s.startswith(' '): s = s.strip() s = s.split(' ') p = s[-int(s[2]):] w = s[0] w = w.replace('_', ' ') self[w, k] = p # {('grasp', 'n'): (offset1, ...)} f.close()
def __init__( self, decode_html_entities=False, data_separator=' ', ): HTMLParser.__init__(self) self._parse_html_entities = decode_html_entities self._data_separator = data_separator self._in_td = False self._in_th = False self._current_table = [] self._current_row = [] self._current_cell = [] self.tables = []
def get_links(html): # ???????? class URLSeeker(HTMLParser): def __init__(self): HTMLParser.__init__(self) # ?? ???super.__init__(self) self.urls = [] def handle_starttag(self, tag, attrs): href = dict(attrs).get('href') if href and tag == 'a': self.urls.append(href) url_seeker = URLSeeker() url_seeker.feed(html) print('@@'*20) print(url_seeker.urls) print('@@'*20) return url_seeker.urls # ?????????
def __init__(self): HTMLParser.__init__(self) self._in_td = False self._in_th = False self._current_table = [] self._current_row = [] self._current_cell = [] self.tables = []
def __init__(self): HTMLParser.__init__(self) self.buf = [] self.last_text = [] self.hide_output = False self.tag_count = 0 self.current_tag = None
def __init__(self): HTMLParser.__init__(self) self.event_time = [] self.event_title = [] self.event_location = [] self.in_time = False self.in_title = False self.in_location = False
def __init__(self, *args, **kwargs): HTMLParser.__init__(self, *args, **kwargs) # Keep a list of empty-element tags that were encountered # without an explicit closing tag. If we encounter a closing tag # of this type, we'll associate it with one of those entries. # # This isn't a stack because we don't care about the # order. It's a list of closing tags we've already handled and # will ignore, assuming they ever show up. self.already_closed_empty_element = []
def __init__(self, *args, **kwargs): if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED: kwargs['strict'] = False if CONSTRUCTOR_TAKES_CONVERT_CHARREFS: kwargs['convert_charrefs'] = False self.parser_args = (args, kwargs)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.snf = StyledNoteFormatter(self)
def __init__(self, form): self.form = form self.database = form.database self._backend = WebAppBackend() self._backend.build_link = self.build_link
def __init__(self, form): HTMLParser.__init__(self) self.form = form self.__text = "" self.__tags = {} self.__stack = []
def __init__(self): self.data = [] self.href = 0 self.linkname = '' self.patt = re.compile(r'^/doc/\d+$') HTMLParser.__init__(self)
def __init__(self): self.data = set([]) self.href = 0 self.patt = re.compile(r'^\?p=\d+$') HTMLParser.__init__(self)
def __init__(self, allows = []): HTMLParser.__init__(self) self.allow_tags = allows if allows else self.allow_tags self.result = [] self.start = [] self.data = []
def __init__(self, warnaction = None, warngoal = sys.stderr, caller_id = 0): self.warn_lock = RLock() self.onceregistry = {} self.filters = [] self._ids = [] if not caller_id in self._ids: self._ids.append(caller_id) self.warngoal = warngoal if warnaction == None: warnaction = "default" self.set_warnaction(warnaction, caller_id)
def __init__(self, dtree, parent = None): self.node_lock = RLock() with self.node_lock: self.dtc = DataTreeConstants() self.children = [] self.dtree = dtree self.parent = parent self.value = None self.child_index = 0 self.level = 0 self.links = {} self.links["values"] = {} self.links["nodes"] = {} self.end_links = {} self.end_links["values"] = {} self.end_links["nodes"] = {} self.is_root = bool(self.parent == None) n = self while not n.is_root: n = n.parent self.root = n if isinstance(parent, DATAnode): self.parent.append_child(self) self.level = parent.level + 1
def __init__(self, dtree, data = None, parent = None): self.tag = u'' self.text = u'' self.tail = u'' self.attributes = {} self.attr_names = [] DATAnode.__init__(self, dtree, parent) with self.node_lock: if isinstance(data, (str, unicode)): self.tag = data.lower().strip() elif isinstance(data, list): if len(data) > 0: self.tag = data[0].lower().strip() if len(data) > 1 and isinstance(data[1], (list, tuple)): for a in data[1]: if isinstance(a[1], (str, unicode)): self.attributes[a[0].lower().strip()] = a[1].strip() else: self.attributes[a[0].lower().strip()] = a[1] if 'class' in self.attributes.keys(): self.attr_names.append('class') if 'id' in self.attributes.keys(): self.attr_names.append('id') for a in self.attributes.keys(): if a not in self.attr_names: self.attr_names.append(a)
def __init__(self, data, autoclose_tags=[], print_tags = False, output = sys.stdout, warnaction = "default", warngoal = sys.stderr, caller_id = 0): HTMLParser.__init__(self) DATAtree.__init__(self, output, warnaction, warngoal, caller_id) with self.tree_lock: self.tree_type ='html' self.print_tags = print_tags self.autoclose_tags = autoclose_tags self.is_tail = False self.root = HTMLnode(self, 'root') self.current_node = self.root self.last_node = None self.text = u'' self.open_tags = {} self.count_tags(data) # read the html page into the tree try: # Cover for incomplete reads where the essentiel body part is retrieved for ctag in ('body', 'BODY', 'html', 'HTML', 'xml', 'XML'): if u'<%s>' % (ctag, ) in data and not u'</%s>' % (ctag, ) in data: data = u'%s</%s>' % (data, ctag) self.feed(data) self.reset() self.start_node = self.root except: self.warn('Unable to parse the HTML data. Invalid dataset!', dtDataWarning, 1) self.start_node = NULLnode()
def get_links(html): class URLSeeker(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.urls = [] def handle_starttag(self, tag, attrs): href = dict(attrs).get('href') if href and tag == 'a': self.urls.append(href) url_seeker = URLSeeker() url_seeker.feed(html) return url_seeker.urls
def __init__(self): HTMLParser.__init__(self) self.entity = None self.state = 'IDLE' self.data = { 'rwattrs': [], 'roattrs': [], 'addattrs': [], 'updateattrs': [], } self.new_current_attr() self.re_json_arg = re.compile(r'"([^"]+)": *<[^>]+>')
def __init__(self) -> None: HTMLParser.__init__(self) self.galleries: typing.Set[str] = set() self.stop_at_favorites: int = 0
def __init__(self) -> None: HTMLParser.__init__(self) self.empty_search = 0
def __init__(self) -> None: HTMLParser.__init__(self, convert_charrefs=True) self.torrent_link = '' self.stop_at_found: int = 0 self.found_non_final_gallery: int = 0 self.parent_gallery: str = '' self.found_parent_gallery: int = 0 self.found_gallery_link: int = 0 self.non_final_gallery: str = ''
def __init__(self) -> None: HTMLParser.__init__(self, convert_charrefs=True) self.torrent = '' self.found_seed_data = 0 self.found_posted_data = 0 self.posted_date = '' self.seeds = 0
def __init__(self) -> None: HTMLParser.__init__(self) self.archive = ''
def __init__(self, **kwargs): HTMLParser.__init__(self) self.kwargs = kwargs self.tables = [] self.last_row = [] self.rows = [] self.max_row_width = 0 self.active = None self.last_content = "" self.is_last_row_header = False
def __init__(self, lang): self.lang = lang # lang of the word we are looking up self.in_lang = False # flag: are we in the appropriate language? self.getting_defs = False # flag: are we collecting definitions? self.pos = "" # part of speech we are in self.trans = {} # each key is the pos, each entry the translations HTMLParser.__init__(self)
def __init__(self): HTMLParser.__init__(self) self.count = 0 self.id = None
def __init__(self, url, with_subdomain=False): HTMLParser.__init__(self) self.protocol, self.domain, self.path = self.parse_url(url) self.with_subdomain = with_subdomain self.links = set()
def __init__(self, level): """Initialize attributes.""" self.level = level # header level of the element, e.g. 1 for <h1>, 2 for <h2>, etc self.id = '' # anchor id (in-page link), used in 'id' and 'href' attribute of 'a' tag self.text = '' # pure text content of header tag, e.g. 'Title' for '<h1>Title</h1>' self.inner_html = '' # inner HTML self.father = None # point to the direct father node self.children = [] # elements with lower levels that directly follows the current elem
def __init__(self): HTMLParser.__init__(self) self._level = 0 self._last = '' self._in_code = False self._prettified = [_BASE_HTML_HEADER]
def __init__(self, value=None): self._value = value
def __init__(self): HTMLParser.__init__(self) self.lectures = 0
def __init__(self): HTMLParser.__init__(self)