Python re 模块，findall() 实例源码

我们从Python开源项目中，提取了以下49个代码示例，用于说明如何使用re.findall()。

项目：ln2sql 作者：FerreroJeremy | 项目源码 | 文件源码

def create_table(self, table_string):
        lines = table_string.split("\n")
        table = Table()
        for line in lines:
            if 'TABLE' in line:
                table_name = re.search("`(\w+)`", line)
                table.name = table_name.group(1)
                if self.thesaurus_object is not None:
                    table.equivalences = self.thesaurus_object.get_synonyms_of_a_word(table.name)
            elif 'PRIMARY KEY' in line:
                primary_key_columns = re.findall("`(\w+)`", line)
                for primary_key_column in primary_key_columns:
                    table.add_primary_key(primary_key_column)
            else:
                column_name = re.search("`(\w+)`", line)
                if column_name is not None:
                    column_type = self.predict_type(line)
                    if self.thesaurus_object is not None:
                        equivalences = self.thesaurus_object.get_synonyms_of_a_word(column_name.group(1))
                    else:
                        equivalences = []
                    table.add_column(column_name.group(1), column_type, equivalences)
        return table

项目：python- 作者：secondtonone1 | 项目源码 | 文件源码

def getDetailList(self,content):
        s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"'
        pattern =re.compile(s2 , re.S
            )
        result = re.findall(pattern, content)
        with open('file.txt','w',encoding='gbk') as f:
            f.write(content)

        if not result:
            print('???????..............')


        threadsList=[] 
        for item in result:
            t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path))
            threadsList.append(t)
            t.start()

        for threadid in threadsList:
            threadid.join()

项目：Python 作者：Guzi219 | 项目源码 | 文件源码

def ParseHtml(self, html):
        soup = BeautifulSoup(html)
        links = soup.findAll('a', attrs={'class': 'ulink'})
        #print len(links)
        if len(links) == 0: #the js return
            # tmp_js = soup.find(name='script', attrs={'language': 'javascript'})
            js_str = soup.script.string #two ways to get the <script></script>
            new_url = js_str[16:-1] #get the new url
            new_url = eval(new_url) #eval:??????????
            self.ParseHtml(self.LoadPage(new_url))
        else:
            # print type(links)
            for link in links:
                # print type(link)
                # print type(link.string)
                # print unicode(link.string)
                titles = re.findall(r'?(.+?)?', str(link.string)) #unicode(link.string))
                if len(titles) <> 0:
                    print titles[0]
                # print 'url is %s, title is %s.' %(link['href'], titles[0])

项目：Stitch 作者：nathanlopez | 项目源码 | 文件源码

def get_profiles():
    passwd=''
    netsh_output = run_command("netsh wlan show profiles")
    if "not running" in netsh_output:
        net_wlan = run_command("net start wlansvc")
        if "started successfully" in net_wlan:
            netsh_output = run_command("netsh wlan show profiles")
        else:
            return net_wlan
    if "no wireless interface" in netsh_output:
        return netsh_output
    else:
        profiles=re.findall(': (.*)\r',netsh_output)
        for x in profiles:
            output= run_command('netsh wlan show profiles "{}" key=clear'.format(x))
            #output=re.findall('(Key Content.*)\r',proc)
            if output:
                passwd += "\n{}\n{}\n\n".format(x,output)
        return passwd

项目：python- 作者：secondtonone1 | 项目源码 | 文件源码

def getDetailList(self,content):
        s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"'
        pattern =re.compile(s2 , re.S
            )
        result = re.findall(pattern, content)
        with open('file.txt','w',encoding='gbk') as f:
            f.write(content)

        if not result:
            print('???????..............')


        threadsList=[] 
        for item in result:
            t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path))
            threadsList.append(t)
            t.start()

        for threadid in threadsList:
            threadid.join()

项目：ochem_predict_nn 作者：connorcoley | 项目源码 | 文件源码

def reassign_atom_mapping(transform):
    '''This function takes an atom-mapped reaction and reassigns 
    the atom-mapping labels (numbers) from left to right, once 
    that transform has been canonicalized.'''

    all_labels = re.findall('\:([0-9]+)\]', transform)

    # Define list of replacements which matches all_labels *IN ORDER*
    replacements = []
    replacement_dict = {}
    counter = 1
    for label in all_labels: # keep in order! this is important
        if label not in replacement_dict:
            replacement_dict[label] = str(counter)
            counter += 1
        replacements.append(replacement_dict[label])

    # Perform replacements in order
    transform_newmaps = re.sub('\:[0-9]+\]', 
        lambda match: (':' + replacements.pop(0) + ']'),
        transform)

    return transform_newmaps

项目：spyking-circus 作者：spyking-circus | 项目源码 | 文件源码

def _read_from_header(self):

        a, b, c                = self._get_header()
        header                 = a
        header['data_offset']  = b
        header['nb_channels']  = c
        #header['dtype_offset'] = int(header['ADC zero'])
        header['gain']         = float(re.findall("\d+\.\d+", header['El'])[0])
        header['data_dtype']   = self.params['data_dtype']

        self.data   = numpy.memmap(self.file_name, offset=header['data_offset'], dtype=header['data_dtype'], mode='r')
        self.size   = len(self.data)
        self._shape = (self.size//header['nb_channels'], header['nb_channels'])
        del self.data

        return header

项目：sopel-modules 作者：phixion | 项目源码 | 文件源码

def title_command(bot, trigger):
    """
    Show the title or URL information for the given URL, or the last URL seen
    in this channel.
    """
    if not trigger.group(2):
        if trigger.sender not in bot.memory['last_seen_url']:
            return
        matched = check_callbacks(bot, trigger,
                                  bot.memory['last_seen_url'][trigger.sender],
                                  True)
        if matched:
            return
        else:
            urls = [bot.memory['last_seen_url'][trigger.sender]]
    else:
        urls = re.findall(url_finder, trigger)

    results = process_urls(bot, trigger, urls)
    for title, domain in results[:4]:
        bot.reply('[ %s ] - %s' % (title, domain))

项目：pyselenium-js 作者：neetjn | 项目源码 | 文件源码

def test_trigger_single_event(self):
        """Test: Trigger click event on button, validate dispatched"""
        regex = '([0-9]{1,3})'
        original = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.page.js.trigger_event(
            element=self.page.add_counter_button,
            event='click'
        )
        for i in range(10):
            if (original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            modified, original+1,
            'Counter label was not modified as expected; %s clicks' % modified
        )

项目：pyselenium-js 作者：neetjn | 项目源码 | 文件源码

def test_trigger_multiple_events(self):
        """Test: Trigger click event on button twice, validate dispatched"""
        regex = '([0-9]{1,3})'
        original = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.page.js.trigger_event(
            element=self.page.add_counter_button,
            event=('click', 'click')
        )
        for i in range(10):
            if (original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            modified, original+2,
            'Counter label was not modified as expected; %s clicks' % modified
        )

项目：pyselenium-js 作者：neetjn | 项目源码 | 文件源码

def test_trigger_multiple_events_multiple_elements(self):
        """Test: Trigger click event on two buttons twice, validate dispatched"""
        regex = '([0-9]{1,3})'
        num_counter_original = eval(re.findall(regex, self.page.counter_label.text)[0])
        num_users_original = len(self.page.user_cards)
        self.page.js.trigger_event(
            element=(self.page.add_counter_button, self.page.add_user_button),
            event=('click', 'click')
        )
        for i in range(10):
            if (num_counter_original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        num_counter_modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            num_counter_modified, num_counter_original+2,
            'Counter label was not modified as expected; %s clicks' % num_counter_modified
        )
        self.assertEqual(
            len(self.page.user_cards), num_users_original+2,
            'Expected %s user cards found %s' % (
                num_users_original+2, len(self.page.user_cards)
            )
        )

项目：trf 作者：aistairc | 项目源码 | 文件源码

def calc_rs_modality(self) -> Dict[str, float]:

        modality_counter = Counter()
        for i, s in enumerate(self.sentences):
            chunks = []
            for bnst in self.knp.parse(s).bnst_list():
                chunk = Chunk(chunk_id=bnst.bnst_id,
                              link=bnst.parent,
                              description=bnst.fstring)
                chunks.append(chunk)

            s = "".join([chunk.description for chunk in chunks])
            ms = set(re.findall("<?????-(.+?)>", s))
            modality_counter += Counter(ms)

            n = len(self.sentences)

        return dict([(k, float(c) / n)
                     for k, c in modality_counter.items()])

项目：cellranger 作者：10XGenomics | 项目源码 | 文件源码

def get_transcript_gc_content(self, transcript_obj):
        pattern = re.compile('[cCgG]')

        gc, length = 0, 0
        for interval in transcript_obj.intervals:
            if interval.chrom not in self.chroms:
                continue

            seq = self.chroms[interval.chrom][interval.start:interval.end]
            gc += len(re.findall(pattern, seq))
            length += interval.length

        if length > 0:
            return float(gc) / float(length)
        else:
            return 0

# NOTE: these stub classes are necessary to maintain backwards compatibility with old refdata (1.2 or older)

项目：ComicSpider 作者：QuantumLiu | 项目源码 | 文件源码

def get_info(self):
        '''
        ????????????url???url
        Get informations of the comic
        return:
            comic title,description,cover url,chapters' urls
        '''
        headers={'use-agent':"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",'Referer':'http://manhua.dmzj.com/tags/s.shtml'}
        root='http://manhua.dmzj.com'
        r_title=r'<span class="anim_title_text"><a href=".*?"><h1>(.*?)</h1></a></span>'
        r_des=r'<meta name=\'description\' content=".*?(??.*?)"/>'#????
        r_cover=r'src="(.*?)" id="cover_pic"/></a>'#??url??
        r_cb=r'<div class="cartoon_online_border" >([\s\S]*?)<div class="clearfix"></div>'#??border
        r_cs=r'<li><a title="(.*?)" href="(.*?)" .*?>.*?</a>'#??????
        try:
            text=requests.get(self.comic_url,headers=headers).text
        except ConnectionError:
            traceback.print_exc()
            raise ConnectionError
        title=re.findall(r_title,text)[0]
        cb=re.findall(r_cb,text)[0]
        chapter_urls=[(c[0],root+c[1]+'#@page=1') for c in re.findall(r_cs,cb)]
        cover_url=re.findall(r_cover,text)[0]
        des=re.findall(r_des,text)
        return title,des,cover_url,chapter_urls

项目：Instagram 作者：Fastcampus-WPS-5th | 项目源码 | 文件源码

def make_html_content_and_add_tags(self):
        # ????? ???? ?????
        p = re.compile(r'(#\w+)')
        # findall???? ???? ????? ???
        tag_name_list = re.findall(p, self.content)
        # ?? content(Comment??)? ??? ??
        ori_content = self.content
        # ????? ????
        for tag_name in tag_name_list:
            # Tag??? ????? ??, ????? ???? ????? _??
            tag, _ = Tag.objects.get_or_create(name=tag_name.replace('#', ''))
            # ?? content? ??? ??
            change_tag = '<a href="{url}" class="hash-tag">{tag_name}</a>'.format(
                # url=reverse('post:hashtag_post_list', args=[tag_name.replace('#', '')]),
                url=reverse('post:hashtag_post_list',
                            kwargs={'tag_name': tag_name.replace('#', '')}),
                tag_name=tag_name
            )
            ori_content = re.sub(r'{}(?![<\w])'.format(tag_name), change_tag, ori_content, count=1)
            # content? ??? Tag??? ??? tags??? ??
            if not self.tags.filter(pk=tag.pk).exists():
                self.tags.add(tag)
        # ??? ??? ???? html_content? ??
        self.html_content = ori_content
        super().save(update_fields=['html_content'])

项目：django-powerpages 作者：Open-E-WEB | 项目源码 | 文件源码

def parse_sitemap(content):
    if not isinstance(content, six.text_type):
        content = content.decode('utf-8')
    urlset_match = re.search(
        r'<urlset[^>]*>(?P<urls>[\s\S]*)</urlset>', content
    )
    if urlset_match:
        results = []
        urlset_content = urlset_match.groupdict()['urls']
        for url_content in re.findall(r'<url>([\s\S]+)</url>', urlset_content):
            results.append(
                dict(
                    re.findall(r'<([^>]+)>([^<]*)</[^>]+>', url_content)
                )
            )
    else:
        results = None
    return results

项目：fantasy-dota-heroes 作者：ThePianoDentist | 项目源码 | 文件源码

def get_players():
    with open("../../junk/iemoakland.html") as f:
        html = f.read()
    output = []
    teams = re.findall('(?s)<div class="influencer-card">(.*?)<!-- Card End  -->', html)
    counter = 1
    for t in teams:
        team_name = re.search('<h1 class="influencer-name">([^<]+)</h1>', t).group(1)
        player_section = re.search('(?s)<p class="influencer-description">(.*?)</p>', t).group(1)
        players = re.findall('(?:<a[^>]+>)?\s*(.*?)(?:</a>)?<br />', player_section)
        if len(players) < 4:
            print(team_name)
            print(players)
        for player in players:
            if '<a hre' in player:
                player = re.search('<a[^>]+>([^<]+)', player).group(1)
            output.append({"id": counter, "name": player, "team": team_name, "value": 10.0})
            counter += 1

    with open("../../lib/pubg_players.py", "w+") as f:
        f.write("pubg_init = " + repr(output))
    return

项目：course-crawler 作者：Foair | 项目源码 | 文件源码

def get_summary(url):
    """ ?????????? """

    res = CONNECTION.get(url).text

    # ???????
    term_id = re.search(r'termId : "(\d+)"', res).group(1)

    names = re.findall(r'name:"(.+)"', res)
    # ????
    course_name = names[0]
    # ????
    institution = names[1]

    # ?????
    dir_name = REG_FILE.sub('', course_name + ' - ' + institution)
    print(dir_name)

    return term_id, dir_name

项目：course-crawler 作者：Foair | 项目源码 | 文件源码

def get_summary(url):
    """ ?????????? """

    res = CONNECTION.get(url).text

    # ???????
    term_id = re.search(r'termId : "(\d+)"', res).group(1)

    names = re.findall(r'name:"(.+)"', res)
    # ????
    course_name = names[0]
    # ????
    institution = names[1]

    # ?????
    dir_name = REG_FILE.sub('', course_name + ' - ' + institution)
    print(dir_name)

    return term_id, dir_name

项目：course-crawler 作者：Foair | 项目源码 | 文件源码

def get_announce(term_id):
    """ ??????? """

    # batchId ?????? str(int(time.time() * 1000))
    post_data = {'callCount': '1', 'scriptSessionId': '${scriptSessionId}190', 'httpSessionId': 'dba4977be78d42a78a6e2c2dd2b9bb42', 'c0-scriptName': 'CourseBean', 'c0-methodName': 'getAllAnnouncementByTerm', 'c0-id': '0', 'c0-param0': 'number:' + term_id, 'c0-param1': 'number:1', 'batchId': str(int(time.time() * 1000))}
    res = CONNECTION.post('http://mooc.study.163.com/dwr/call/plaincall/CourseBean.getAllAnnouncementByTerm.dwr', data=post_data).text

    announcements = re.findall(r'content="(.*?[^\\])".*title="(.*?[^\\])"', res)

    with open(os.path.join(BASE_DIR, 'Announcements.html'), 'w', encoding='utf-8') as announce_file:
        for announcement in announcements:
            # ????
            announce_content = announcement[0].encode('utf-8').decode('unicode_escape')
            # ????
            announce_title = announcement[1].encode('utf-8').decode('unicode_escape')
            announce_file.write('<h1>' + announce_title + '</h1>\n' + announce_content + '\n')

项目：segno 作者：heuer | 项目源码 | 文件源码

def pdf_as_matrix(buff, border):
    """\
    Reads the path in the PDF and returns it as list of 0, 1 lists.

    :param io.BytesIO buff: Buffer to read the matrix from.
    """
    pdf = buff.getvalue()
    h, w = re.search(br'/MediaBox \[0 0 ([0-9]+) ([0-9]+)\]', pdf,
                     flags=re.MULTILINE).groups()
    if h != w:
        raise ValueError('Expected equal height/width, got height="{}" width="{}"'.format(h, w))
    size = int(w) - 2 * border

    graphic = _find_graphic(buff)
    res = [[0] * size for i in range(size)]
    for x1, y1, x2, y2 in re.findall(r'\s*(\-?\d+)\s+(\-?\d+)\s+m\s+'
                                        r'(\-?\d+)\s+(\-?\d+)\s+l', graphic):
        x1, y1, x2, y2 = [int(i) for i in (x1, y1, x2, y2)]
        y = abs(y1)
        res[y][x1:x2] = [1] * (x2 - x1)
    return res