Python pandas 模块，read_excel() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用pandas.read_excel()。

项目：AutoTrading 作者：curme | 项目源码 | 文件源码

def getExcelData(self):
        """
        get data from 'hsi_futures.xlsx'
        Date | Open | High | Low | Close | SMAVG5 | SMAVG10 | SMAVG15 | Volume | VolumeSMAVG5
        :return: data table
        """
        df = pd.DataFrame()
        xl = pd.ExcelFile("../dataManager/hsi_futures.xlsx")
        # print xl.sheet_names
        sheets = xl.sheet_names
        for sheet in sheets:
            df = df.append(pd.read_excel("../dataManager/hsi_futures.xlsx", sheet))
        df['Date'] = pd.to_datetime(df['Date'])
        df.sort_values("Date", ascending=True, inplace=True)
        data = df.set_index([range(df.shape[0])])
        return data

项目：TuShare 作者：andyzsf | 项目源码 | 文件源码

def get_hs300s():
    """
    ????300??????????
    Return
    --------
    DataFrame
        code :????
        name :????
        date :??
        weight:??
    """
    from tushare.stock.fundamental import get_stock_basics
    try:
        wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'], 
                                                  ct.PAGES['hs300w']), parse_cols=[0, 3, 6])
        wt.columns = ct.FOR_CLASSIFY_W_COLS
        wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
        df = get_stock_basics()[['name']]
        df = df.reset_index()
        return pd.merge(df,wt)
    except Exception as er:
        print(str(er))

项目：TuShare 作者：andyzsf | 项目源码 | 文件源码

def get_sz50s():
    """
    ????50???
    Return
    --------
    DataFrame
        code :????
        name :????
    """
    try:
        df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'], 
                                                  ct.PAGES['sz50b']), parse_cols=[0,1])
        df.columns = ct.FOR_CLASSIFY_B_COLS
        df['code'] = df['code'].map(lambda x :str(x).zfill(6))
        return df
    except Exception as er:
        print(str(er))

项目：TuShare 作者：andyzsf | 项目源码 | 文件源码

def get_zz500s():
    """
    ????500???
    Return
    --------
    DataFrame
        code :????
        name :????
    """
    from tushare.stock.fundamental import get_stock_basics
    try:
#         df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'], 
#                                                   ct.PAGES['zz500b']), parse_cols=[0,1])
#         df.columns = ct.FOR_CLASSIFY_B_COLS
#         df['code'] = df['code'].map(lambda x :str(x).zfill(6))
        wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'], 
                                                   ct.PAGES['zz500wt']), parse_cols=[0, 3, 6])
        wt.columns = ct.FOR_CLASSIFY_W_COLS
        wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
        df = get_stock_basics()[['name']]
        df = df.reset_index()
        return pd.merge(df,wt)
    except Exception as er:
        print(str(er))

项目：base_function 作者：Rockyzsu | 项目源码 | 文件源码

def search():
    #??
    df = pd.read_excel("huatai2.xls")
    input_m = 0.0
    output_m = 0.0
    for index, row in df.iterrows():
        if row[u'??'] == u'??':
            each_input = row[u'?????']
            print u"??",
            print each_input
            input_m = input_m + each_input
            #print type(money)
        if row[u'??'] == u'??':
            each_output = row[u'?????']
            print u"??",
            print each_output
            #print type(money)
            output_m = output_m + each_output

    print "Sumary is %f" % (input_m - output_m)

项目：base_function 作者：Rockyzsu | 项目源码 | 文件源码

def replace_test():
    #???

    df = pd.read_excel("huatai2.xls")
    s1 = pd.Series(['a', 'b', 'c', 'd', 'e'])
    #print s1
    s2 = pd.Series(['1', '2', '3', '4', '5'])
    #print s2

    s3 = s1.replace(1, 'k')
    #print s1
    #print s3
    print df
    df.replace(['20160722', u'????', 2431.0, u'????', 13.00, 300.0, 3891.10, 3905.71, u'??'],
               ['20160722', '0', '0', '0', 0, 0, 0, 0, '0'], inplace=True)
    #df.replace(['20160722'],['20160725','0','0','0',0,0,0,0,'0'],inplace=True)
    print df

项目：newsrecommender 作者：Newsrecommender | 项目源码 | 文件源码

def load_articles(self):
        """
        Loads the DataFrame with all the  articles. 
        Return: DataFrame with the title, content, tags and author of all  articles
        """
        #parser = SafeConfigParser()
        #parser.read('Config.ini')
        #file_path = settings['IP_FILE_PATH']
        #file_name = settings['IP_FILE_NAME']

        #logging.debug("Directory Name : {0} and File name is {1} \n".format(file_path,file_name))

        #logging.debug("Directory Name : {0} and File name is {1} \n".format(parser.get('Article_input_dir', 'ip_file_path'),parser.get('Article_input_file', 'ip_file_name'))    
        #file_path = '/Users/shwetanknagar/Downloads/Personal/Project Eventstreet/Boconni Project'
        #file_name = os.path.basename("TestSet300_User_Ratings.xlsx")
        path = os.path.join(self.ip_file_path, self.ip_file_name)
        #commented by shwenag
        #self.df = pd.read_csv('TrainSet700_User_Ratings.xlsx', encoding='utf-8')         # Load articles in a DataFrame
        self.df = pd.read_excel(path,  na_values=['NA'], parse_cols = "A,B,C")
        #self.df = self.df[['Sno', 'title', 'content_text']]  # Slice to remove redundant columns
        #commenting the below by shwenag
        print(self.df)
        logging.debug("Number of articles: {0} and no of columns are {1} \n".format(len(self.df),self.df.shape))

项目：newsrecommender 作者：Newsrecommender | 项目源码 | 文件源码

def load_articles(self):
        """
        Loads the DataFrame with all the  articles. 
        Return: DataFrame with the title, content, tags and author of all  articles
        """
        #parser = SafeConfigParser()
        #parser.read('Config.ini')
        #file_path = settings['IP_FILE_PATH']
        #file_name = settings['IP_FILE_NAME']

        #logging.debug("Directory Name : {0} and File name is {1} \n".format(file_path,file_name))

        #logging.debug("Directory Name : {0} and File name is {1} \n".format(parser.get('Article_input_dir', 'ip_file_path'),parser.get('Article_input_file', 'ip_file_name'))    
        file_path = '/Users/shwetanknagar/Downloads/Personal/Project Eventstreet/Boconni Project'
        file_name = os.path.basename("TestSet300_User_Ratings.xlsx")
        path = os.path.join(file_path, file_name)
        #commented by shwenag
        #self.df = pd.read_csv('TrainSet700_User_Ratings.xlsx', encoding='utf-8')         # Load articles in a DataFrame
        self.df = pd.read_excel(path,  na_values=['NA'], parse_cols = "A,B,C")
        #self.df = self.df[['Sno', 'title', 'content_text']]  # Slice to remove redundant columns
        #commenting the below by shwenag
        print(self.df)
        logging.debug("Number of articles: {0} and no of columns are {1} \n".format(len(self.df),self.df.shape))

项目：cjworkbench 作者：CJWorkbench | 项目源码 | 文件源码

def test_load_xlsx(self):
        url = 'http://test.com/the.xlsx'
        self.url_pval.set_value(url)
        self.url_pval.save()

        xlsx_bytes = open(mock_xslx_path, "rb").read()
        xlsx_table = pd.read_excel(mock_xslx_path)

        # success case
        with requests_mock.Mocker() as m:
            m.get(url, content=xlsx_bytes, headers={'content-type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'})
            self.press_fetch_button()
            response = self.get_render()
            self.assertEqual(response.content, make_render_json(xlsx_table))

        # malformed file  should put module in error state
        with requests_mock.Mocker() as m:
            m.get(url, content=b"there's just no way this is xlsx", headers={'content-type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'})
            self.press_fetch_button()
            self.wfmodule.refresh_from_db()
            self.assertEqual(self.wfmodule.status, WfModule.ERROR)

项目：smart-battery-for-smart-energy-usage 作者：AnatolyPavlov | 项目源码 | 文件源码

def main():
    df = pd.read_excel('../data/Tariffs.xlsx')
    df.loc[df['Tariff'] == 'Low', 'Tariff'] = 0.0399
    df.loc[df['Tariff'] == 'Normal', 'Tariff'] = 0.1176
    df.loc[df['Tariff'] == 'High', 'Tariff'] = 0.6720
    #
    ets = ExtractTimeSeries(datetime_col='TariffDateTime', yt_col='Tariff')
    df = ets.transform(df)
    #
    day = pd.to_datetime('2013-12-27').date()
    next_day = day + timedelta(days=1)
    df_out = df.query('index >= @day and index < @next_day')
    df_out.columns=['Tariff (UK Pounds)']
    #
    print_process('Saving Post-Processed Data')
    path_to_price = '../clean_data/price_data_London.csv'
    df_out.to_csv(path_to_price)
    print 'Tariff data saved into: {}'.format(path_to_price)
    print

项目：nuts-ml 作者：maet3608 | 项目源码 | 文件源码

def _load_table(self, filepath):
        """
        Load table from file system.

        :param str filepath: Path to table in CSV, TSV, XLSX or
                   Pandas pickle format.
        :return: Pandas table
        :rtype: pandas.core.frame.DataFrame
        """
        _, ext = os.path.splitext(filepath.lower())
        if ext == '.tsv':
            return pd.read_table(filepath, **self.kwargs)
        if ext == '.csv':
            return pd.read_csv(filepath, **self.kwargs)
        if ext == '.xlsx':
            return pd.read_excel(filepath, **self.kwargs)
        return pd.read_pickle(filepath, **self.kwargs)

项目：the-magical-csv-merge-machine 作者：entrepreneur-interet-general | 项目源码 | 文件源码

def read_excel(self, file):
        # TODO: add iterator and return columns
        excel_tab = pd.read_excel(file, dtype=str)
        columns = excel_tab.columns

        def make_gen(excel_tab, chunksize):
            cursor = 0
            chunk = excel_tab.iloc[:chunksize]
            while chunk.shape[0]:
                yield chunk
                cursor += chunksize
                chunk = excel_tab.iloc[cursor:cursor+chunksize]
        tab = make_gen(excel_tab, self.CHUNKSIZE) 

        tab = (self._clean_header(tab_part) for tab_part in tab)

        return tab, None, None, self._clean_column_names(columns)

项目：stock 作者：Rockyzsu | 项目源码 | 文件源码

def GetAllTodayData(self):
        #???? ????  ?,??????????
        filename=self.today+'_all_.xls'
        #??data????
        filename=os.path.join(self.path,filename)
        if not os.path.exists(filename):
            self.df_today_all=ts.get_today_all()
            #?????
            self.df_today_all.drop(self.df_today_all[self.df_today_all['turnoverratio']==0].index,inplace=True)
            #??????????
            #n1=self.df_today_all[self.df_today_all['turnoverratio']==0]
            #n2=self.df_today_all.drop(n1.index)
            #print n2
            print self.df_today_all
            self.df_today_all.to_excel(filename,sheet_name='All')

        else:
            self.df_today_all=pd.read_excel(filename,sheet_name='All')
            print "File existed"

项目：stock 作者：Rockyzsu | 项目源码 | 文件源码

def count_up_down(filename):
    total=[]
    df=pd.read_excel(filename)

    count= len(df[(df['changepercent']>=-10.2) & (df['changepercent']<-9)])
    total.append(count)
    for i in range(-9,9,1):
        count= len(df[(df['changepercent']>=i*1.00) & (df['changepercent']<((i+1))*1.00)])
        total.append(count)
    count= len(df[(df['changepercent']>=9)])
    total.append(count)
    print total
    df_figure=pd.Series(total,index=[range(-10,10)])
    print df_figure
    fg=df_figure.plot(kind='bar',table=True)
    plt.show(fg)

项目：Factory_Planning 作者：dimgold | 项目源码 | 文件源码

def read_res(file): #read data from xls files
    Size, R, xr,xl,xc,yu,yd,yc = [] ,[], [],[] ,[], [],[],[] #lists for areas, Fij's,  deps cordinates
    out = pd.read_excel(file, sheetname = "Out") #read model results
    Rout = pd.read_excel(file, sheetname = "R") #read Fij's
    Sizeout = pd.read_excel(file, sheetname = "Size") #read deps wanted sizes
    Wout = pd.read_excel(file, sheetname = "W") # w1 and w2
    w1 = float(Wout['w1'][0])
    w2 = 1.0-w1
    totx = float(out['totx'][0]) #total length in x axis
    toty = float(out['toty'][0]) #total length in y axis
    for d in range(len(Sizeout)): #insert data results into python lists
        R.append([])
        Size.append(float(Sizeout['Area'][d]))
        xr.append(float(out['Xr'][d]))
        xl.append(float(out['Xl'][d]))
        xc.append((float(out['Xl'][d])+float(out['Xr'][d]))/2)
        yu.append(float(out['Yu'][d]))
        yd.append(float(out['Yd'][d]))   
        yc.append((float(out['Yu'][d])+float(out['Yd'][d]))/2)
        for i in range(len(Rout)):
            R[d].append(float(Rout.iloc[d,i]))
    return Size, R, totx, toty, xr,xl,xc,yu,yd,yc, w1, w2

项目：CUBAC 作者：usnistgov | 项目源码 | 文件源码

def groups(ofname):
    df = pandas.read_excel('GC-VTPR.xlsx', sheetname='Groups')
    entries = []
    for i,row in df.iterrows():
        entry = {
          "Q_k": row['Qk'], 
          "R_k": row['Rk'], 
          "maingroup_name": row["main group name"], 
          "mgi": row['main group index'], 
          "sgi": row['sub group index'], 
          "subgroup_name": row["sub group name"]
        }
        entries.append(entry)

    with open(ofname, 'w') as fp:
        json.dump(entries, fp, indent = 2, sort_keys = True)

项目：CUBAC 作者：usnistgov | 项目源码 | 文件源码

def interaction_parameters(ofname):
    df = pandas.read_excel('GC-VTPR.xlsx', sheetname='InteractionParameters')
    df = df.fillna(0.0)
    entries = []
    for i,row in df.iterrows():
        entry = {
            "a_ij": row['aij / K'],
            "a_ji": row['aji / K'],
            "b_ij": row['bij'],
            "b_ji": row['bji'],
            "c_ij": row['cij / K-1'], 
            "c_ji": row['cji / K-1'], 
            "mgi1": row['i'], 
            "mgi2": row['j']
        }
        entries.append(entry)

    with open(ofname, 'w') as fp:
        json.dump(entries, fp, indent = 2, sort_keys = True)

项目：ReducedVarianceReparamGradients 作者：andymiller | 项目源码 | 文件源码

def _load_powerplant():
    """
    attribute information:

    features consist of hourly average ambient variables 
    - temperature (t) in the range 1.81 c and 37.11 c,
    - ambient pressure (ap) in the range 992.89-1033.30 millibar,
    - relative humidity (rh) in the range 25.56% to 100.16%
    - exhaust vacuum (v) in teh range 25.36-81.56 cm hg
    - net hourly electrical energy output (ep) 420.26-495.76 mw
    the averages are taken from various sensors located around the
    plant that record the ambient variables every second.
    the variables are given without normalization.
    """
    data_file = os.path.join(data_dir, 'power-plant/Folds5x2_pp.xlsx')
    data = pd.read_excel(data_file)
    x    = data.values[:, :-1]
    y    = data.values[:,  -1]
    return x, y

项目：statscraper 作者：jplusplus | 项目源码 | 文件源码

def _fetch_data(self, dataset, query=None):
        files = [(y, m) for y in query['years'] for m in query['months']]
        frames = []

        # Download and clean every monthly Excel file
        for file in files:
            year, month = file
            url = self.BASE_URL.format(year=year, month=MONTHS[month])
            frame = self._clean_data(pd.read_excel(url), year, month)
            frames.append(frame)

        # Yield individual rows of type Result from the dataframe
        raw_data = pd.concat(frames)
        for i, row in raw_data.iterrows():
            val = row.pop('value')
            yield Result(val, json.loads(row.to_json()))