6.6 pandas

以下是pandas的常用方法:
这是一个可以读取xls文件的库,需要导入:import pandas                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        
返回类型函数名及参数说明示例
DataFramepandas.DataFrame()创建一个空的数据框架(DataFrame)df = pd.DataFrame()
DataFramepandas.read_csv(filepath_or_buffer, sep=',', header='infer', names=None)从CSV文件创建数据框架df = pd.read_csv('data.csv')
DataFramepandas.read_excel(io, sheet_name=0, header=0, names=None)从Excel文件创建数据框架df = pd.read_excel('data.xlsx')
DataFramepandas.concat(objs, axis=0, join='outer', ignore_index=False, keys=None)连接行(列)数据框架df = pd.concat([df1, df2])
Seriespandas.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)创建一个系列(Series)s = pd.Series([1,2,3,4], index=['a','b','c','d'])
DataFramepandas.DataFrame.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)删除缺失值df = df.dropna()
DataFramepandas.DataFrame.fillna(value=None, method=None, axis=None, inplace=False, limit=None, downcast=None)填充缺失值df = df.fillna(0)
DataFramepandas.DataFrame.pivot(index=None, columns=None, values=None)创建透视表df = df.pivot(index='date', columns='variable', values='value')
DataFramepandas.DataFrame.groupby(by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True)按组计算grouped_df = df.groupby('group_col').sum()
DataFramepandas.DataFrame.merge(right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)合并数据框架merged_df = pd.merge(df1, df2, on='key')
DataFramepandas.DataFrame.apply(func, axis=0, raw=False, result_type=None, args=(), **kwds)逐行(逐列)应用函数df = df.apply(lambda x: x**2)
DataFramepandas.DataFrame.sort_values(by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None)按值排序df = df.sort_values(by='value_col')
DataFramepandas.DataFrame.corr(method='pearson', min_periods=1)计算列之间的相关性corr_matrix = df.corr()
DataFramepandas.DataFrame.plot(kind='line', x=None, y=None, ax=None, subplots=False, sharex=None, sharey=False, layout=None, figsize=None, use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, table=False, yerr=None, xerr=None, secondary_y=False, sort_columns=False, **kwds)可视化数据框架df.plot(kind='line', x='date_col', y='value_col')