pandas.read_csv(filepath_or_buffer, na_values='NAN', parse_dates=['Last Update'])
从CSV文件中读取数据并创建一个DataFrame对象,na_vlaues用于设置缺失值形式,parse_dates用于将指定的列解析成时间日期格式。dataframe.to_csv("xxx.csv", mode='a', header=False)
import pandas as pdif __name__ == "__main__": df = pd.read_csv("temp.csv") print(df) print( df.to_csv("temp2.csv")# output:# S.No Name Age City Salary# 0 1 Tom 28 Toronto 20000# 1 2 Lee 32 HongKong 3000# 2 3 Steven 43 Bay Area 8300# 3 4 Ram 38 Hyderabad 3900# <class 'pandas.core.frame.DataFrame'># RangeIndex: 4 entries, 0 to 3# Data columns (total 5 columns):# S.No 4 non-null int64# Name 4 non-null object# Age 4 non-null int64# City 4 non-null object# Salary 4 non-null int64# dtypes: int64(3), object(2)# memory usage: 240.0+ bytes# None
import pandas as pdif __name__ == "__main__": df = pd.read_csv("temp.csv", index_col=['S.No']) print(df)# output:# Name Age City Salary# S.No# 1 Tom 28 Toronto 20000# 2 Lee 32 HongKong 3000# 3 Steven 43 Bay Area 8300# 4 Ram 38 Hyderabad 3900
HDF5(Hierarchical Data Formal)是用于存储大规模数值数据的较为理想的存储格式,文件后缀名为h6,存储读取速度非常快,且可在文件内部按照明确的层次存储数据,同一个HDF5可以看做一个高度整合的文件夹,其内部可存放不同类型的数据。在Python中操作HDF5文件的方式主要有两种,一是利用pandas中内建的一系列HDF5文件操作相关的方法来将pandas中的数据结构保存在HDF5文件中,二是利用h6py模块来完成从Python原生数据结构向HDF5格式的保存。pandas.HDFStore()
import pandas as pdimport numpy as npif __name__ == "__main__": store = pd.HDFStore("demo.h6") s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) df = pd.DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C']) store['s'] = s store.put(key='df', value=df) print(store.items) print(store.keys()) store.close()# output:# <bound method HDFStore.items of <class ''># File path: demo.h6# ># ['/df', '/s']
import pandas as pdimport numpy as npif __name__ == "__main__": store = pd.HDFStore("demo.h6") s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) df = pd.DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C']) store['s'] = s store.put(key='df', value=df) print(store.keys()) store.remove('s') print(store.keys()) store.close()# output:# ['/df', '/s']# ['/df']
Pandas提供了便利方法可以将Pandas的数据结构直接导出到本地h6文件中或从h6文件中读取。pd.read_hdf('demo.h6', key='df')
从hdf文件中读取键的值df.to_hdf(path_or_buf='demo.h6', key='df')
import pandas as pdimport numpy as npif __name__ == "__main__": # 创建新的数据框 df_ = pd.DataFrame(np.random.randn(5, 5)) # 导出到已存在的h6文件中 df_.to_hdf(path_or_buf='demo.h6', key='df') # 创建于本地demo.h6进行IO连接的store对象 store = pd.HDFStore('demo.h6') # 查看指定h6对象中的所有键 print(store.keys()) store.close() print(store.is_open) df = pd.read_hdf('demo.h6', key='df') print(df)# output:# ['/df']# False# 0 1 2 3 4# 0 0.262806 -0.146832 -0.219655 0.553608 -0.278420# 1 -0.057369 -1.662138 -0.757119 -2.000140 1.659584# 2 1.030621 0.421785 -0.239423 0.814709 -1.596752# 3 -1.538354 0.988993 -1.460490 0.846775 1.073998# 4 0.092367 -0.042897 -0.034253 0.299312 0.970190
3、Excel文件pd.read_excel(io, sheet_name=0, header=0, names=None, index_col=None, usecols=None)
import pandas as pdif __name__ == "__main__": df = pd.read_excel("test.xls", sheet_name=None) print(df['Sheet1']) print(df['Sheet2'])# output:# No Name Age Score# 0 1 Bauer 26 89# 1 2 Bob 24 87# 2 3 Jack 25 80# 3 4 Alex 30 90# No Name Age# 0 1 Bauer 26# 1 2 Bob 24# 2 3 Jack 25# 3 4 Alex 30
读取excel主要通过read_excel函数实现,除了pandas还需要安装第三方库xlrd。data.to_excel(io, sheet_name='Sheet1', index=False, header=True)
import pandas as pdimport numpy as npif __name__ == "__main__": df = pd.DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C']) print(df) df.to_excel("test1.xls", sheet_name='Sheet3', index=False) df = pd.read_excel("test1.xls") print(df)# output:# A B C# 0 1.066504 0.807083 -0.213006# 1 0.247025 -1.129131 -0.130942# 2 0.090071 -0.358951 0.266514# A B C# 0 1.066504 0.807083 -0.213006# 1 0.247025 -1.129131 -0.130942# 2 0.090071 -0.358951 0.266514
pandas.read_sql(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, columns=None, chunksize=None)
将SQL查询或数据库表读入DataFrame,是对read_sql_table和 read_sql_query的封装,将根据提供的输入委托给特定的功能。SQL查询将被路由到read_sql_query,而数据库表名将被路由到read_sql_table。pandas.read_sql_table(table_name, con, schema=None, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None)
import MySQLdbmysql_cn= MySQLdb.connect(host='host', port=3306,user='username', passwd='password', db='information_schema')df_mysql = pd.read_sql('select * from VIEWS;', con=mysql_cn) print('loaded dataframe from MySQL. records:', len(df_mysql))mysql_cn.close()
DataFrame.to_sql (name,con,schema = None,if_exists ='fail',index = True,index_label = None,chunksize = None,dtype = None )
index:布尔值,默认为True,将DataFrame index写为列。使用index_label作为表中的列名。
index_label:字符串或序列,默认为None,index列的列标签。如果给出None(默认)且 index为True,则使用index名称。如果DataFrame使用MultiIndex,则应该给出一个sequence。
pandas.read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, convert_axes=True, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, lines=False, chunksize=None, compression='infer')
‘split’ : JSON字符串以字典格式,如{index -> [index], columns -> [columns], data -> [values]}
‘records’ : JSON字符串以列表格式,如[{column -> value}, … , {column -> value}]
‘index’ : JSON字符串以字典格式,如 {index -> {column -> value}}
‘columns’ : JSON字符串以字典格式,如 {column -> {index -> value}}
‘values’ : JSON字符串为数组格式。
data.to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression='infer', index=True)
import pandas as pdimport numpy as npif __name__ == "__main__": df = pd.DataFrame(np.random.randn(3,8), index=['A', 'B', 'C']) print(df) df.to_json("test.json") df = pd.read_json("test.json") print(df)# output:# 0 1 2 ... 5 6 7# A -0.305526 -0.696618 0.796365 ... -0.195769 -1.669797 0.548616# B -1.598829 1.104907 -1.969812 ... 1.590904 1.372927 0.766009# C -1.424199 0.717892 0.728426 ... 0.358646 0.742373 -0.820586## [3 rows x 8 columns]# 0 1 2 ... 5 6 7# A -0.305526 -0.696618 0.796365 ... -0.195769 -1.669797 0.548616# B -1.598829 1.104907 -1.969812 ... 1.590904 1.372927 0.766009# C -1.424199 0.717892 0.728426 ... 0.358646 0.742373 -0.820586## [3 rows x 8 columns]
import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20130101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print(df.head(3)) print(df.tail(3))# output:# A B C# 2013-01-01 0.768917 -0.963290 -0.159038# 2013-01-02 -0.023267 -0.292786 0.652954# 2013-01-03 0.176760 0.137241 1.301041# 2013-01-04 -0.071628 -1.371969 0.774005# 2013-01-05 -0.793016 -0.178345 0.035532# 2013-01-06 0.407762 0.241827 1.170372# A B C# 2013-01-01 0.768917 -0.963290 -0.159038# 2013-01-02 -0.023267 -0.292786 0.652954# 2013-01-03 0.176760 0.137241 1.301041# A B C# 2013-01-04 -0.071628 -1.371969 0.774005# 2013-01-05 -0.793016 -0.178345 0.035532# 2013-01-06 0.407762 0.241827 1.170372
import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20130101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print(df.index) print(df.columns) print(list(df)) print(df.values)# output:# A B C# 2013-01-01 0.971426 0.403905 0.304562# 2013-01-02 -2.404873 -0.222086 0.444464# 2013-01-03 -0.144014 -0.513883 -0.468732# 2013-01-04 0.065060 0.460675 -0.633609# 2013-01-05 -1.322018 2.128932 1.099606# 2013-01-06 -0.220413 -0.086348 -0.289723# DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',# '2013-01-05', '2013-01-06'],# dtype='datetime64[ns]', freq='D')# Index(['A', 'B', 'C'], dtype='object')# ['A', 'B', 'C']# [[ 0.97142634 0.40390521 0.30456152]# [-2.4048735 -0.22208588 0.44446443]# [-0.14401362 -0.51388305 -0.46873214]# [ 0.06505955 0.46067507 -0.63360907]# [-1.32201785 2.12893236 1.09960613]# [-0.22041327 -0.08634845 -0.28972288]]
import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20130101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print(df.shape) print(df.shape[0]) print(df.shape[1])# output:# A B C# 2013-01-01 1.571635 0.740456 -0.789674# 2013-01-02 0.534758 0.372924 1.139897# 2013-01-03 0.419329 0.097288 -0.061034# 2013-01-04 0.292189 -0.805046 -0.512478# 2013-01-05 2.293956 -0.310201 -0.661519# 2013-01-06 0.890370 0.190517 0.306458# (6, 3)# 6# 3
import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20130101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print( output:# A B C# 2013-01-01 0.145529 -0.299115 -0.360462# 2013-01-02 2.203913 -0.619418 2.478992# 2013-01-03 -1.106605 1.114359 -0.653225# 2013-01-04 1.409313 2.198673 -1.663985# 2013-01-05 -0.917697 0.645962 -1.323553# 2013-01-06 0.729082 0.043500 -1.932772# <class 'pandas.core.frame.DataFrame'># DatetimeIndex: 6 entries, 2013-01-01 to 2013-01-06# Freq: D# Data columns (total 3 columns):# A 6 non-null float64# B 6 non-null float64# C 6 non-null float64# dtypes: float64(3)# memory usage: 192.0 bytes# None
import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20130101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print(df.count())# output:# A B C# 2013-01-01 0.160293 0.298212 0.572019# 2013-01-02 1.046787 0.559711 -0.259907# 2013-01-03 0.208801 1.018917 -1.165052# 2013-01-04 -0.080998 1.268477 -1.038384# 2013-01-05 -0.413563 0.101436 0.215154# 2013-01-06 0.266813 0.945366 1.726588# A 6# B 6# C 6# dtype: int64
import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20130101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print(df.A.nunique()) print(len(set(df.A)))# output:# A B C# 2013-01-01 0.256037 -0.096629 -0.224575# 2013-01-02 0.220131 0.460777 -0.191140# 2013-01-03 0.957422 0.584076 -1.548418# 2013-01-04 -0.913387 -1.056598 0.201946# 2013-01-05 -0.076716 0.337379 2.560821# 2013-01-06 1.244448 1.241131 0.232319# 6# 6
import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20130101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print(df.T)# output:# A B C# 2013-01-01 -0.622806 1.461436 -1.133845# 2013-01-02 1.408834 -1.117877 0.922919# 2013-01-03 -0.492947 -1.063588 1.702908# 2013-01-04 -0.401612 -0.206524 0.843514# 2013-01-05 0.064999 0.106151 0.733977# 2013-01-06 -2.219718 -0.972984 0.466263# 2013-01-01 2013-01-02 2013-01-03 2013-01-04 2013-01-05 2013-01-06# A -0.622806 1.408834 -0.492947 -0.401612 0.064999 -2.219718# B 1.461436 -1.117877 -1.063588 -0.206524 0.106151 -0.972984# C -1.133845 0.922919 1.702908 0.843514 0.733977 0.466263
df.idxmax(self, axis=0, skipna=True)
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": df = pd.DataFrame(np.random.randn(4, 3), index=['rank2', 'rank1', 'rank4', 'rank3'], columns=['col3', 'col2', 'col1']) print(df) print(df.idxmax(0)) print(df.col2.idxmax(0)) print(df.idxmax(1)) print(df.idxmin(0)) print(df.col2.idxmin(0)) print(df.idxmin(1))# output:# col3 col2 col1# rank2 -0.139445 -1.239773 -0.280064# rank1 0.170190 1.093101 1.697052# rank4 -0.174857 -0.526127 -1.197490# rank3 -0.190417 0.241660 1.206216# col3 rank1# col2 rank1# col1 rank1# dtype: object# rank1# rank2 col3# rank1 col1# rank4 col3# rank3 col1# dtype: object# col3 rank3# col2 rank2# col1 rank4# dtype: object# rank2# rank2 col2# rank1 col3# rank4 col1# rank3 col3# dtype: object
import pandas as pdimport numpy as npif __name__ == "__main__": # 百分数 print('{:.2%}'.format(0.12354)) # 金额千位分隔符 print('{:,}'.format(123456789)) # 小数精度 print('{:.2f}'.format(31.31412))# output:# 12.35%# 123,456,789# 31.31
pandas.set_option('display.expand_frame_repr', False)
True表示可以换行显示,False表示不允许换行。pandas.set_option('display.max_rows', 10)
pandas.set_option('display.max_columns', 10)
显示的最大行数和列数,如果超额就显示省略号。pandas.set_option('display.precision', 5)
显示小数点后的位数,浮点数的精度。pandas.set_option('display.large_repr', 'truncate')
truncate表示截断,info表示查看信息,默认选truncate。pandas.set_option('display.max_colwidth', 5)
设定每一列的最大宽度pandas.set_option('display.chop_threshold', 0.5)
绝对值小于0.5的显示0.0pandas.set_option('display.colheader_justify', 'left')
显示居中还是左边pandas.set_option('display.width', 200)
横向最多显示多少个字符, 一般80不适合横向的屏幕,平时多用200。
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": df = pd.DataFrame(np.random.randn(4, 3), index=['rank2', 'rank1', 'rank4', 'rank3'], columns=['col3', 'col2', 'col1']) print(df) print(df.loc['rank1', 'col2']) print(df.loc[:, 'col3']) print(df.loc[:, ['col1', 'col3']]) print(df.loc['rank1':'rank3', :])# output:# col3 col2 col1# rank2 1.113696 -1.412935 -0.806799# rank1 0.107469 1.086778 -0.971733# rank4 -0.135899 -0.753419 -0.569671# rank3 1.416578 1.230413 0.795368# 1.086777931461885# rank2 1.113696# rank1 0.107469# rank4 -0.135899# rank3 1.416578# Name: col3, dtype: float64# col1 col3# rank2 -0.806799 1.113696# rank1 -0.971733 0.107469# rank4 -0.569671 -0.135899# rank3 0.795368 1.416578# col3 col2 col1# rank1 0.107469 1.086778 -0.971733# rank4 -0.135899 -0.753419 -0.569671# rank3 1.416578 1.230413 0.795368
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20190101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print(df.loc[dates[0]]) print(df.loc[:, ['A', 'B']]) print(df.loc['2019-01-03':'2019-01-05', ['A', 'B']]) print(df.loc['2019-01-03', ['A', 'B']]) print(df.loc['2019-01-03', 'A'])# output:# A B C# 2019-01-01 -0.640586 0.296498 0.758321# 2019-01-02 -0.219330 0.377097 0.353152# 2019-01-03 0.857294 1.255778 1.797687# 2019-01-04 -1.271955 -1.675781 0.484156# 2019-01-05 1.223988 1.200979 1.074488# 2019-01-06 -0.722830 -0.525681 0.294155# A -0.640586# B 0.296498# C 0.758321# Name: 2019-01-01 00:00:00, dtype: float64# A B# 2019-01-01 -0.640586 0.296498# 2019-01-02 -0.219330 0.377097# 2019-01-03 0.857294 1.255778# 2019-01-04 -1.271955 -1.675781# 2019-01-05 1.223988 1.200979# 2019-01-06 -0.722830 -0.525681# A B# 2019-01-03 0.857294 1.255778# 2019-01-04 -1.271955 -1.675781# 2019-01-05 1.223988 1.200979# A 0.857294# B 1.255778# Name: 2019-01-03 00:00:00, dtype: float64# 0.8572941113047045
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": df = pd.DataFrame(np.random.randn(4, 3), index=['rank2', 'rank1', 'rank4', 'rank3'], columns=['col3', 'col2', 'col1']) print(df) print(df.iloc[0:3]) print(df.iloc[[1, 2], 0:2])# output:# col3 col2 col1# rank2 -0.483500 -1.073882 -1.081589# rank1 -0.753271 -1.434796 -0.946916# rank4 0.125635 0.570554 -2.454738# rank3 1.949820 -1.464900 -0.171653# col3 col2 col1# rank2 -0.483500 -1.073882 -1.081589# rank1 -0.753271 -1.434796 -0.946916# rank4 0.125635 0.570554 -2.454738# col3 col2# rank1 -0.753271 -1.434796# rank4 0.125635 0.570554
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20190101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print(df.iloc[3]) # 选取除最后两列外的所有列 print(df.iloc[:, :-2]) print(df.iloc[1:4, 1:3]) print(df.iloc[:, [1, 2]]) # 获取标量 print(df.iloc[1, 2])# output:# A B C# 2019-01-01 -1.348715 -0.184542 -0.290333# 2019-01-02 0.177905 0.876349 0.371486# 2019-01-03 1.368759 1.399392 -0.000577# 2019-01-04 1.855882 0.564528 -0.089876# 2019-01-05 0.530389 -1.292908 0.681160# 2019-01-06 -0.286435 -0.461200 0.864096# A 1.855882# B 0.564528# C -0.089876# Name: 2019-01-04 00:00:00, dtype: float64# A# 2019-01-01 -1.348715# 2019-01-02 0.177905# 2019-01-03 1.368759# 2019-01-04 1.855882# 2019-01-05 0.530389# 2019-01-06 -0.286435# B C# 2019-01-02 0.876349 0.371486# 2019-01-03 1.399392 -0.000577# 2019-01-04 0.564528 -0.089876# B C# 2019-01-01 -0.184542 -0.290333# 2019-01-02 0.876349 0.371486# 2019-01-03 1.399392 -0.000577# 2019-01-04 0.564528 -0.089876# 2019-01-05 -1.292908 0.681160# 2019-01-06 -0.461200 0.864096# 0.3714863793190553
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": df = pd.DataFrame(np.random.randn(4, 3), index=['rank2', 'rank1', 'rank4', 'rank3'], columns=['col3', 'col2', 'col1']) print(df) print(df['col2']) print(df.col2)# output:# col3 col2 col1# rank2 -0.010866 -1.438301 1.008284# rank1 -0.633372 0.951618 0.190146# rank4 -0.158926 -2.016063 0.456099# rank3 -1.028975 -0.144202 -0.077525# rank2 -1.438301# rank1 0.951618# rank4 -2.016063# rank3 -0.144202# Name: col2, dtype: float64# rank2 -1.438301# rank1 0.951618# rank4 -2.016063# rank3 -0.144202# Name: col2, dtype: float64
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": df = pd.DataFrame(np.random.randn(4, 3), index=['rank2', 'rank1', 'rank4', 'rank3'], columns=['col3', 'col2', 'col1']) print(df) print(df[['col2', 'col3']])# output:# col3 col2 col1# rank2 -0.190013 0.775020 -2.243045# rank1 0.884000 1.347191 -0.388117# rank4 -1.401332 0.228368 -1.475148# rank3 0.369793 0.813368 -0.428450# col2 col3# rank2 0.775020 -0.190013# rank1 1.347191 0.884000# rank4 0.228368 -1.401332# rank3 0.813368 0.369793
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": df = pd.DataFrame(np.random.randn(4, 3), index=['rank2', 'rank1', 'rank4', 'rank3'], columns=['col3', 'col2', 'col1']) print(df) print(df[0:3]) print(df['rank1':'rank4'])# output:# col3 col2 col1# rank2 -0.868999 0.852147 0.346300# rank1 1.975817 0.633193 -0.157873# rank4 0.271203 -0.681425 0.227320# rank3 0.173491 -0.225134 -0.750217# col3 col2 col1# rank2 -0.868999 0.852147 0.346300# rank1 1.975817 0.633193 -0.157873# rank4 0.271203 -0.681425 0.227320# col3 col2 col1# rank1 1.975817 0.633193 -0.157873# rank4 0.271203 -0.681425 0.227320
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20190101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print(df[df.A > 0])# output:# A B C# 2019-01-01 -0.419116 0.370122 -2.026854# 2019-01-02 -1.041050 0.356879 1.166706# 2019-01-03 -0.853631 -0.115552 -0.859882# 2019-01-04 -0.725505 -0.424321 0.218010# 2019-01-05 1.087608 1.135607 -0.191611# 2019-01-06 -0.630319 1.033699 -0.153894# A B C# 2019-01-05 1.087608 1.135607 -0.191611
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20190101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) print(df[df > 0])# output:# A B C# 2019-01-01 -0.562408 0.394501 0.516874# 2019-01-02 -0.589820 -0.902871 -0.395223# 2019-01-03 0.009566 -0.817079 1.620771# 2019-01-04 0.307311 0.392733 0.090025# 2019-01-05 0.469306 -0.563045 -1.402386# 2019-01-06 0.554762 -0.023549 1.889080# A B C# 2019-01-01 NaN 0.394501 0.516874# 2019-01-02 NaN NaN NaN# 2019-01-03 0.009566 NaN 1.620771# 2019-01-04 0.307311 0.392733 0.090025# 2019-01-05 0.469306 NaN NaN# 2019-01-06 0.554762 NaN 1.889080
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20190101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) df.loc['2019-01-04', 'B'] = 3.1415 print(df)# output:# A B C# 2019-01-01 0.950116 0.147263 1.049792# 2019-01-02 0.305393 -0.235960 -0.385073# 2019-01-03 -0.024728 -0.581566 -0.343492# 2019-01-04 2.384613 0.256359 0.422368# 2019-01-05 -0.941046 0.259252 0.559688# 2019-01-06 -0.138191 -1.055116 -1.268404# A B C# 2019-01-01 0.950116 0.147263 1.049792# 2019-01-02 0.305393 -0.235960 -0.385073# 2019-01-03 -0.024728 -0.581566 -0.343492# 2019-01-04 2.384613 3.141500 0.422368# 2019-01-05 -0.941046 0.259252 0.559688# 2019-01-06 -0.138191 -1.055116 -1.268404
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20190101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) df.iloc[0, 0] = 3.1415 print(df)# output:# A B C# 2019-01-01 1.141077 0.102785 -1.243796# 2019-01-02 -0.100035 -0.468026 -1.230186# 2019-01-03 -1.361605 0.603181 0.009779# 2019-01-04 0.094592 0.377274 -0.743773# 2019-01-05 0.756191 0.254951 -0.032884# 2019-01-06 1.029874 0.377550 -1.061605# A B C# 2019-01-01 3.141500 0.102785 -1.243796# 2019-01-02 -0.100035 -0.468026 -1.230186# 2019-01-03 -1.361605 0.603181 0.009779# 2019-01-04 0.094592 0.377274 -0.743773# 2019-01-05 0.756191 0.254951 -0.032884# 2019-01-06 1.029874 0.377550 -1.061605
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20190101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) df.loc[:, 'D']= np.array([3]*len(df)) print(df)# output:# A B C# 2019-01-01 -0.377629 -0.792364 -0.030633# 2019-01-02 0.034738 -0.121923 0.159174# 2019-01-03 0.288188 2.671207 -0.670135# 2019-01-04 0.626814 0.669742 0.017105# 2019-01-05 -0.127686 -0.643768 0.000738# 2019-01-06 0.524352 -0.228057 -0.896196# A B C D# 2019-01-01 -0.377629 -0.792364 -0.030633 3# 2019-01-02 0.034738 -0.121923 0.159174 3# 2019-01-03 0.288188 2.671207 -0.670135 3# 2019-01-04 0.626814 0.669742 0.017105 3# 2019-01-05 -0.127686 -0.643768 0.000738 3# 2019-01-06 0.524352 -0.228057 -0.896196 3
# -*- coding=utf-8 -*-import pandas as pdimport numpy as npif __name__ == "__main__": dates = pd.date_range('20190101', periods=6) df = pd.DataFrame(np.random.randn(6, 3), index=dates, columns=list('ABC')) print(df) df2 = df.copy() # 将正数转化为负数 df2[df2 > 0] = -df2 print(df2)# output:# A B C# 2019-01-01 0.691983 0.489286 -1.632002# 2019-01-02 1.212439 0.854812 -0.292094# 2019-01-03 -0.365872 0.738098 -0.494800# 2019-01-04 0.548706 0.066543 0.242601# 2019-01-05 0.656829 0.155872 0.262424# 2019-01-06 -0.085094 1.392970 -0.214890# A B C# 2019-01-01 -0.691983 -0.489286 -1.632002# 2019-01-02 -1.212439 -0.854812 -0.292094# 2019-01-03 -0.365872 -0.738098 -0.494800# 2019-01-04 -0.548706 -0.066543 -0.242601# 2019-01-05 -0.656829 -0.155872 -0.262424# 2019-01-06 -0.085094 -1.392970 -0.214890