使用Pandas和plotnine可视化数据
目标: 之前分享过很多pandas可视化、plotnine可视化,但是两者对比的还没有,今天我们尝试分别用pandas和plotnine作直方图、散点图。
导入数据%matplotlib inline import pandas as pd import warnings warnings.filterwarnings('ignore')
ecoli = pd.read_csv('ecoli.csv') ecoli.head()
直方图pandas直方图ecoli['genome_size'].plot.hist()
#带标题 ecoli['genome_size'].plot.hist(title='Histogram with pandas')
plotnine直方图from plotnine import ggplot, aes, geom_histogram
(ggplot(data=ecoli, #dataframe数据 mapping=aes(x='genome_size')) #需要统计的x轴 + geom_histogram(bins=15) #直方图分为15个区间 )
from plotnine import ggplot, aes, geom_histogram, labs
(ggplot(data=ecoli, mapping=aes(x='genome_size')) + geom_histogram(bins=15) + labs(title='Histogram with Plotnine')
)
散点图pandas散点图ecoli.plot.scatter(x='generation',y='genome_size')
#带标题、点的颜色、点的形状marker ecoli.plot.scatter(x='generation',y='genome_size', title='Scater plot with pandas', c='green', marker='+')
plotnine散点图from plotnine import ggplot, aes, geom_point, labs
(ggplot(data=ecoli, mapping=aes(x = 'sample', y= 'genome_size')) + geom_point(alpha=0.5, color='blue') #散点图 + labs(title='scatter plot with plotnine') )
from plotnine import ggplot, aes, geom_point, labs, theme, element_text
(ggplot(data=ecoli, mapping=aes(x = 'sample', y= 'genome_size')) + geom_point(alpha=0.5, color='blue') #透明度alpha + labs(title='scatter plot with plotnine') + theme(axis_text_x=element_text(angle=45, hjust=1)) #设置x轴的文本角度和水平居中程度 )
from plotnine import ggplot, aes, geom_point, labs, theme, element_text
#除x、y轴,增加一个颜色维度 (ggplot(data=ecoli, mapping=aes(x = 'sample', y= 'genome_size', color='generation')) + geom_point(alpha=0.5) + labs(title='scatter plot with plotnine') + theme(axis_text_x = element_text(angle=45, hjust=1)) )
近期文章
|