Python data mining – I analyzed the sister who braved the wind and waves and found these secrets

Time:2021-10-16

Need data set + source code attention plus praise + screenshot plus QQ group: 606115027

Read data

import pandas as pd

import matplotlib.pyplot as plt

df = pd.read_csv(“/home/kesci/input/sister5122/final_data.csv”, encoding=’gbk’)

df.names = [i.strip(‘\r\n’) for i in df.names]

df

age birth hometown names primaryScore jobs picUrl

0 (51 years old) 1969 / 3 / 4 Taiwan Yi Nengjing 74 singer and actor,bkimg.cdn.bcebos.com/pic/c2fdfc039…

1 (35 years old) October 15, 1984, Henan Hailu 68 actor,bkimg.cdn.bcebos.com/pic/9345d688d…

2 (34 years old) 1985 / 10 / 21 Jilin Xu Fei 75 singer,bkimg.cdn.bcebos.com/pic/f703738da…

3 (33 years old) August 8, 1986 Shandong Zhang Yuqi 72 actors,bkimg.cdn.bcebos.com/pic/b21c8701a…

4 (31 years old) 1989 / 4 / 9 Sichuan Zhang Hanyun 79 singer and actor,bkimg.cdn.bcebos.com/pic/0823dd545…

5 (37 years old) 1983 / 1 / 29 Liaoning Wu Xin 74 actor, host,bkimg.cdn.bcebos.com/pic/8644ebf81…

6 (35 years old) on March 22, 1985, Wang Likun, 72 actors from Inner Mongolia,bkimg.cdn.bcebos.com/pic/37d3d539b…

7 (37 years old) 1983 / 3 / 14 Shanghai Jinsha 68 singer and actor,bkimg.cdn.bcebos.com/pic/5366d0160…

8 (30 years old) April 16, 1990 Shanghai Lan Yingying 91 actor,bkimg.cdn.bcebos.com/pic/10dfa9ec8…

9 (37 years old) 1983 / 2 / 11 Shanghai Huang Shengyi 80 singer and actor,bkimg.cdn.bcebos.com/pic/64380cd79…

10 (39 years old) 1981 / 3 / 6 Tianjin Zhang Meng 77 actor,bkimg.cdn.bcebos.com/pic/6d81800a1…

11 (29 years old) September 5, 1990 Shandong Jinchen 80 models, actors,bkimg.cdn.bcebos.com/pic/5bafa40f4…

12 (32 years old) April 18, 1988, Yunnan Zhu Jingxi 76 singer and musician,bkimg.cdn.bcebos.com/pic/728da9773…

13 (48 years old) April 27, 1972 Guizhou quiet 84 actor,bkimg.cdn.bcebos.com/pic/574e9258d…

14 (30 years old) 1990 / 2 / 3 Hunan Mengjia 87 singer and actor,bkimg.cdn.bcebos.com/pic/2934349b0…

15 (36 years old) 1983 / 10 / 23 Sichuan Yu Kewei 85 singer,bkimg.cdn.bcebos.com/pic/42166d224…

16 (33 years old) April 27, 1987 Hainan Wang Feifei 84 singer and actor,bkimg.cdn.bcebos.com/pic/267f9e2f0…

17 (42 years old) April 17, 1978, Hunan Addo 79 singer and actor,bkimg.cdn.bcebos.com/pic/f7246b600…

18 (49) 1970 / 9 / 19 Canadian Christy Chung 78 actor,bkimg.cdn.bcebos.com/pic/ac4bd1137…

19 (38 years old) September 6, 1981 Shanghai Zheng Xiyi 84 singer and actor,bkimg.cdn.bcebos.com/pic/a8014c086…

20 (30 years old) April 26, 1990 Sichuan Li sidani 87 singer, actor,bkimg.cdn.bcebos.com/pic/4e4a20a44…

21 (37 years old) 1982 / 12 / 26 Hunan Liu Yun 74 actor,bkimg.cdn.bcebos.com/pic/d8f9d72a6…

22 (34 years old) may 2, 1986 Shaanxi Bai Bing 79 actor,bkimg.cdn.bcebos.com/pic/c8ea15ce3…

23 (37 years old) July 29, 1982 Liaoning Wang Zhi 68 actor,bkimg.cdn.bcebos.com/pic/d1160924a…

24 (33 years old) 1987 / 2 / 13 Shanghai Huang Ling 89 singer and actor,bkimg.cdn.bcebos.com/pic/4610b912c…

25 (33 years old) 1986 / 11 / 14 American yuan Yonglin 83 singer and actor,bkimg.cdn.bcebos.com/pic/060828381…

26 (38 years old) April 17, 1982, Zhejiang Dingdang 75 singer,bkimg.cdn.bcebos.com/pic/d01373f08…

27 (38 years old) May 14, 1982 Hunan Wanqian 77 singer and actor,bkimg.cdn.bcebos.com/pic/5bafa40f4…

28 (31 years old) 1989 / 6 / 13 Hunan Shen Mengchen 86 actor and host,bkimg.cdn.bcebos.com/pic/3801213fb…

29 (49 years old) 1971 / 1 / 21 Hong Kong Chen Songling 73 singer and actor,bkimg.cdn.bcebos.com/pic/b90e7bec5…

DF. Age = [int (i.strip(). Replace (‘,’). Replace (‘,’). Replace (‘,’)) for I in DF. Age. Values]

from pyecharts import Pie, Bar, Line

from pyecharts.charts import Pie, Bar, Line, Funnel

from pyecharts.options.global_options import ThemeType

from pyecharts import options as opts

from pyecharts.charts import Pie, Bar, line

Age distribution of sisters

attr = []

count = []

age_ Cut = pd.cut (df.age, [26,33,40,47,54], labels = [U “26-33”, u “33-40”, u “40-47”, u “47-54”) # divides the ages by sections

for i, j in age_cut.value_counts().items():

attr.append(i)

count.append(j)

pie = (Pie(init_opts=opts.InitOpts(

theme=ThemeType.CHALK

)).add(‘’, [list(z) for z in zip(attr, count)],

radius=[“30%”, “75%”],rosetype=”radius”)

.set_ global_ Opts (title_opts = opts. Titleopts (title = “sister riding the wind and waves”, subtitle = “age distribution”)

.set_series_opts(label_opts=opts.LabelOpts(formatter=”{b}: {d}%”))

)

pie.render_notebook()

Occupation distribution of elder sister

from collections import Counter

jobsClass = Counter(filter(None, ‘’.join(df.jobs.values).split(‘,’)))

funnel = (Funnel(init_opts=opts.InitOpts(

theme=ThemeType.CHALK

))

. add (“sister riding the wind and waves”, [list (z) for Z in zip (jobsclass. Keys(), jobsclass. Values())],

sort_=’ascending’,

label_opts=opts.LabelOpts(position=”inside”))

.set_ global_ Opts (title_opts = opts. Titlepts (title = “sister riding the wind and waves”, subtitle = “occupation distribution”),)

)

funnel.render_notebook()

Distribution of sister provinces

from pyecharts.charts import Map

import random

provinces = Counter(df.hometown)

print(provinces)

area = [(i[0],i[1]) for i in provinces.items()]

maps = (

Map(init_opts=opts.InitOpts(

theme=ThemeType.ROMANTIC

))

. add (“birthplace”, area, “China”)

.set_global_opts(

title_ Opts = opts.titlepts (title = “map basic example”),

legend_opts=opts.LegendOpts(is_show=False),

visualmap_opts=opts.VisualMapOpts(max_=5, is_piecewise=True),

)

)

maps.render_notebook()

Counter ({Shanghai: 5, Hunan: 5, Sichuan: 3, Shandong: 2, Liaoning: 2, Taiwan: 1, Henan: 1, Jilin: 1, Inner Mongolia: 1, Tianjin: 1, Yunnan: 1, Guizhou: 1, Hainan: 1, Canada: 1, Shaanxi: 1, United States: 1, Zhejiang: 1, Hong Kong: 1})

Relationship between sister’s age and initial stage score

from pyecharts import options as opts

from pyecharts.charts import Bar, Line

top5 = df[:5]

names = top5.names.values.tolist()

ages = top5.age.values.tolist()

scores = top5.primaryScore.values.tolist()

bar = (

Bar(init_opts=opts.InitOpts(

theme=ThemeType.ROMANTIC

))

.add_xaxis(names)

.add_ Yaxis (“age”, ages)

.extend_axis(

yaxis=opts.AxisOpts(

axislabel_ Opts = opts. Labelopts (format = “{value}” points “), interval = 20

)

)

.extend_axis(

yaxis=opts.AxisOpts(

axislabel_ Opts = opts. Labelopts (format = “{value}” points “), interval = 20

)

)

.set_ series_ opts(label_opts=opts.LabelOpts(is_show=False))

)

line = Line().add_ xaxis(names).add_ Yaxis (“initial stage score”, scores, yaxis_index = 1)

bar.overlap(line)

bar.render_notebook()

Average age of sister

print(sum(ages) / 5)

36.8

This work adoptsCC agreement, reprint must indicate the author and the link to this article