sql语句
from .basedao import BaseDao # 定义一个操作职位数据的数据库访问类 class JobPositionDao(BaseDao): def __init__(self): super().__init__() pass # 向数据库插入职位信息 def create(self, params): a = 1 sql = "insert into job_position (job_position, job_company, job_address, job_salary, job_date," \ "job_taskid, job_lowsalary, job_highsalary, job_meansalary, job_city) values (%s, %s, %s, %s, %s, %s,%s, %s, %s, %s)" result = self.execute(sql, params) lastRowId = self.getLastRowId() self.commit() return result, lastRowId def findPositionClassify(self): sql = 'select avg(job_meansalary), job_taskid, task_title from job_position, job_collect_task' \ ' where job_position.job_taskid=job_collect_task.task_id group by job_taskid, task_title' result = self.execute(sql, params=None) self.commit() return self.fetch() pass def findCityPositionClassify(self): sql = 'select avg(t1.job_meansalary) as m, t1.job_taskid,t2.task_title,t1.job_city' \ ' from job_position t1 left join job_collect_task t2 on t1.job_taskid = t2.task_id' \ ' group by job_taskid,job_city,t2.task_title order by t1.job_taskid asc,m desc' result = self.execute(sql, params=None) self.commit() return self.fetch() pass def findPositionClassify(self): sql = "select avg(job_meansalary),job_taskid,task_title from job_position,job_collect_task where " + \ "job_position.job_taskid = job_collect_task.task_id group by job_taskid,task_title" result = self.execute(sql, params=None) self.commit() return self.fetch() pass def findCityPositionClassify(self): sql = "select avg(t1.job_meansalary) as m,t1.job_taskid,t2.task_title,t1.job_city from " + \ "job_position t1 left join job_collect_task t2 " + \ "on t1.job_taskid = t2.task_id group by job_taskid,job_city,t2.task_title order by " + \ "t1.job_taskid asc,m desc" result = self.execute(sql, params=None) self.commit() return self.fetch() pass def bingshuju(self): sql = "select avg(job_position.job_meansalary),job_city from job_position where job_taskid = '1' group by job_taskid,job_city order by avg(job_position.job_meansalary) desc;" result = self.execute(sql, params=None) self.commit() return self.fetch() pass def sandiantu(self): sql = "select job_taskid,job_city,avg(job_position.job_meansalary) from job_position group by job_taskid,job_city;" result = self.execute(sql, params=None) self.commit() return self.fetch() pass
调用函数并存成json文件
from day22.spiderproject.spiderproject.dao.jobpostiondao import JobPositionDao jp = JobPositionDao() print(jp.findPositionClassify()) print(jp.findCityPositionClassify()) jp.close() import json jp = JobPositionDao() print(jp.findCityPositionClassify()) #点线图的数据 print(jp.findPositionClassify()) #3个语言的平均工资(柱状图) print(jp.bingshuju()) #前五个是饼图数据(城市的平均工资占比) print(jp.sandiantu()) #每个语言每个城市的平均薪资 print(jp.textfunc()) #工作需求,用于jieba with open('F:\\py\\pythonsientific\\day05\\2.json','w')as fp: dict1 = {} for i in jp.findCityPositionClassify(): dict1[i[0]]={i[2]:i[3]} a = json.dumps(dict1) json.dump(a,fp) with open('F:\\py\\pythonsientific\\day05\\3.json','w',encoding='utf-8')as fp: dict3 = {} for i in jp.findPositionClassify(): dict3[i[0]] = {i[1]:i[2]} c = json.dumps(dict3) json.dump(c,fp) with open('F:\\py\\pythonsientific\\day05\\4.json','w',encoding='utf-8')as fp: dict4 = {} for i in jp.bingshuju(): dict4[i[0]] = i[1] d = json.dumps(dict4) json.dump(d,fp) with open('F:\\py\\pythonsientific\\day05\\5.json','w',encoding='utf-8')as fp: dict5 = {} for i in jp.sandiantu(): dict5[i[1]] = i[2] e = json.dumps(dict5) json.dump(e,fp) with open('F:\\py\\pythonsientific\\day05\\6.json','w',encoding='utf-8')as fp: list1 = [] for i in jp.textfunc(): list1.append(i[0]) f = json.dumps(list1) json.dump(f,fp) jp.close()
matplotlib画四种图表的实例
#引入需要的模块 import numpy as np import matplotlib.pyplot as plt import json #用来正常显示中文标签 plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示负号 plt.rcParams['axes.unicode_minus'] = False figure = plt.figure() #创建画布 #点线图--上海和北京不同城市的python薪资 subplot1 = figure.add_subplot(2,2,1) #图标占位,2行2列第1个(下面的3幅图一样) with open('2.json','r')as fp: #打开文件 a = json.load(fp) data = json.loads(a) print(data) list1 = [] list2 = [] list3 = [] list4 = [] list5 = [] list6 = [] list7 = [] for i in data.items(): a = i[1] for j in a.items(): b = j[0] if j[0]=='Python职位数据采集': list1.append(i) elif j[0]=='Java职位数据采集': list2.append(i) else: pass print(list1) print(list2) for i in list1: a = i[0] for j in i[1].items(): b = j[1] list3.append((a,b)) array2d = np.array(list3) array2d = array2d.T for i in list2: a = i[0] for j in i[1].items(): b = j[1] list4.append((a,b)) array2e = np.array(list4) array2e = array2e.T print(array2d) print(array2e) for i in array2d[1]: for j in array2e[1]: if i == j: list5.append(i) xdata = np.array(list5) print(xdata) for i in xdata: for j in array2d.T: if i==j[1]: list6.append(j[0]) for i in xdata: for j in array2e.T: if i==j[1]: list7.append(j[0]) ydata = np.array(list6).astype(np.float) print(ydata) zdata = np.array(list7).astype(np.float) print(zdata) subplot1.plot(xdata,ydata,'b-',label='python') subplot1.plot(xdata,zdata,'r-',label='java') subplot1.scatter(xdata,ydata,s=50,c='g',alpha=0.5) subplot1.scatter(xdata,zdata,s=50,c='g',alpha=0.5) subplot1.legend(loc='best') #柱状图--python、Java、php三种语言的薪资 subplot2 = figure.add_subplot(2,2,2) with open('3.json','r')as fp: a = json.load(fp) data = json.loads(a) print(data) lista = [] listb = [] listc = [] for i in data.items(): lista.append(i[0]) for j in i[1].items(): listb.append(j[0]) listc.append(j[1]) # print(lista) # print(listb) # print(listc) xlabel = np.array(listc) x = np.array(listb).astype(np.float) y = np.array(lista).astype(np.float) bars = plt.bar(x,y,width=0.3) #width:柱子宽度 #返回的是bar柱子对象 subplot2.set_ylabel("salary/month") subplot2.set_xlabel("position") subplot2.set_xticks(x) subplot2.set_xticklabels(xlabel) subplot2.grid(linestyle='--') #添加背景网格线 #设置颜色 i = 0 for bar in bars: bar.set_color('#'+str(111111 + i)) i += 225540 pass for x,y in zip(x,y): subplot2.text(x,y+0.05,'{0}yuan/month'.format(y),ha='center',va='bottom') #饼图--python语言5个城市的薪资占比 subplot3 = figure.add_subplot(2,2,3) with open('4.json','r')as fp: a = json.load(fp) data = json.loads(a) print(data) listx = [] listy = [] i = 0 for j in data.items(): if i<5: i += 1 listx.append(j[0]) listy.append(j[1]) # print(listx) # print(listy) result = np.array(listx).astype(np.float)/np.sum(np.array(listx).astype(np.float)) print(result) labels = ['%s %.2f %%' %(y,np.float(x * 100)) for x,y in zip(result,np.array(listy))] subplot3.pie(result,colors=['#008888','#006600','#FF0000','#00FF00','#0000FF'],labels=labels) #散点图--不同语言不同城市的平均薪资 subplot4 = figure.add_subplot(2,2,4) with open('5.json','r')as fp: a = json.load(fp) data = json.loads(a) print(data) listm = [] listn = [] for i in data.items(): listm.append(i[0]) listn.append(i[1]) x = np.array(listm) y = np.array(listn) subplot4.scatter(x,y,s=20,c='b',marker='*') #s大小,c颜色,alpha透明度,marker点的形状默认o plt.show()
图片展示