import numpy as np
import pandas as pd
import matplotlib.dates as mdates
from datetime import datetime
from statsmodels.tsa.seasonal import seasonal_decompose
import os
# print(data_x)
# 热力图-------------
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
import numpy as np
import pandas as pd
import re
import matplotlib.dates as mdates
from datetime import datetime
from statsmodels.tsa.seasonal import seasonal_decompose
import os
# print(data_x)
# 热力图-------------
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
data = pd.read_excel("Book(1)111.xls") # 1 3 7 是 预测列
data['USERID'] = pd.factorize(data['USERID'])[0]
data['BOOK_ID'] = pd.factorize(data['BOOK_ID'])[0]
data=data[['USERID','BOOK_ID']]
print(data.columns)
print(data.head(5))
df = pd.DataFrame(data)
grouped = df.groupby('USERID')
datas=[]
for group_name, group_data in grouped:
if len(group_data['BOOK_ID'].values)>3:
# print(group_data['USERID'].values[0],group_data['BOOK_ID'].values)
# print('--------------------------')
datas.append(group_data['BOOK_ID'].values.tolist())
print(datas)
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
# 假设data是你的借阅数据,每一行是一个读者的借阅信息,每个元素是一本书的标识符
# 将数据转换成适合Apriori算法的格式
te = TransactionEncoder()
te_ary = te.fit(datas).transform(datas)
df = pd.DataFrame(te_ary, columns=te.columns_)
# 使用Apriori算法找出频繁项集
frequent_itemsets = apriori(df, min_support=0.001, use_colnames=True)
print('frequent_itemsets',frequent_itemsets)
# 生成关联规则
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.001)
# 打印结果
print("频繁项集:")
print(frequent_itemsets)
print("\n关联规则:")
print(rules)
Apriori对于作者借阅图书关联数据挖掘
最新推荐文章于 2024-11-27 08:29:13 发布