import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from time import time
%matplotlib inline
with open('train/train/train_status.txt','r') as f:
lines = f.readlines()
status=[]
for line in lines:
status.append(line.strip().split(','))
tr_status = pd.DataFrame(status).loc[:,:5]
tr_status.columns=['uid','retweet','review','source','time','content']
tr_status.to_csv('train_status.csv',index=False)
display(tr_status.head())
with open('valid/valid_status.txt','r') as f:
lines = f.readlines()
status=[]
for line in lines:
status.append(line.strip().split(','))
v_status = pd.DataFrame(status).loc[:,:5]
v_status.columns=['uid','retweet','review','source','time','content']
v_status.to_csv('valid_status.csv',index=False)
display(v_status.head())