读取txt中的字段key,然后编号再输出

import pandas as pd
sep="|"

def read_key(dict_key, arr_fileld, idx=[]):
    if len(idx) == 0:
        return
    for index in idx:
        keys = arr_fileld[index].split(",")
        for key in keys:
            if key not in dict_key:
                dict_key[key] = len(dict_key) + 1

def replace_key(dict_key, arr_fileld, idx=[]):
    if len(idx) == 0:
        return ""
    res = []
    for i in range(len(arr_fileld)):
        if i in idx:
            keys = arr_fileld[i].split(",")
            ids = []
            for key in keys:
                ids.append(str(dict_key[key]))
            res.append(",".join(ids))
        else:
            res.append(arr_fileld[i])
    return "|".join(res)


def read_file(file_name,file_out,idx):

    with open(file_name,"r",encoding="utf-8") as f:
        dict_key = {}
        for line in f:
            read_key(dict_key, line.split(sep), idx)
    with open(file_name,"r",encoding="utf-8") as f:
        with open(file_out,"w",encoding="utf-8") as fout:
            for line in f:
                res = replace_key(dict_key,line.split(sep),idx)
                fout.write(res)


import json

with open("../conf/names.json", 'r') as f:
    names = json.load(f)
idx = []


for i,name in enumerate(names):
    if "room_id" in name or ("room" in name and "idx" in name):
        idx.append(i)


from datetime import datetime
cur_time1 = datetime.now()
read_file("../data/xxx", "../data/out",idx)
cur_time2 = datetime.now()

time_span = cur_time2 -cur_time1
print("time", time_span)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值