您的当前位置：首页 MOOC-人工智能实践：Tensorflow笔记4.2自制数据集txt文件和数据集不匹配解决办法（读取文件名并提取内容并合并原始内容与提取内容）

MOOC-人工智能实践：Tensorflow笔记4.2自制数据集txt文件和数据集不匹配解决办法（读取文件名并提取内容并合并原始内容与提取内容）

来源：刀刀网

1、读取文件名称生成txt文件

# P01 批量读取文件名，并将读取的文件名保存到指定路径下的txt中（带.*** 后缀）

import os
def ListFilesToTxt(dir, file, wildcard, recursion):
    exts = wildcard.split(" ")
    files = os.listdir(dir)
    for name in files:
        fullname = os.path.join(dir, name)
        if (os.path.isdir(fullname) & recursion):
            ListFilesToTxt(fullname, file, wildcard, recursion)
        else:
            for ext in exts:
                if (name.endswith(ext)):
                    file.write(name + "\n")
                    break

def ReadName():
    dir = "F:/1.Research/2022/Tensorflow/class4/MNIST_FC//mnist_image_label/mnist_test_jpg_10000"  # 读取测试集文件路径
    outfile = "F:/1.Research/2022/Tensorflow/class4/MNIST_FC//mnist_image_label/Image1.txt"  # 将测试集文件名写入Image1.txt
    wildcard = ".jpg" # 读取jpg图片
    #   wildcard = ".jpg .txt .exe .dll .lib"      #要读取的文件类型；
    file = open(outfile, "w")
    if not file:
        print("cannot open the file %s for writing" % outfile)
    ListFilesToTxt(dir, file, wildcard, 1)
    file.close()

ReadName()

借鉴：https:///GeekYao/article/details/105074574
运行结果：

在import re

# 这里以读取video_reqtest.txt中后一个括号内的数字0.079，存到train.txt为例，video_reqtest.txt文件中的内容如下
# video 1/1 (1/902) /content/drive/MyDrive/yolov5-5.0/mydata/barbecue.mp4: 384x0 1 fire, Done. (0.079s)
# video 1/1 (2/902) /content/drive/MyDrive/yolov5-5.0/mydata/barbecue.mp4: 384x0 1 fire, Done. (0.015s)
# video 1/1 (3/902) /content/drive/MyDrive/yolov5-5.0/mydata/barbecue.mp4: 384x0 1 fire, Done. (0.015s)
# 读取文件的每一行
read_txt = open("F:/1.Research/2022/Tensorflow/class4/MNIST_FC/mnist_image_label/Image1.txt", "r", encoding="utf-8")
# 定义一个空列表用于接收提取出来的内容
temp = []

for line in read_txt:
    b = re.findall(r'[_](.*?)[.]', line)  # 读出一行中_以及.中间的值，用到是python的正则表达式，不懂。。。如  abbc(0.1s)   ,读出为 0.1s
    '''
    print(a)
    输出结果:
    ['1/902', '0.079s']
    ['2/902', '0.015s']
    ['3/902', '0.015s']
    要提取的内容在列表的第二个索引中,再去掉a[1]中的s
    '''
   # b = re.findall(r'(.*?)[s]', a[1])  # 按上面的方法把s去掉
    '''
    print(b)
    输出结果:
    ['0.079']
    ['0.015']
    ['0.015']
    要提取的内容在列表的第一个索引中，注意不是列表b呦。。。
    '''
    temp.append((b[0]))  # 添加到临时列表中
    '''
    print(temp)
    输出内容：
    ['0.079']
    ['0.079', '0.015']
    ['0.079', '0.015', '0.015']
    循环一次添加一个，最后得到我们要的列表。。。。
    对一个python小白来说搞到这里好艰难，参考网友的内容，一点点的搞出来了。。。。。
    还有最后一步，保存到文件。。。
    '''
# 写入到文件
with open("train1.txt", "w", encoding='utf-8') as file:
    for i in temp:
        file.write(i + '\n')
    file.close()

'''trian文件内容'''
# 0.079
# 0.015
# 0.015

# 于2022-05-15这里插入代码片

运行结果：

f_ = open('F:/1.Research/2022/Tensorflow/class4/MNIST_FC/mnist_image_label/Image1.txt', 'r')
n = 0
list1 = []
for i in f_.readlines():
    n += 1
    s = i.strip()
    list1.append(s)
f_.close()

ff_ = open('F:/1.Research/2022/Tensorflow/class4/MNIST_FC/mnist_image_label/train1.txt', 'r')
m = 0
list2 = []
for i in ff_.readlines():
    m += 1
    s = i.strip()
    list2.append(s)
ff_.close()

fff_ = open('Merge1.txt', 'w')
for i in range(n):
    s = list1[i] + '    ' + list2[i]
    fff_.write(s + '\n')
    #print(s)
fff_.close()

# python在文本开头插入一行
#with open('1221.txt', 'r+') as f:
 #   content = f.read()
  #  f.seek(0, 0)
   # f.write('QGC WPL 110\n'+content)

运行结果：

因篇幅问题不能全部显示，请点此查看更多更全内容

查看全文