这篇“Python如何操作目标检测数据集xml”文章的知识点大部分人都不太理解,所以小编给大家总结了以下内容,内容详细,步骤清晰,具有一定的借鉴价值,希望大家阅读完这篇文章能有所收获,下面我们一起来看看这篇“Python如何操作目标检测数据集xml”文章吧。

1. 根据xml文件统计目标种类以及数量

#-*-coding:utf-8-*-#根据xml文件统计目标种类以及数量importosimportxml.etree.ElementTreeasETimportnumpyasnpnp.set_printoptions(suppress=True,threshold=np.nan)importmatplotlibfromPILimportImagedefparse_obj(xml_path,filename):tree=ET.parse(xml_path+filename)objects=[]forobjintree.findall('object'):obj_struct={}obj_struct['name']=obj.find('name').textobjects.append(obj_struct)returnobjectsdefread_image(image_path,filename):im=Image.open(image_path+filename)W=im.size[0]H=im.size[1]area=W*Him_info=[W,H,area]returnim_infoif__name__=='__main__':xml_path='/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations/'filenamess=os.listdir(xml_path)filenames=[]fornameinfilenamess:name=name.replace('.xml','')filenames.append(name)recs={}obs_shape={}classnames=[]num_objs={}obj_avg={}fori,nameinenumerate(filenames):recs[name]=parse_obj(xml_path,name+'.xml')fornameinfilenames:forobjectinrecs[name]:ifobject['name']notinnum_objs.keys():num_objs[object['name']]=1else:num_objs[object['name']]+=1ifobject['name']notinclassnames:classnames.append(object['name'])fornameinclassnames:print('{}:{}个'.format(name,num_objs[name]))print('信息统计算完毕。')2.根据xml文件统计目标的平均长度、宽度、面积以及每一个目标在原图中的占比

#-*-coding:utf-8-*-#统计#计算每一个目标在原图中的占比#计算目标的平均长度、#计算平均宽度,#计算平均面积、#计算目标平均占比importosimportxml.etree.ElementTreeasETimportnumpyasnp#np.set_printoptions(suppress=True,threshold=np.nan)#10,000,000np.set_printoptions(suppress=True,threshold=10000000)#10,000,000importmatplotlibfromPILimportImagedefparse_obj(xml_path,filename):tree=ET.parse(xml_path+filename)objects=[]forobjintree.findall('object'):obj_struct={}obj_struct['name']=obj.find('name').textbbox=obj.find('bndbox')obj_struct['bbox']=[int(bbox.find('xmin').text),int(bbox.find('ymin').text),int(bbox.find('xmax').text),int(bbox.find('ymax').text)]objects.append(obj_struct)returnobjectsdefread_image(image_path,filename):im=Image.open(image_path+filename)W=im.size[0]H=im.size[1]area=W*Him_info=[W,H,area]returnim_infoif__name__=='__main__':image_path='/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/JPEGImages/'xml_path='/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations/'filenamess=os.listdir(xml_path)filenames=[]fornameinfilenamess:name=name.replace('.xml','')filenames.append(name)print(filenames)recs={}ims_info={}obs_shape={}classnames=[]num_objs={}obj_avg={}fori,nameinenumerate(filenames):print('正在处理{}.xml'.format(name))recs[name]=parse_obj(xml_path,name+'.xml')print('正在处理{}.jpg'.format(name))ims_info[name]=read_image(image_path,name+'.jpg')print('所有信息收集完毕。')print('正在处理信息......')fornameinfilenames:im_w=ims_info[name][0]im_h=ims_info[name][1]im_area=ims_info[name][2]forobjectinrecs[name]:ifobject['name']notinnum_objs.keys():num_objs[object['name']]=1else:num_objs[object['name']]+=1#num_objs+=1ob_w=object['bbox'][2]-object['bbox'][0]ob_h=object['bbox'][3]-object['bbox'][1]ob_area=ob_w*ob_hw_rate=ob_w/im_wh_rate=ob_h/im_harea_rate=ob_area/im_areaifnotobject['name']inobs_shape.keys():obs_shape[object['name']]=([[ob_w,ob_h,ob_area,w_rate,h_rate,area_rate]])else:obs_shape[object['name']].append([ob_w,ob_h,ob_area,w_rate,h_rate,area_rate])ifobject['name']notinclassnames:classnames.append(object['name'])#求平均fornameinclassnames:obj_avg[name]=(np.array(obs_shape[name]).sum(axis=0))/num_objs[name]print('{}的情况如下:*******'.format(name))print('目标平均W={}'.format(obj_avg[name][0]))print('目标平均H={}'.format(obj_avg[name][1]))print('目标平均area={}'.format(obj_avg[name][2]))print('目标平均与原图的W比例={}'.format(obj_avg[name][3]))print('目标平均与原图的H比例={}'.format(obj_avg[name][4]))print('目标平均原图面积占比={}'.format(obj_avg[name][5]))print('信息统计计算完毕。')3.修改xml文件中某个目标的名字为另一个名字

#修改xml文件中的目标的名字,importos,sysimportglobfromxml.etreeimportElementTreeasET#批量读取Annotations下的xml文件#per=ET.parse(r'C:UsersockhuangDesktopAnnotations00003.xml')xml_dir=r'/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations'xml_list=glob.glob(xml_dir+'/*.xml')forxmlinxml_list:print(xml)per=ET.parse(xml)p=per.findall('/object')foroneperinp:#找出person节点child=oneper.getchildren()[0]#找出person节点的子节点ifchild.text=='PinNormal':#需要修改的名字child.text='normalbolt'#修改成什么名字ifchild.text=='PinDefect':#需要修改的名字child.text='defectbolt-1'#修改成什么名字per.write(xml)print(child.tag,':',child.text)

以上就是关于“Python如何操作目标检测数据集xml”这篇文章的内容,相信大家都有了一定的了解,希望小编分享的内容对大家有帮助,若想了解更多相关的知识内容,请关注亿速云行业资讯频道。