参考资料

https://www.cnblogs.com/tina-python/p/5508402.html

原数据

\"在这里插入图片描述\"

目标

将如上混杂在一列的数据按特征分隔开。

代码

import xlrd
import xlwt
xlsfile=r\'C:\\Users\\Administrator\\Desktop/SN_air.xlsx\'
book=xlrd.open_workbook(xlsfile)
##按索引获取表数据
sheet0=book.sheet_by_index(0)
print(\"1、\",sheet0)
##获取工作表名称
sheet_name=book.sheet_names()[0]
print(\"2、\",sheet_name)
nrows=sheet0.nrows
print(\"3、\",nrows)
ncols=sheet0.ncols
print(\"4\",ncols)
workbook=xlwt.Workbook()   #写EXCEL文件
worksheet = workbook.add_sheet(\'My Sheet\')
name=(\"品牌\",\"型号\",\"功率等级\",\"类型\",\"变频or定频\",\"冷暖or单冷\",\"备注\",\"价格\")
for i in range(0,8):
    worksheet.write(0,i,name[i])
    print(name[i])


import re
##备注信息
pattern = r\'(【[^【]+】)|(\\w+】)\'
#循环打印每一行的内容
for i in range(0,int(nrows)):
    print(i,sheet0.cell_value(i, 0))
    sadd1=re.search(pattern,sheet0.cell_value(i,0),re.M|re.I)##提取备注内容
    if sadd1:
        sadd=sadd1.group()
    else:
        sadd=\"\"
    print(\"备注:\",sadd)
    s=re.sub(pattern,r\'\', sheet0.cell_value(i, 0))##替换备注内容
    print(i,s)
    #筛选名字

    matchObj = re.search( r\'(.*?) (.*)\',s, re.I)##对大小写不敏感
    if matchObj:
        print(\"Name: \", matchObj.group(1))#group(1) 列出第一个括号匹配部分
        sname = matchObj.group(1)
        s2 = matchObj.group(2)
        print(\"s2\",s2)

    #空调功率等级
    levelp = r\'(([正大小]\\d+|[1-9]|[正大小1-9]\\.\\d+)(匹|P)).*?\'
    matchObj2 = re.search(levelp ,s2, re.M|re.I)
    if(matchObj2):
        slevel=matchObj2.group(1)
    else:
        slevel=\'\'
    print(\"Level is\",slevel)
    s3= re.sub(levelp, r\'\', s2)
    #print(\"s3\",s3)

    # 类型是挂机还是柜机
    stype = re.search(r\'挂机\', s, re.M | re.I)
    if (stype):
        stype = \'壁挂式\'
    elif re.search(r\'柜机\', s, re.M | re.I):
        stype = \'柜式\'
    else:
        stype = \'\'
    print(\"Type is: \", stype)

    ###型号
    idp = r\'((\\w+(\\-)\\w+[a-zA-Z1-9])|(\\w+(\\-)\\w+(\\/)\\w+[a-zA-Z1-9])|([a-zA-Z]+\\d+)|([a-zA-Z]+\\s\\w+\\d+)).*\'
    matchObj3 = re.search(idp, s3, re.M | re.I)
    if matchObj3:
        print(\"standNo. is: \", matchObj3.group(1))
        sid = matchObj3.group(1)
    else:
        sid=\' \'
        print(i,sname,sid)
    ##变频or定频
    smode = re.search(r\'变频\', s, re.M | re.I)
    if (smode):
        smode = \'变频\'
    elif re.search(r\'定频\', s, re.M | re.I):
        smode = \"定频\"
    else:
        smode = \'\'
    print(\"Mode is: \", smode)
    ##冷暖or单冷
    suse = re.search(r\'冷暖\', s, re.M | re.I)
    if (suse):
        suse = \'冷暖\'
    elif re.search(r\'单冷\', s, re.M | re.I):
        suse = \"单冷\"
    else:
        suse = \'\'
    print(\"Use is: \", suse)

    worksheet.write(i+1, 0,sname)
    worksheet.write(i+1, 1,sid)
    worksheet.write(i+1, 2,slevel)
    worksheet.write(i+1, 3,stype)
    worksheet.write(i+1, 4,smode)
    worksheet.write(i+1, 5,suse)
    worksheet.write(i+1, 6,sadd)
    worksheet.write(i+1, 7,sheet0.cell_value(i, 1))
workbook.save(r\"C:\\Users\\Administrator\\Desktop/zuoye_air.xls\")
收藏 打印