表格内容如下:

\"python读取word,插入mysql\"

1、实现批量导入word文档,取文档标题中的数字作为编号

2、除取上面打钩的内容需要匹配出来入库入库,其他内容全部直接入库mysql

# wuyanfeng
# -*- coding:utf-8 -*-
# 读取docx中的文本代码示例
import docx
import pymysql
import re
import os

# 创建数据库链接
conn = pymysql.connect(
 host=\'rm-bp1vu5d84dg12c6d59o.mysql.rds.aliyuncs.com\',
 port=3306,
 user=\'root\',
 passwd=\'wYf092415*\',
 db=\'pays\',
 charset=\'utf8\',
)
# 创建游标
cursor = conn.cursor()

#切片函数
def section(info,key,len11):
 a = len(info)
 print(a, type(a))
 d = []
 e = 0
 g = -1
 i = 0
 task_class=[]
 while i < len(info):
  # for i in range(len(info)):
  # i+=1
  print(\"i::::\", i)
  try:
   #c = info.index(\"a\", e)
   #print(\"c:::::\", c)
   c = info.index(key, e)
   #print(\"c:::::\", c)

   print(\"c类型判断\",type(c))
  except ValueError:
   print(ValueError)
  try:
   if (c != \'\') & (g < int(c)):
    d.append(c)
    g = c
    i = c + 1
    print(\"illlldddd:\", i)
    e = c + 1
    continue

   elif (c == \'\'):
     break
  except UnboundLocalError:
   print(UnboundLocalError)

   return task_class
  break
 print(\"d\", d, type(d))
 print(d[0], type(d[0]))
 print(\"d的长度:\",len(d))
 #开始切片
 if len(d) != 0:
  for j in range(len(d)):
   print(\"info11:::\", info, type(info))
   info = \'\'.join(info)
   print(\"info222:::\",info,type(info))
   print(\"d[%d]\"%j,d[j])
   #print(\"d[j]:5\"%j,info[d[j]:5])

   llll = info[d[j]+1:d[j]+5]
   print(\"d[%d]:5\" % j, llll)
   task_class.append(llll)
   print(\"task_class::11\", task_class)

 task_class=\",\".join(task_class)
 print(\"str1112222\",task_class)
 return task_class


def insettable(file):
 print(\"file::::::::::::::::::::\", file)
 print(\"type::::::::::::::::::::\", type(file))
 # file1 = file
 # file1 = str(file1)
 ddd = re.findall(\"知识库\\\\\\(\\d+)\", file)
 print(\"ddd:::::::::::\", ddd)
 print(\"ddd[0]:::\", ddd[0])
 ddd = int(ddd[0])
 print(\"ddd::::\", type(ddd))

 file = docx.Document(file)
 # 读取表格:
 t = file.tables[0]
 print(t)
 print(\"1:\", t.cell(0, 0).text) # 1
 cell1 = t.cell(0, 0).text
 print(\"tyep::::\", type(t.cell(0, 0).text))

 print(\"2:\", t.cell(0, 1).text) # 2
 cell2 = t.cell(0, 1).text

 print(\"2:\", t.cell(0, 2).text) # 2
 cell3 = t.cell(0, 2).text

 print(\"2:\", t.cell(0, 3).text) # 2
 cell4 = t.cell(0, 3).text
 print(\"cell4:::::::::\", cell4)

 print(\"3:\", t.cell(1, 0).text) # 3
 cell5 = t.cell(1, 0).text

 print(\"4:\", t.cell(1, 2).text) # 4
 cell6 = t.cell(1, 2).text

 print(\"5:\", t.cell(1, 3).text) # 5
 task_type = t.cell(1, 3).text
 # task_type = re.findall(\'.*[☑√](.*)$\', cell7)
 # task_type = \'\'.join(cell7)
 print(\"task_type111111:\", task_type)
 # task_class = task_class[0:4]
 \'\'\'低级处理方式
 a = int(task_type.count(\"☑\"))
 print(\"a|||||||\", a, type(a))
 b = int(task_type.count(\"√\"))
 print(\"b|||||||\", b, type(a))
 if (a == 1) | (b == 1):
  print(\"111111111111111111\")
  # task_type = re.findall(\'.*[☑√](.*)$\', task_type)
  task_type = re.findall(\'.*[☑√](.*)$\', task_type)
  print(\"task_type1\", task_type)
  task_type = \'\'.join(task_type)
  print(\"task_type2\", task_type)
  task_type = task_type[0:4]
  print(\"task_type3:d:%s,b=%d\" % (a, b), task_type)
 elif (a == 0) & (b == 0):
  print(\"2222222222222222222\")
  task_type = \'法定职责\'
  print(\"a:%s,b=%s\" % (a, b), task_type)
 elif (a == 2) | (b == 2):
  print(\"333333333333333333333\")
  task_type = \'法定职责,工作职责 \'
  print(\"a:%s,b=%s\" % (a, b), task_type)
 \'\'\'
 #调用切片函数
 task_type1 = section(task_type, \"√\", 4)
 task_type2 = section(task_type, \"☑\", 4)
 task_type1 = \"\".join(task_type1)
 task_type2 = \"\".join(task_type2)
 print(\"task_type1:::\", task_type1,type(task_type1))
 print(\"task_type2:::\", task_type2,type(task_type2))
 if task_type1.strip()!=\"\":
  task_type = task_type1
  print(\"task_type111:::\", task_type1)
 elif task_type2.strip()!=\"\":
  task_type = task_type2
  print(\"task_type222:::\", task_type2)

 print(\"6:\", t.cell(1, 4).text) # 6
 cell8 = t.cell(1, 4).text

 print(\"7:\", t.cell(2, 1).text) # 7
 cell9 = t.cell(2, 1).text

 # 获取文档对象
 # file = docx.Document(\"D:\\\\配置库\\\\公案APP\\\\1.2 系统规格\\\\知识库\\\\14人员死亡先期处置.docx\")
 print(\"段落数:\" + str(len(file.paragraphs))) # 段落数为13,每个回车隔离一段
 lenn = len(file.paragraphs)
 print(\"len:\", lenn)
 # 输出每一段的内容
 for para in file.paragraphs:
  print(para.text)

 # 输出段落编号及段落内容
 for i in range(len(file.paragraphs)):
  print(\"第\" + str(i) + \"段的内容是:\" + file.paragraphs[i].text)

 list6 = []
 for i in range(len(file.paragraphs)):
  if 0 == i:
   print(\"i:\", i)
   lis0 = file.paragraphs[i].text
   print(\"list0:\", lis0)
   print(type(lis0))

  elif 1 == i:
   print(\"i:\", i)
   task_class = file.paragraphs[i].text
   print(\"lis1\", task_class,type(task_class))
   \'\'\'低级处理方式
   print(\"task_class111111:\", task_class)

   c = int(task_class.count(\"☑\"))
   task_class = \'\'.join(task_class)
   #print(task_class.index(\'☑\'))
   print(\"c|||||||\", c, type(c))
   d = int(task_class.count(\"√\"))
   print(task_class.index(\'√\'))

   print(\"d|||||||\", d, type(d))
   task_class = re.findall(r\'[☑√](?:.*)\', task_class)
   task_class = \'\'.join(task_class)
   task_class = task_class[1:5]
   print(\"task_class\", task_class)
  \'\'\'
   #调用切片函数
   task_class1 = section(task_class, \"√\", 4)
   task_class2 = section(task_class, \"☑\", 4)
   task_class1 = \"\".join(task_class1)
   task_class2 = \"\".join(task_class2)
   print(\"task_class1:::\", task_class1,type(task_class1))
   print(\"task_class2:::\", task_class2,type(task_class2))
   if task_class1.strip()!=\"\":
    task_class = task_class1
    print(\"task_class11:::\", task_class1)
   elif task_class2.strip()!=\"\":
    task_class = task_class2
    print(\"task_class22:::\", task_class2)


  if 2 == i:
   print(\"i:\", i)
   lis2 = file.paragraphs[i].text

   print(\"lis2\", lis2)
   print(type(lis2))
   preparer = re.findall(\'填表单位:(.*?)$\', lis2)
   preparer = \'\'.join(preparer)
   print(\"preparer:%s\" % preparer)

  # elif 3 == i:
  #  print(\"i:\", i)
  #  lis3 = file.paragraphs[i].text
  elif 3 == i:
   print(\"i:\", i)
   lis4 = file.paragraphs[i].text
   print(\"lis4\", lis4)
   print(type(lis4))
  elif 3 < i < lenn - 1:
   print(\"i:\", i)
   print(file.paragraphs[i].text)
   print(type(file.paragraphs[i].text))
   # list6[i-5] = list6.append(file.paragraphs[i].text)
   list6.append(str(file.paragraphs[i].text).strip(\'\\xa0\'))
   # list6.append(\"%s\\n\" % str(file.paragraphs[i].text).strip(\'\\xa0\'))
   print(list6)
 key_steps = \"\\n\".join(list6)
 # print(\"key_steps:\\n\",key_steps.strip(\'\\n\'))

 cursor.execute(
  \"insert into `t_knowledge_ ` (`no`, `preparer`, `task_class`, `task_name`, `task_specification`, `task_type`, `task_desc`, `task_basis`, `key_steps`) values (\'%d\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',NULL,\'%s\')\" % (
   ddd, preparer, task_class, cell2, cell4, task_type, cell9, key_steps))
 conn.commit()


def traverse(f):
 fs = os.listdir(f)
 for f1 in fs:
  tmp_path = os.path.join(f, f1)
  if not os.path.isdir(tmp_path):
   print(\'文件: %s\' % tmp_path)
   insettable(tmp_path)
  else:
   print(\'文件夹:%s\' % tmp_path)
   traverse(tmp_path)


path = \'D:\\\\配置库\\公案APP\\\\1.2 系统规格\\\\知识库\'
traverse(path)

#单文件调测
# path = \'D:\\\\配置库\\\\公案APP\\\\1.2 系统规格\\\\知识库\\\\14人员死亡先期处置.docx\'
# insettable(path)

# 关闭游标
cursor.close()
# 关闭连接
conn.close()

以上这篇python读取word文档,插入mysql数据库的示例代码就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持脚本之家。

收藏 打印