I see, does this make it better?
The input file:
My Document = some_doc.docx
The input file:
My Document = some_doc.docx
import os import re from docx import Document f = open("sometext.txt", "w") def get_inputs(): global my_doc, my_doc_file input_file = open('input.txt','r') #reading inputs text file names = input_file.readlines() input_file.close() for i in range(0,len(names)): if re.search(r'My document',names[i],re.IGNORECASE): pos = str(names[i]).rfind("=") my_doc = str(names[i][pos+1:]).strip() if os.stat(my_doc).st_size == 0: #checking the size of my_doc, if zero then message displays print("Empty Document! Please check and retry!") my_doc_file = Document(my_doc) #reading the .docx file, throws error if it does not exist print(my_doc, my_doc_file) def extract(my_doc): tlist = [] tab_list = [] #global my_doc, my_doc_file for table in my_doc.tables: #looping through all tables in the .docx file if re.search("mystring", table.cell(0,1).text, re.IGNORECASE): for row in table.rows: #looping through all rows in the table under consideration for cell in row.cells: #looping through all cells(grid cells are considered, not actual) in the row under consideration tabdata = cell.text tabdata = re.sub(r'\s+',"",tabdata) #cell.text is the text in each cell tlist.append(tabdata) #appending to tlist(tlist is a list/array) a list of all the text in a row tab_list.append(tlist) #list of all rows tlist = [] f.write(tab_list) tab_list=[] def main(): global my_doc, my_doc_file get_inputs() extract(my_doc_file) print("DONE!") main()and the error is:
Error:Traceback (most recent call last):
File "gen_scr.py", line 44, in <module>
main()
File "gen_scr.py", line 41, in main
extract(my_doc_file)
File "gen_scr.py", line 27, in extract
if re.search("mystring", table.cell(0,1).text, re.IGNORECASE):
File "$PYTHONPATH/python3.6/site-packages/docx/table.py", line 81, in cell
return self._cells[cell_idx]
IndexError: list index out of range