Hi Expert,
I am trying to use model id for form recognizer py script. Here is my code . currently I am using endpoint url and api key and data is getting export .but when i use model id it is not taking that from that. how can use it .. here is the code what i tried
from django.shortcuts import render
import os
from django.http import HttpResponse
import csv
import re
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
from azure.storage.blob import BlobClient
# Create your views here.
def download_blob(blob_name, output_path):
"""
Download
:param blob_name:
:param output_path:
:return:
"""
_, filename = os.path.split(blob_name)
destination_file = os.path.join(output_path, filename)
blob_client = BlobClient.from_connection_string(
conn_str='DefaultEndpointsProtocol=https;AccountName=demoretail;AccountKey=jSZtsbMoGpmViFuWtTXDwEJEktIs24oUAIPSz9tSiZ25zCPe0mFRWC6V0gvlZCcGU0HcxCTdV1GsAl5vMwnanA==;EndpointSuffix=core.windows.net',
container_name='demo',
blob_name=blob_name
)
with open(destination_file, "wb") as my_blob:
blob_data = blob_client.download_blob()
blob_data.readinto(my_blob)
return destination_file
def recognize_form_tables(form_path):
endpoint = https://Test1.cognitiveservices.azure.com/
credential = AzureKeyCredential("c82cdbb3ad62438b9e77e7d0dffdf")
Modelid=("ddkjdhhkjjksjs")
form_recognizer_client = FormRecognizerClient(endpoint, credential)
with open(form_path, "rb") as fd:
form = fd.read()
os.remove(form_path)
response = form_recognizer_client.begin_recognize_content(form)
form_pages = response.result()
tables = []
table_label_data = []
port_regex = '^col1:(.*)'
header_regex = '.*col1:(.*)Area Name:(.*)Month Reporting:\s*([A-Za-z]{3}-[0-9]{2}).*'
table_index = -1
for content in form_pages:
for table in content.tables:
tables.append(table)
table_header = ''
i = 0
flag = False
for line_idx, line in enumerate(content.lines):
port_line = re.findall(port_regex, line.text)
if port_line:
table_index += 1
i = 0
flag = True
if flag and i < 10 :
table_header += line.text + ' '
if i == 10:
header_match = re.match(header_regex, table_header)
if header_match:
gr = header_match.groups()
table_label_data.append([gr[0], gr[1], gr[2]])
table_header = ''
flag = False
i += 1
return tables, table_label_data
def create_csv(table, path):
with open(path, 'a') as f:
writer = csv.writer(f)
for row in table:
if len(row) < 10 or not row[3]:
continue
writer.writerow(row)
def create_csv_data(tables, table_label_data):
count = 0
for t in tables:
count += 1
table_data = []
row_index = -1
for cell in t.cells:
cell = cell.to_dict()
if count > 1 and 'is_header' in cell and cell['is_header']:
continue
elif cell['row_index'] == row_index or (count > 1 and cell['row_index'] == row_index + 1):
table_data[row_index].append(cell['text'])
else:
row_index += 1
if 'is_header' in cell and cell['is_header']:
table_data.append(['Port', 'Area Name', 'Month Reporting'])
else:
table_data.append([])
if len(table_label_data) > count:
table_data[row_index] = table_label_data[count - 1] + table_data[row_index]
table_data[row_index].append(cell['text'])
create_csv(table_data, f'table.csv')
print('Created or updated table.csv file.')
def index(request):
form_path = download_blob('Test_for_MARCH_2022.pdf', '')
tables, table_label_data = recognize_form_tables(form_path)
print('form recognize success')
create_csv_data(tables, table_label_data)
with open('table.csv', newline='') as in_file:
with open('Test.csv', 'w', newline='') as out_file:
writer = csv.writer(out_file)
for row in csv.reader(in_file):
if row:
writer.writerow(row)
return HttpResponse("Load Succeeded")
I am trying to use model id for form recognizer py script. Here is my code . currently I am using endpoint url and api key and data is getting export .but when i use model id it is not taking that from that. how can use it .. here is the code what i tried
from django.shortcuts import render
import os
from django.http import HttpResponse
import csv
import re
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
from azure.storage.blob import BlobClient
# Create your views here.
def download_blob(blob_name, output_path):
"""
Download
:param blob_name:
:param output_path:
:return:
"""
_, filename = os.path.split(blob_name)
destination_file = os.path.join(output_path, filename)
blob_client = BlobClient.from_connection_string(
conn_str='DefaultEndpointsProtocol=https;AccountName=demoretail;AccountKey=jSZtsbMoGpmViFuWtTXDwEJEktIs24oUAIPSz9tSiZ25zCPe0mFRWC6V0gvlZCcGU0HcxCTdV1GsAl5vMwnanA==;EndpointSuffix=core.windows.net',
container_name='demo',
blob_name=blob_name
)
with open(destination_file, "wb") as my_blob:
blob_data = blob_client.download_blob()
blob_data.readinto(my_blob)
return destination_file
def recognize_form_tables(form_path):
endpoint = https://Test1.cognitiveservices.azure.com/
credential = AzureKeyCredential("c82cdbb3ad62438b9e77e7d0dffdf")
Modelid=("ddkjdhhkjjksjs")
form_recognizer_client = FormRecognizerClient(endpoint, credential)
with open(form_path, "rb") as fd:
form = fd.read()
os.remove(form_path)
response = form_recognizer_client.begin_recognize_content(form)
form_pages = response.result()
tables = []
table_label_data = []
port_regex = '^col1:(.*)'
header_regex = '.*col1:(.*)Area Name:(.*)Month Reporting:\s*([A-Za-z]{3}-[0-9]{2}).*'
table_index = -1
for content in form_pages:
for table in content.tables:
tables.append(table)
table_header = ''
i = 0
flag = False
for line_idx, line in enumerate(content.lines):
port_line = re.findall(port_regex, line.text)
if port_line:
table_index += 1
i = 0
flag = True
if flag and i < 10 :
table_header += line.text + ' '
if i == 10:
header_match = re.match(header_regex, table_header)
if header_match:
gr = header_match.groups()
table_label_data.append([gr[0], gr[1], gr[2]])
table_header = ''
flag = False
i += 1
return tables, table_label_data
def create_csv(table, path):
with open(path, 'a') as f:
writer = csv.writer(f)
for row in table:
if len(row) < 10 or not row[3]:
continue
writer.writerow(row)
def create_csv_data(tables, table_label_data):
count = 0
for t in tables:
count += 1
table_data = []
row_index = -1
for cell in t.cells:
cell = cell.to_dict()
if count > 1 and 'is_header' in cell and cell['is_header']:
continue
elif cell['row_index'] == row_index or (count > 1 and cell['row_index'] == row_index + 1):
table_data[row_index].append(cell['text'])
else:
row_index += 1
if 'is_header' in cell and cell['is_header']:
table_data.append(['Port', 'Area Name', 'Month Reporting'])
else:
table_data.append([])
if len(table_label_data) > count:
table_data[row_index] = table_label_data[count - 1] + table_data[row_index]
table_data[row_index].append(cell['text'])
create_csv(table_data, f'table.csv')
print('Created or updated table.csv file.')
def index(request):
form_path = download_blob('Test_for_MARCH_2022.pdf', '')
tables, table_label_data = recognize_form_tables(form_path)
print('form recognize success')
create_csv_data(tables, table_label_data)
with open('table.csv', newline='') as in_file:
with open('Test.csv', 'w', newline='') as out_file:
writer = csv.writer(out_file)
for row in csv.reader(in_file):
if row:
writer.writerow(row)
return HttpResponse("Load Succeeded")
Larz60+ write May-11-2022, 12:46 AM:
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Please, 2nd notice.
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Please, 2nd notice.