|
- import numpy as np
- import matplotlib.pyplot as plt
- import matplotlib.image as mpimg
- import matplotlib.patches as patches
- import pickle
- import os
- import bin.caracter_recognition as ocr
-
-
- informations_types = ["text_box", "text_begin", "1case", "xcases"]
-
-
- class Coord():
- def __init__(self):
- self.x = 0
- self.y = 0
-
- def modifier(self, x ,y):
- self.x=x
- self.y=y
-
- def affine(self, multx, multy, offsetx, offsety):
- self.x = int(self.x*multx + offsetx)
- self.y = int(self.y*multy + offsety)
-
- def reset(self):
- self.x=0
- self.y=0
-
- def test_null(self):
- if(self.x == None or self.y == None):
- self.reset()
-
-
- class Coord_data():
- def __init__(self, name, type):
- self.name = name
- self.type = type
- self.content_if_checkbox = []
- if(self.type > 1):
- self.nb_boxes = 0
- while(self.nb_boxes <=0):
- self.nb_boxes = int(input("Nombre de cases pouvant etre cochees : "))
- for i in range(0,self.nb_boxes):
- text = "Intitule case n°" + str(i+1) + " : "
- self.content_if_checkbox = str(input(text))
- else:
- self.nb_boxes = 1 #default
- self.box = [] #upper left and lower right corners of the boxes, format [[Coord1(), Coord2()],...]
- self.temps_coordinates = None
-
-
- def box_coords_min_max(self):
- minx, miny, maxx, maxy = self.box[0][0].x, self.box[0][0].y, self.box[0][1].x, self.box[0][1].y
- for subbox in self.box:
- minx = min(minx, subbox[0].x, subbox[1].x)
- miny = min(miny, subbox[0].y, subbox[1].y)
- maxx = max(maxx, subbox[0].x, subbox[1].x)
- maxy = max(maxy, subbox[0].y, subbox[1].y)
- return minx, miny, maxx, maxy
-
-
- def define_box(self, x1, x2, y1, y2, n=1):
- if n > 1:
- self.box.append([Coord(), Coord()])
- else:
- self.box = [[Coord(), Coord()]]
- self.box[n-1][0].modifier(x1, y1)
- self.box[n-1][1].modifier(x2, y2)
-
- def define_box_begin(self, x1, y1, y2):
- self.define_box(x1, x1, y1, y2)
-
- def define_ckeck_marks(self, liste, n):
- """liste is a (n,4) list of coordinates, n the number
- of boxes"""
- for i in range(0,n):
- define_box(liste[i][0], liste[i][1], liste[i][2], liste[i][3], i)
-
- def mouse_one_event(self, event):
- if(self.temps_coordinates == None): # if only 1 point
- self.temps_coordinates = Coord()
- self.temps_coordinates.modifier(event.xdata, event.ydata)
- self.temps_coordinates.test_null()
- self.temps_coordinates.modifier(int(self.temps_coordinates.x), int(self.temps_coordinates.y))
- else: #if second point
- temp2 = Coord()
- temp2.modifier(event.xdata, event.ydata)
- temp2.test_null()
- x, y = int(temp2.x), int(temp2.y)
- if(self.type != 1):
- self.define_box(self.temps_coordinates.x, x, self.temps_coordinates.y, y, len(self.box)+1)
- else:
- self.define_box_begin(self.temps_coordinates.x, self.temps_coordinates.y, y)
- self.box[-1][1].test_null()
- self.temps_coordinates = None
-
- def mouse_event(self, event):
- if(len(self.box) < self.nb_boxes or self.temps_coordinates != None):
- self.mouse_one_event(event)
- if(len(self.box) == self.nb_boxes and self.temps_coordinates == None):
- plt.close("all")
-
-
-
- class Template_File():
-
- def __init__(self):
- self.path_template_img = "" #complete path of the template image
- self.template_img = 0 #will be an image after init
- self.informations_template_objects = []
- self.data_path_dir = ""
- self.template_name = ""
-
- def open_files(self, path_template_img, path_template_obj):
- self.path_template_img = path_template_img
- self.data_path_dir = path_template_obj
- self.template_img = plt.imread(path_template_img)
- self.template_img = (self.template_img[:,:,0]+self.template_img[:,:,1]+self.template_img[:,:,2])*255//3
- infos_object = open(path_template_obj, 'rb')
- self.informations_template_objects = pickle.load(infos_object)
- infos_object.close()
- self.template_name = path_template_obj[path_template_obj.rfind("/")+1:]
-
- def define_template_img(self):
- self.path_template_img = str(input("Chemin complet de l'image PNG du modèle : "))
- self.template_img = mpimg.imread(self.path_template_img)
- self.template_name = str(input("Nom du template : "))
-
- def add_template_information(self):
- fig = plt.figure(num="Emplacement de la donnée")
- plt.imshow(self.template_img)
- plt.axis('off')
- if(self.informations_template_objects != []):
- boxes_lists = self.informations_template_objects
- rects = []
- for object_data in boxes_lists:
- minx, miny, maxx, maxy = object_data.box_coords_min_max()
- rect = patches.Rectangle((minx, miny), maxx-minx, maxy-miny, linewidth=1, edgecolor='r', facecolor='none')
- plt.text(minx, miny, str(object_data.name), verticalalignment='top')
- rects.append(rect)
- plt.gca().add_patch(rect)
-
- information_type = int(input(" 1. Information manuscrite delimitee\n 2. Début d'information manuscrite\n 3. Case exclusive\n 4. Cases à choix multiples\n-> "))
- information_type = information_type-1
- information_name = str(input("Catégorie de la donnée : "))
-
- self.informations_template_objects.append(Coord_data(information_name, information_type))
- mng = plt.get_current_fig_manager()
-
- cid = fig.canvas.mpl_connect("button_press_event", self.informations_template_objects[-1].mouse_event)#self.fig
- plt.show()
- fig.canvas.mpl_disconnect(cid) #self.fig
- #for i in range(0,self.informations_template_objects[0].nb_boxes):
- # print(self.informations_template_objects[0].box[i][0].x, self.informations_template_objects[0].box[i][0].y,
- # self.informations_template_objects[0].box[i][1].x, self.informations_template_objects[0].box[i][1].y)
-
- def show_template_boxes(self):
- boxes_lists = self.informations_template_objects
- fig = plt.figure(num="Emplacement des données")
- plt.imshow(self.template_img)
- rects = []
- for object_data in boxes_lists:
- minx, miny, maxx, maxy = object_data.box_coords_min_max()
- rect = patches.Rectangle((minx, miny), maxx-minx, maxy-miny, linewidth=1, edgecolor='r', facecolor='none')
- plt.text(minx, miny, str(object_data.name), verticalalignment='top')
- rects.append(rect)
- plt.gca().add_patch(rect)
- plt.show()
-
-
-
- def save_data_file(self, folder_path, file_name, extension):
- """folder_path is the absolute path of the folder"""
- file_list = os.listdir(folder_path)
- if(file_name+"."+extension in file_list):
- answer = str(input("Le fichier "+file_name+extension+" existe déjà. Le remplacer? O/N : "))
- if(answer == "O"):
- file = open(folder_path+file_name+extension,"wb") #overwrite binary mode
- data = [self.informations_template_objects]
- pickle.dump(data, file)
- file.close()
- else:
- print("Fichier non enregistre.\n")
- else:
- file = open(folder_path+file_name+extension,"wb") #overwrite binary mode
- data = self.informations_template_objects
- pickle.dump(data, file)
- file.close()
-
-
-
- def save_img_template(self, folder_path, file_name):
- file_list = os.listdir(folder_path)
-
- img_file_name = self.path_template_img[self.path_template_img.rfind("/")+1:]
-
- source_file = self.path_template_img
- destination_file = folder_path + file_name
-
- if(file_name[1:] in file_list):
- figure = plt.figure()
- ax1 = figure.add_subplot(121)
- ax2 = figure.add_subplot(122)
- ax1.title.set_text("Image-template non-enregistree")
- ax2.title.set_text("Image deja enregistree")
- ax1.axis('off')
- ax2.axis('off')
- ax1.imshow(self.template_img)
-
- saved_image = mpimg.imread(destination_file)
- ax2.imshow(saved_image)
- figure.suptitle("Une images-templates existe deja avec ce nom. Fermez la fenetre.")
- plt.show()
- answer = str(input("Une images-templates existe deja avec ce nom. La remplacer? O/N : "))
- if(answer == "O"):
- if(os.name == "posix"):
- os.popen("cp \"" + source_file + "\" \"" + destination_file+"\"")
- elif(os.name == "win32" or os.name == "windows"):
- os.popen("copy \"" + source_file + "\" \"" + destination_file + "\"")
- else:
- print("Fichier non enregistre.\n")
- else:
- if(os.name == "posix"):
- os.popen("cp \"" + source_file + "\" \"" + destination_file+"\"")
- elif(os.name == "win32" or os.name == "windows"):
- os.popen("copy \"" + source_file + "\" \"" + destination_file + "\"")
-
- def save_template(self, folder_path, img_file_name, templ_file_name, extension):
- """folder_path is the absolute path of the folder"""
- test_file_list = os.listdir(folder_path[:folder_path.rfind("/")+1])
- if(folder_path[folder_path.rfind("/")+1:] not in test_file_list):
- os.mkdir(folder_path)
-
-
- self.save_data_file(folder_path, templ_file_name, extension)
- self.save_img_template(folder_path, img_file_name)
-
-
-
- class Handwritten_Content():
-
- def __init__(self, FilesNames):
- self.images = []
- self.name = []
- self.result = []
- self.File_Names = FilesNames
-
- def extract_handwritten_content(self, template_object, img_template_resized, img_scanned, ratio, offset):
- if(img_template_resized.shape[0] < template_object.template_img.shape[0]):
- ratio = 1/ratio
-
- list_coord_data_objects = template_object.informations_template_objects
- for boxes_list in list_coord_data_objects: # list of Coord_data objects
- for box in boxes_list.box: #for each box
- for coord in box: #for each coordinate
- coord.affine(ratio, ratio, offset[1], offset[0])
-
- for boxes_list in list_coord_data_objects:
- if(boxes_list.type == 0): #if fully delimited
- self.images.append(img_scanned[boxes_list.box[0][0].y:boxes_list.box[0][1].y,boxes_list.box[0][0].x:boxes_list.box[0][1].x])
- self.name.append(boxes_list.name)
- sentence = ocr.ocr_run(img_file=self.images[-1], FileNames=self.File_Names)
- if sentence == 0:
- sentence = ""
- self.result.append(sentence)
- elif(boxes_list.type == 1): #if beginning delimited
- xlimit = img_scanned.shape[1]-1
- self.images.append(img_scanned[boxes_list.box[0][0].y:boxes_list.box[0][1].y,boxes_list.box[0][0].x:xlimit])
- self.name.append(boxes_list.name)
- sentence = ocr.ocr_run(img_file=self.images[-1], FileNames=self.File_Names)
- self.result.append(sentence)
- # elif(boxes_list.type == 2): #if exclusive box
- plt.imshow(self.images[0])
- plt.show()
- plt.imshow(self.images[1])
- plt.show()
-
-
-
-
- if __name__ == "__main__":
- #used for tests
- template = Template_File()
- template.define_template_img()
- template.add_template_information()
- template.add_template_information()
- template.show_template_boxes()
- #template.save_template("/home/inc0nnu-rol/Documents/La Gemme/OCR_paper_form/files", "/formulaire1", ".opdf")
-
-
-
-
-
- print("Execution success")
|