import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mpimg import matplotlib.patches as patches import pickle import os import bin.caracter_recognition as ocr informations_types = ["text_box", "text_begin", "1case", "xcases"] class Coord(): def __init__(self): self.x = 0 self.y = 0 def modifier(self, x ,y): self.x=x self.y=y def affine(self, multx, multy, offsetx, offsety): self.x = int(self.x*multx + offsetx) self.y = int(self.y*multy + offsety) def reset(self): self.x=0 self.y=0 def test_null(self): if(self.x == None or self.y == None): self.reset() class Coord_data(): def __init__(self, name, type): self.name = name self.type = type self.content_if_checkbox = [] if(self.type > 1): self.nb_boxes = 0 while(self.nb_boxes <=0): self.nb_boxes = int(input("Nombre de cases pouvant etre cochees : ")) for i in range(0,self.nb_boxes): text = "Intitule case n°" + str(i+1) + " : " self.content_if_checkbox = str(input(text)) else: self.nb_boxes = 1 #default self.box = [] #upper left and lower right corners of the boxes, format [[Coord1(), Coord2()],...] self.temps_coordinates = None def box_coords_min_max(self): minx, miny, maxx, maxy = self.box[0][0].x, self.box[0][0].y, self.box[0][1].x, self.box[0][1].y for subbox in self.box: minx = min(minx, subbox[0].x, subbox[1].x) miny = min(miny, subbox[0].y, subbox[1].y) maxx = max(maxx, subbox[0].x, subbox[1].x) maxy = max(maxy, subbox[0].y, subbox[1].y) return minx, miny, maxx, maxy def define_box(self, x1, x2, y1, y2, n=1): if n > 1: self.box.append([Coord(), Coord()]) else: self.box = [[Coord(), Coord()]] self.box[n-1][0].modifier(x1, y1) self.box[n-1][1].modifier(x2, y2) def define_box_begin(self, x1, y1, y2): self.define_box(x1, x1, y1, y2) def define_ckeck_marks(self, liste, n): """liste is a (n,4) list of coordinates, n the number of boxes""" for i in range(0,n): define_box(liste[i][0], liste[i][1], liste[i][2], liste[i][3], i) def mouse_one_event(self, event): if(self.temps_coordinates == None): # if only 1 point self.temps_coordinates = Coord() self.temps_coordinates.modifier(event.xdata, event.ydata) self.temps_coordinates.test_null() self.temps_coordinates.modifier(int(self.temps_coordinates.x), int(self.temps_coordinates.y)) else: #if second point temp2 = Coord() temp2.modifier(event.xdata, event.ydata) temp2.test_null() x, y = int(temp2.x), int(temp2.y) if(self.type != 1): self.define_box(self.temps_coordinates.x, x, self.temps_coordinates.y, y, len(self.box)+1) else: self.define_box_begin(self.temps_coordinates.x, self.temps_coordinates.y, y) self.box[-1][1].test_null() self.temps_coordinates = None def mouse_event(self, event): if(len(self.box) < self.nb_boxes or self.temps_coordinates != None): self.mouse_one_event(event) if(len(self.box) == self.nb_boxes and self.temps_coordinates == None): plt.close("all") class Template_File(): def __init__(self): self.path_template_img = "" #complete path of the template image self.template_img = 0 #will be an image after init self.informations_template_objects = [] self.data_path_dir = "" self.template_name = "" def open_files(self, path_template_img, path_template_obj): self.path_template_img = path_template_img self.data_path_dir = path_template_obj self.template_img = plt.imread(path_template_img) self.template_img = (self.template_img[:,:,0]+self.template_img[:,:,1]+self.template_img[:,:,2])*255//3 infos_object = open(path_template_obj, 'rb') self.informations_template_objects = pickle.load(infos_object) infos_object.close() self.template_name = path_template_obj[path_template_obj.rfind("/")+1:] def define_template_img(self): self.path_template_img = str(input("Chemin complet de l'image PNG du modèle : ")) self.template_img = mpimg.imread(self.path_template_img) self.template_name = str(input("Nom du template : ")) def add_template_information(self): fig = plt.figure(num="Emplacement de la donnée") plt.imshow(self.template_img) plt.axis('off') if(self.informations_template_objects != []): boxes_lists = self.informations_template_objects rects = [] for object_data in boxes_lists: minx, miny, maxx, maxy = object_data.box_coords_min_max() rect = patches.Rectangle((minx, miny), maxx-minx, maxy-miny, linewidth=1, edgecolor='r', facecolor='none') plt.text(minx, miny, str(object_data.name), verticalalignment='top') rects.append(rect) plt.gca().add_patch(rect) information_type = int(input(" 1. Information manuscrite delimitee\n 2. Début d'information manuscrite\n 3. Case exclusive\n 4. Cases à choix multiples\n-> ")) information_type = information_type-1 information_name = str(input("Catégorie de la donnée : ")) self.informations_template_objects.append(Coord_data(information_name, information_type)) mng = plt.get_current_fig_manager() cid = fig.canvas.mpl_connect("button_press_event", self.informations_template_objects[-1].mouse_event)#self.fig plt.show() fig.canvas.mpl_disconnect(cid) #self.fig #for i in range(0,self.informations_template_objects[0].nb_boxes): # print(self.informations_template_objects[0].box[i][0].x, self.informations_template_objects[0].box[i][0].y, # self.informations_template_objects[0].box[i][1].x, self.informations_template_objects[0].box[i][1].y) def show_template_boxes(self): boxes_lists = self.informations_template_objects fig = plt.figure(num="Emplacement des données") plt.imshow(self.template_img) rects = [] for object_data in boxes_lists: minx, miny, maxx, maxy = object_data.box_coords_min_max() rect = patches.Rectangle((minx, miny), maxx-minx, maxy-miny, linewidth=1, edgecolor='r', facecolor='none') plt.text(minx, miny, str(object_data.name), verticalalignment='top') rects.append(rect) plt.gca().add_patch(rect) plt.show() def save_data_file(self, folder_path, file_name, extension): """folder_path is the absolute path of the folder""" file_list = os.listdir(folder_path) if(file_name+"."+extension in file_list): answer = str(input("Le fichier "+file_name+extension+" existe déjà. Le remplacer? O/N : ")) if(answer == "O"): file = open(folder_path+file_name+extension,"wb") #overwrite binary mode data = [self.informations_template_objects] pickle.dump(data, file) file.close() else: print("Fichier non enregistre.\n") else: file = open(folder_path+file_name+extension,"wb") #overwrite binary mode data = self.informations_template_objects pickle.dump(data, file) file.close() def save_img_template(self, folder_path, file_name): file_list = os.listdir(folder_path) img_file_name = self.path_template_img[self.path_template_img.rfind("/")+1:] source_file = self.path_template_img destination_file = folder_path + file_name if(file_name[1:] in file_list): figure = plt.figure() ax1 = figure.add_subplot(121) ax2 = figure.add_subplot(122) ax1.title.set_text("Image-template non-enregistree") ax2.title.set_text("Image deja enregistree") ax1.axis('off') ax2.axis('off') ax1.imshow(self.template_img) saved_image = mpimg.imread(destination_file) ax2.imshow(saved_image) figure.suptitle("Une images-templates existe deja avec ce nom. Fermez la fenetre.") plt.show() answer = str(input("Une images-templates existe deja avec ce nom. La remplacer? O/N : ")) if(answer == "O"): if(os.name == "posix"): os.popen("cp \"" + source_file + "\" \"" + destination_file+"\"") elif(os.name == "win32" or os.name == "windows"): os.popen("copy \"" + source_file + "\" \"" + destination_file + "\"") else: print("Fichier non enregistre.\n") else: if(os.name == "posix"): os.popen("cp \"" + source_file + "\" \"" + destination_file+"\"") elif(os.name == "win32" or os.name == "windows"): os.popen("copy \"" + source_file + "\" \"" + destination_file + "\"") def save_template(self, folder_path, img_file_name, templ_file_name, extension): """folder_path is the absolute path of the folder""" test_file_list = os.listdir(folder_path[:folder_path.rfind("/")+1]) if(folder_path[folder_path.rfind("/")+1:] not in test_file_list): os.mkdir(folder_path) self.save_data_file(folder_path, templ_file_name, extension) self.save_img_template(folder_path, img_file_name) class Handwritten_Content(): def __init__(self, FilesNames): self.images = [] self.name = [] self.result = [] self.File_Names = FilesNames def extract_handwritten_content(self, template_object, img_template_resized, img_scanned, ratio, offset): if(img_template_resized.shape[0] < template_object.template_img.shape[0]): ratio = 1/ratio list_coord_data_objects = template_object.informations_template_objects for boxes_list in list_coord_data_objects: # list of Coord_data objects for box in boxes_list.box: #for each box for coord in box: #for each coordinate coord.affine(ratio, ratio, offset[1], offset[0]) for boxes_list in list_coord_data_objects: if(boxes_list.type == 0): #if fully delimited self.images.append(img_scanned[boxes_list.box[0][0].y:boxes_list.box[0][1].y,boxes_list.box[0][0].x:boxes_list.box[0][1].x]) self.name.append(boxes_list.name) sentence = ocr.ocr_run(img_file=self.images[-1], FileNames=self.File_Names) if sentence == 0: sentence = "" self.result.append(sentence) elif(boxes_list.type == 1): #if beginning delimited xlimit = img_scanned.shape[1]-1 self.images.append(img_scanned[boxes_list.box[0][0].y:boxes_list.box[0][1].y,boxes_list.box[0][0].x:xlimit]) self.name.append(boxes_list.name) sentence = ocr.ocr_run(img_file=self.images[-1], FileNames=self.File_Names) self.result.append(sentence) # elif(boxes_list.type == 2): #if exclusive box plt.imshow(self.images[0]) plt.show() plt.imshow(self.images[1]) plt.show() if __name__ == "__main__": #used for tests template = Template_File() template.define_template_img() template.add_template_information() template.add_template_information() template.show_template_boxes() #template.save_template("/home/inc0nnu-rol/Documents/La Gemme/OCR_paper_form/files", "/formulaire1", ".opdf") print("Execution success")