You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

296 lines
12KB

  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. import matplotlib.image as mpimg
  4. import matplotlib.patches as patches
  5. import pickle
  6. import os
  7. import bin.caracter_recognition as ocr
  8. informations_types = ["text_box", "text_begin", "1case", "xcases"]
  9. class Coord():
  10. def __init__(self):
  11. self.x = 0
  12. self.y = 0
  13. def modifier(self, x ,y):
  14. self.x=x
  15. self.y=y
  16. def affine(self, multx, multy, offsetx, offsety):
  17. self.x = int(self.x*multx + offsetx)
  18. self.y = int(self.y*multy + offsety)
  19. def reset(self):
  20. self.x=0
  21. self.y=0
  22. def test_null(self):
  23. if(self.x == None or self.y == None):
  24. self.reset()
  25. class Coord_data():
  26. def __init__(self, name, type):
  27. self.name = name
  28. self.type = type
  29. self.content_if_checkbox = []
  30. if(self.type > 1):
  31. self.nb_boxes = 0
  32. while(self.nb_boxes <=0):
  33. self.nb_boxes = int(input("Nombre de cases pouvant etre cochees : "))
  34. for i in range(0,self.nb_boxes):
  35. text = "Intitule case n°" + str(i+1) + " : "
  36. self.content_if_checkbox = str(input(text))
  37. else:
  38. self.nb_boxes = 1 #default
  39. self.box = [] #upper left and lower right corners of the boxes, format [[Coord1(), Coord2()],...]
  40. self.temps_coordinates = None
  41. def box_coords_min_max(self):
  42. minx, miny, maxx, maxy = self.box[0][0].x, self.box[0][0].y, self.box[0][1].x, self.box[0][1].y
  43. for subbox in self.box:
  44. minx = min(minx, subbox[0].x, subbox[1].x)
  45. miny = min(miny, subbox[0].y, subbox[1].y)
  46. maxx = max(maxx, subbox[0].x, subbox[1].x)
  47. maxy = max(maxy, subbox[0].y, subbox[1].y)
  48. return minx, miny, maxx, maxy
  49. def define_box(self, x1, x2, y1, y2, n=1):
  50. if n > 1:
  51. self.box.append([Coord(), Coord()])
  52. else:
  53. self.box = [[Coord(), Coord()]]
  54. self.box[n-1][0].modifier(x1, y1)
  55. self.box[n-1][1].modifier(x2, y2)
  56. def define_box_begin(self, x1, y1, y2):
  57. self.define_box(x1, x1, y1, y2)
  58. def define_ckeck_marks(self, liste, n):
  59. """liste is a (n,4) list of coordinates, n the number
  60. of boxes"""
  61. for i in range(0,n):
  62. define_box(liste[i][0], liste[i][1], liste[i][2], liste[i][3], i)
  63. def mouse_one_event(self, event):
  64. if(self.temps_coordinates == None): # if only 1 point
  65. self.temps_coordinates = Coord()
  66. self.temps_coordinates.modifier(event.xdata, event.ydata)
  67. self.temps_coordinates.test_null()
  68. self.temps_coordinates.modifier(int(self.temps_coordinates.x), int(self.temps_coordinates.y))
  69. else: #if second point
  70. temp2 = Coord()
  71. temp2.modifier(event.xdata, event.ydata)
  72. temp2.test_null()
  73. x, y = int(temp2.x), int(temp2.y)
  74. if(self.type != 1):
  75. self.define_box(self.temps_coordinates.x, x, self.temps_coordinates.y, y, len(self.box)+1)
  76. else:
  77. self.define_box_begin(self.temps_coordinates.x, self.temps_coordinates.y, y)
  78. self.box[-1][1].test_null()
  79. self.temps_coordinates = None
  80. def mouse_event(self, event):
  81. if(len(self.box) < self.nb_boxes or self.temps_coordinates != None):
  82. self.mouse_one_event(event)
  83. if(len(self.box) == self.nb_boxes and self.temps_coordinates == None):
  84. plt.close("all")
  85. class Template_File():
  86. def __init__(self):
  87. self.path_template_img = "" #complete path of the template image
  88. self.template_img = 0 #will be an image after init
  89. self.informations_template_objects = []
  90. self.data_path_dir = ""
  91. self.template_name = ""
  92. def open_files(self, path_template_img, path_template_obj):
  93. self.path_template_img = path_template_img
  94. self.data_path_dir = path_template_obj
  95. self.template_img = plt.imread(path_template_img)
  96. self.template_img = (self.template_img[:,:,0]+self.template_img[:,:,1]+self.template_img[:,:,2])*255//3
  97. infos_object = open(path_template_obj, 'rb')
  98. self.informations_template_objects = pickle.load(infos_object)
  99. infos_object.close()
  100. self.template_name = path_template_obj[path_template_obj.rfind("/")+1:]
  101. def define_template_img(self):
  102. self.path_template_img = str(input("Chemin complet de l'image PNG du modèle : "))
  103. self.template_img = mpimg.imread(self.path_template_img)
  104. self.template_name = str(input("Nom du template : "))
  105. def add_template_information(self):
  106. fig = plt.figure(num="Emplacement de la donnée")
  107. plt.imshow(self.template_img)
  108. plt.axis('off')
  109. if(self.informations_template_objects != []):
  110. boxes_lists = self.informations_template_objects
  111. rects = []
  112. for object_data in boxes_lists:
  113. minx, miny, maxx, maxy = object_data.box_coords_min_max()
  114. rect = patches.Rectangle((minx, miny), maxx-minx, maxy-miny, linewidth=1, edgecolor='r', facecolor='none')
  115. plt.text(minx, miny, str(object_data.name), verticalalignment='top')
  116. rects.append(rect)
  117. plt.gca().add_patch(rect)
  118. information_type = int(input(" 1. Information manuscrite delimitee\n 2. Début d'information manuscrite\n 3. Case exclusive\n 4. Cases à choix multiples\n-> "))
  119. information_type = information_type-1
  120. information_name = str(input("Catégorie de la donnée : "))
  121. self.informations_template_objects.append(Coord_data(information_name, information_type))
  122. mng = plt.get_current_fig_manager()
  123. cid = fig.canvas.mpl_connect("button_press_event", self.informations_template_objects[-1].mouse_event)#self.fig
  124. plt.show()
  125. fig.canvas.mpl_disconnect(cid) #self.fig
  126. #for i in range(0,self.informations_template_objects[0].nb_boxes):
  127. # print(self.informations_template_objects[0].box[i][0].x, self.informations_template_objects[0].box[i][0].y,
  128. # self.informations_template_objects[0].box[i][1].x, self.informations_template_objects[0].box[i][1].y)
  129. def show_template_boxes(self):
  130. boxes_lists = self.informations_template_objects
  131. fig = plt.figure(num="Emplacement des données")
  132. plt.imshow(self.template_img)
  133. rects = []
  134. for object_data in boxes_lists:
  135. minx, miny, maxx, maxy = object_data.box_coords_min_max()
  136. rect = patches.Rectangle((minx, miny), maxx-minx, maxy-miny, linewidth=1, edgecolor='r', facecolor='none')
  137. plt.text(minx, miny, str(object_data.name), verticalalignment='top')
  138. rects.append(rect)
  139. plt.gca().add_patch(rect)
  140. plt.show()
  141. def save_data_file(self, folder_path, file_name, extension):
  142. """folder_path is the absolute path of the folder"""
  143. file_list = os.listdir(folder_path)
  144. if(file_name+"."+extension in file_list):
  145. answer = str(input("Le fichier "+file_name+extension+" existe déjà. Le remplacer? O/N : "))
  146. if(answer == "O"):
  147. file = open(folder_path+file_name+extension,"wb") #overwrite binary mode
  148. data = [self.informations_template_objects]
  149. pickle.dump(data, file)
  150. file.close()
  151. else:
  152. print("Fichier non enregistre.\n")
  153. else:
  154. file = open(folder_path+file_name+extension,"wb") #overwrite binary mode
  155. data = self.informations_template_objects
  156. pickle.dump(data, file)
  157. file.close()
  158. def save_img_template(self, folder_path, file_name):
  159. file_list = os.listdir(folder_path)
  160. img_file_name = self.path_template_img[self.path_template_img.rfind("/")+1:]
  161. source_file = self.path_template_img
  162. destination_file = folder_path + file_name
  163. if(file_name[1:] in file_list):
  164. figure = plt.figure()
  165. ax1 = figure.add_subplot(121)
  166. ax2 = figure.add_subplot(122)
  167. ax1.title.set_text("Image-template non-enregistree")
  168. ax2.title.set_text("Image deja enregistree")
  169. ax1.axis('off')
  170. ax2.axis('off')
  171. ax1.imshow(self.template_img)
  172. saved_image = mpimg.imread(destination_file)
  173. ax2.imshow(saved_image)
  174. figure.suptitle("Une images-templates existe deja avec ce nom. Fermez la fenetre.")
  175. plt.show()
  176. answer = str(input("Une images-templates existe deja avec ce nom. La remplacer? O/N : "))
  177. if(answer == "O"):
  178. if(os.name == "posix"):
  179. os.popen("cp \"" + source_file + "\" \"" + destination_file+"\"")
  180. elif(os.name == "win32" or os.name == "windows"):
  181. os.popen("copy \"" + source_file + "\" \"" + destination_file + "\"")
  182. else:
  183. print("Fichier non enregistre.\n")
  184. else:
  185. if(os.name == "posix"):
  186. os.popen("cp \"" + source_file + "\" \"" + destination_file+"\"")
  187. elif(os.name == "win32" or os.name == "windows"):
  188. os.popen("copy \"" + source_file + "\" \"" + destination_file + "\"")
  189. def save_template(self, folder_path, img_file_name, templ_file_name, extension):
  190. """folder_path is the absolute path of the folder"""
  191. test_file_list = os.listdir(folder_path[:folder_path.rfind("/")+1])
  192. if(folder_path[folder_path.rfind("/")+1:] not in test_file_list):
  193. os.mkdir(folder_path)
  194. self.save_data_file(folder_path, templ_file_name, extension)
  195. self.save_img_template(folder_path, img_file_name)
  196. class Handwritten_Content():
  197. def __init__(self, FilesNames):
  198. self.images = []
  199. self.name = []
  200. self.result = []
  201. self.File_Names = FilesNames
  202. def extract_handwritten_content(self, template_object, img_template_resized, img_scanned, ratio, offset):
  203. if(img_template_resized.shape[0] < template_object.template_img.shape[0]):
  204. ratio = 1/ratio
  205. list_coord_data_objects = template_object.informations_template_objects
  206. for boxes_list in list_coord_data_objects: # list of Coord_data objects
  207. for box in boxes_list.box: #for each box
  208. for coord in box: #for each coordinate
  209. coord.affine(ratio, ratio, offset[1], offset[0])
  210. for boxes_list in list_coord_data_objects:
  211. if(boxes_list.type == 0): #if fully delimited
  212. self.images.append(img_scanned[boxes_list.box[0][0].y:boxes_list.box[0][1].y,boxes_list.box[0][0].x:boxes_list.box[0][1].x])
  213. self.name.append(boxes_list.name)
  214. sentence = ocr.ocr_run(img_file=self.images[-1], FileNames=self.File_Names)
  215. if sentence == 0:
  216. sentence = ""
  217. self.result.append(sentence)
  218. elif(boxes_list.type == 1): #if beginning delimited
  219. xlimit = img_scanned.shape[1]-1
  220. self.images.append(img_scanned[boxes_list.box[0][0].y:boxes_list.box[0][1].y,boxes_list.box[0][0].x:xlimit])
  221. self.name.append(boxes_list.name)
  222. sentence = ocr.ocr_run(img_file=self.images[-1], FileNames=self.File_Names)
  223. self.result.append(sentence)
  224. # elif(boxes_list.type == 2): #if exclusive box
  225. plt.imshow(self.images[0])
  226. plt.show()
  227. plt.imshow(self.images[1])
  228. plt.show()
  229. if __name__ == "__main__":
  230. #used for tests
  231. template = Template_File()
  232. template.define_template_img()
  233. template.add_template_information()
  234. template.add_template_information()
  235. template.show_template_boxes()
  236. #template.save_template("/home/inc0nnu-rol/Documents/La Gemme/OCR_paper_form/files", "/formulaire1", ".opdf")
  237. print("Execution success")