You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

form_data_places.py 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. import matplotlib.image as mpimg
  4. import matplotlib.patches as patches
  5. import pickle
  6. import os
  7. import bin.caracter_recognition as ocr
  8. informations_types = ["text_box", "text_begin", "1case", "xcases"]
  9. class Coord():
  10. def __init__(self):
  11. self.x = 0
  12. self.y = 0
  13. def modifier(self, x ,y):
  14. self.x=x
  15. self.y=y
  16. def affine(self, multx, multy, offsetx, offsety):
  17. self.x = int(self.x*multx + offsetx)
  18. self.y = int(self.y*multy + offsety)
  19. def reset(self):
  20. self.x=0
  21. self.y=0
  22. def test_null(self):
  23. if(self.x == None or self.y == None):
  24. self.reset()
  25. class Coord_data():
  26. def __init__(self, name, type):
  27. self.name = name
  28. self.type = type
  29. self.content_if_checkbox = []
  30. if(self.type > 1):
  31. self.nb_boxes = 0
  32. while(self.nb_boxes <=0):
  33. self.nb_boxes = int(input("Nombre de cases pouvant etre cochees : "))
  34. for i in range(0,self.nb_boxes):
  35. text = "Intitule case n°" + str(i+1) + " : "
  36. self.content_if_checkbox = str(input(text))
  37. else:
  38. self.nb_boxes = 1 #default
  39. self.box = [] #upper left and lower right corners of the boxes, format [[Coord1(), Coord2()],...]
  40. self.temps_coordinates = None
  41. def box_coords_min_max(self):
  42. minx, miny, maxx, maxy = self.box[0][0].x, self.box[0][0].y, self.box[0][1].x, self.box[0][1].y
  43. for subbox in self.box:
  44. minx = min(minx, subbox[0].x, subbox[1].x)
  45. miny = min(miny, subbox[0].y, subbox[1].y)
  46. maxx = max(maxx, subbox[0].x, subbox[1].x)
  47. maxy = max(maxy, subbox[0].y, subbox[1].y)
  48. return minx, miny, maxx, maxy
  49. def define_box(self, x1, x2, y1, y2, n=1):
  50. if n > 1:
  51. self.box.append([Coord(), Coord()])
  52. else:
  53. self.box = [[Coord(), Coord()]]
  54. self.box[n-1][0].modifier(x1, y1)
  55. self.box[n-1][1].modifier(x2, y2)
  56. def define_box_begin(self, x1, y1, y2):
  57. self.define_box(x1, x1, y1, y2)
  58. def define_ckeck_marks(self, liste, n):
  59. """liste is a (n,4) list of coordinates, n the number
  60. of boxes"""
  61. for i in range(0,n):
  62. define_box(liste[i][0], liste[i][1], liste[i][2], liste[i][3], i)
  63. def mouse_one_event(self, event):
  64. if(self.temps_coordinates == None): # if only 1 point
  65. self.temps_coordinates = Coord()
  66. self.temps_coordinates.modifier(event.xdata, event.ydata)
  67. self.temps_coordinates.test_null()
  68. self.temps_coordinates.modifier(int(self.temps_coordinates.x), int(self.temps_coordinates.y))
  69. else: #if second point
  70. temp2 = Coord()
  71. temp2.modifier(event.xdata, event.ydata)
  72. temp2.test_null()
  73. x, y = int(temp2.x), int(temp2.y)
  74. if(self.type != 1):
  75. self.define_box(self.temps_coordinates.x, x, self.temps_coordinates.y, y, len(self.box)+1)
  76. else:
  77. self.define_box_begin(self.temps_coordinates.x, self.temps_coordinates.y, y)
  78. self.box[-1][1].test_null()
  79. self.temps_coordinates = None
  80. def mouse_event(self, event):
  81. if(len(self.box) < self.nb_boxes or self.temps_coordinates != None):
  82. self.mouse_one_event(event)
  83. if(len(self.box) == self.nb_boxes and self.temps_coordinates == None):
  84. plt.close("all")
  85. class Template_File():
  86. def __init__(self):
  87. self.path_template_img = "" #complete path of the template image
  88. self.template_img = 0 #will be an image after init
  89. self.informations_template_objects = []
  90. self.data_path_dir = ""
  91. self.template_name = ""
  92. def open_files(self, path_template_img, path_template_obj):
  93. self.path_template_img = path_template_img
  94. self.data_path_dir = path_template_obj
  95. self.template_img = plt.imread(path_template_img)
  96. self.template_img = (self.template_img[:,:,0]+self.template_img[:,:,1]+self.template_img[:,:,2])*255//3
  97. infos_object = open(path_template_obj, 'rb')
  98. self.informations_template_objects = pickle.load(infos_object)
  99. infos_object.close()
  100. self.template_name = path_template_obj[path_template_obj.rfind("/")+1:]
  101. def define_template_img(self):
  102. self.path_template_img = str(input("Chemin complet de l'image PNG du modèle : "))
  103. self.template_img = mpimg.imread(self.path_template_img)
  104. self.template_name = str(input("Nom du template : "))
  105. def add_template_information(self):
  106. fig = plt.figure(num="Emplacement de la donnée")
  107. plt.imshow(self.template_img)
  108. plt.axis('off')
  109. if(self.informations_template_objects != []):
  110. boxes_lists = self.informations_template_objects
  111. rects = []
  112. for object_data in boxes_lists:
  113. minx, miny, maxx, maxy = object_data.box_coords_min_max()
  114. rect = patches.Rectangle((minx, miny), maxx-minx, maxy-miny, linewidth=1, edgecolor='r', facecolor='none')
  115. plt.text(minx, miny, str(object_data.name), verticalalignment='top')
  116. rects.append(rect)
  117. plt.gca().add_patch(rect)
  118. information_type = int(input(" 1. Information manuscrite delimitee\n 2. Début d'information manuscrite\n 3. Case exclusive\n 4. Cases à choix multiples\n-> "))
  119. information_type = information_type-1
  120. information_name = str(input("Catégorie de la donnée : "))
  121. self.informations_template_objects.append(Coord_data(information_name, information_type))
  122. mng = plt.get_current_fig_manager()
  123. cid = fig.canvas.mpl_connect("button_press_event", self.informations_template_objects[-1].mouse_event)#self.fig
  124. plt.show()
  125. fig.canvas.mpl_disconnect(cid) #self.fig
  126. #for i in range(0,self.informations_template_objects[0].nb_boxes):
  127. # print(self.informations_template_objects[0].box[i][0].x, self.informations_template_objects[0].box[i][0].y,
  128. # self.informations_template_objects[0].box[i][1].x, self.informations_template_objects[0].box[i][1].y)
  129. def show_template_boxes(self):
  130. boxes_lists = self.informations_template_objects
  131. fig = plt.figure(num="Emplacement des données")
  132. plt.imshow(self.template_img)
  133. rects = []
  134. for object_data in boxes_lists:
  135. minx, miny, maxx, maxy = object_data.box_coords_min_max()
  136. rect = patches.Rectangle((minx, miny), maxx-minx, maxy-miny, linewidth=1, edgecolor='r', facecolor='none')
  137. plt.text(minx, miny, str(object_data.name), verticalalignment='top')
  138. rects.append(rect)
  139. plt.gca().add_patch(rect)
  140. plt.show()
  141. def save_data_file(self, folder_path, file_name, extension):
  142. """folder_path is the absolute path of the folder"""
  143. file_list = os.listdir(folder_path)
  144. if(file_name+"."+extension in file_list):
  145. answer = str(input("Le fichier "+file_name+extension+" existe déjà. Le remplacer? O/N : "))
  146. if(answer == "O"):
  147. file = open(folder_path+file_name+extension,"wb") #overwrite binary mode
  148. data = [self.informations_template_objects]
  149. pickle.dump(data, file)
  150. file.close()
  151. else:
  152. print("Fichier non enregistre.\n")
  153. else:
  154. file = open(folder_path+file_name+extension,"wb") #overwrite binary mode
  155. data = self.informations_template_objects
  156. pickle.dump(data, file)
  157. file.close()
  158. def save_img_template(self, folder_path, file_name):
  159. file_list = os.listdir(folder_path)
  160. img_file_name = self.path_template_img[self.path_template_img.rfind("/")+1:]
  161. source_file = self.path_template_img
  162. destination_file = folder_path + file_name
  163. if(file_name[1:] in file_list):
  164. figure = plt.figure()
  165. ax1 = figure.add_subplot(121)
  166. ax2 = figure.add_subplot(122)
  167. ax1.title.set_text("Image-template non-enregistree")
  168. ax2.title.set_text("Image deja enregistree")
  169. ax1.axis('off')
  170. ax2.axis('off')
  171. ax1.imshow(self.template_img)
  172. saved_image = mpimg.imread(destination_file)
  173. ax2.imshow(saved_image)
  174. figure.suptitle("Une images-templates existe deja avec ce nom. Fermez la fenetre.")
  175. plt.show()
  176. answer = str(input("Une images-templates existe deja avec ce nom. La remplacer? O/N : "))
  177. if(answer == "O"):
  178. if(os.name == "posix"):
  179. os.popen("cp \"" + source_file + "\" \"" + destination_file+"\"")
  180. elif(os.name == "win32" or os.name == "windows"):
  181. os.popen("copy \"" + source_file + "\" \"" + destination_file + "\"")
  182. else:
  183. print("Fichier non enregistre.\n")
  184. else:
  185. if(os.name == "posix"):
  186. os.popen("cp \"" + source_file + "\" \"" + destination_file+"\"")
  187. elif(os.name == "win32" or os.name == "windows"):
  188. os.popen("copy \"" + source_file + "\" \"" + destination_file + "\"")
  189. def save_template(self, folder_path, img_file_name, templ_file_name, extension):
  190. """folder_path is the absolute path of the folder"""
  191. test_file_list = os.listdir(folder_path[:folder_path.rfind("/")+1])
  192. if(folder_path[folder_path.rfind("/")+1:] not in test_file_list):
  193. os.mkdir(folder_path)
  194. self.save_data_file(folder_path, templ_file_name, extension)
  195. self.save_img_template(folder_path, img_file_name)
  196. class Handwritten_Content():
  197. def __init__(self, FilesNames):
  198. self.images = []
  199. self.name = []
  200. self.result = []
  201. self.File_Names = FilesNames
  202. def extract_handwritten_content(self, template_object, img_template_resized, img_scanned, ratio, offset):
  203. if(img_template_resized.shape[0] < template_object.template_img.shape[0]):
  204. ratio = 1/ratio
  205. list_coord_data_objects = template_object.informations_template_objects
  206. for boxes_list in list_coord_data_objects: # list of Coord_data objects
  207. for box in boxes_list.box: #for each box
  208. for coord in box: #for each coordinate
  209. coord.affine(ratio, ratio, offset[1], offset[0])
  210. for boxes_list in list_coord_data_objects:
  211. if(boxes_list.type == 0): #if fully delimited
  212. self.images.append(img_scanned[boxes_list.box[0][0].y:boxes_list.box[0][1].y,boxes_list.box[0][0].x:boxes_list.box[0][1].x])
  213. self.name.append(boxes_list.name)
  214. sentence = ocr.ocr_run(img_file=self.images[-1], FileNames=self.File_Names)
  215. if sentence == 0:
  216. sentence = ""
  217. self.result.append(sentence)
  218. elif(boxes_list.type == 1): #if beginning delimited
  219. xlimit = img_scanned.shape[1]-1
  220. self.images.append(img_scanned[boxes_list.box[0][0].y:boxes_list.box[0][1].y,boxes_list.box[0][0].x:xlimit])
  221. self.name.append(boxes_list.name)
  222. sentence = ocr.ocr_run(img_file=self.images[-1], FileNames=self.File_Names)
  223. self.result.append(sentence)
  224. # elif(boxes_list.type == 2): #if exclusive box
  225. plt.imshow(self.images[0])
  226. plt.show()
  227. plt.imshow(self.images[1])
  228. plt.show()
  229. if __name__ == "__main__":
  230. #used for tests
  231. template = Template_File()
  232. template.define_template_img()
  233. template.add_template_information()
  234. template.add_template_information()
  235. template.show_template_boxes()
  236. #template.save_template("/home/inc0nnu-rol/Documents/La Gemme/OCR_paper_form/files", "/formulaire1", ".opdf")
  237. print("Execution success")