You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

28 lines
768B

  1. import argparse
  2. import pickle
  3. import cv2
  4. import lmdb
  5. from path import Path
  6. parser = argparse.ArgumentParser()
  7. parser.add_argument('--data_dir', type=Path, required=True)
  8. args = parser.parse_args()
  9. # 2GB is enough for IAM dataset
  10. assert not (args.data_dir / 'lmdb').exists()
  11. env = lmdb.open(str(args.data_dir / 'lmdb'), map_size=1024 * 1024 * 1024 * 2)
  12. # go over all png files
  13. fn_imgs = list((args.data_dir / 'img').walkfiles('*.png'))
  14. # and put the imgs into lmdb as pickled grayscale imgs
  15. with env.begin(write=True) as txn:
  16. for i, fn_img in enumerate(fn_imgs):
  17. print(i, len(fn_imgs))
  18. img = cv2.imread(fn_img, cv2.IMREAD_GRAYSCALE)
  19. basename = fn_img.basename()
  20. txn.put(basename.encode("ascii"), pickle.dumps(img))
  21. env.close()