|
- import argparse
- import pickle
-
- import cv2
- import lmdb
- from path import Path
-
- parser = argparse.ArgumentParser()
- parser.add_argument('--data_dir', type=Path, required=True)
- args = parser.parse_args()
-
- # 2GB is enough for IAM dataset
- assert not (args.data_dir / 'lmdb').exists()
- env = lmdb.open(str(args.data_dir / 'lmdb'), map_size=1024 * 1024 * 1024 * 2)
-
- # go over all png files
- fn_imgs = list((args.data_dir / 'img').walkfiles('*.png'))
-
- # and put the imgs into lmdb as pickled grayscale imgs
- with env.begin(write=True) as txn:
- for i, fn_img in enumerate(fn_imgs):
- print(i, len(fn_imgs))
- img = cv2.imread(fn_img, cv2.IMREAD_GRAYSCALE)
- basename = fn_img.basename()
- txn.put(basename.encode("ascii"), pickle.dumps(img))
-
- env.close()
|