randy/randy/scripts/scan.py

87 lines
2.4 KiB
Python

import glob
import hashlib
import os
import sys
import time
import transaction
from pyramid.paster import (
get_appsettings,
setup_logging,
)
from pyramid.scripts.common import parse_vars
from ..models import (
get_engine,
get_session_factory,
get_tm_session,
Picture
)
def usage(argv):
cmd = os.path.basename(argv[0])
print('usage: %s <config_uri> [var=value]\n'
'(example: "%s development.ini")' % (cmd, cmd))
sys.exit(1)
def main(argv=sys.argv):
if len(argv) < 2:
usage(argv)
config_uri = argv[1]
options = parse_vars(argv[2:])
setup_logging(config_uri)
settings = get_appsettings(config_uri, options=options)
engine = get_engine(settings)
session_factory = get_session_factory(engine)
with transaction.manager:
dbsession = get_tm_session(session_factory, transaction.manager)
scan(settings['directories'].splitlines(), dbsession)
def scan(directories, dbsession):
for item in directories:
name, path = item.split(':', 1)
getFiles(name, path, dbsession)
def getFiles(name, path, dbsession):
files = glob.glob(path + '**/*.jpg', recursive=True)
for file in files:
uri = name + ':' + file[len(path):]
size = os.path.getsize(file)
modified_time = time.gmtime(os.path.getmtime(file))
old = dbsession.query(Picture).filter(Picture.uri == uri).first()
# if old is not None and old.size == size and old.modified_time == modified_time:
if old is not None:
print('Old file with ' + old.file_hash + ' exists!')
continue
file_hash = getHash(file)
# if old is not None:
# old.size = size
# old.modified_time = modified_time
# print('updated old', old.modified_time, 'with', modified_time)
# old.file_hash = file_hash
# print('Updated file with ' + file_hash)
# else:
picture = Picture(uri, 4096, file_hash, {}, {}, size, modified_time)
dbsession.add(picture)
print('Added file with ' + file_hash)
def getHash(file):
# BUF_SIZE is totally arbitrary, change for your app!
BUF_SIZE = 1 * 1024 * 1024 # lets read stuff in 64kb chunks!
sha256 = hashlib.sha256()
with open(file, 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
sha256.update(data)
return "SHA256:{0}".format(sha256.hexdigest())