Skip to content

Commit

Permalink
Allow grouping scans by arbitrary numbers
Browse files Browse the repository at this point in the history
Previously, it was only possible to mass-scan 1 or 2 page documents.
That restriction is lifted with the caveat that the `-z` argument
now requires the number with which to group pages.
  • Loading branch information
Ben Morgan committed Mar 12, 2019
1 parent fe54ca1 commit 9797305
Showing 1 changed file with 71 additions and 68 deletions.
139 changes: 71 additions & 68 deletions scanbro.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,7 @@ def with_filepath(filename, name):

class Processor:
binary = "false"
multiple_in = False
multiple_of = None
multiple_in = 1
multiple_out = False

def __init__(self):
Expand All @@ -211,11 +210,14 @@ def run_cmd(cmd, stdin=None, stdout=None):
def suffix(self, file):
return with_suffix(file, self.binary + '.' + self.filetype)

def command(self, input_file, output_file):
return [self.binary]
def command(self, input_files, output_file):
assert(type(input_files) is list and len(input_files) > 0)
assert(self.multiple_in == 1)
return [self.binary, input_files[0], output_file]

def process(self, input_file, output_file, dryrun=False, stdin=None, stdout=None):
cmd = self.command(input_file, output_file)
def process(self, input_files, output_file, dryrun=False, stdin=None, stdout=None):
assert(type(input_files) is list and len(input_files) > 0)
cmd = self.command(input_files, output_file)
Color.debug(" ".join(cmd), dryrun)
if not dryrun:
self.run_cmd(cmd, stdin, stdout)
Expand Down Expand Up @@ -301,13 +303,14 @@ def output(self, prototype):
else:
return [prototype] if os.path.exists(prototype) else []

def command(self, input_file, output_file):
def command(self, input_device, output_file):
assert(input_device is None or type(input_device) is str)
self.assert_output_format(output_file)
if input_file is None:
input_file = self.device
if input_device is None:
input_device = self.device
cmd = [
self.binary,
'--device-name', input_file,
'--device-name', input_device,
'--format', self.filetype,
]
if self.is_adf():
Expand All @@ -322,16 +325,20 @@ def command(self, input_file, output_file):
cmd.extend(self.sources.args(self.config['source']))
return cmd

def process(self, input_file, output_file, dryrun=False, stdin=None, stdout=None):
def process(self, input_device, output_file, dryrun=False, stdin=None, stdout=None):
assert(input_device is None or type(input_device) is str)
self.assert_output_format(output_file)
if not self.is_adf():
cmd = self.command(input_file, output_file)
cmd = self.command(input_device, output_file)
Color.debug(' '.join(cmd) + f' > {output_file}', dryrun)
if not dryrun:
with open(output_file, 'w') as file:
self.run_cmd(cmd, stdout=file)
else:
Processor.process(self, input_file, output_file, dryrun, stdin, stdout)
cmd = self.command(input_device, output_file)
Color.debug(" ".join(cmd), dryrun)
if not dryrun:
self.run_cmd(cmd, stdin, stdout)

def scan(self, output_file, clobber=False, trim=False, batch=False, dryrun=False):
def scan_once(output_file):
Expand Down Expand Up @@ -365,12 +372,12 @@ def scan_once(output_file):
Color.error("Invalid choice, try again.")
elif "source".startswith(answer):
scanner.config["source"] = Color.input(
f"Select one of {scanner.sources.choices}", prefix="<<",
f"Select one of {scanner.sources.choices.keys()}", prefix="<<",
)
continue
elif "papersize".startswith(answer):
scanner.config["papersize"] = Color.input(
f"Select one of {scanner.papersizes.choices}", prefix="<<",
f"Select one of {scanner.papersizes.choices.keys()}", prefix="<<",
)
continue
elif "continue".startswith(answer):
Expand Down Expand Up @@ -433,8 +440,8 @@ class Brother_MFC_J5730DW(Scanner):
sources = Option('auto', {
'auto': [],
'flatbed': ['--source', 'FlatBed'],
'adf': ['--source', 'Automatic Document Feeder(left aligned)'],
'duplex': ['--source', 'Automatic Document Feeder(left aligned,Duplex)'],
'adf': ['--source', 'Automatic Document Feeder(centrally aligned)'],
'duplex': ['--source', 'Automatic Document Feeder(centrally aligned,Duplex)'],
'adf-left': ['--source', 'Automatic Document Feeder(left aligned)'],
'adf-left-duplex': ['--source', 'Automatic Document Feeder(left aligned,Duplex)'],
'adf-center': ['--source', 'Automatic Document Feeder(centrally aligned)'],
Expand All @@ -449,7 +456,9 @@ class Unpaper(Processor):
binary = 'unpaper'
filetype = 'pnm'

def command(self, input_file, output_file):
def command(self, input_files, output_file):
assert(type(input_files) is list and len(input_files) == 1)
input_file = input_files[0]
cmd = [
self.binary,
input_file,
Expand All @@ -468,7 +477,9 @@ def __init__(self, language='deu'):
Processor.__init__(self)
self.language = language

def command(self, input_file, output_file):
def command(self, input_files, output_file):
assert(type(input_files) is list and len(input_files) == 1)
input_file = input_files[0]
if has_suffix(output_file, self.filetype):
output_file = output_file[:-(len(self.filetype)+1)]
cmd = [self.binary, input_file, output_file]
Expand Down Expand Up @@ -510,7 +521,9 @@ def __init__(self, profile, quality):
self.profile = profile
self.quality = quality

def command(self, input_file, output_file):
def command(self, input_files, output_file):
assert(type(input_files) is list and len(input_files) == 1)
input_file = input_files[0]
if input_file == output_file:
raise Exception(f"input {input_file} and output {output_file} are the same")
cmd = [self.binary, input_file]
Expand All @@ -531,7 +544,7 @@ class Ghostscript(Processor):

binary = 'gs'
filetype = 'pdf'
multiple_in = True
multiple_in = 0
profiles = Option('high', {
# Default profiles:
# 'default': ['-dPDFSETTINGS=/default'],
Expand Down Expand Up @@ -581,6 +594,7 @@ def __init__(self, profile='high', benchmark=False):
self.benchmark = benchmark

def command(self, input_files, output_file):
assert(type(input_files) is list and len(input_files) > 0)
cmd = [
self.binary,
'-dNOPAUSE',
Expand All @@ -592,13 +606,11 @@ def command(self, input_files, output_file):
f'-sOutputFile={output_file}',
]
cmd.extend(self.profiles.args(self.profile))
if self.multiple_in:
cmd.extend(input_files)
else:
cmd.append(input_files)
cmd.extend(input_files)
return cmd

def process(self, input_file, output_file, dryrun=False, stdin=None, stdout=None):
def process(self, input_files, output_file, dryrun=False, stdin=None, stdout=None):
assert(type(input_files) is list and len(input_files) > 0)
if self.benchmark:
Color.print('Ghostscript benchmark requested.')
Color.print('--------------------------------')
Expand All @@ -607,14 +619,14 @@ def process(self, input_file, output_file, dryrun=False, stdin=None, stdout=None
self.profile = profile
profile_output = with_presuffix(output_file, profile)
Color.print(f'Create {profile_output}')
cmd = self.command(input_file, profile_output)
cmd = self.command(input_files, profile_output)
Color.debug(' '.join(cmd), dryrun)
if not dryrun:
self.run_cmd(cmd, None, None)
Color.print('--------------------------------')
self.profile = original_profile
else:
Processor.process(self, input_file, output_file, dryrun, stdin, stdout)
Processor.process(self, input_files, output_file, dryrun, stdin, stdout)


def scanbro(scanner, pipeline, output_name, clean=0, trim=False, batch=False, dryrun=False):
Expand Down Expand Up @@ -663,39 +675,30 @@ def scanbro(scanner, pipeline, output_name, clean=0, trim=False, batch=False, dr
input_files = scanned_files
for p in pipeline:
stage += 1
if p.multiple_in:
if p.multiple_of is None:
# Currently, only the scanner can create multiple output files,
# so we assume that multiple in means single out.
assert(not p.multiple_out)
part = input_files[0].rpartition('.1')
output_files = [ p.suffix(part[0] + part[2]) ]
Color.info(f'Transform [{input_files[0]} ...] => {prototype}')
p.process(input_files, output_files[0], dryrun)
else:
# In this case, we will be creating multiple_out even if it's
# not explicitely specified. Instead, we will be partitioning
# the input files in multiples and converting these into
# output.
output_files = []
prototype = p.suffix(input_files[0])
n = len(input_files)
if n % p.multiple_of != 0:
raise Exception(f'input files {n} cannot be cleanly partitioned in {p.multiple_of}')
Color.info(f'Transform by {p.multiple_of}s [{input_files[0]} ...] => [{prototype} ...]')
partitioned_files = [input_files[i:i + p.multiple_of] for i in range(0, n, p.multiple_of)]
for in_files in partitioned_files:
out_file = p.suffix(in_files[0])
output_files.append(out_file)
p.process(in_files, out_file, dryrun)
if p.multiple_in <= 0:
# Currently, only the scanner can create multiple output files,
# so we assume that multiple in means single out.
assert(not p.multiple_out)
part = input_files[0].rpartition('.1')
output_files = [ p.suffix(part[0] + part[2]) ]
Color.info(f'Transform [{input_files[0]} ...] => {prototype}')
p.process(input_files, output_files[0], dryrun)
else:
# In this case, we will be creating multiple_out even if it's
# not explicitely specified. Instead, we will be partitioning
# the input files in multiples and converting these into
# output.
output_files = []
prototype = p.suffix(input_files[0])
Color.info(f'Transform [{input_files[0]} ...] => [{prototype} ...]')
for in_file in input_files:
out_file = p.suffix(in_file)
n = len(input_files)
if n % p.multiple_in != 0:
raise Exception(f'input files {n} cannot be cleanly partitioned in {p.multiple_in}')
Color.info(f'Transform /{p.multiple_in} [{input_files[0]} ...] => [{prototype} ...]')
partitioned_files = [input_files[i:i + p.multiple_in] for i in range(0, n, p.multiple_in)]
for in_files in partitioned_files:
out_file = p.suffix(in_files[0])
output_files.append(out_file)
p.process(in_file, out_file, dryrun)
p.process(in_files, out_file, dryrun)

# Remove intermediate files if requested
if stage > 1 and clean > 0:
Expand Down Expand Up @@ -871,10 +874,11 @@ def make_scanner(args):
help='scan multiple times interactively',
)
parser.add_argument(
'-z', '--separate',
dest='separate',
action='store_true',
help='save each scanned file separately',
'-z', '--group-by',
dest='group_by',
type=int,
default=0,
help='group input files by N and save separately',
)
parser.add_argument(
'-t', '--trim-last',
Expand All @@ -892,11 +896,10 @@ def make_tesseract(scanner, args):
return Tesseract(args.language)
def make_ghostscript(scanner, args):
gs = Ghostscript(args.gs_profile, args.gs_benchmark)
if args.separate:
if scanner.is_duplex():
gs.multiple_of = 2
else:
gs.multiple_in = False
if args.group_by != 0:
gs.multiple_in = args.group_by;
if args.group_by % 2 == 1 and scanner.is_duplex():
raise Exception('duplex scanning requires group-by to be an even number');
return gs

FILTERS = {
Expand Down Expand Up @@ -962,8 +965,8 @@ def make_ghostscript(scanner, args):
# We need to convert from PNM to PNG. The default options for
# ImageMagick should result in a lossless conversion.
args.filters.append('imagemagick')
if args.separate and args.trim:
raise Exception('cannot specify --separate and --trim simultaneously')
if args.group_by != 0 and args.trim:
raise Exception('cannot specify --group-by and --trim simultaneously')

# Create scanner and pipeline. The order is not customizable.
scanner = make_scanner(args)
Expand Down

0 comments on commit 9797305

Please sign in to comment.