-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetect-vendor
executable file
·70 lines (62 loc) · 1.88 KB
/
detect-vendor
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env perl
use strict;
use File::Copy;
$ENV{'PATH'} = $ENV{'PATH'}.':/usr/local/bin';
# Can't just use this because of the PATH issue.
# Hazel doesn't have it in PATH, so we put it there.
eval
{
require Image::OCR::Tesseract;
Image::OCR::Tesseract->import();
};
unless($@)
{
my $file = shift(@ARGV);
# Handle the tempfile so we don't accidentally trigger Hazel.
my $time = time();
my $tempfile = "/var/tmp/tesseract-$time.jpg";
copy($file,$tempfile) or die("Copy failed: $!");
my $ocr_text = Image::OCR::Tesseract::get_ocr($tempfile);
unlink($tempfile);
print $ocr_text;
my %date_pattern = (
'mmddyyyy' => qr/(\d\d?)(\/|-)(\d\d?)(\/|-)((20)?\d\d)/,
'dd_mon_yyyy' => qr/(\d\d?) (\w+),?\s+((20)?\d\d)/,
'mon_dd_yyyy' => qr/(\w+) (\d\d?),?\s+((20)?\d\d)/,
);
my @vendors = (
{
'name' => 'Cee Kay Supply',
'name_regex' => qr/CEE KAY SUPPLY/,
'date_pattern' => 'mmddyyyy',
},
{
'name' => 'DeVine Wine and Spirits',
'name_regex' => qr/devine wine & spirits/,
'date_pattern' => 'mmddyyyy',
},
{
'name' => 'St. Louis Wine and Beermaking',
'name_regex' => qr//,
'date_pattern' => 'mmddyyyy',
},
);
# First, let's see if we can find the date in a format we recognize.
my $found_date_pattern;
foreach my $pattern(keys %date_pattern){
if($ocr_text =~ /$date_pattern{$pattern}/ig){
$found_date_pattern = $pattern;
last;
}
}
die("No matching date patterns found!") unless $found_date_pattern;
print "Matched date pattern: $found_date_pattern\n";
# Now test for vendor names based on the matching date pattern.
my $found_vendor_name;
foreach my $vendor_hashref(@vendors){
next unless($vendor_hashref->{'date_pattern'} eq $found_date_pattern);
next unless($ocr_text =~ /$vendor_hashref->{'name_regex'}/ig);
print $vendor_hashref->{'name'};
}
}
exit;