From 68ecf9fd7fd7ba36893018196321ded80207f2fc Mon Sep 17 00:00:00 2001 From: John Mertz Date: Mon, 2 Mar 2020 12:22:27 +0100 Subject: [PATCH 1/6] Tesseract Installation and Setup Required for corrosponding branch in MailCleaner --- updates/33_TesseractOcr.update | 37 ++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 updates/33_TesseractOcr.update diff --git a/updates/33_TesseractOcr.update b/updates/33_TesseractOcr.update new file mode 100644 index 0000000..fd34209 --- /dev/null +++ b/updates/33_TesseractOcr.update @@ -0,0 +1,37 @@ +#!/bin/bash + +SRCDIR=`grep 'SRCDIR' /etc/mailcleaner.conf | cut -d ' ' -f3` + +echo "Installing TesseractOcr binary and OpenCV dependency" +apt-get update +apt-get --force-yes -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -o Dpkg::Options::="--force-confmiss" install tesseract-ocr libopencv-dev + +echo "Installing TesseractOCR plugin" +cd /root +git clone https://github.com/MailCleaner/TesseractOcr.git +cd TesseractOcr +git checkout 3.00 +perl ./Makefile.PL INSTALLSITEARCH=/usr/local/share/perl/5.20.2 +make +make install + +echo "Renaming DB column use_fuzzyocr to generic use_ocr" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +ALTER TABLE antispam CHANGE use_fuzzyocr use_ocr tinyint(1); +EOF + +echo "Adding tocr_config table" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +CREATE TABLE IF NOT EXISTS tocr_config (preprocess tinyint, msg_timeout int, img_timeout int, skip_jpg tinyint, skip_png tinyint, skip_gif tinyint, skip_bmp tinyint, skip_tif tinyint, skip_pdf tinyint, min_size int, max_size int, min_x int, min_y int, max_x int, max_y int, min_area int, max_area int); +EOF + +echo "Initializing tocr_config table" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +INSERT INTO tocr_config (preprocess, msg_timeout, img_timeout, skip_jpg, skip_png, skip_gif, skip_bmp, skip_tif, skip_pdf, min_size, max_size, min_x, min_y, max_x, max_y, min_area, max_area) VALUES (1, 15, 5, 0, 0, 0, 0, 0, 0, 1024, 4096000, 16, 16, 2048, 2048, 512, 2073600); +EOF + +echo "Resyncing DB" +/usr/mailcleaner/bin/resync_db.sh + +echo "Generating new config files" +/usr/mailcleaner/bin/dump_mailscanner_config.pl From c14a650b3b953a968095057b978e6aa6d4cde357 Mon Sep 17 00:00:00 2001 From: John Mertz Date: Fri, 1 May 2020 19:59:28 +0200 Subject: [PATCH 2/6] Increment update number to be the next. Also throw in libswitch-perl to fix DMACR reporting. --- updates/33_TesseractOcr.update | 37 ---------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 updates/33_TesseractOcr.update diff --git a/updates/33_TesseractOcr.update b/updates/33_TesseractOcr.update deleted file mode 100644 index fd34209..0000000 --- a/updates/33_TesseractOcr.update +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -SRCDIR=`grep 'SRCDIR' /etc/mailcleaner.conf | cut -d ' ' -f3` - -echo "Installing TesseractOcr binary and OpenCV dependency" -apt-get update -apt-get --force-yes -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -o Dpkg::Options::="--force-confmiss" install tesseract-ocr libopencv-dev - -echo "Installing TesseractOCR plugin" -cd /root -git clone https://github.com/MailCleaner/TesseractOcr.git -cd TesseractOcr -git checkout 3.00 -perl ./Makefile.PL INSTALLSITEARCH=/usr/local/share/perl/5.20.2 -make -make install - -echo "Renaming DB column use_fuzzyocr to generic use_ocr" -cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config -ALTER TABLE antispam CHANGE use_fuzzyocr use_ocr tinyint(1); -EOF - -echo "Adding tocr_config table" -cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config -CREATE TABLE IF NOT EXISTS tocr_config (preprocess tinyint, msg_timeout int, img_timeout int, skip_jpg tinyint, skip_png tinyint, skip_gif tinyint, skip_bmp tinyint, skip_tif tinyint, skip_pdf tinyint, min_size int, max_size int, min_x int, min_y int, max_x int, max_y int, min_area int, max_area int); -EOF - -echo "Initializing tocr_config table" -cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config -INSERT INTO tocr_config (preprocess, msg_timeout, img_timeout, skip_jpg, skip_png, skip_gif, skip_bmp, skip_tif, skip_pdf, min_size, max_size, min_x, min_y, max_x, max_y, min_area, max_area) VALUES (1, 15, 5, 0, 0, 0, 0, 0, 0, 1024, 4096000, 16, 16, 2048, 2048, 512, 2073600); -EOF - -echo "Resyncing DB" -/usr/mailcleaner/bin/resync_db.sh - -echo "Generating new config files" -/usr/mailcleaner/bin/dump_mailscanner_config.pl From e7cbee82dcdf02a1e20ef9fda21021640340cf94 Mon Sep 17 00:00:00 2001 From: John Mertz Date: Fri, 1 May 2020 20:15:59 +0200 Subject: [PATCH 3/6] Added TOcr update under new name --- updates/36_TesseractOcr.update | 39 ++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 updates/36_TesseractOcr.update diff --git a/updates/36_TesseractOcr.update b/updates/36_TesseractOcr.update new file mode 100644 index 0000000..aa21d84 --- /dev/null +++ b/updates/36_TesseractOcr.update @@ -0,0 +1,39 @@ +#!/bin/bash +export PATH="/usr/mailcleaner/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + +SRCDIR=`grep 'SRCDIR' /etc/mailcleaner.conf | cut -d ' ' -f3` + +# Bonus: Throwing in opendmarc dependency libswitch-perl +echo "Installing TesseractOcr binary and OpenCV dependency" +apt-get update +apt-get --force-yes -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -o Dpkg::Options::="--force-confmiss" install tesseract-ocr libopencv-dev libswitch-perl + +echo "Installing TesseractOCR plugin" +cd /root +git clone https://github.com/MailCleaner/TesseractOcr.git +cd TesseractOcr +git checkout 3.00 +perl ./Makefile.PL INSTALLSITEARCH=/usr/local/share/perl/5.20.2 +make +make install + +echo "Renaming DB column use_fuzzyocr to generic use_ocr" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +ALTER TABLE antispam CHANGE use_fuzzyocr use_ocr tinyint(1); +EOF + +echo "Adding tocr_config table" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +CREATE TABLE IF NOT EXISTS tocr_config (preprocess tinyint, msg_timeout int, img_timeout int, skip_jpg tinyint, skip_png tinyint, skip_gif tinyint, skip_bmp tinyint, skip_tif tinyint, skip_pdf tinyint, min_size int, max_size int, min_x int, min_y int, max_x int, max_y int, min_area int, max_area int); +EOF + +echo "Initializing tocr_config table" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +INSERT INTO tocr_config (preprocess, msg_timeout, img_timeout, skip_jpg, skip_png, skip_gif, skip_bmp, skip_tif, skip_pdf, min_size, max_size, min_x, min_y, max_x, max_y, min_area, max_area) VALUES (1, 15, 5, 0, 0, 0, 0, 0, 0, 1024, 4096000, 16, 16, 2048, 2048, 512, 2073600); +EOF + +echo "Resyncing DB" +/usr/mailcleaner/bin/resync_db.sh + +echo "Generating new config files" +/usr/mailcleaner/bin/dump_mailscanner_config.pl From cfe9a3c17ddd979c6b223962b693351d9c5dcaa1 Mon Sep 17 00:00:00 2001 From: John Mertz Date: Mon, 2 Mar 2020 12:22:27 +0100 Subject: [PATCH 4/6] Tesseract Installation and Setup Required for corrosponding branch in MailCleaner --- updates/33_TesseractOcr.update | 37 ++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 updates/33_TesseractOcr.update diff --git a/updates/33_TesseractOcr.update b/updates/33_TesseractOcr.update new file mode 100644 index 0000000..fd34209 --- /dev/null +++ b/updates/33_TesseractOcr.update @@ -0,0 +1,37 @@ +#!/bin/bash + +SRCDIR=`grep 'SRCDIR' /etc/mailcleaner.conf | cut -d ' ' -f3` + +echo "Installing TesseractOcr binary and OpenCV dependency" +apt-get update +apt-get --force-yes -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -o Dpkg::Options::="--force-confmiss" install tesseract-ocr libopencv-dev + +echo "Installing TesseractOCR plugin" +cd /root +git clone https://github.com/MailCleaner/TesseractOcr.git +cd TesseractOcr +git checkout 3.00 +perl ./Makefile.PL INSTALLSITEARCH=/usr/local/share/perl/5.20.2 +make +make install + +echo "Renaming DB column use_fuzzyocr to generic use_ocr" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +ALTER TABLE antispam CHANGE use_fuzzyocr use_ocr tinyint(1); +EOF + +echo "Adding tocr_config table" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +CREATE TABLE IF NOT EXISTS tocr_config (preprocess tinyint, msg_timeout int, img_timeout int, skip_jpg tinyint, skip_png tinyint, skip_gif tinyint, skip_bmp tinyint, skip_tif tinyint, skip_pdf tinyint, min_size int, max_size int, min_x int, min_y int, max_x int, max_y int, min_area int, max_area int); +EOF + +echo "Initializing tocr_config table" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +INSERT INTO tocr_config (preprocess, msg_timeout, img_timeout, skip_jpg, skip_png, skip_gif, skip_bmp, skip_tif, skip_pdf, min_size, max_size, min_x, min_y, max_x, max_y, min_area, max_area) VALUES (1, 15, 5, 0, 0, 0, 0, 0, 0, 1024, 4096000, 16, 16, 2048, 2048, 512, 2073600); +EOF + +echo "Resyncing DB" +/usr/mailcleaner/bin/resync_db.sh + +echo "Generating new config files" +/usr/mailcleaner/bin/dump_mailscanner_config.pl From 442c4617547ac2dda70f28d3a746ccd9beda8d52 Mon Sep 17 00:00:00 2001 From: John Mertz Date: Fri, 1 May 2020 19:59:28 +0200 Subject: [PATCH 5/6] Increment update number to be the next. Also throw in libswitch-perl to fix DMACR reporting. --- updates/33_TesseractOcr.update | 37 ---------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 updates/33_TesseractOcr.update diff --git a/updates/33_TesseractOcr.update b/updates/33_TesseractOcr.update deleted file mode 100644 index fd34209..0000000 --- a/updates/33_TesseractOcr.update +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -SRCDIR=`grep 'SRCDIR' /etc/mailcleaner.conf | cut -d ' ' -f3` - -echo "Installing TesseractOcr binary and OpenCV dependency" -apt-get update -apt-get --force-yes -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -o Dpkg::Options::="--force-confmiss" install tesseract-ocr libopencv-dev - -echo "Installing TesseractOCR plugin" -cd /root -git clone https://github.com/MailCleaner/TesseractOcr.git -cd TesseractOcr -git checkout 3.00 -perl ./Makefile.PL INSTALLSITEARCH=/usr/local/share/perl/5.20.2 -make -make install - -echo "Renaming DB column use_fuzzyocr to generic use_ocr" -cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config -ALTER TABLE antispam CHANGE use_fuzzyocr use_ocr tinyint(1); -EOF - -echo "Adding tocr_config table" -cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config -CREATE TABLE IF NOT EXISTS tocr_config (preprocess tinyint, msg_timeout int, img_timeout int, skip_jpg tinyint, skip_png tinyint, skip_gif tinyint, skip_bmp tinyint, skip_tif tinyint, skip_pdf tinyint, min_size int, max_size int, min_x int, min_y int, max_x int, max_y int, min_area int, max_area int); -EOF - -echo "Initializing tocr_config table" -cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config -INSERT INTO tocr_config (preprocess, msg_timeout, img_timeout, skip_jpg, skip_png, skip_gif, skip_bmp, skip_tif, skip_pdf, min_size, max_size, min_x, min_y, max_x, max_y, min_area, max_area) VALUES (1, 15, 5, 0, 0, 0, 0, 0, 0, 1024, 4096000, 16, 16, 2048, 2048, 512, 2073600); -EOF - -echo "Resyncing DB" -/usr/mailcleaner/bin/resync_db.sh - -echo "Generating new config files" -/usr/mailcleaner/bin/dump_mailscanner_config.pl From 8066addc57d8c4e839b420fc8e0d6dbbbe52294c Mon Sep 17 00:00:00 2001 From: John Mertz Date: Fri, 1 May 2020 20:15:59 +0200 Subject: [PATCH 6/6] Added TOcr update under new name --- updates/36_TesseractOcr.update | 39 ++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 updates/36_TesseractOcr.update diff --git a/updates/36_TesseractOcr.update b/updates/36_TesseractOcr.update new file mode 100644 index 0000000..aa21d84 --- /dev/null +++ b/updates/36_TesseractOcr.update @@ -0,0 +1,39 @@ +#!/bin/bash +export PATH="/usr/mailcleaner/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + +SRCDIR=`grep 'SRCDIR' /etc/mailcleaner.conf | cut -d ' ' -f3` + +# Bonus: Throwing in opendmarc dependency libswitch-perl +echo "Installing TesseractOcr binary and OpenCV dependency" +apt-get update +apt-get --force-yes -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -o Dpkg::Options::="--force-confmiss" install tesseract-ocr libopencv-dev libswitch-perl + +echo "Installing TesseractOCR plugin" +cd /root +git clone https://github.com/MailCleaner/TesseractOcr.git +cd TesseractOcr +git checkout 3.00 +perl ./Makefile.PL INSTALLSITEARCH=/usr/local/share/perl/5.20.2 +make +make install + +echo "Renaming DB column use_fuzzyocr to generic use_ocr" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +ALTER TABLE antispam CHANGE use_fuzzyocr use_ocr tinyint(1); +EOF + +echo "Adding tocr_config table" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +CREATE TABLE IF NOT EXISTS tocr_config (preprocess tinyint, msg_timeout int, img_timeout int, skip_jpg tinyint, skip_png tinyint, skip_gif tinyint, skip_bmp tinyint, skip_tif tinyint, skip_pdf tinyint, min_size int, max_size int, min_x int, min_y int, max_x int, max_y int, min_area int, max_area int); +EOF + +echo "Initializing tocr_config table" +cat << EOF | ${SRCDIR}/bin/mc_mysql -m mc_config +INSERT INTO tocr_config (preprocess, msg_timeout, img_timeout, skip_jpg, skip_png, skip_gif, skip_bmp, skip_tif, skip_pdf, min_size, max_size, min_x, min_y, max_x, max_y, min_area, max_area) VALUES (1, 15, 5, 0, 0, 0, 0, 0, 0, 1024, 4096000, 16, 16, 2048, 2048, 512, 2073600); +EOF + +echo "Resyncing DB" +/usr/mailcleaner/bin/resync_db.sh + +echo "Generating new config files" +/usr/mailcleaner/bin/dump_mailscanner_config.pl