aboutsummaryrefslogtreecommitdiffstats
path: root/community/tesseract-ocr
diff options
context:
space:
mode:
authorCarlo Landmeter <clandmeter@gmail.com>2017-08-01 14:36:13 +0200
committerCarlo Landmeter <clandmeter@gmail.com>2017-08-01 14:37:43 +0200
commitf3028937426b9a0cea16640e2789e6eb0e1cda0f (patch)
treebf3d8401ccbca2c837ddca19b5e464382f6553cb /community/tesseract-ocr
parentb6189305eb1c942a2568b556aa0f0f03f1717867 (diff)
downloadaports-f3028937426b9a0cea16640e2789e6eb0e1cda0f.tar.bz2
aports-f3028937426b9a0cea16640e2789e6eb0e1cda0f.tar.xz
community/tesseract-ocr: add ocr languages
and training tools/data.
Diffstat (limited to 'community/tesseract-ocr')
-rw-r--r--community/tesseract-ocr/APKBUILD46
1 files changed, 36 insertions, 10 deletions
diff --git a/community/tesseract-ocr/APKBUILD b/community/tesseract-ocr/APKBUILD
index 1b4f1365c9..19726233b2 100644
--- a/community/tesseract-ocr/APKBUILD
+++ b/community/tesseract-ocr/APKBUILD
@@ -2,22 +2,35 @@
# Maintainer: Francesco Colista <fcolista@alpinelinux.org>
pkgname=tesseract-ocr
pkgver=3.05.01
-pkgrel=0
+_tdver=3.04.00
+pkgrel=1
pkgdesc="open source OCR engine"
url="https://github.com/tesseract-ocr/tesseract/releases"
arch="all"
license="Apache"
depends=""
depends_dev=""
-makedepends="automake autoconf libtool leptonica-dev pango-dev icu-dev cairo-dev"
+makedepends="automake autoconf libtool leptonica-dev pango-dev icu-dev
+ cairo-dev"
subpackages="$pkgname-dev $pkgname-doc"
-source="$pkgname-$pkgver.tar.gz::https://github.com/$pkgname/${pkgname/-*}/archive/$pkgver.tar.gz"
+source="$pkgname-$pkgver.tar.gz::https://github.com/$pkgname/${pkgname/-*}/archive/$pkgver.tar.gz
+ https://github.com/tesseract-ocr/tessdata/archive/${_tdver}/tessdata-${_tdver}.tar.gz
+ "
+
+_langs="afr ara aze bel ben bul cat ces chi_sim chi_tra chr dan deu ell
+enm epo equ est eus fin fra frk frm glg grc heb hin hrv hun ind isl ita
+ita_old jpn kan kor lav lit mal mkd mlt msa nld nor pol por ron rus slk
+slv spa spa_old sqi srp swa swe tam tel tgl tha tur ukr vie"
+
+for _lang in $_langs; do
+ subpackages="$subpackages $pkgname-data-$_lang:_lang_data:noarch"
+done
builddir="$srcdir"/${pkgname/-*}-$pkgver
build() {
cd "$builddir"
- ./autogen.sh || return 1
+ ./autogen.sh
./configure \
--build=$CBUILD \
--host=$CHOST \
@@ -26,15 +39,28 @@ build() {
--mandir=/usr/share/man \
--infodir=/usr/share/info \
--localstatedir=/var \
- --disable-static \
- --disable-graphics \
- || return 1
- make || return 1
+ --disable-static
+ make
+ make training
}
package() {
cd "$builddir"
- make DESTDIR="$pkgdir" install || return 1
+ make DESTDIR="$pkgdir" install
+ make DESTDIR="$pkgdir" training-install
+ install -D "$srcdir"/tessdata-$_tdver/eng.* \
+ "$srcdir"/tessdata-$_tdver/osd.* \
+ "$pkgdir"/usr/share/tessdata/
+}
+
+_lang_data() {
+ local lang="${subpkgname#$pkgname-data-}"
+ pkgdesc="Tesseract language data for $lang"
+ depends="$pkgname"
+ mkdir -p "$subpkgdir"/usr/share/tessdata
+ mv "$srcdir"/tessdata-$_tdver/$lang.* \
+ "$subpkgdir"/usr/share/tessdata/
}
-sha512sums="a49c20c98386684cd89582e57b772811204fad8e5ff18214fb0da109f73629c70845054985e31e8deeb49107fbcf56e546aff661f08eb5dd60fbf83dbe976e81 tesseract-ocr-3.05.01.tar.gz"
+sha512sums="a49c20c98386684cd89582e57b772811204fad8e5ff18214fb0da109f73629c70845054985e31e8deeb49107fbcf56e546aff661f08eb5dd60fbf83dbe976e81 tesseract-ocr-3.05.01.tar.gz
+4fbb66137c729e16c7a9e35b09916a45c1bb5ec5a7002a22647e0b10975362cb44c6d6c0c997baf25866f78749ec2d4a86317ec3fb664bd963243e230516d162 tessdata-3.04.00.tar.gz"