tesseract-ocr: update to 4.0.0.

Closes: #5380 [via git-merge-pr]
Signed-off-by: Jürgen Buchmüller <pullmoll@t-online.de>
This commit is contained in:
Piotr Wójcik 2018-12-03 21:46:48 +01:00 committed by Jürgen Buchmüller
parent 21b98df42a
commit 7f7ff14e13
No known key found for this signature in database
GPG key ID: 6764EC32352D0647
22 changed files with 194 additions and 34 deletions

View file

@ -2077,7 +2077,7 @@ libhttp_parser.so.2.8 http-parser-2.8.0_1
libmaa.so.4 libmaa-1.4.2_1 libmaa.so.4 libmaa-1.4.2_1
libcodeblocks.so.0 codeblocks-13.12_1 libcodeblocks.so.0 codeblocks-13.12_1
liblept.so.5 leptonica-1.73_1 liblept.so.5 leptonica-1.73_1
libtesseract.so.3 tesseract-ocr-3.02.02_1 libtesseract.so.4 tesseract-ocr-4.0.0_1
libffmpegthumbnailer.so.4 ffmpegthumbnailer-2.0.10_1 libffmpegthumbnailer.so.4 ffmpegthumbnailer-2.0.10_1
libopenraw.so.7 libopenraw-0.1.0_1 libopenraw.so.7 libopenraw-0.1.0_1
libopenrawgnome.so.7 libopenraw-0.1.0_1 libopenrawgnome.so.7 libopenraw-0.1.0_1

1
srcpkgs/tesseract-ocr-bre Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-cos Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-div Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-fao Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-fil Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-fry Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-gla Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-hye Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-ltz Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-mon Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-mri Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-oci Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-que Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-snd Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-sun Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-tat Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-ton Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

1
srcpkgs/tesseract-ocr-yor Symbolic link
View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -1,12 +0,0 @@
Add include required to build with muslc libc
--- viewer/svutil.cpp 2012-03-03 12:53:33.000000000 +0100
+++ viewer/svutil.cpp 2015-05-28 17:51:46.917525843 +0200
@@ -39,6 +39,7 @@
#include <string.h>
#include <netdb.h>
#include <sys/socket.h>
+#include <sys/select.h>
#ifdef __linux__
#include <sys/prctl.h>
#endif

View file

@ -1,12 +1,11 @@
# Template file for 'tesseract-ocr' # Template file for 'tesseract-ocr'
pkgname=tesseract-ocr pkgname=tesseract-ocr
version=3.05.02 version=4.0.0
revision=3 revision=1
wrksrc=tesseract-${version} wrksrc="tesseract-${version}"
_tessdata_ver=074c37215b01ab8cc47a0e06ff7356383883d775
build_style=gnu-configure build_style=gnu-configure
configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include" configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include"
hostmakedepends="automake libtool pkg-config leptonica" hostmakedepends="automake libtool pkg-config leptonica libxslt asciidoc"
makedepends="cairo-devel pango-devel leptonica-devel icu-devel" makedepends="cairo-devel pango-devel leptonica-devel icu-devel"
short_desc="Tesseract Open Source OCR engine" short_desc="Tesseract Open Source OCR engine"
maintainer="Jürgen Buchmüller <pullmoll@t-online.de>" maintainer="Jürgen Buchmüller <pullmoll@t-online.de>"
@ -14,24 +13,25 @@ license="Apache-2.0"
homepage="https://github.com/tesseract-ocr/tesseract" homepage="https://github.com/tesseract-ocr/tesseract"
distfiles=" distfiles="
https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz
https://github.com/tesseract-ocr/tessdata/archive/${_tessdata_ver}.tar.gz>tessdata-${_tessdata_ver}.tar.gz" https://github.com/tesseract-ocr/tessdata/archive/${version}.tar.gz>tessdata-${version}.tar.gz"
checksum=" checksum="a1f5422ca49a32e5f35c54dee5112b11b99928fc9f4ee6695cdc6768d69f61dd
494d64ffa7069498a97b909a0e65a35a213989e0184f1ea15332933a90d43445 38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08"
e33dea2118f447848a76e0fa5d50d45a2b8630cccc6adeb8d58221a1d09d6007"
# Create a package for one specific language $1 # Create a package for one specific language $1
pkg_lang() { pkg_lang() {
local f lang=$1 local f lang=$1
vmkdir usr/share/tessdata vmkdir usr/share/tessdata
for f in $(find ${wrksrc}/tessdata -name "${lang}.*" -o -name "${lang}_frak.*"); do for f in $(find ${wrksrc}/tessdata -name "${lang}.*" \
-o -name "${lang}_frak.*" \
-o -name "${lang}_vert.*" ); do
vinstall $f 644 usr/share/tessdata vinstall $f 644 usr/share/tessdata
rm $f rm $f
done done
} }
post_extract() { post_extract() {
mv ${XBPS_BUILDDIR}/tessdata-${_tessdata_ver}/* ${wrksrc}/tessdata mv ${XBPS_BUILDDIR}/tessdata-${version}/* ${wrksrc}/tessdata
rmdir ${XBPS_BUILDDIR}/tessdata-${_tessdata_ver} rmdir ${XBPS_BUILDDIR}/tessdata-${version}
} }
pre_configure() { pre_configure() {
NOCONFIGURE=1 ./autogen.sh NOCONFIGURE=1 ./autogen.sh
@ -39,6 +39,9 @@ pre_configure() {
post_build() { post_build() {
make ${makejobs} training make ${makejobs} training
} }
do_check() {
: # submodule not in tarball
}
post_install() { post_install() {
local lang local lang
# Rename binary to avoid conflict with tesseract package # Rename binary to avoid conflict with tesseract package
@ -46,8 +49,6 @@ post_install() {
mv ${DESTDIR}/usr/share/man/man1/tesseract{,-ocr}.1 mv ${DESTDIR}/usr/share/man/man1/tesseract{,-ocr}.1
vdoc ChangeLog vdoc ChangeLog
vdoc README.md vdoc README.md
vdoc testing/eurotext.tif
vdoc testing/phototest.tif
vlicense ${FILESDIR}/COPYING LICENSE-tessdata vlicense ${FILESDIR}/COPYING LICENSE-tessdata
# Move the pseudo languges "equ" (math / equation detection) and # Move the pseudo languges "equ" (math / equation detection) and
# "osd" (orientation and script detection) to the main package # "osd" (orientation and script detection) to the main package
@ -113,13 +114,13 @@ tesseract-ocr-all_package() {
conflicts="tesseract-ocr-basic>=0" conflicts="tesseract-ocr-basic>=0"
short_desc+=" - all languages data" short_desc+=" - all languages data"
# All available languages # All available languages
for lang in afr amh ara asm aze aze_cyrl bel ben bod bos bul cat ceb \ for lang in afr amh ara asm aze aze_cyrl bel ben bod bos bre bul cat ceb \
ces chi_sim chi_tra chr cym dan deu dzo ell eng enm epo est eus \ ces chi_sim chi_tra chr cos cym dan deu div dzo ell eng enm epo est eus fao \
fas fin fra frk frm gle glg grc guj hat heb hin hrv hun iku ind isl ita \ fas fil fin fra frk frm fry gla gle glg grc guj hat heb hin hrv hun hye iku ind isl ita \
ita_old jav jpn kan kat kat_old kaz khm kir kor kur lao lat lav lit mal mar \ ita_old jav jpn kan kat kat_old kaz khm kir kor kur kur_ara lao lat lav lit ltz mal mar \
mkd mlt msa mya nep nld nor ori pan pol por pus ron rus san sin slk slv \ mkd mlt mon mri msa mya nep nld nor oci ori pan pol por que pus ron rus san sin slk slv \
spa spa_old sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur \ snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur \
uig ukr urd uzb uzb_cyrl vie yid; do uig ukr urd uzb uzb_cyrl vie yor yid; do
depends+=" tesseract-ocr-${lang}>=${version}_${revision}" depends+=" tesseract-ocr-${lang}>=${version}_${revision}"
done done
} }
@ -203,6 +204,14 @@ tesseract-ocr-bos_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-bre_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Breton language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-bul_package() { tesseract-ocr-bul_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -259,6 +268,14 @@ tesseract-ocr-chr_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-cos_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Corsican language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-cym_package() { tesseract-ocr-cym_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -283,6 +300,14 @@ tesseract-ocr-deu_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-div_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Dhivehi language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-dzo_package() { tesseract-ocr-dzo_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -339,6 +364,14 @@ tesseract-ocr-eus_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-fao_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Faroese language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-fas_package() { tesseract-ocr-fas_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -347,6 +380,14 @@ tesseract-ocr-fas_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-fil_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Filipino language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-fin_package() { tesseract-ocr-fin_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -379,6 +420,22 @@ tesseract-ocr-frm_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-fry_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Frisian language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-gla_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Scottish Gaelic language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-gle_package() { tesseract-ocr-gle_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -451,6 +508,14 @@ tesseract-ocr-hun_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-hye_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Armenian language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-iku_package() { tesseract-ocr-iku_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -571,6 +636,14 @@ tesseract-ocr-kur_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-kur_ara_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Kurdish (Arabic) language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-lao_package() { tesseract-ocr-lao_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -603,6 +676,14 @@ tesseract-ocr-lit_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-ltz_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Luxembourgish language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-mal_package() { tesseract-ocr-mal_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -635,6 +716,22 @@ tesseract-ocr-mlt_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-mon_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Mongolian language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-mri_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Maori language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-msa_package() { tesseract-ocr-msa_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -675,6 +772,14 @@ tesseract-ocr-nor_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-oci_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Occitan (post 1500) language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-ori_package() { tesseract-ocr-ori_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -715,6 +820,14 @@ tesseract-ocr-pus_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-que_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Quechua language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-ron_package() { tesseract-ocr-ron_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -763,6 +876,14 @@ tesseract-ocr-slv_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-snd_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Sindhi language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-spa_package() { tesseract-ocr-spa_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -803,6 +924,14 @@ tesseract-ocr-srp_latn_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-sun_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Sundanese language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-swa_package() { tesseract-ocr-swa_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -835,6 +964,14 @@ tesseract-ocr-tam_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-tat_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Tatar language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-tel_package() { tesseract-ocr-tel_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -875,6 +1012,14 @@ tesseract-ocr-tir_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-ton_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Tonga language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-tur_package() { tesseract-ocr-tur_package() {
noarch=yes noarch=yes
depends="${sourcepkg}>=${version}_${revision}" depends="${sourcepkg}>=${version}_${revision}"
@ -939,3 +1084,11 @@ tesseract-ocr-yid_package() {
$(pkg_lang ${pkgname#tesseract-ocr-}) $(pkg_lang ${pkgname#tesseract-ocr-})
} }
} }
tesseract-ocr-yor_package() {
noarch=yes
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Yoruba language data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}