tesseract-ocr: update to 4.1.1

Also add all script languages as separate subpackages.
This commit is contained in:
Jürgen Buchmüller 2019-12-28 15:10:27 +01:00
parent 56f113ec21
commit aee0563a01
34 changed files with 322 additions and 12 deletions

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -0,0 +1 @@
tesseract-ocr

View file

@ -1,11 +1,12 @@
# Template file for 'tesseract-ocr'
pkgname=tesseract-ocr
version=4.1.0
revision=2
version=4.1.1
revision=1
_tessdataver=4.0.0
wrksrc="tesseract-${version}"
build_style=gnu-configure
configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include $(vopt_enable openmp)"
make_build_args="all training"
hostmakedepends="automake libtool pkg-config leptonica libxslt asciidoc"
makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel"
short_desc="Tesseract Open Source OCR engine"
@ -15,7 +16,7 @@ homepage="https://github.com/tesseract-ocr/tesseract"
distfiles="
https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz
https://github.com/tesseract-ocr/tessdata/archive/${_tessdataver}.tar.gz>tessdata-${_tessdataver}.tar.gz"
checksum="5c5ed5f1a76888dc57a83704f24ae02f8319849f5c4cf19d254296978a1a1961
checksum="2a66ff0d8595bff8f04032165e6c936389b1e5727c3ce5a27b3e059d218db1cb
38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08"
build_options="openmp"
@ -24,12 +25,21 @@ desc_option_openmp="Enable Open MP (gomp)"
# Create a package for one specific language $1
pkg_lang() {
local f lang=$1
vmkdir usr/share/tessdata
local f script lang=$1
case "$1" in
script-*)
script=/script
lang=${1#script-}
;;
*) script=
lang=$1
;;
esac
vmkdir usr/share/tessdata${script}
for f in $(find ${wrksrc}/tessdata -name "${lang}.*" \
-o -name "${lang}_frak.*" \
-o -name "${lang}_vert.*" ); do
vinstall $f 644 usr/share/tessdata
-o -name "${lang}_frak.*" \
-o -name "${lang}_vert.*" ); do
vinstall $f 644 usr/share/tessdata${script}
rm $f
done
}
@ -41,9 +51,6 @@ post_extract() {
pre_configure() {
NOCONFIGURE=1 ./autogen.sh
}
post_build() {
make ${makejobs} training
}
do_check() {
: # submodule not in tarball
}
@ -125,7 +132,13 @@ tesseract-ocr-all_package() {
ita_old jav jpn kan kat kat_old kaz khm kir kor kur kur_ara lao lat lav lit ltz mal mar \
mkd mlt mon mri msa mya nep nld nor oci ori pan pol por que pus ron rus san sin slk slv \
snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur \
uig ukr urd uzb uzb_cyrl vie yor yid; do
uig ukr urd uzb uzb_cyrl vie yid yor \
script-Arabic script-Armenian script-Bengali script-Canadian_Aboriginal script-Cherokee \
script-Cyrillic script-Devanagari script-Ethiopic script-Fraktur script-Georgian \
script-Greek script-Gujarati script-Gurmukhi script-HanS script-HanT script-Hangul \
script-Hebrew script-Japanese script-Kannada script-Khmer script-Lao script-Latin \
script-Malayalam script-Myanmar script-Oriya script-Sinhala script-Syriac script-Tamil \
script-Telugu script-Thaana script-Thai script-Tibetan script-Vietnamese; do
depends+=" tesseract-ocr-${lang}>=${version}_${revision}"
done
}
@ -1097,3 +1110,267 @@ tesseract-ocr-yor_package() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Arabic_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Arabic script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Armenian_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Armenian script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Bengali_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Bengali script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Canadian_Aboriginal_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Canadian Aboriginal script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Cherokee_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Cherokee script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Cyrillic_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Cyrillic script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Devanagari_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Devanagari script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Ethiopic_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Ethiopic script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Fraktur_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Fraktur script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Georgian_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Georgian script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Greek_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Greek script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Gujarati_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Gujarati script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Gurmukhi_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Gurmukhi script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-HanS_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - HanS script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-HanT_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - HanT script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Hangul_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Hangul script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Hebrew_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Hebrew script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Japanese_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Japanese script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Kannada_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Kannada script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Khmer_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Khmer script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Lao_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Lao script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Latin_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Latin script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Malayalam_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Malayalam script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Myanmar_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Myanmar script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Oriya_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Oriya script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Sinhala_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Sinhala script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Syriac_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Syriac script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Tamil_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Tamil script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Telugu_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Telugu script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Thaana_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Thaana script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Thai_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Thai script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Tibetan_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Tibetan script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}
tesseract-ocr-script-Vietnamese_package() {
archs=noarch
depends="${sourcepkg}>=${version}_${revision}"
short_desc+=" - Vietnamese script data"
pkg_install() {
$(pkg_lang ${pkgname#tesseract-ocr-})
}
}