tesseract-ocr: update to 4.1.1
Also add all script languages as separate subpackages.
This commit is contained in:
parent
56f113ec21
commit
aee0563a01
34 changed files with 322 additions and 12 deletions
1
srcpkgs/tesseract-ocr-script-Arabic
Symbolic link
1
srcpkgs/tesseract-ocr-script-Arabic
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Armenian
Symbolic link
1
srcpkgs/tesseract-ocr-script-Armenian
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Bengali
Symbolic link
1
srcpkgs/tesseract-ocr-script-Bengali
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Canadian_Aboriginal
Symbolic link
1
srcpkgs/tesseract-ocr-script-Canadian_Aboriginal
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Cherokee
Symbolic link
1
srcpkgs/tesseract-ocr-script-Cherokee
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Cyrillic
Symbolic link
1
srcpkgs/tesseract-ocr-script-Cyrillic
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Devanagari
Symbolic link
1
srcpkgs/tesseract-ocr-script-Devanagari
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Ethiopic
Symbolic link
1
srcpkgs/tesseract-ocr-script-Ethiopic
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Fraktur
Symbolic link
1
srcpkgs/tesseract-ocr-script-Fraktur
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Georgian
Symbolic link
1
srcpkgs/tesseract-ocr-script-Georgian
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Greek
Symbolic link
1
srcpkgs/tesseract-ocr-script-Greek
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Gujarati
Symbolic link
1
srcpkgs/tesseract-ocr-script-Gujarati
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Gurmukhi
Symbolic link
1
srcpkgs/tesseract-ocr-script-Gurmukhi
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-HanS
Symbolic link
1
srcpkgs/tesseract-ocr-script-HanS
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-HanT
Symbolic link
1
srcpkgs/tesseract-ocr-script-HanT
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Hangul
Symbolic link
1
srcpkgs/tesseract-ocr-script-Hangul
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Hebrew
Symbolic link
1
srcpkgs/tesseract-ocr-script-Hebrew
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Japanese
Symbolic link
1
srcpkgs/tesseract-ocr-script-Japanese
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Kannada
Symbolic link
1
srcpkgs/tesseract-ocr-script-Kannada
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Khmer
Symbolic link
1
srcpkgs/tesseract-ocr-script-Khmer
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Lao
Symbolic link
1
srcpkgs/tesseract-ocr-script-Lao
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Latin
Symbolic link
1
srcpkgs/tesseract-ocr-script-Latin
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Malayalam
Symbolic link
1
srcpkgs/tesseract-ocr-script-Malayalam
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Myanmar
Symbolic link
1
srcpkgs/tesseract-ocr-script-Myanmar
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Oriya
Symbolic link
1
srcpkgs/tesseract-ocr-script-Oriya
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Sinhala
Symbolic link
1
srcpkgs/tesseract-ocr-script-Sinhala
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Syriac
Symbolic link
1
srcpkgs/tesseract-ocr-script-Syriac
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Tamil
Symbolic link
1
srcpkgs/tesseract-ocr-script-Tamil
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Telugu
Symbolic link
1
srcpkgs/tesseract-ocr-script-Telugu
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Thaana
Symbolic link
1
srcpkgs/tesseract-ocr-script-Thaana
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Thai
Symbolic link
1
srcpkgs/tesseract-ocr-script-Thai
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Tibetan
Symbolic link
1
srcpkgs/tesseract-ocr-script-Tibetan
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
1
srcpkgs/tesseract-ocr-script-Vietnamese
Symbolic link
1
srcpkgs/tesseract-ocr-script-Vietnamese
Symbolic link
|
@ -0,0 +1 @@
|
|||
tesseract-ocr
|
|
@ -1,11 +1,12 @@
|
|||
# Template file for 'tesseract-ocr'
|
||||
pkgname=tesseract-ocr
|
||||
version=4.1.0
|
||||
revision=2
|
||||
version=4.1.1
|
||||
revision=1
|
||||
_tessdataver=4.0.0
|
||||
wrksrc="tesseract-${version}"
|
||||
build_style=gnu-configure
|
||||
configure_args="LIBLEPT_HEADERSDIR=${XBPS_CROSS_BASE}/usr/include $(vopt_enable openmp)"
|
||||
make_build_args="all training"
|
||||
hostmakedepends="automake libtool pkg-config leptonica libxslt asciidoc"
|
||||
makedepends="cairo-devel pango-devel leptonica-devel $(vopt_if openmp libgomp-devel) icu-devel"
|
||||
short_desc="Tesseract Open Source OCR engine"
|
||||
|
@ -15,7 +16,7 @@ homepage="https://github.com/tesseract-ocr/tesseract"
|
|||
distfiles="
|
||||
https://github.com/tesseract-ocr/tesseract/archive/${version}.tar.gz>${pkgname}-${version}.tar.gz
|
||||
https://github.com/tesseract-ocr/tessdata/archive/${_tessdataver}.tar.gz>tessdata-${_tessdataver}.tar.gz"
|
||||
checksum="5c5ed5f1a76888dc57a83704f24ae02f8319849f5c4cf19d254296978a1a1961
|
||||
checksum="2a66ff0d8595bff8f04032165e6c936389b1e5727c3ce5a27b3e059d218db1cb
|
||||
38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08"
|
||||
|
||||
build_options="openmp"
|
||||
|
@ -24,12 +25,21 @@ desc_option_openmp="Enable Open MP (gomp)"
|
|||
|
||||
# Create a package for one specific language $1
|
||||
pkg_lang() {
|
||||
local f lang=$1
|
||||
vmkdir usr/share/tessdata
|
||||
local f script lang=$1
|
||||
case "$1" in
|
||||
script-*)
|
||||
script=/script
|
||||
lang=${1#script-}
|
||||
;;
|
||||
*) script=
|
||||
lang=$1
|
||||
;;
|
||||
esac
|
||||
vmkdir usr/share/tessdata${script}
|
||||
for f in $(find ${wrksrc}/tessdata -name "${lang}.*" \
|
||||
-o -name "${lang}_frak.*" \
|
||||
-o -name "${lang}_vert.*" ); do
|
||||
vinstall $f 644 usr/share/tessdata
|
||||
-o -name "${lang}_frak.*" \
|
||||
-o -name "${lang}_vert.*" ); do
|
||||
vinstall $f 644 usr/share/tessdata${script}
|
||||
rm $f
|
||||
done
|
||||
}
|
||||
|
@ -41,9 +51,6 @@ post_extract() {
|
|||
pre_configure() {
|
||||
NOCONFIGURE=1 ./autogen.sh
|
||||
}
|
||||
post_build() {
|
||||
make ${makejobs} training
|
||||
}
|
||||
do_check() {
|
||||
: # submodule not in tarball
|
||||
}
|
||||
|
@ -125,7 +132,13 @@ tesseract-ocr-all_package() {
|
|||
ita_old jav jpn kan kat kat_old kaz khm kir kor kur kur_ara lao lat lav lit ltz mal mar \
|
||||
mkd mlt mon mri msa mya nep nld nor oci ori pan pol por que pus ron rus san sin slk slv \
|
||||
snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur \
|
||||
uig ukr urd uzb uzb_cyrl vie yor yid; do
|
||||
uig ukr urd uzb uzb_cyrl vie yid yor \
|
||||
script-Arabic script-Armenian script-Bengali script-Canadian_Aboriginal script-Cherokee \
|
||||
script-Cyrillic script-Devanagari script-Ethiopic script-Fraktur script-Georgian \
|
||||
script-Greek script-Gujarati script-Gurmukhi script-HanS script-HanT script-Hangul \
|
||||
script-Hebrew script-Japanese script-Kannada script-Khmer script-Lao script-Latin \
|
||||
script-Malayalam script-Myanmar script-Oriya script-Sinhala script-Syriac script-Tamil \
|
||||
script-Telugu script-Thaana script-Thai script-Tibetan script-Vietnamese; do
|
||||
depends+=" tesseract-ocr-${lang}>=${version}_${revision}"
|
||||
done
|
||||
}
|
||||
|
@ -1097,3 +1110,267 @@ tesseract-ocr-yor_package() {
|
|||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Arabic_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Arabic script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Armenian_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Armenian script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Bengali_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Bengali script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Canadian_Aboriginal_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Canadian Aboriginal script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Cherokee_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Cherokee script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Cyrillic_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Cyrillic script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Devanagari_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Devanagari script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Ethiopic_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Ethiopic script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Fraktur_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Fraktur script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Georgian_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Georgian script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Greek_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Greek script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Gujarati_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Gujarati script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Gurmukhi_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Gurmukhi script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-HanS_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - HanS script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-HanT_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - HanT script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Hangul_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Hangul script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Hebrew_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Hebrew script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Japanese_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Japanese script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Kannada_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Kannada script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Khmer_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Khmer script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Lao_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Lao script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Latin_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Latin script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Malayalam_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Malayalam script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Myanmar_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Myanmar script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Oriya_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Oriya script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Sinhala_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Sinhala script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Syriac_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Syriac script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Tamil_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Tamil script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Telugu_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Telugu script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Thaana_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Thaana script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Thai_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Thai script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Tibetan_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Tibetan script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
tesseract-ocr-script-Vietnamese_package() {
|
||||
archs=noarch
|
||||
depends="${sourcepkg}>=${version}_${revision}"
|
||||
short_desc+=" - Vietnamese script data"
|
||||
pkg_install() {
|
||||
$(pkg_lang ${pkgname#tesseract-ocr-})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue