Sprach- und literaturwissenschaftliche Fakultät - Korpuslinguistik und Morphologie

normalizeDipl.sh

text/x-sh normalizeDipl.sh — 1 KB

Dateiinhalt

#!/bin/bash
# normalize to NFC, do some checks
#  2014-11-13, Uwe Springmann

# usage: ./normalize.sh <filename>

# RIDGES specific rules
perl -C1 -i -pe 's/m̄/m̃/g' $1
perl -C1 -i -pe 's/n̄/ñ/g' $1 
perl -C1 -i -pe 's/ā/ã/g' $1
perl -C1 -i -pe 's/ē/ẽ/g' $1
perl -C1 -i -pe 's/ī/ĩ/g' $1
perl -C1 -i -pe 's/ī/ĩ/g' $1 #additive Unicode
perl -C1 -i -pe 's/ō/õ/g' $1
perl -C1 -i -pe 's/ō/õ/g' $1 #additive Unicode
perl -C1 -i -pe 's/ū/ũ/g' $1
perl -C1 -i -pe 's/ū/ũ/g' $1 #additive Unicode

perl -C1 -i -pe 's/€/ð/g' $1
perl -C1 -i -pe 's/vͤ/v̉/g' $1
# $ character needs to be escaped
perl -C1 -i -pe 's/\$/ꝰ/g' $1

# ű, í are hard to distinguish from ü,i in recognition
perl -C1 -i -pe 's/ű/ü/g' $1
perl -C1 -i -pe 's/í/i/g' $1

# normalization to NFC/NFD
uconv.exe -f utf8 -t utf8 -x nfc -o $1".neu" $1
mv $1".neu" $1