normalizeDipl.sh
normalizeDipl.sh
— 1 KB
Dateiinhalt
#!/bin/bash # normalize to NFC, do some checks # 2014-11-13, Uwe Springmann # usage: ./normalize.sh <filename> # RIDGES specific rules perl -C1 -i -pe 's/m̄/m̃/g' $1 perl -C1 -i -pe 's/n̄/ñ/g' $1 perl -C1 -i -pe 's/ā/ã/g' $1 perl -C1 -i -pe 's/ē/ẽ/g' $1 perl -C1 -i -pe 's/ī/ĩ/g' $1 perl -C1 -i -pe 's/ī/ĩ/g' $1 #additive Unicode perl -C1 -i -pe 's/ō/õ/g' $1 perl -C1 -i -pe 's/ō/õ/g' $1 #additive Unicode perl -C1 -i -pe 's/ū/ũ/g' $1 perl -C1 -i -pe 's/ū/ũ/g' $1 #additive Unicode perl -C1 -i -pe 's/€/ð/g' $1 perl -C1 -i -pe 's/vͤ/v̉/g' $1 # $ character needs to be escaped perl -C1 -i -pe 's/\$/ꝰ/g' $1 # ű, í are hard to distinguish from ü,i in recognition perl -C1 -i -pe 's/ű/ü/g' $1 perl -C1 -i -pe 's/í/i/g' $1 # normalization to NFC/NFD uconv.exe -f utf8 -t utf8 -x nfc -o $1".neu" $1 mv $1".neu" $1