[linux] Re: file encoding

Wybo Dekker wybo op servalys.nl
Za Jul 14 13:52:47 CEST 2007


Voor de liefhebber - dit is wat ik er via Hugo's file(1) suggestie 
voor gemaakt heb (jammer dat file voor latin1, en misschien ook voor 
de andere ISO-8859 sets, ISO-8859 geeft en niet ISO-8859-1:

#!/bin/sh
# guessencoding - guess the encoding of an input file or stdin
# Usage:
#    guessencoding file
#    guessencoding <file

function die {
   echo $@ >/dev/stderr
   $delete
   exit 1
}
f="$1"
if [ "$f" = "" ]; then
   # stdin: save
   f=`mktemp`
   cat - > $f
   delete="rm $f"
elif [ ! -f "$f" ]; then
   die not a regular file
fi
if [ -s "$f" ]; then # not empty?
   encoding=`file "$f"`
   case "$encoding" in
   *text*) ;;
   *) die "not a text file ($encoding)" ;;
   esac
   encoding=${encoding/*: /}
   encoding=${encoding/ */}
   # file(1) does not distinquish between ISO-8859-x :-((
   if [ $encoding = ISO-8859 ]; then encoding=$encoding-1; fi
   echo $encoding
else # denote empty file as ASCII:
   echo ASCII
fi
$delete

-- 
Wybo



More information about the Linux mailing list