UTF7.sa
Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
-------------------------> GNU Sather - sourcefile <-------------------------
-- Copyright (C) 2000 by K Hopper, University of Waikato, New Zealand --
-- This file is part of the GNU Sather library. It is free software; you may --
-- redistribute and/or modify it under the terms of the GNU Library General --
-- Public License (LGPL) as published by the Free Software Foundation; --
-- either version 2 of the license, or (at your option) any later version. --
-- This library is distributed in the hope that it will be useful, but --
-- WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details. --
-- The license text is also available from: Free Software Foundation, Inc., --
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --
--------------> Please email comments to <bug-sather@gnu.org> <--------------
class UTF7 < $IS_EQ, $IMMUTABLE
class UTF7 < $IS_EQ, $IMMUTABLE is
-- This class provides for conversion of encoding sequences (ie text
-- strings in some encoding) to a sequence of one or more octets in which
-- a clear octet is only found when the original encoding was void.
-- The algorithm provided is designed to achieve the translation given
-- in the following table in which v signifies a value bit of the original
-- code.
-- Bits Hex Min Hex Max Octet Sequence in Binary
--
-- 7 00000000 0000007f 0vvvvvvv
-- 11 00000080 000007FF 110vvvvv 10vvvvvv ** needed for 8-bit!!
-- 16 00000800 0000FFFF 1110vvvv 10vvvvvv 10vvvvvv
-- 21 00010000 001FFFFF 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
-- 26 00200000 03FFFFFF 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
-- 31 04000000 7FFFFFFF 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
--
-- The original code value is just the concatenation of the v bits in
-- the multiple octet encoding. When there are multiple ways to encode a
-- value, for example UCS 0, only the shortest encoding is legal.
-- Version 1.3 Apr 2001. Copyright K Hopper, U of Waikato
-- Development History
-- -------------------
-- Date Who By Detail
-- ---- ------ ------
-- 11 Dec 96 kh Original
-- 30 Oct 98 kh Added pre/post conditions
-- 26 Mar 99 kh Revised for restructured text library
-- 27 Apr 01 kh Changed hex_str to str, eight_bit_str to
-- tgt_str for uniformity.
include AREF{OCTET} ;
include COMPARABLE ;
include BINARY ;
private const code_mask : OCTET := OCTET::create(0xA0) ;
private const code_value : OCTET := OCTET::create(0x80) ;
private const Six_Bits : CARD := 6 ;
private const Six_Bits_Val : CARD := 0x40 ;
private const Six_Bit_Mask : OCTET := OCTET::create(0x3F) ;
private const Valid_Bit_Limit : CARD := 5 ;
private const Seven_Bit_Max : CARD := 0x7F ;
private const Eleven_Bit_Max : CARD := 0x7FF ;
private const Sixteen_Bit_Max : CARD := 0xFFFF ;
private const Twenty_One_Bit_Max : CARD := 0x1FFFFF ;
private const Twenty_Six_Bit_Max : CARD := 0x3FFFFFF ;
private const Thirty_One_Bit_Max : CARD := 0x7FFFFFFF ;
private const lengths : ARRAY{OCTET} := |
OCTET::create(0),
OCTET::create(0xC0),
OCTET::create(0xE0),
OCTET::create(0xF0),
OCTET::create(0xF8),
OCTET::create(0xFC)
| ;
private count(
oct : OCTET
) : CARD
pre true
post (result = 0)
or ((result > 1)
and (result <= 6))
is
-- This routine returns the number of following code octets in a single
-- encoding representation or CARD::nil if invalid.
res : CARD := oct.highest(clearbit) ;
case res
when 0, CARD::nil then -- Oops! invalid!
return CARD::nil
when 7 then
return 0
else
return OCTET::Octet_Bits - res - 1
end
end ;
private code_size(
num : CARD
) : CARD
pre (num < Thirty_One_Bit_Max)
and (num > 0)
post (result > 1)
and (result <= 6)
is
-- This private routine returns the number of octets needed to encode
-- the value contained in num.
if (num > Thirty_One_Bit_Max) then
SYS_ERROR::create.error(SYS::str_for_tp(SYS::tp(self)),
SYS_EXCEPT::Bad_Value,num.str) ;
return void -- to keep compiler happy
elsif num <= Seven_Bit_Max then
return 1
elsif num <= Eleven_Bit_Max then
return 2
elsif num <= Sixteen_Bit_Max then
return 3
elsif num <= Twenty_One_Bit_Max then
return 4
elsif num <= Twenty_Six_Bit_Max then
return 5
else
return 6
end
end ;
create(
num : CARD
) : SAME is
-- This routine creates a valid UTF7 encoding corresponding to the given
-- numeric value treated as a bit pattern.
length : CARD := code_size(num) ;
me : SAME := new(length) ;
length := length - 1 ;
if length = 0 then
me[0] := OCTET::create(num)
else
val : CARD := num ;
loop
index : CARD := length.downto!(1) ;
code : OCTET := OCTET::create(val % Six_Bits_Val) ;
val := val / Six_Bits_Val ;
me[index] := code.bit_or(code_value)
end ;
me[0] := lengths[length].bit_or(OCTET::create(val))
end ;
return me
end ;
private build_list(
octets : BINSTR,
chunk_size : CARD
) : SAME
pre ~void(octets)
and (chunk_size > 0)
post ~void(result)
is
-- This private routine returns the UTF7 value of the binary encoding
-- which is processed in chunk_size 'bites'.
res : FLIST{OCTET} := FLIST{OCTET}::create ;
loop
numval : CARD := CARD::create(octets.chunk!(chunk_size)) ;
bits : QUADBITS := QUADBITS::create(numval) ;
if numval <= Seven_Bit_Max then
res := res.push(OCTET::create(numval))
else
loc_list : FLIST{OCTET} := FLIST{OCTET}::create ;
octet_cnt : CARD ; -- of six-bit octets!
if numval <= Eleven_Bit_Max then
octet_cnt := 1
elsif numval <= Sixteen_Bit_Max then
octet_cnt := 2
elsif numval <= Twenty_One_Bit_Max then
octet_cnt := 3
elsif numval <= Twenty_Six_Bit_Max then
octet_cnt := 4
else
octet_cnt := 5
end ;
loop
octet_cnt.times! ;
loc_val : OCTET := bits.octet.bit_and(Six_Bit_Mask) ;
loc_list := loc_list.push(loc_val.bit_or(code_value)) ;
bits := bits.right(Six_Bits)
end ;
loc_list := loc_list.push(lengths[octet_cnt].bit_or(
OCTET::create(bits.card))) ;
loop
(octet_cnt + 1).times! ;
res := res.push(loc_list.pop)
end
end
end ;
me : SAME := new(res.size) ;
loop
me.aset!(res.elt!)
end ;
return me
end ;
create(
rn : RUNE
) : SAME is
-- This routine creates a new encoding from the given rune, provided
-- that there is a valid translation, otherwise void is returned!
return build_list(rn.binstr,rn.lib.my_size)
end ;
create(
rns : RUNES
) : SAME is
-- This routine converts the given binary string which is expected to be
-- a character encoding sequence for conversion into a valid UTF7 code.
if void(rns) then
return void
else
return build_list(rns.binstr,rns.index_lib.my_size)
end
end ;
build(
octets : BIN_CURSOR,
lib : LIBCHARS
) : SAME
pre ~void(octets)
and octets.remaining % lib.my_size = 0
and ~void(lib)
post ~void(result)
is
-- This routine converts the indicated binary string into a UTF7 coding
-- sequence, using the given repertoire and encoding.
return build_list(octets.get_remainder,lib.my_size)
end ;
build(
octets : BIN_CURSOR
) : SAME
pre ~void(octets)
and octets.remaining % LIBCHARS::default.my_size = 0
post ~void(result)
is
-- This routine converts the indicated binary string into a UTF7 coding
-- sequence, using the default repertoire and encoding.
return build(octets,LIBCHARS::default)
end ;
private length(
first_octet : OCTET
) : CARD is
-- This private routine returns the length in octets of the code which
-- begins with the given octet. If this is not a valid leading octet then
-- CARD::nil is returned.
res : CARD := first_octet.highest(clearbit) ;
if res /= CARD::nil then
res := res + 1
end ;
return res
end ;
is_eq(
other : SAME
) : BOOL is
-- This predicate returns true if and only if all components of this
-- coding are equal to all those of other.
loop
if aelt! /= other.aelt! then
return false
end
end ;
return true
end ;
is_singleton : BOOL is
-- This predicate returns true if and only if self is the transformed
-- coding of a single character/rune.
return asize > length([0])
end ;
card : CARD
pre ~void(self)
and (asize = length([0]))
post true
is
-- This routine returns the encoded bits of self as a numeric value --
-- providing that self is merely a single code.
loc_lgth : CARD := length([0]) - 1 ;
res : CARD := (lengths[loc_lgth].bit_invert.bit_and([0])).card ;
loop
index : CARD := 1.upto!(loc_lgth) ;
res := res * Six_Bits_Val + [index].bit_and(Six_Bit_Mask).card
end ;
return res
end ;
tgt_str : STR
pre ~void(self)
post result.asize = asize
is
-- This routine is the one which turns a UTF7 string into a locally
-- defined default character string using eight, sixteen or thirty-two bit
-- code spaces as defined in the environment.
res : STR := STR::create(asize,LIBCHARS::default) ;
loop
index : CARD := 0.upto!(asize - 1) ;
loc_val : CARD := aelt!.card ;
res[index] := CHAR::create(CHAR_CODE::create(loc_val,
LIBCHARS::default))
end ;
return res
end ;
binstr : BINSTR
pre ~void(self)
post (result.size <= asize)
is
-- This routine returns the value of self as a decoded binary string,
-- whose individual codes occupy the given number of octets -- for conversion
-- as desired into characters, etc.
res : BINSTR := BINSTR::create ;
octet_cnt : CARD ;
to_do : CARD ;
loc_val : OCTET ;
bits_so_far : CARD ;
started : BOOL := false ;
loop
oct : OCTET := aelt! ;
if ~started then
started := true ;
octet_cnt := count(oct) ;
to_do := octet_cnt - 1 ; -- one has already been 'read'
if octet_cnt = 0 then
loc_val := oct ;
bits_so_far := OCTET::Octet_Bits
else
loc_val := oct.bit_xor(lengths[octet_cnt]) ;
bits_so_far := Valid_Bit_Limit + 1 - octet_cnt
end
else
to_do := to_do - 1 ;
if (bits_so_far % OCTET::Octet_Bits) = 0 then
res := res + loc_val ;
loc_val := OCTET::null
end ;
loc_val := loc_val.left(Six_Bits).bit_or(
oct.bit_and(Six_Bit_Mask)) ;
if to_do = 0 then
res := res + loc_val ;
started := false
end
end
end ;
return res
end ;
runes(
lib : LIBCHARS
) : RUNES
pre true
post (void(self)
and void(result))
or ~void(result)
is
-- This routine returns a string consisting of the individual codes of
-- self treated as a character string in the given repertoire and encoding.
res : RUNES := RUNES::create(lib) ;
loop
res := res + rune!(lib)
end ;
return res
end ;
runes : RUNES
pre ~void(self)
post ~void(result)
is
-- This routine returns a string consisting of the individual codes of
-- self treated as a character string in the default repertoire and encoding.
return runes(LIBCHARS::default)
end ;
rune!(
once lib : LIBCHARS
) : RUNE
pre ~void(lib)
and ~void(self)
post ~void(result)
is
-- This iter yields successive octets converted to character form in
-- the given repertoire and encoding.
cursor : BIN_CURSOR := binstr.cursor ;
loop
yield RUNE::build(cursor.get_upto(lib.my_size).cursor,lib) ;
if cursor.is_done then
quit
end
end
end ;
rune! : RUNE
pre true
post ~void(result)
is
-- This iter yields successive octets converted to character form in
-- the current repertoire and encoding.
loop
yield rune!(LIBCHARS::default)
end
end ;
code! : SAME
pre true
post ~void(result)
is
-- This iter yields successive individual UTF codes in self
if void(self) then
quit
end ;
started : BOOL := false ;
res : SAME ;
loc_cnt : CARD ;
index : CARD ;
loop
oct : OCTET := aelt! ;
if ~started then
loc_cnt := count(oct) ;
res := new(loc_cnt + 1) ;
index := 0 ;
started := true
end ;
res[index] := oct ;
if index = loc_cnt then
yield res ;
started := false
else
index := index - 1
end
end
end ;
str(
lib : LIBCHARS
) : STR
pre ~void(lib)
and ~void(self)
post true
is
-- This routine produces a character string from the octets of which
-- the value is encoded as space separated hexadecimal numbers in the
-- given repertoire and encoding.
res : STR := STR::create ;
loop
res := res + lib.Space.char.str(lib).separate!(aelt!.hex_str(lib))
end ;
return res
end ;
str : STR
pre ~void(self)
post true
is
-- This routine produces a character string from the octets of which
-- the value is encoded as space separated hexadecimal numbers in the
-- default repertoire and encoding.
return str(LIBCHARS::default)
end ;
end ; -- UTF7