chars.sa
Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
-------------------------> GNU Sather - sourcefile <-------------------------
-- Copyright (C) 2000 by K Hopper, University of Waikato, New Zealand --
-- This file is part of the GNU Sather library. It is free software; you may --
-- redistribute and/or modify it under the terms of the GNU Library General --
-- Public License (LGPL) as published by the Free Software Foundation; --
-- either version 2 of the license, or (at your option) any later version. --
-- This library is distributed in the hope that it will be useful, but --
-- WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details. --
-- The license text is also available from: Free Software Foundation, Inc., --
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --
--------------> Please email comments to <bug-sather@gnu.org> <--------------
immutable class CHAR < $CHAR{CHAR}
immutable class CHAR < $CHAR{CHAR} is
-- This class implements the notion of a logical character (ie having
-- the semantics associated with such a token in the cultural environment
-- concerned) having a single encoding!
-- NOTE For characters which require multiple encodings see the class RUNE
-- in this section of the required library.
-- Version 1.8 Apr 99. Copyright K Hopper, U of Waikato
-- Development History
-- -------------------
-- Date Who By Detail
-- ---- ------ ------
-- 6 Dec 96 kh Original heavily adapted from the
-- Sather 1.1 immutable CHAR class.
-- 19 Feb 97 kh Modified to become array of encoding.
-- 27 May 97 kh 'Automated' culture/repertoire 'loading'
-- 27 Jun 97 kh Modified to handle multiple encodings.
-- 22 Jul 98 kh Added $BINARY facilities
-- 4 Nov 98 kh Revised for added conversion facilities.
-- 12 Feb 99 kh Rewrite for efficiency.
-- 21 Feb 99 kh Further efficiency improvements
-- 9 Apr 99 kh Revised for Version 8 of text classes.
include AVAL{OCTET} ;
include COMPARABLE ;
include BINARY ;
include CHAR_STR ;
include CHAR_INCL ; -- The common implementations!
private const asize : CARD := 4 ; -- Always!
const num_codes : CARD := 1 ;
inspect_pat is
#OUT+"CHAR code pattern: ";
loop o:OCTET:=aelt!; if o=OCTET::null then #OUT+"0"; else #OUT+"*"; end; end;
#OUT+"\n";
end;
inspect is
#OUT+"CHAR code:"+code.hex_str+"\n";
end;
build(cursor : BIN_CURSOR, lib : LIBCHARS) : SAME
pre ~void(lib) and (cursor.remaining >= lib.my_size)
post ((initial(cursor.remaining) - lib.my_size) = cursor.remaining)
is
-- This routine takes the number of octets required to form a single
-- character in the indicated repertoire and encoding. If the bit-pattern
-- found is a valid encoding in the indicated repertoire then it is returned
-- as a character, otherwise the cursor index has not been moved and void is
-- returned.
loc_code : CHAR_CODE := CHAR_CODE::raw_build(cursor,lib) ;
if void(lib.culture.charmap) -- only occurs during cult set-up
or lib.culture.charmap.valid(loc_code) then
return loc_code.char
else
cursor.set_index(cursor.index + lib.my_size) ;
return create(CHAR_CODE::null)
end
end ;
build(index : BIN_CURSOR) : SAME
pre (index.remaining >= LIBCHARS::default.culture.kind.size)
post ~(result.code = CHAR_CODE::null) or (index.remaining < initial(index.remaining))
is
-- This routine builds a character from the binary string attached
-- to the given cursor in the default repertoire and encoding.
return build(index,LIBCHARS::default)
end ;
create(code : CHAR_CODE) : SAME
pre ~code.is_combining
post ~(result.code = CHAR_CODE::null)
is
-- This creates a 'control character' from the given control code!
return code.char
end ;
create(code : CONTROL_CODES, lib : LIBCHARS) : SAME
pre ~void(lib)
post true -- ~void(result) but may be 'zero'
is
-- This creates a 'control character' from the given control code!
return CHAR_CODE::create(code.card,lib).char
end ;
nil : SAME is
-- This routine returns a nil value - which cannot be a valid character.
return QUADBITS::create(CARD::nil).char
end ;
null : SAME is
-- This routine returns the character corresponding to the control
-- function name NUL.
return OCTET::null.quad.char
end ;
valid(num : CARD, lib : LIBCHARS) : BOOL
pre ~void(lib)
post true
is
-- This routine returns true if and only if the value of num may be
-- interpreted as a character bit-pattern valid in the given encoding and
-- repertoire, otherwise false.
return lib.culture.charmap.valid(CHAR_CODE::create(num,lib))
end ;
valid(num : CARD) : BOOL is
-- This predicate returns true if and only if the value of num may be
-- interpreted as a character bit-pattern in the default repertoire and
-- encoding, otherwise false.
return valid(num,LIBCHARS::default)
end ;
valid_number(lib : LIBCHARS) : BOOL is
--This predicate is used to test if self CHAR will fit into the
-- number of bits available for codes using lib. It returns true if and
-- only if the value will fit.
if SYS::is_little_endian then
loop
index : CARD := 0.upto!(asize - 1) ;
loc_oct : OCTET := aelt! ;
if index >= lib.my_size then
if loc_oct /= OCTET::null then
#OUT+"CHAR::valid_number. Too long bits for codes using lib.\n";
inspect_pat;
return false
end
end
end ;
else -- big_endian
loop
index : CARD := 0.upto!(asize - 1) ;
loc_oct : OCTET := aelt! ;
if index+lib.my_size < asize then
if loc_oct /= OCTET::null then
#OUT+"CHAR::valid_number. Too long bits for codes using lib.\n";
inspect_pat;
return false
end
end
end ;
end;
return true
end ;
is_eq(other : SAME) : BOOL is
-- This predicate returns true if and only if self and other are
-- the same character irrespective of encoding!!
loc_cult : CULTURE := CULTURE::default ;
if void(loc_cult.collating) -- still being built
or ~loc_cult.sather_lib.has_combining then
return code = other.code
else
return loc_cult.collating.same(self,other)
end
end ;
is_lt(other : SAME) : BOOL is
-- This predicate returns true if and only if self is earlier than
-- other in the repertoire defined ordering of characters.
loc_coll : REPERTOIRE := CULTURE::default.collating ;
if void(loc_coll) then -- still being built
return code < other.code
else
return loc_coll.earlier(self,other)
end
end ;
is_nil : BOOL is
-- This routine returns true if and only if self is the nil character.
return is_eq(nil)
end ;
binstr(lib : LIBCHARS) : BINSTR
pre true
post (lib.my_size = result.size)
is
-- This routine returns a representation of self as a binary string,
-- using the given repertoire and encoding.
loc_res : BINSTR := QUADBITS::create(self).binstr ;
return loc_res.tail(lib.my_size)
end ;
binstr : BINSTR
pre true
post (LIBCHARS::default.my_size = result.size)
is
-- This routine returns a representation of self as a binary string,
-- using the default repertoire and encoding.
return binstr(LIBCHARS::default)
end ;
convert( from_lib : LIBCHARS, to_lib : LIBCHARS ) : CHAR
pre ~void(from_lib) and ~void(to_lib)
post true
is
-- This routine converts self, which is assumed to be in the from_lib encoding,
-- into the to_lib encoding if this is possible, otherwise void is returned.
-- void if character not in to_lib kind
if (from_lib.culture.kind = to_lib.culture.kind) then return self; end;
loc_rune : RUNE ;
if (from_lib.culture.kind = CODE_KINDS::Unicode)
or (from_lib.culture.kind = CODE_KINDS::UCS2)
or (from_lib.culture.kind = CODE_KINDS::UCS4) then
loc_rune := rune(from_lib)
else
loc_rune := CODE_CONVERTER::rune(from_lib,self)
end ;
-- NOTE The above conversion is into the default rune encoding of Unicode!!!!
if (to_lib.culture.kind = CODE_KINDS::Unicode)
or (to_lib.culture.kind = CODE_KINDS::UCS2)
or (to_lib.culture.kind = CODE_KINDS::UCS4) then
return loc_rune.char
else
return CODE_CONVERTER::char(to_lib.culture.kind,loc_rune)
end ;
end ;
convert(lib : LIBCHARS) : CHAR
pre ~void(lib) and (lib /= LIBCHARS::default)
post true -- void if character not in default lib
is
-- This routine converts self, which is assumed to be in the default
-- encoding and repertoire, to be in the given encoding and
-- repertoire.
return convert(LIBCHARS::default,lib);
end ;
char : SAME is
-- This routine returns self.
return self;
end ;
code(lib : LIBCHARS) : CHAR_CODE
pre ~void(lib)
post true
is
-- This routine returns the bit-pattern of self as a character code
-- belonging to the given repertoire and encoding.
return CHAR_CODE::create(self,lib)
end ;
code : CHAR_CODE is
-- This routine returns the bit-pattern of self as a character code
-- belonging to the default repertoire and encoding. Since any bit-pattern
-- will do there can neither be sensible nor meaningful pre/post
-- conditions!
return CHAR_CODE::create(self,LIBCHARS::default)
end ;
rune(lib : LIBCHARS) : RUNE
pre ~void(lib)
post true
is
-- This routine returns the bit-pattern of self as a rune in the given
-- repertoire and encoding.
return RUNE::create(CHAR_CODE::create(self,lib))
end ;
rune : RUNE is
-- This routine returns the bit-pattern of self as a rune in the default
-- repertoire and encoding. Since any bit-pattern may be used, there can be
-- neither sensible nor meaningful pre/post conditions.
return RUNE::create(CHAR_CODE::create(self,LIBCHARS::default))
end ;
hash : CARD
pre ~(code = CHAR_CODE::null)
post true
is
-- This routine returns a hash value of the encoding for the purposes of
-- mapping etc.
return code.hash
end ;
upper(lib : LIBCHARS) : SAME
pre ~void(lib)
post result.is_upper(lib)
is
-- Providing that there exists an upper case version of self then
-- an encoding giving that character is returned, otherwise the current
-- value of self.
if is_lower(lib) then
loc_map : CHAR_MAPPINGS := CHAR_MAPPINGS::To_Upper ;
return loc_map.to_range(self,lib.culture)
else
return self
end
end ;
upper : SAME
pre self.is_lower
post result.is_upper
is
-- Providing that there exists an upper case version of self then
-- an encoding giving that character is returned, otherwise the current
-- value of self.
return upper(LIBCHARS::default)
end ;
lower(lib : LIBCHARS) : SAME
pre ~void(lib)
post result.is_lower
is
-- Providing that there exists a lower case version of self then
-- an encoding giving that character is returned, otherwise the current
-- value of self.
if is_upper(lib) then
loc_map : CHAR_MAPPINGS := CHAR_MAPPINGS::To_Lower ;
return loc_map.to_range(self,lib.culture)
else
return self
end
end ;
lower : SAME
pre self.is_upper
post result.is_lower
is
-- Providing that there exists a lower case version of self then
-- an encoding giving that character is returned, otherwise the current
-- value of self.
return lower(LIBCHARS::default)
end ;
code!(once lib : LIBCHARS) : CHAR_CODE is
-- This iter merely yields the single code - in the given encoding -
-- corresponding to self!
yield code(lib)
end ;
code! : CHAR_CODE is
-- This iter merely yields the single code!
yield code
end ;
end ; -- CHAR