chars.sa


Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
 
------------------------->  GNU Sather - sourcefile  <-------------------------
-- Copyright (C) 2000 by K Hopper, University of Waikato, New Zealand        --
-- This file is part of the GNU Sather library. It is free software; you may --
-- redistribute  and/or modify it under the terms of the GNU Library General --
-- Public  License (LGPL)  as published  by the  Free  Software  Foundation; --
-- either version 2 of the license, or (at your option) any later version.   --
-- This  library  is distributed  in the  hope that it will  be  useful, but --
-- WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details.       --
-- The license text is also available from:  Free Software Foundation, Inc., --
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA                     --
-------------->  Please email comments to <bug-sather@gnu.org>  <--------------


immutable class CHAR < $CHAR{CHAR}

immutable class CHAR < $CHAR{CHAR} is -- This class implements the notion of a logical character (ie having -- the semantics associated with such a token in the cultural environment -- concerned) having a single encoding! -- NOTE For characters which require multiple encodings see the class RUNE -- in this section of the required library. -- Version 1.8 Apr 99. Copyright K Hopper, U of Waikato -- Development History -- ------------------- -- Date Who By Detail -- ---- ------ ------ -- 6 Dec 96 kh Original heavily adapted from the -- Sather 1.1 immutable CHAR class. -- 19 Feb 97 kh Modified to become array of encoding. -- 27 May 97 kh 'Automated' culture/repertoire 'loading' -- 27 Jun 97 kh Modified to handle multiple encodings. -- 22 Jul 98 kh Added $BINARY facilities -- 4 Nov 98 kh Revised for added conversion facilities. -- 12 Feb 99 kh Rewrite for efficiency. -- 21 Feb 99 kh Further efficiency improvements -- 9 Apr 99 kh Revised for Version 8 of text classes. include AVAL{OCTET} ; include COMPARABLE ; include BINARY ; include CHAR_STR ; include CHAR_INCL ; -- The common implementations! private const asize : CARD := 4 ; -- Always! const num_codes : CARD := 1 ; inspect_pat is #OUT+"CHAR code pattern: "; loop o:OCTET:=aelt!; if o=OCTET::null then #OUT+"0"; else #OUT+"*"; end; end; #OUT+"\n"; end; inspect is #OUT+"CHAR code:"+code.hex_str+"\n"; end; build(cursor : BIN_CURSOR, lib : LIBCHARS) : SAME pre ~void(lib) and (cursor.remaining >= lib.my_size) post ((initial(cursor.remaining) - lib.my_size) = cursor.remaining) is -- This routine takes the number of octets required to form a single -- character in the indicated repertoire and encoding. If the bit-pattern -- found is a valid encoding in the indicated repertoire then it is returned -- as a character, otherwise the cursor index has not been moved and void is -- returned. loc_code : CHAR_CODE := CHAR_CODE::raw_build(cursor,lib) ; if void(lib.culture.charmap) -- only occurs during cult set-up or lib.culture.charmap.valid(loc_code) then return loc_code.char else cursor.set_index(cursor.index + lib.my_size) ; return create(CHAR_CODE::null) end end ; build(index : BIN_CURSOR) : SAME pre (index.remaining >= LIBCHARS::default.culture.kind.size) post ~(result.code = CHAR_CODE::null) or (index.remaining < initial(index.remaining)) is -- This routine builds a character from the binary string attached -- to the given cursor in the default repertoire and encoding. return build(index,LIBCHARS::default) end ; create(code : CHAR_CODE) : SAME pre ~code.is_combining post ~(result.code = CHAR_CODE::null) is -- This creates a 'control character' from the given control code! return code.char end ; create(code : CONTROL_CODES, lib : LIBCHARS) : SAME pre ~void(lib) post true -- ~void(result) but may be 'zero' is -- This creates a 'control character' from the given control code! return CHAR_CODE::create(code.card,lib).char end ; nil : SAME is -- This routine returns a nil value - which cannot be a valid character. return QUADBITS::create(CARD::nil).char end ; null : SAME is -- This routine returns the character corresponding to the control -- function name NUL. return OCTET::null.quad.char end ; valid(num : CARD, lib : LIBCHARS) : BOOL pre ~void(lib) post true is -- This routine returns true if and only if the value of num may be -- interpreted as a character bit-pattern valid in the given encoding and -- repertoire, otherwise false. return lib.culture.charmap.valid(CHAR_CODE::create(num,lib)) end ; valid(num : CARD) : BOOL is -- This predicate returns true if and only if the value of num may be -- interpreted as a character bit-pattern in the default repertoire and -- encoding, otherwise false. return valid(num,LIBCHARS::default) end ; valid_number(lib : LIBCHARS) : BOOL is --This predicate is used to test if self CHAR will fit into the -- number of bits available for codes using lib. It returns true if and -- only if the value will fit. if SYS::is_little_endian then loop index : CARD := 0.upto!(asize - 1) ; loc_oct : OCTET := aelt! ; if index >= lib.my_size then if loc_oct /= OCTET::null then #OUT+"CHAR::valid_number. Too long bits for codes using lib.\n"; inspect_pat; return false end end end ; else -- big_endian loop index : CARD := 0.upto!(asize - 1) ; loc_oct : OCTET := aelt! ; if index+lib.my_size < asize then if loc_oct /= OCTET::null then #OUT+"CHAR::valid_number. Too long bits for codes using lib.\n"; inspect_pat; return false end end end ; end; return true end ; is_eq(other : SAME) : BOOL is -- This predicate returns true if and only if self and other are -- the same character irrespective of encoding!! loc_cult : CULTURE := CULTURE::default ; if void(loc_cult.collating) -- still being built or ~loc_cult.sather_lib.has_combining then return code = other.code else return loc_cult.collating.same(self,other) end end ; is_lt(other : SAME) : BOOL is -- This predicate returns true if and only if self is earlier than -- other in the repertoire defined ordering of characters. loc_coll : REPERTOIRE := CULTURE::default.collating ; if void(loc_coll) then -- still being built return code < other.code else return loc_coll.earlier(self,other) end end ; is_nil : BOOL is -- This routine returns true if and only if self is the nil character. return is_eq(nil) end ; binstr(lib : LIBCHARS) : BINSTR pre true post (lib.my_size = result.size) is -- This routine returns a representation of self as a binary string, -- using the given repertoire and encoding. loc_res : BINSTR := QUADBITS::create(self).binstr ; return loc_res.tail(lib.my_size) end ; binstr : BINSTR pre true post (LIBCHARS::default.my_size = result.size) is -- This routine returns a representation of self as a binary string, -- using the default repertoire and encoding. return binstr(LIBCHARS::default) end ; convert( from_lib : LIBCHARS, to_lib : LIBCHARS ) : CHAR pre ~void(from_lib) and ~void(to_lib) post true is -- This routine converts self, which is assumed to be in the from_lib encoding, -- into the to_lib encoding if this is possible, otherwise void is returned. -- void if character not in to_lib kind if (from_lib.culture.kind = to_lib.culture.kind) then return self; end; loc_rune : RUNE ; if (from_lib.culture.kind = CODE_KINDS::Unicode) or (from_lib.culture.kind = CODE_KINDS::UCS2) or (from_lib.culture.kind = CODE_KINDS::UCS4) then loc_rune := rune(from_lib) else loc_rune := CODE_CONVERTER::rune(from_lib,self) end ; -- NOTE The above conversion is into the default rune encoding of Unicode!!!! if (to_lib.culture.kind = CODE_KINDS::Unicode) or (to_lib.culture.kind = CODE_KINDS::UCS2) or (to_lib.culture.kind = CODE_KINDS::UCS4) then return loc_rune.char else return CODE_CONVERTER::char(to_lib.culture.kind,loc_rune) end ; end ; convert(lib : LIBCHARS) : CHAR pre ~void(lib) and (lib /= LIBCHARS::default) post true -- void if character not in default lib is -- This routine converts self, which is assumed to be in the default -- encoding and repertoire, to be in the given encoding and -- repertoire. return convert(LIBCHARS::default,lib); end ; char : SAME is -- This routine returns self. return self; end ; code(lib : LIBCHARS) : CHAR_CODE pre ~void(lib) post true is -- This routine returns the bit-pattern of self as a character code -- belonging to the given repertoire and encoding. return CHAR_CODE::create(self,lib) end ; code : CHAR_CODE is -- This routine returns the bit-pattern of self as a character code -- belonging to the default repertoire and encoding. Since any bit-pattern -- will do there can neither be sensible nor meaningful pre/post -- conditions! return CHAR_CODE::create(self,LIBCHARS::default) end ; rune(lib : LIBCHARS) : RUNE pre ~void(lib) post true is -- This routine returns the bit-pattern of self as a rune in the given -- repertoire and encoding. return RUNE::create(CHAR_CODE::create(self,lib)) end ; rune : RUNE is -- This routine returns the bit-pattern of self as a rune in the default -- repertoire and encoding. Since any bit-pattern may be used, there can be -- neither sensible nor meaningful pre/post conditions. return RUNE::create(CHAR_CODE::create(self,LIBCHARS::default)) end ; hash : CARD pre ~(code = CHAR_CODE::null) post true is -- This routine returns a hash value of the encoding for the purposes of -- mapping etc. return code.hash end ; upper(lib : LIBCHARS) : SAME pre ~void(lib) post result.is_upper(lib) is -- Providing that there exists an upper case version of self then -- an encoding giving that character is returned, otherwise the current -- value of self. if is_lower(lib) then loc_map : CHAR_MAPPINGS := CHAR_MAPPINGS::To_Upper ; return loc_map.to_range(self,lib.culture) else return self end end ; upper : SAME pre self.is_lower post result.is_upper is -- Providing that there exists an upper case version of self then -- an encoding giving that character is returned, otherwise the current -- value of self. return upper(LIBCHARS::default) end ; lower(lib : LIBCHARS) : SAME pre ~void(lib) post result.is_lower is -- Providing that there exists a lower case version of self then -- an encoding giving that character is returned, otherwise the current -- value of self. if is_upper(lib) then loc_map : CHAR_MAPPINGS := CHAR_MAPPINGS::To_Lower ; return loc_map.to_range(self,lib.culture) else return self end end ; lower : SAME pre self.is_upper post result.is_lower is -- Providing that there exists a lower case version of self then -- an encoding giving that character is returned, otherwise the current -- value of self. return lower(LIBCHARS::default) end ; code!(once lib : LIBCHARS) : CHAR_CODE is -- This iter merely yields the single code - in the given encoding - -- corresponding to self! yield code(lib) end ; code! : CHAR_CODE is -- This iter merely yields the single code! yield code end ; end ; -- CHAR