runes.sa
Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
-------------------------> GNU Sather - sourcefile <-------------------------
-- Copyright (C) 2000 by K Hopper, University of Waikato, New Zealand --
-- This file is part of the GNU Sather library. It is free software; you may --
-- redistribute and/or modify it under the terms of the GNU Library General --
-- Public License (LGPL) as published by the Free Software Foundation; --
-- either version 2 of the license, or (at your option) any later version. --
-- This library is distributed in the hope that it will be useful, but --
-- WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details. --
-- The license text is also available from: Free Software Foundation, Inc., --
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --
--------------> Please email comments to <bug-sather@gnu.org> <--------------
class RUNES < $TEXT_STRING{RUNE,FRUNES,RUNES}, $IMMUTABLE
class RUNES < $TEXT_STRING{RUNE,FRUNES,RUNES}, $IMMUTABLE is
-- This class provides a string encoding of arbitrary codes of 32-bits.
-- It provides most of the operations which are provided in the class STR
-- in respect of runes which are single encodings.
-- Version 1.4 Apr 99. Copyright K Hopper, U of Waikato
-- Development History
-- -------------------
-- Date Who By Detail
-- ---- ------ ------
-- 11 May 96 kh Original from Sather STR class
-- 8 Nov 96 kh Now uses generic base class.
-- 5 Apr 97 kh Modified for INT to CARD
-- 13 Oct 98 kh Revised and added pre/post conditions
-- 13 Apr 99 kh Completely re-written for V8 of text classes
include TEXT_STRING{RUNE,FRUNES}
plus -> private raw_plus,
from_fstr -> private from_fstr,
set! -> ; -- NOT for runes!
private shared width : CARD := 4 ; -- merely to keep strings happy!
private attr indices : FLIST{CARD} ;
-- NOTE The usage of the indices array in this class is different
-- from that in the class FRUNES. The reason for this is
-- that the latter has a feature 'loc' which indicates the next
-- element to be filled. The last element of the index
-- array serves that purpose in this class.
private create(
size : CARD, -- IN OCTETS!!!!!!!!!!!!!!
lib : LIBCHARS
) : SAME is
-- This routine returns an empty rune string.
me : SAME := new(size) ;
me.indices := FLIST{CARD}::create.push(0) ;
me.priv_lib := REP_LIB_LIST::index(lib) ;
return me
end ;
create(
sz : CARD
) : SAME is
-- This routine is the version of creation which assumes the default
-- cultural repertoire and environment and that the size is in octets.
return create(sz,LIBCHARS::default)
end ;
create(
lib : LIBCHARS
) : SAME is
-- This routine returns an empty rune string with the indicated encoding
-- and repertoire.
return create(0,lib)
end ;
create : SAME is
-- This routine returns an empty rune string.
return create(0,LIBCHARS::default)
end ;
create_from_ucs2(
str : BINSTR,
lib : LIBCHARS
) : SAME
pre ~void(lib)
and (lib.culture.kind = CODE_KINDS::UCS2)
and (str.size > 0)
and ((str.size % 2) = 0) -- must be even!
post ~void(result)
is
-- This routine builds the runes from the binary string indicated,
-- assuming that the encoding is UCS2 - not UCS4.
return build(str.cursor,lib)
end ;
create(
rune : RUNE
) : SAME is
-- This routine creates and then returns a single element rune
-- string.
me : SAME := create(0,rune.lib) ;
me := me + rune ;
return me
end ;
create(
ch_code : CHAR_CODE
) : SAME is
-- This routine creates a new single code rune from the given character
-- code and then returns it.
return create(ch_code.rune)
end ;
create(
ch : CHAR,
lib : LIBCHARS
) : SAME
pre ~void(str)
post (result.size = str.size)
is
-- This routine converts a single character - presumed to be in the
-- given repertoire and encoding - into a string of runes.
return CODE_CONVERTER::runes(lib,ch)
end ;
create(
str : STR
) : SAME
pre ~void(str)
post (result.size = str.size)
is
-- This routine converts a string of the default character into
-- a string of runes. The result is an array for which each element value
-- is no greater than the default character encoding maximum -- unless
-- str is empty (void) when the result is void.
lgth : CARD := str.size ;
res : SAME := create(str.size * str.index_lib.my_size,str.index_lib) ;
index : CARD := 0 ;
loop
loc_code : CHAR_CODE := str.code! ;
res.aset(index,loc_code.rune) ;
index := index + 1
end ;
return res
end ;
from_frunes(
fstr : FRUNES
) : SAME
pre ~void(fstr)
post (result.size = fstr.size)
is
-- This routine converts the given fast rune string into the normal
-- string form (which has immutable semantics). The 'generic' from_fstr
-- routine is used to avoid renaming problems for other uses.
return from_fstr(fstr)
end ;
cursor : RUNES_CURSOR
pre ~void(self)
post ~void(result)
is
-- This routine returns a cursor object corresponding to the contents of
-- self.
return RUNES_CURSOR::create(self)
end ;
size : CARD is
-- This routine returns the number of runes in self, or zero if self
-- is void.
if void(self) then
return 0
else
return indices.size - 1
end
end ;
private buffer_scan
pre ~void(self)
post (indices.size = size + 1)
is
-- This routine scans the buffer and sets up a new index list from
-- the actual contents. Any alteration in contents should be accompanied
-- within this class of a call to this routine.
loc_bin : BINSTR := binstr ;
loc_index : CARD := 0 ;
indices := FLIST{CARD}::create ; -- a new list!
loop
if ~(CHAR_CODE::create(loc_bin.chunk!(
index_lib.my_size),index_lib)).is_combining then
indices := indices.push(loc_index)
end ;
loc_index := loc_index + index_lib.my_size
end ;
indices := indices.push(loc_index)
end ;
plus(
elem : RUNE
) : SAME
pre void(self)
or (elem.lib = index_lib)
post ~void(result)
or (result.size = initial(size) + 1)
is
-- This routine appends the given element to self, returning the
-- resulting string.
res : SAME ;
loc_index : CARD := size ; -- which may, of course, be zero
if void(self)
or (asize = 0) then
res := create(elem.asize,elem.lib)
else
res := new(asize + elem.asize) ;
res.indices := FLIST{CARD}::create ;
res.priv_lib := priv_lib ;
loop
res.indices := res.indices.push(indices.elt!)
end ;
res.acopy(self) ;
SYS::destroy(self) -- old one shouldn't be used now.
end ;
res.aset(loc_index,elem) ;
return res
end ;
plus(
fstr : FRUNES
) : SAME
pre (index_lib = fstr.index_lib)
post ~void(result)
and ((void(self)
and (result.size = str.size))
or (result.size = initial(size) + str.size))
is
-- This routine appends the rune string fstr to self and returns it.
res : SAME := raw_plus(fstr) ;
buffer_scan ;
return res
end ;
plus(
str : SAME
) : SAME
pre ~void(self)
and ~void(str)
and (priv_lib = str.priv_lib)
post ~void(result)
and ((void(self)
and (result.size = str.size))
or (result.size = initial(size) + str.size))
is
-- This routine appends the string str to self and returns the resulting
-- string.
res : SAME := raw_plus(str) ;
buffer_scan ;
return res
end ;
private store_index(
elem_index : CARD
) : CARD
pre (elem_index < indices.size)
post result = indices[elem_index]
is
-- This routine returns the store index corresponding to the given
-- element_index for use where they may be different).
return indices[elem_index]
end ;
aget(
index : CARD
) : RUNE
pre ~void(self)
and (index < indices.size - 1)
post true
is
-- This routine is the 'array' indexing facility for runes in a string
-- of runes, returning the rune indexed.
loc_res : BINSTR := BINSTR::create ;
loop
loc_index : CARD := indices[index].upto!(indices[index + 1] - 1) ;
loc_res := loc_res + oct_aget(loc_index)
end ;
return RUNE::build(loc_res.cursor,index_lib)
end ;
aset(
index : CARD,
elem : RUNE
)
pre ~void(self)
and ((index < (indices.size - 1))
and ((indices[index + 1] - indices[index]) = elem.size)
or ((index = (indices.size - 1))
and ((indices[index] + elem.size) <= asize)))
post true
is
-- This routine is the 'array' indexing facility for a rune string which
-- is only applicable if it is known that either the element to be set is at
-- the end of the current contents and there is space for the element or that
-- the size of the element to be inserted is identical to the size of that
-- being replaced.
if index = (indices.size - 1) then
indices := indices.push(indices[index] + elem.asize)
end ;
loop
loc_index : CARD := indices[index].up! ;
oct_aset(loc_index,elem.aelt!)
end
end ;
vset(
index : CARD,
elem : RUNE
) : SAME
pre ~void(self)
and (index <= (indices.size - 1))
post true
is
-- This routine is the 'array' indexing facility for the case where
-- the number of codes in elem is different from the number currently at that
-- index position in the string. A new rune string has to be produced if the
-- resulting length is different from that currently allocated.
if index = (indices.size - 1)
and (indices[index] + elem.size) < asize then
aset(index,elem) ;
return self
else
return head(index - 1) + elem + tail((indices.size - 1) - index)
end
end ;
rune(
index : CARD
) : RUNE
pre (index < (asize - 1))
post result = [index]
is
-- This routine returns the value of the rune at the given index.
return [index]
end ;
char(
index : CARD
) : RUNE
pre (index < (asize - 1))
post result = [index]
is
-- This routine is a synonym for rune above. It is included to match
-- the required abstract interface definition.
return [index]
end ;
binstr : BINSTR
pre ~void(self)
post ~void(result)
is
-- This routine just returns a copy of self. It is provided for cases
-- where it is necessary to put a text string into a binary stream of some
-- kind.
res : FBINSTR := FBINSTR::create ;
loop
res := res + aelt!
end ;
return res.binstr
end ;
ucs2 : BINSTR
pre ~void(self)
post (result.size = 2 * size) -- two octets per code
is
-- This routine returns a copy of self which is the UCS2 binary form.
-- It is provided for cases where it is necessary to put the string into that
-- form. No count is included. If it is not possible to represent the rune
-- string in UCS2 coding then void is returned.
res : BINSTR := BINSTR::create ;
loop
ucs4_code : CARD := code!.card ;
if ucs4_code > HEXTET::Hextet_Max then
return void
end ;
res := res + HEXTET::create(ucs4_code).binstr
end ;
return res
end ;
convert(
lib : LIBCHARS
) : SAME
pre ~void(self)
and ~void(lib)
post true
is
-- This routine converts self to be in the given encoding and
-- repertoire. If any character has no corresponding code then void is
-- returned.
return CODE_CONVERTER::runes(lib,self)
end ;
private do_replace(
old_ch,
new_ch : RUNE
) : SAME is
-- This routine returns a copy of self which has had every occurrence of
-- old_ch replaced by new_ch.
loc_res : CODE_STR := CODE_STR::create(index_lib) ;
loop
ch : RUNE := elt! ;
if ch = old_ch then
ch := new_ch
end ;
loc_res := loc_res + ch.code
end ;
return loc_res.tgt_runes
end ;
replace(
set : SAME,
new_ch : RUNE
) : SAME
pre ~void(set)
and ~void(self)
and (set.priv_lib = priv_lib)
and (new_ch.lib = index_lib)
post (result.size = self.size) -- and replacement done!
is
-- This routine returns a copy of self in which all occurrences of
-- characters in set are replaced by new_ch. Self may be void.
loc_res : CODE_STR := CODE_STR::create(index_lib) ;
loop
ch : RUNE := elt! ;
if set.contains(ch) then
ch := new_ch
end ;
loc_res := loc_res + ch.code
end ;
return loc_res.tgt_runes
end ;
escape(
esc : RUNE,
elist : SAME
) : SAME
pre ~void(self)
and ~void(elist)
and (priv_lib = elist.priv_lib)
and (esc.lib = index_lib)
post (result.contains(esc)
or (result = self))
is
-- This routine returns a copy of self in which all characters occurring
-- in elist and the character esc itself are preceded by the escape character.
-- This is done in situ using a fast string.
buf : FRUNES := FRUNES::create(asize/index_lib.my_size) ;
loop
loc_ch : RUNE := elt! ;
if elist.contains(loc_ch)
or (loc_ch = esc) then
buf := buf + esc
end ;
buf := buf + loc_ch
end ;
return from_frunes(buf)
end ;
set!(
rn : RUNE
)
pre ~void(self)
and (rn.lib = index_lib)
post true
is
-- This iter sets successive elements of self to the given value.
loc_oct_index : CARD := 0 ;
loop
loc_size : CARD := rn.num_codes * index_lib.my_size ;
if loc_size + loc_oct_index < asize then
loop
aset!(loc_oct_index,rn.aelt!)
end ;
loc_oct_index := loc_oct_index + loc_size ;
yield
else
break!
end
end ;
buffer_scan
end ;
split!(
once rn : RUNE
) : RUNES
pre ~void(self)
and (rn.lib = index_lib)
post (result.size >= 0) -- may be zero if two adjacent rn runes found.
is
-- This iter yields successive substrings of self which are separated
-- by the single rune rn. The separating runes are omitted and the string
-- yielded is from that after the previous separating character up to and
-- NOT containing the next (or the end of self if not found)
curr_loc : CARD := 0 ; -- Start of next string
loop
next_loc : CARD := search(rn,curr_loc) ;
if next_loc /= CARD::nil then -- The character was found
yield substring(curr_loc,next_loc - curr_loc) ;
curr_loc := next_loc + 1
else -- not found so use rest of string
yield substring(curr_loc,size - curr_loc) ;
quit
end
end
end ;
separate!(
rns : SAME
) : SAME
pre ~void(rns)
and ~void(self)
and (priv_lib = rns.priv_lib)
post (result = rns)
or (result = self + rns)
is
-- On the first iteration just outputs rns, on successive iterations it
-- outputs self followed by rns. Useful for forming lists, eg
--
-- loop
-- #OUT + comma.separate!(a.elt!)
-- end ;
yield rns ;
loop
yield self + rns
end
end ;
text_str(
lib : LIBCHARS
) : STR
pre ~void(self)
and ~void(lib)
post result.size > 0
is
-- This routine returns a string representation of self in the given
-- repertoire and encoding as a space separated sequence of hex numbers.
res : STR := STR::create ;
loop
loc_rune : RUNE := elt! ;
loop
res := res + lib.Space.str.separate!(loc_rune.code!.card.hex_str)
end ;
res := res + lib.Comma.char
end ;
return res
end ;
text_str : STR
pre ~void(self)
post result.size > 0
is
-- This routine returns a string representation of self in the default
-- repertoire and encoding as a space separated sequence of hex numbers.
return text_str(LIBCHARS::default)
end ;
str(
lib : LIBCHARS
) : STR
pre ~void(self)
and ~void(lib)
post result.size > 0
is
-- This routine creates a new string which is a literal copy of the
-- codes of self, interpreted as being in the given encoding and repertoire.
res : STR := STR::create(lib) ;
loop
index : CARD := 0.up! ;
loc_elem : CHAR := elt!.char ;
if ~void(loc_elem)
and lib.culture.charmap.valid(loc_elem.code) then
res := res + loc_elem
end
end ;
return res
end ;
str : STR
pre ~void(self)
post (result.size = size)
is
-- Create an 8-bit coded string from an array of RUNE. The resultant
-- string is void if any rune value is greater than can be encoded in the
-- local default encoding and repertoire.
-- NOTE This routine does NOT convert any encoding. The existing encoding is
-- interpreted in the default environment. Any invalid codes are
-- silently omitted.
loc_lib : LIBCHARS := LIBCHARS::default ;
res : STR := STR::create(loc_lib) ;
loop
index : CARD := 0.up! ;
loc_elem : CHAR := elt!.char ;
if ~void(loc_elem)
and loc_lib.culture.charmap.valid(loc_elem.code) then
res := res + loc_elem
end
end ;
return res
end ;
end ; -- RUNES