repmap.sa
Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
-------------------------> GNU Sather - sourcefile <-------------------------
-- Copyright (C) 2000 by K Hopper, University of Waikato, New Zealand --
-- This file is part of the GNU Sather library. It is free software; you may --
-- redistribute and/or modify it under the terms of the GNU Library General --
-- Public License (LGPL) as published by the Free Software Foundation; --
-- either version 2 of the license, or (at your option) any later version. --
-- This library is distributed in the hope that it will be useful, but --
-- WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details. --
-- The license text is also available from: Free Software Foundation, Inc., --
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --
--------------> Please email comments to <bug-sather@gnu.org> <--------------
class REP_MAP
class REP_MAP is
-- This class embodies the repertoire map describe in ISO/IEC 14652 and
-- provides facilities for encoding and decoding either individual or
-- 'strings' of named code-points.
--
-- The repertoire map (used to be known as a charmap) itself included
-- here is automatically read in if not already created when any of the other
-- operations are performed.
-- NOTE Where synonyms are involved the two maps will have different sizes!
-- Version 1.2 Oct 98. Copyright K Hopper, U of Waikato
-- Development History
-- -------------------
-- Date Who By Detail
-- ---- ------ ------
-- 29 May 97 kh Original for Extended Regular Expressions
-- 26 Jun 97 kh Converted to a bi-map.
-- 5 Oct 98 kh Added pre/post conditions
private attr lib : LIBCHARS ; -- for code/char creation use.
private attr outmap : FMAP{TOKEN,CHAR_CODE} ;
private attr inmap : FMAP{CHAR_CODE,FLIST{TOKEN}} ;
private attr synonyms : FMAP{TOKEN,FLIST{TOKEN}} ;
-- These are the two maps for converting character name tokens to
-- encodings and the inverse map which converts a code to the equivalent
-- token. Code is always in the culture specified encoding.
Leaf_Name_ref : STR is
-- This routine creates and returns the repertoire map file name "rep_map", using
-- the local default culture and encoding - since it is for a file name!!
loc_lib : LIBCHARS := LIBCHARS::default ;
loc_res : CODE_STR := CODE_STR::create(loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_R.card,loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_E.card,loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_P.card,loc_lib) +
CHAR_CODE::create(UNICODE::LOW_LINE.card,loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_M.card,loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_A.card,loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_P.card,loc_lib) ;
return loc_res.tgt_str
end ;
create(lib : LIBCHARS) : SAME
pre ~void(lib)
post ~void(result.lib) and void(result.inmap) and void(result.outmap)
is
--This routine creates a new empty bi-map with the given culture encoding.
me : SAME := new ;
me.lib := lib ;
return me
end ;
create : SAME is
--This routine creates a new empty bi-map for use with the default
-- encoding and repertoire.
return create(LIBCHARS::default)
end ;
private get_synonyms(cursor : BIN_CURSOR,token_size : CARD) : SAME
pre ~void(cursor) and ~cursor.is_done
post true
is
--This routine reads the synonym data from the stream and creates
-- the synonym map required.
res : SAME := self ;
size : CARD := cursor.card ;
if size = 0 then -- nothing to do!
return res
end ;
loc_list : FLIST{TOKEN} ;
res.synonyms := FMAP{TOKEN,FLIST{TOKEN}}::create(size) ;
loop
size.times! ;
tok : TOKEN := TOKEN::build(cursor,token_size) ;
list_size : CARD := cursor.get_item.card ;
loc_list := FLIST{TOKEN}::create(list_size) ;
loop
list_size.times! ;
list_item : TOKEN := TOKEN::build(cursor,token_size) ;
loc_list := loc_list.push(list_item)
end ;
res.synonyms := res.synonyms.insert(tok,loc_list)
end ;
return res
end ;
private build_map(cursor : BIN_CURSOR,token_size : CARD) : SAME
pre ~void(cursor) and ~cursor.is_done
post true
is
--This routine reads the binary map data from the stream and creates
-- the outmap required.
res : SAME := self ;
size : CARD := cursor.card ;
loc_list : FLIST{TOKEN} ;
res.inmap := FMAP{CHAR_CODE,FLIST{TOKEN}}::create(size) ;
res.outmap := FMAP{TOKEN,CHAR_CODE}::create(size) ;
loop
size.times! ;
tok : TOKEN := TOKEN::build(cursor,token_size) ;
loc_code : CHAR_CODE := CHAR_CODE::raw_build(cursor,lib) ;
if res.inmap.test(loc_code) then
loc_list := res.inmap.get(loc_code).push(tok)
else
loc_list := FLIST{TOKEN}::create.push(tok)
end ;
res.inmap := res.inmap.insert(loc_code,loc_list) ;
res.outmap := res.outmap.insert(tok,loc_code)
end ;
return res
end ;
create(cult : CULTURE) : SAME
pre ~void(cult) and ~void(cult.resource_path)
post ~void(result)
is
--This routine creates the locale specific version of this map. Given
-- the locale name, the appropriate resource file is found and then the
-- contents read into the individual maps.
if cult.state > cult.Culture then -- only one per culture
return cult.charmap
end ;
me : SAME := create(cult.sather_lib) ; -- the empty map as above
loc_path : FILE_PATH := cult.bin_resource_path.append(me.Leaf_Name_ref) ;
fyle : BIN_FILE := BIN_FILE::open_for_read(loc_path.str) ;
if void(fyle) then -- the program cannot proceed!
SYS_ERROR::blind_error(me,loc_path.str,LIBCHARS::default)
elsif fyle.error then
SYS_ERROR::blind_error(me,fyle.error_message + loc_path.str,LIBCHARS::default)
end ;
file_text : FBINSTR := fyle.buffer ;
fyle.close ;
file_cursor : BIN_CURSOR := file_text.binstr.cursor ;
tok_size : CARD := file_cursor.get_item.card ;
if file_cursor.get_item.card /= me.lib.my_size then
SYS_ERROR::blind_error(me,loc_path.str,LIBCHARS::default)
end ;
me := me.build_map(file_cursor,tok_size) ;
me := me.get_synonyms(file_cursor,tok_size) ;
if ~file_cursor.is_done then
SYS_ERROR::blind_error(me,loc_path.str,LIBCHARS::default)
end ;
return me
end ;
insert(name : TOKEN,list : FLIST{TOKEN})
pre ~void(self)and ~(name.card = 0)and ~void(list)
post (inmap.size = (initial(inmap.size) + 1))
is
--This routine inserts the maplet from name to val, providing that self
-- is not void. If name is already in the map then the existing value for
-- it is over-written with val. If self is void or name is invalid then
-- nothing is done!
if void(synonyms) then
synonyms := FMAP{TOKEN,FLIST{TOKEN}}::create
end ;
synonyms := synonyms.insert(name,list)
end ;
insert(name : TOKEN,val : CHAR_CODE)
pre ~void(self)
and ~(name.card = 0)
and (void(inmap) or ~inmap.test(val))
post (inmap.size = (initial(inmap.size) + 1))
is
--This routine inserts the maplet from name to val, providing that self
-- is not void. If name is already in the map then the existing value for
-- it is over-written with val. If self is void or name is invalid then
-- nothing is done!
loc_list : FLIST{TOKEN} ;
if void(outmap) then
outmap := FMAP{TOKEN,CHAR_CODE}::create ;
inmap := FMAP{CHAR_CODE,FLIST{TOKEN}}::create
end ;
if inmap.test(val) then
loc_list := inmap.get(val).push(name)
else
loc_list := FLIST{TOKEN}::create.push(name)
end ;
inmap := inmap.insert(val,loc_list) ;
outmap := outmap.insert(name,val)
end ;
valid(name : TOKEN) : BOOL is
--This predicate returns true if and only if the given name is in
-- the range of the outmap, otherwise false.
if (name.card = 0) then
return false
else
return outmap.test(name)
end
end ;
valid(val : CHAR_CODE) : BOOL is
--This predicate returns true if and only if the given code is in
-- the range of the inmap, otherwise false.
return inmap.test(val)
end ;
is_valid_encoding(bits : BINSTR) : BOOL is
--This predicate is the vital link between encodings and characters.
-- It returns true if and only if the bit-pattern in bits is a valid inmap
-- key -- ie the bit-pattern is the encoding of a character or ligature in
-- this repertoire.
code : CHAR_CODE := CHAR_CODE::create(bits,lib) ;
return inmap.test(code)
end ;
is_valid_encoding(bits : FBINSTR) : BOOL is
--This predicate is the vital link between encodings and characters.
-- It returns true if and only if the bit-pattern in bits is a valid map key
-- -- ie the bit-pattern is the encoding of a character or ligature in this
-- repertoire.
code : CHAR_CODE := CHAR_CODE::create(bits.binstr,lib) ;
return inmap.test(code)
end ;
char(bits : BINSTR) : CHAR
pre is_valid_encoding(bits)
post true
is
--This routine is the only one which actually creates characters from bit-patterns.
return CHAR::build(bits.cursor,lib)
end ;
token_list(name : CHAR_CODE) : FLIST{TOKEN}
pre ~void(inmap) and inmap.test(name)
post result.size > 0
is
--This routine returns the list of synonym codes for the character
-- with the given name.
return inmap.get(name)
end ;
code(val : TOKEN) : CHAR_CODE
pre ~void(outmap)and outmap.test(val)
post true
is
--This routine returns the code which corresponds to the given token
-- or void if there is no such token.
return outmap.get(val)
end ;
token(val : CHAR_CODE) : TOKEN
pre ~void(inmap)and inmap.test(val)
post true
is
--This routine returns a token 'name' corresponding to the given code.
-- The token returned may not necessarily be the one used in text string
-- ordering.
return inmap.get(val)[0]
end ;
codes(str : STR) : CODE_STR
pre ~void(self)
and ~void(inmap)
and (str.index_lib = lib)
post result.size = str.size
is
--This routine returns the sequence of codes which are the equivalent
-- of the string in the encoding defined for this bi-map.
res : CODE_STR := CODE_STR::create(lib) ;
loop
res := res + str.elt!.code
end ;
return res
end ;
tokens(codes : ARRAY{CHAR_CODE}) : ARRAY{TOKEN}
pre codes.size > 0
post result.size = codes.size -- or an exception has been raised!
is
--This routine returns a list of tokens represented by the given
-- list of codes.
res : ARRAY{TOKEN} := ARRAY{TOKEN}::create(codes.asize) ;
loop
tok : TOKEN := inmap.get(codes.elt!)[0] ;
if void(tok) then -- can't do anything more!
SYS_ERROR::blind_error(self,tok.card.str,LIBCHARS::default)
else
res.set!(tok)
end
end ;
return res
end ;
private get_size(val : CARD,size : CARD) : CARD is
-- This routine sets size to be the greater of the initial value and
-- the number of octets needed to represent val.
loc_octs : CARD ;
if val > HEXTET::Hextet_Max then
loc_octs := 4
elsif val > OCTET::Octet_Max then
loc_octs := 2
else
loc_octs := 1
end ;
return size.max(loc_octs)
end ;
private get_token_size : CARD is
-- This private routine returns the number of octets needed to provide
-- a binary representation of the tokens in the maps. Although it could
-- be expected that this should be determinable without searching the maps,
-- in practice there are a number of token values which were used in
-- creating the maps which do not appear explicitly in them.
res : CARD := 1 ; -- the default minimum!
loop
res := get_size(outmap.keys!.card,res)
end ;
return res
end ;
private put_synonyms(tok_size : CARD) : BINSTR
pre ~void(self)
post ~void(result)
is
--This routine creates the binary representation of the synonym map
-- for appending to the end of the token/code map table.
res : BINSTR := synonyms.size.binstr ;
loop
synonyms.size.times! ;
tok : TOKEN := synonyms.keys! ;
loc_list : FLIST{TOKEN} := synonyms.get(tok) ;
res := res + OCTET::create(loc_list.size) ;
loop
res := res + loc_list.elt!.binstr(token_size)
end
end ;
return res
end ;
binstr : BINSTR
pre ~void(self)
post ~void(result)
is
--This routine returns the binary string form of self, in a form
-- suitable for external storage.
loc_token : TOKEN ;
loc_code : CHAR_CODE ;
token_size : CARD := get_token_size ;
res : BINSTR := BINSTR::create + OCTET::create(token_size) +
OCTET::create(lib.my_size) + outmap.size.binstr ;
loop
loc_token := outmap.keys! ;
loc_code := outmap.get(loc_token) ;
res := res + loc_token.binstr(token_size) + loc_code.binstr
end ;
return res + put_synonyms(token_size)
end ;
token!(once val : CHAR_CODE ) : TOKEN
pre ~void(inmap)
and inmap.test(val)
post true
is
--This iter yields in turn each token 'name' corresponding to the given
-- character code.
loc_list : FLIST{TOKEN} := inmap.get(val) ;
loop
yield loc_list.elt!
end
end ;
synonym!(once ch : CHAR) : CHAR_CODE
pre ~void(ch)
post ~void(result)
is
--This iter returns an arbitrary sequence of all those encodings for
-- the character ch, including the encoding of ch itself as one of the
-- elements returned.
list : FLIST{TOKEN} := synonyms.get(inmap.get(ch.code(lib))[0]) ;
yield CHAR_CODE::create(ch,lib) ;
loop
yield outmap.get(list.elt!)
end
end ;
synonym!(once ch : RUNE) : CHAR_CODE
pre ~void(ch)
post ~void(result)
is
--This iter returns an arbitrary sequence of all those encodings for
-- the character ch, including the encoding of ch itself as one of the
-- elements returned.
list : FLIST{TOKEN} := synonyms.get(inmap.get(ch.code)[0]) ;
yield ch.code ;
loop
yield outmap.get(list.elt!)
end
end ;
sequence! : TOKEN
pre ~void(self) and (outmap.size > 0)
post true
is
--This routine yields an arbitrarily chosen token sequence from each
-- element of the inmap.
loop
yield inmap.elt![0]
end
end ;
private do_str(codes : ARRAY{TOKEN}) : CODE_STR
pre ~void(self)
and ~void(outmap)
and ~void(codes)
post result.size > 0 -- or an exception has been raised!
is
--This routine returns a possible string representation of the given
-- token array (the first item in each synonym!) in the encoding defined
-- for this repertoire map.
res : CODE_STR := CODE_STR::create(lib) ;
loop
loc_code : CHAR_CODE := outmap.get(codes.elt!) ;
if void(loc_code) then -- can't do anything more!
SYS_ERROR::blind_error(self,res.tgt_str,LIBCHARS::default)
else
res := res + loc_code
end
end ;
return res
end ;
str(codes : ARRAY{TOKEN}) : STR
pre ~void(self)
and ~void(outmap)
and ~void(codes)
post result.size > 0 -- or an exception has been raised!
is
--This routine returns a possible string representation of the given
-- token array (the first item in each synonym!) in the encoding defined
-- for this repertoire map.
return do_str(codes).tgt_str
end ;
end ; -- REP_MAP