repmap.sa


Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
 
------------------------->  GNU Sather - sourcefile  <-------------------------
-- Copyright (C) 2000 by K Hopper, University of Waikato, New Zealand        --
-- This file is part of the GNU Sather library. It is free software; you may --
-- redistribute  and/or modify it under the terms of the GNU Library General --
-- Public  License (LGPL)  as published  by the  Free  Software  Foundation; --
-- either version 2 of the license, or (at your option) any later version.   --
-- This  library  is distributed  in the  hope that it will  be  useful, but --
-- WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details.       --
-- The license text is also available from:  Free Software Foundation, Inc., --
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA                     --
-------------->  Please email comments to <bug-sather@gnu.org>  <--------------


class REP_MAP

class REP_MAP is -- This class embodies the repertoire map describe in ISO/IEC 14652 and -- provides facilities for encoding and decoding either individual or -- 'strings' of named code-points. -- -- The repertoire map (used to be known as a charmap) itself included -- here is automatically read in if not already created when any of the other -- operations are performed. -- NOTE Where synonyms are involved the two maps will have different sizes! -- Version 1.2 Oct 98. Copyright K Hopper, U of Waikato -- Development History -- ------------------- -- Date Who By Detail -- ---- ------ ------ -- 29 May 97 kh Original for Extended Regular Expressions -- 26 Jun 97 kh Converted to a bi-map. -- 5 Oct 98 kh Added pre/post conditions private attr lib : LIBCHARS ; -- for code/char creation use. private attr outmap : FMAP{TOKEN,CHAR_CODE} ; private attr inmap : FMAP{CHAR_CODE,FLIST{TOKEN}} ; private attr synonyms : FMAP{TOKEN,FLIST{TOKEN}} ; -- These are the two maps for converting character name tokens to -- encodings and the inverse map which converts a code to the equivalent -- token. Code is always in the culture specified encoding. Leaf_Name_ref : STR is -- This routine creates and returns the repertoire map file name "rep_map", using -- the local default culture and encoding - since it is for a file name!! loc_lib : LIBCHARS := LIBCHARS::default ; loc_res : CODE_STR := CODE_STR::create(loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_R.card,loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_E.card,loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_P.card,loc_lib) + CHAR_CODE::create(UNICODE::LOW_LINE.card,loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_M.card,loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_A.card,loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_P.card,loc_lib) ; return loc_res.tgt_str end ; create(lib : LIBCHARS) : SAME pre ~void(lib) post ~void(result.lib) and void(result.inmap) and void(result.outmap) is --This routine creates a new empty bi-map with the given culture encoding. me : SAME := new ; me.lib := lib ; return me end ; create : SAME is --This routine creates a new empty bi-map for use with the default -- encoding and repertoire. return create(LIBCHARS::default) end ; private get_synonyms(cursor : BIN_CURSOR,token_size : CARD) : SAME pre ~void(cursor) and ~cursor.is_done post true is --This routine reads the synonym data from the stream and creates -- the synonym map required. res : SAME := self ; size : CARD := cursor.card ; if size = 0 then -- nothing to do! return res end ; loc_list : FLIST{TOKEN} ; res.synonyms := FMAP{TOKEN,FLIST{TOKEN}}::create(size) ; loop size.times! ; tok : TOKEN := TOKEN::build(cursor,token_size) ; list_size : CARD := cursor.get_item.card ; loc_list := FLIST{TOKEN}::create(list_size) ; loop list_size.times! ; list_item : TOKEN := TOKEN::build(cursor,token_size) ; loc_list := loc_list.push(list_item) end ; res.synonyms := res.synonyms.insert(tok,loc_list) end ; return res end ; private build_map(cursor : BIN_CURSOR,token_size : CARD) : SAME pre ~void(cursor) and ~cursor.is_done post true is --This routine reads the binary map data from the stream and creates -- the outmap required. res : SAME := self ; size : CARD := cursor.card ; loc_list : FLIST{TOKEN} ; res.inmap := FMAP{CHAR_CODE,FLIST{TOKEN}}::create(size) ; res.outmap := FMAP{TOKEN,CHAR_CODE}::create(size) ; loop size.times! ; tok : TOKEN := TOKEN::build(cursor,token_size) ; loc_code : CHAR_CODE := CHAR_CODE::raw_build(cursor,lib) ; if res.inmap.test(loc_code) then loc_list := res.inmap.get(loc_code).push(tok) else loc_list := FLIST{TOKEN}::create.push(tok) end ; res.inmap := res.inmap.insert(loc_code,loc_list) ; res.outmap := res.outmap.insert(tok,loc_code) end ; return res end ; create(cult : CULTURE) : SAME pre ~void(cult) and ~void(cult.resource_path) post ~void(result) is --This routine creates the locale specific version of this map. Given -- the locale name, the appropriate resource file is found and then the -- contents read into the individual maps. if cult.state > cult.Culture then -- only one per culture return cult.charmap end ; me : SAME := create(cult.sather_lib) ; -- the empty map as above loc_path : FILE_PATH := cult.bin_resource_path.append(me.Leaf_Name_ref) ; fyle : BIN_FILE := BIN_FILE::open_for_read(loc_path.str) ; if void(fyle) then -- the program cannot proceed! SYS_ERROR::blind_error(me,loc_path.str,LIBCHARS::default) elsif fyle.error then SYS_ERROR::blind_error(me,fyle.error_message + loc_path.str,LIBCHARS::default) end ; file_text : FBINSTR := fyle.buffer ; fyle.close ; file_cursor : BIN_CURSOR := file_text.binstr.cursor ; tok_size : CARD := file_cursor.get_item.card ; if file_cursor.get_item.card /= me.lib.my_size then SYS_ERROR::blind_error(me,loc_path.str,LIBCHARS::default) end ; me := me.build_map(file_cursor,tok_size) ; me := me.get_synonyms(file_cursor,tok_size) ; if ~file_cursor.is_done then SYS_ERROR::blind_error(me,loc_path.str,LIBCHARS::default) end ; return me end ; insert(name : TOKEN,list : FLIST{TOKEN}) pre ~void(self)and ~(name.card = 0)and ~void(list) post (inmap.size = (initial(inmap.size) + 1)) is --This routine inserts the maplet from name to val, providing that self -- is not void. If name is already in the map then the existing value for -- it is over-written with val. If self is void or name is invalid then -- nothing is done! if void(synonyms) then synonyms := FMAP{TOKEN,FLIST{TOKEN}}::create end ; synonyms := synonyms.insert(name,list) end ; insert(name : TOKEN,val : CHAR_CODE) pre ~void(self) and ~(name.card = 0) and (void(inmap) or ~inmap.test(val)) post (inmap.size = (initial(inmap.size) + 1)) is --This routine inserts the maplet from name to val, providing that self -- is not void. If name is already in the map then the existing value for -- it is over-written with val. If self is void or name is invalid then -- nothing is done! loc_list : FLIST{TOKEN} ; if void(outmap) then outmap := FMAP{TOKEN,CHAR_CODE}::create ; inmap := FMAP{CHAR_CODE,FLIST{TOKEN}}::create end ; if inmap.test(val) then loc_list := inmap.get(val).push(name) else loc_list := FLIST{TOKEN}::create.push(name) end ; inmap := inmap.insert(val,loc_list) ; outmap := outmap.insert(name,val) end ; valid(name : TOKEN) : BOOL is --This predicate returns true if and only if the given name is in -- the range of the outmap, otherwise false. if (name.card = 0) then return false else return outmap.test(name) end end ; valid(val : CHAR_CODE) : BOOL is --This predicate returns true if and only if the given code is in -- the range of the inmap, otherwise false. return inmap.test(val) end ; is_valid_encoding(bits : BINSTR) : BOOL is --This predicate is the vital link between encodings and characters. -- It returns true if and only if the bit-pattern in bits is a valid inmap -- key -- ie the bit-pattern is the encoding of a character or ligature in -- this repertoire. code : CHAR_CODE := CHAR_CODE::create(bits,lib) ; return inmap.test(code) end ; is_valid_encoding(bits : FBINSTR) : BOOL is --This predicate is the vital link between encodings and characters. -- It returns true if and only if the bit-pattern in bits is a valid map key -- -- ie the bit-pattern is the encoding of a character or ligature in this -- repertoire. code : CHAR_CODE := CHAR_CODE::create(bits.binstr,lib) ; return inmap.test(code) end ; char(bits : BINSTR) : CHAR pre is_valid_encoding(bits) post true is --This routine is the only one which actually creates characters from bit-patterns. return CHAR::build(bits.cursor,lib) end ; token_list(name : CHAR_CODE) : FLIST{TOKEN} pre ~void(inmap) and inmap.test(name) post result.size > 0 is --This routine returns the list of synonym codes for the character -- with the given name. return inmap.get(name) end ; code(val : TOKEN) : CHAR_CODE pre ~void(outmap)and outmap.test(val) post true is --This routine returns the code which corresponds to the given token -- or void if there is no such token. return outmap.get(val) end ; token(val : CHAR_CODE) : TOKEN pre ~void(inmap)and inmap.test(val) post true is --This routine returns a token 'name' corresponding to the given code. -- The token returned may not necessarily be the one used in text string -- ordering. return inmap.get(val)[0] end ; codes(str : STR) : CODE_STR pre ~void(self) and ~void(inmap) and (str.index_lib = lib) post result.size = str.size is --This routine returns the sequence of codes which are the equivalent -- of the string in the encoding defined for this bi-map. res : CODE_STR := CODE_STR::create(lib) ; loop res := res + str.elt!.code end ; return res end ; tokens(codes : ARRAY{CHAR_CODE}) : ARRAY{TOKEN} pre codes.size > 0 post result.size = codes.size -- or an exception has been raised! is --This routine returns a list of tokens represented by the given -- list of codes. res : ARRAY{TOKEN} := ARRAY{TOKEN}::create(codes.asize) ; loop tok : TOKEN := inmap.get(codes.elt!)[0] ; if void(tok) then -- can't do anything more! SYS_ERROR::blind_error(self,tok.card.str,LIBCHARS::default) else res.set!(tok) end end ; return res end ; private get_size(val : CARD,size : CARD) : CARD is -- This routine sets size to be the greater of the initial value and -- the number of octets needed to represent val. loc_octs : CARD ; if val > HEXTET::Hextet_Max then loc_octs := 4 elsif val > OCTET::Octet_Max then loc_octs := 2 else loc_octs := 1 end ; return size.max(loc_octs) end ; private get_token_size : CARD is -- This private routine returns the number of octets needed to provide -- a binary representation of the tokens in the maps. Although it could -- be expected that this should be determinable without searching the maps, -- in practice there are a number of token values which were used in -- creating the maps which do not appear explicitly in them. res : CARD := 1 ; -- the default minimum! loop res := get_size(outmap.keys!.card,res) end ; return res end ; private put_synonyms(tok_size : CARD) : BINSTR pre ~void(self) post ~void(result) is --This routine creates the binary representation of the synonym map -- for appending to the end of the token/code map table. res : BINSTR := synonyms.size.binstr ; loop synonyms.size.times! ; tok : TOKEN := synonyms.keys! ; loc_list : FLIST{TOKEN} := synonyms.get(tok) ; res := res + OCTET::create(loc_list.size) ; loop res := res + loc_list.elt!.binstr(token_size) end end ; return res end ; binstr : BINSTR pre ~void(self) post ~void(result) is --This routine returns the binary string form of self, in a form -- suitable for external storage. loc_token : TOKEN ; loc_code : CHAR_CODE ; token_size : CARD := get_token_size ; res : BINSTR := BINSTR::create + OCTET::create(token_size) + OCTET::create(lib.my_size) + outmap.size.binstr ; loop loc_token := outmap.keys! ; loc_code := outmap.get(loc_token) ; res := res + loc_token.binstr(token_size) + loc_code.binstr end ; return res + put_synonyms(token_size) end ; token!(once val : CHAR_CODE ) : TOKEN pre ~void(inmap) and inmap.test(val) post true is --This iter yields in turn each token 'name' corresponding to the given -- character code. loc_list : FLIST{TOKEN} := inmap.get(val) ; loop yield loc_list.elt! end end ; synonym!(once ch : CHAR) : CHAR_CODE pre ~void(ch) post ~void(result) is --This iter returns an arbitrary sequence of all those encodings for -- the character ch, including the encoding of ch itself as one of the -- elements returned. list : FLIST{TOKEN} := synonyms.get(inmap.get(ch.code(lib))[0]) ; yield CHAR_CODE::create(ch,lib) ; loop yield outmap.get(list.elt!) end end ; synonym!(once ch : RUNE) : CHAR_CODE pre ~void(ch) post ~void(result) is --This iter returns an arbitrary sequence of all those encodings for -- the character ch, including the encoding of ch itself as one of the -- elements returned. list : FLIST{TOKEN} := synonyms.get(inmap.get(ch.code)[0]) ; yield ch.code ; loop yield outmap.get(list.elt!) end end ; sequence! : TOKEN pre ~void(self) and (outmap.size > 0) post true is --This routine yields an arbitrarily chosen token sequence from each -- element of the inmap. loop yield inmap.elt![0] end end ; private do_str(codes : ARRAY{TOKEN}) : CODE_STR pre ~void(self) and ~void(outmap) and ~void(codes) post result.size > 0 -- or an exception has been raised! is --This routine returns a possible string representation of the given -- token array (the first item in each synonym!) in the encoding defined -- for this repertoire map. res : CODE_STR := CODE_STR::create(lib) ; loop loc_code : CHAR_CODE := outmap.get(codes.elt!) ; if void(loc_code) then -- can't do anything more! SYS_ERROR::blind_error(self,res.tgt_str,LIBCHARS::default) else res := res + loc_code end end ; return res end ; str(codes : ARRAY{TOKEN}) : STR pre ~void(self) and ~void(outmap) and ~void(codes) post result.size > 0 -- or an exception has been raised! is --This routine returns a possible string representation of the given -- token array (the first item in each synonym!) in the encoding defined -- for this repertoire map. return do_str(codes).tgt_str end ; end ; -- REP_MAP