repertoire.sa


Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
 
------------------------->  GNU Sather - sourcefile  <-------------------------
-- Copyright (C) 2000 by K Hopper, University of Waikato, New Zealand        --
-- This file is part of the GNU Sather library. It is free software; you may --
-- redistribute  and/or modify it under the terms of the GNU Library General --
-- Public  License (LGPL)  as published  by the  Free  Software  Foundation; --
-- either version 2 of the license, or (at your option) any later version.   --
-- This  library  is distributed  in the  hope that it will  be  useful, but --
-- WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details.       --
-- The license text is also available from:  Free Software Foundation, Inc., --
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA                     --
-------------->  Please email comments to <bug-sather@gnu.org>  <--------------


class REPERTOIRE

class REPERTOIRE is -- This class implements the tables needed for use when carrying out -- ordering in accordance with ISO/IEC 14651/2 standards. It permits -- individual weights to be determined by some ordering mechanism, in -- addition to providing for equality testing. -- Version 1.4 Apr 99. Copyright K Hopper, U of Waikato -- Development History -- ------------------- -- Date Who By Detail -- ---- ------ ------ -- 06 Dec 96 kh Original -- 19 Feb 97 kh Modified to use CARD -- 10 Apr 97 kh Added 'char' constants -- 2 Jul 97 kh Separated 'consts' to LIBCHARS -- 6 Apr 99 kh Modified for text class V8! private shared priv_default : SAME ; readonly attr charmap : REP_MAP ; readonly attr maps : FLIST{ORDERING} ; -- At repertoire creation time these maps are created, keyed on the -- code independent character tokens, yielding an array of weights which is -- to be handled in accordance with the ordering rules. readonly attr undefined : RANGE_ORDERING ; -- This provides a default order for a complete character code -- repertoire, provided that a token is not found in any other rule! Leaf_Name_ref : STR is -- This routine creates and returns the name of the default file name "order" -- in the Resources sub-directory as a reference string. loc_lib : LIBCHARS := LIBCHARS::default ; loc_res : CODE_STR := CODE_STR::create(loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_O.card,loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_R.card,loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_D.card,loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_E.card,loc_lib) + CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_R.card,loc_lib) ; return loc_res.tgt_str end ; private build_rules(cursor : BIN_CURSOR) : ARRAY{ORDER_RULE} pre ~void(cursor) and ~cursor.is_done post true is --This private routine builds an order rule set from the binary string -- indicated. rule_cnt : CARD := cursor.get_item.card ; res : ARRAY{ORDER_RULE} := ARRAY{ORDER_RULE}::create(rule_cnt) ; loop loc_index : CARD := 0.upto!(rule_cnt - 1) ; res[loc_index] := ORDER_RULE::create( BOOL::create(cursor.get_item), BOOL::create(cursor.get_item)) end ; return res end ; private build_range_map(index : BIN_CURSOR, loc_rules : ARRAY{ORDER_RULE}) : RANGE_ORDERING pre ~void(index) and ~index.is_done post ~void(result) is -- This routine builds a range ordering from the contents of the -- file indicated by index. loc_str : BINSTR := index.get_sized ; -- the initial char code. loc_low : CARD := CARD::create(loc_str) ; -- treated as range low! loc_base : ORDER_WEIGHT := ORDER_WEIGHT::build(index) ; loc_high : CARD := loc_low + index.card - 1 ; loc_range : RANGE := RANGE::create(loc_low,loc_high) ; loc_wts : ARRAY{ORDER_WEIGHT} := ARRAY{ORDER_WEIGHT}::create(loc_rules.asize) ; loop loc_index : CARD := 0.upto!(loc_rules.size - 1) ; loc_wts[loc_index] := ORDER_WEIGHT::build(index) end ; return RANGE_ORDERING::create(loc_range,loc_base,loc_rules,loc_wts) end ; private build_a_map(index : BIN_CURSOR) : ORDERING pre ~void(index) and ~index.is_done post ~void(result) -- or an exception has been raised is -- This routine builds a single ordering from the contents of the -- file indicated by index. loc_rules : ARRAY{ORDER_RULE} := build_rules(index) ; res : ORDERING := ORDERING::create(loc_rules) ; token_cnt : CARD := index.card ; loc_wts : ARRAY{ORDER_WEIGHT} ; loop token_cnt.times! ; loc_wts := ARRAY{ORDER_WEIGHT}::create(loc_rules.size) ; loc_binstr : BINSTR := index.get_sized ; tok : TOKEN := TOKEN::create(loc_binstr) ; loop loc_index : CARD := 0.upto!(loc_rules.size - 1) ; loc_wts[loc_index] := ORDER_WEIGHT::build(index) end ; if ~res.insert(tok,loc_wts) then SYS_ERROR::blind_error(self,tok.str,LIBCHARS::default) end end ; range_cnt : CARD := index.card ; loop range_cnt.times! ; if ~res.insert(build_range_map(index,loc_rules)) then SYS_ERROR::blind_error(self,range_cnt.str,LIBCHARS::default) end end ; return res end ; create(cult : CULTURE) : SAME pre ~void(cult) and ~void(cult.charmap) and ~void(cult.resource_path) post true is -- This creation routine treats name as the name of a file which contains -- character encodings of a repertoire, together with synonym and ordering -- information. This is known as a repertoire map in ISO/IEC 14651. The -- repertoire file is expected to be a binary image, not the source text -- which specifies the data. if cult.state > cult.Charmap then return cult.collating end ; me : SAME := new ; me.charmap := cult.charmap ; -- Now try to open the file, read it and unhatch the contents! loc_path : FILE_PATH := cult.bin_resource_path.append(me.Leaf_Name_ref) ; fyle : BIN_FILE := BIN_FILE::open_for_read(loc_path.str) ; if void(fyle) then -- error reported in CULTURE! return void end ; index : BIN_CURSOR := fyle.buffer.binstr.cursor ; fyle.close ; if index.get_item.card /= cult.sather_lib.my_size then -- The wrong file!! return void end ; if void(priv_default) then priv_default := me end ; -- Now the character order maps. me.maps := FLIST{ORDERING}::create ; scripts : CARD := index.get_item.card ; -- No of different groups of rules loop scripts.times! ; loc_order : ORDERING := me.build_a_map(index) ; me.maps := me.maps.push(loc_order) ; end ; -- The undefined rules come at the end of the file! undef_rules : ARRAY{ORDER_RULE} := me.build_rules(index) ; me.undefined := me.build_range_map(index,undef_rules) ; if ~index.is_done then SYS_ERROR::blind_error(self,loc_path.str,LIBCHARS::default) end ; return me end ; private initialise : SAME is -- This routine returns the default repertoire object by creating it! cult : CULTURE := CULTURE::default ; cult.collating.charmap := cult.charmap ; return cult.collating end ; default : SAME is -- This routine returns a default object - which may involve retrieving -- data from the Operating System current culture specifications. if void(priv_default) then return initialise else return priv_default end end ; private init : SAME is -- This routine returns the current object or default if self is void. if void(self) then return default else return self end end ; weights(tok : TOKEN) : TUP{ARRAY{ORDER_WEIGHT},ARRAY{ORDER_RULE}} pre ~(tok.card = 0) and ~void(self) post true is --This routine searches the tables as needed to find the weight array -- corresponding to token, returning its value as a copy of the table -- contents. If the token is not found then void is returned. loc_code : CHAR_CODE := charmap.code(tok) ; loc_list : FLIST{TOKEN} := charmap.token_list(loc_code) ; loop -- over the individual maps loc_order : ORDERING := maps.elt! ; if loc_order.contains(loc_list) then return TUP{ARRAY{ORDER_WEIGHT},ARRAY{ORDER_RULE}}::create(loc_order.weights(loc_list),loc_order.rule) end end ; return TUP{ARRAY{ORDER_WEIGHT},ARRAY{ORDER_RULE}}::create(undefined.weights(tok),undefined.rules) end ; private make_tokens(str : CODE_STR) : FLIST{TOKEN} pre true post (result.size = str.size) is --This private routine returns a list of tokens which correspond to -- the character codes in the argument string. res : FLIST{TOKEN} := FLIST{TOKEN}::create ; loop loc_ch : CHAR_CODE := str.elt! ; loc_tok : TOKEN := charmap.token(loc_ch) ; res := res.push(loc_tok) end ; return res end ; private make_weights(toks : FLIST{TOKEN},out rules : ARRAY{ORDER_RULE}) : FLIST{ARRAY{ORDER_WEIGHT}} pre ~void(toks) post void(result) or (result.size = toks.size) is --This routine retrieves the order weights corresponding to the token -- list in the same order as that list. res : FLIST{ARRAY{ORDER_WEIGHT}} := FLIST{ARRAY{ORDER_WEIGHT}}::create ; loc_res : TUP{ARRAY{ORDER_WEIGHT},ARRAY{ORDER_RULE}} ; rules := void ; loop loc_tok : TOKEN := toks.elt! ; loc_res := weights(loc_tok) ; if void(loc_res) then return void elsif void(rules) then rules := loc_res.t2 elsif ~rules.equals(loc_res.t2) then -- impossible match wanted! return void end ; res := res.push(loc_res.t1) ; end ; return res end ; private one_pass(first : FLIST{ARRAY{ORDER_WEIGHT}},second : FLIST{ARRAY{ORDER_WEIGHT}},index : CARD,rule : ORDER_RULE,for_equality : BOOL) : BOOL is --This private predicate carries out a single pass through the two -- arrays with the given index and direction - either for equality or first -- being earlier than second. True is returned if and only if the required -- relation is satisfied by the two lists. loc_index : CARD ; loop if rule.left_to_right then loc_index := 0.upto!(first.size - 1) else loc_index := (first.size - 1).downto!(0) ; end ; loc_left : ORDER_WEIGHT := first[loc_index][index] ; loc_right : ORDER_WEIGHT := second[loc_index][index] ; if ~(loc_left = loc_right) then if for_equality then return false else return (loc_left < loc_right) end end end ; return for_equality -- they are equal end ; private compare(first,second : CODE_STR,equality : BOOL) : BOOL is --This predicate returns true if and only if first and second are -- either equal (if equality is true) or first is less than second if -- equality is false! if equality -- a quick short-cut!! and (first.size /= second.size) then return false end ; res : SAME ; if void(self) then res := init else res := self end ; loc_rules_1 : ARRAY{ORDER_RULE} ; loc_rules_2 : ARRAY{ORDER_RULE} ; loc_first : FLIST{ARRAY{ORDER_WEIGHT}} := res.make_weights(res.make_tokens(first),out loc_rules_1) ; loc_second : FLIST{ARRAY{ORDER_WEIGHT}} := res.make_weights(res.make_tokens(second),out loc_rules_2) ; if void(loc_first) or void(loc_second) or ~loc_rules_1.equals(loc_rules_2) then -- incomparable! return false end ; loc_res : BOOL ; loop index : CARD := 0.upto!(res.undefined.rules.size - 1) ; loc_res := res.one_pass(loc_first,loc_second,index, loc_rules_1.elt!,equality) ; if (equality and ~loc_res) or (~equality and loc_res) then return loc_res end end ; return equality end ; earlier(low,high : CHAR) : BOOL is -- This predicate returns true if and only if first is earlier than -- second in the collating sequence irrespective of their encoding. return compare(CODE_STR::create(low.code), CODE_STR::create(high.code),false) end ; earlier(low,high : STR) : BOOL is -- This predicate returns true if and only if first is earlier in the -- collating sequence than second, irrespective of their encoding. return compare(CODE_STR::create(low),CODE_STR::create(high),false) end ; same(first,second : CHAR) : BOOL is -- This predicate returns true if and only if first is earlier than -- second in the collating sequence irrespective of their encoding. -- In order to simplify encoding the individual characters are treated as -- being strings of single characters. return compare(CODE_STR::create(first.code), CODE_STR::create(second.code),true) end ; same(first,second : STR) : BOOL is -- This predicate returns true if and only if first and second are -- synonymous characters irrespective of their encoding. return compare(CODE_STR::create(first),CODE_STR::create(second),true) end ; earlier(low,high : RUNE) : BOOL is -- This predicate returns true if and only if first is earlier than -- second in the collating sequence irrespective of their encoding. loc_low : CODE_STR := CODE_STR::create(low.lib) ; loop loc_low := loc_low + low.code! end ; loc_high : CODE_STR := CODE_STR::create(high.lib) ; loop loc_high := loc_high + high.code! end ; return compare(loc_low,loc_high,false) end ; earlier(low,high : RUNES) : BOOL is -- This predicate returns true if and only if first is earlier in the -- collating sequence than second, irrespective of their encoding. loc_low : CODE_STR := CODE_STR::create(low.index_lib) ; loop loc_low := loc_low + low.code! end ; loc_high : CODE_STR := CODE_STR::create(high.index_lib) ; loop loc_high := loc_high + high.code! end ; return compare(loc_low,loc_high,false) end ; same(first,second : RUNE) : BOOL is -- This predicate returns true if and only if first is earlier than -- second in the collating sequence irrespective of their encoding. -- In order to simplify encoding they individual characters are treated as -- being strings of single characters. loc_first : CODE_STR := CODE_STR::create(first.lib) ; loop loc_first := loc_first.push(first.code!) end ; loc_second : CODE_STR := CODE_STR::create(second.lib) ; loop loc_second := loc_second.push(second.code!) end ; return compare(loc_first,loc_second,true) end ; same(first,second : RUNES) : BOOL is -- This predicate returns true if and only if first and second are -- synonymous characters irrespective of their encoding. loc_first : CODE_STR := CODE_STR::create(first.index_lib) ; loop loc_first := loc_first + first.code! end ; loc_second : CODE_STR := CODE_STR::create(second.index_lib) ; loop loc_second := loc_second + second.code! end ; return compare(loc_first,loc_second,true) end ; earlier(low,high : CHAR_CODE) : BOOL is -- This predicate returns true if and only if first is earlier than -- second in the collating sequence irrespective of their encoding. return compare(CODE_STR::create(low), CODE_STR::create(high),false) end ; earlier(low,high : CODE_STR) : BOOL is -- This predicate returns true if and only if first is earlier in the -- collating sequence than second, irrespective of their encoding. return compare(low,high,false) end ; same(first,second : CHAR_CODE) : BOOL is -- This predicate returns true if and only if first is the same character -- irrespective of their encoding. In order to simplify encoding the -- individual characters are treated as being strings of single characters. return compare(CODE_STR::create(first), CODE_STR::create(second),true) end ; same(first,second : CODE_STR) : BOOL is -- This predicate returns true if and only if first and second are -- synonymous characters irrespective of their encoding. return compare(first,second,true) end ; end ; -- REPERTOIRE