# GUI Application automation and testing library # Copyright (C) 2006-2018 Mark Mc Mahon and Contributors # https://github.com/pywinauto/pywinauto/graphs/contributors # http://pywinauto.readthedocs.io/en/latest/credits.html # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # * Neither the name of pywinauto nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Module to find the closest match of a string in a list""" from __future__ import unicode_literals import re import difflib import six #import ctypes #import ldistance #levenshtein_distance = ctypes.cdll.levenshtein.levenshtein_distance #levenshtein_distance = ldistance.distance find_best_control_match_cutoff = .6 #==================================================================== class MatchError(IndexError): """A suitable match could not be found""" def __init__(self, items = None, tofind = ''): """Init the parent with the message""" self.tofind = tofind self.items = items if self.items is None: self.items = [] IndexError.__init__(self, "Could not find '{0}' in '{1}'".format(tofind, self.items)) _cache = {} # given a list of texts return the match score for each # and the best score and text with best score #==================================================================== def _get_match_ratios(texts, match_against): """Get the match ratio of how each item in texts compared to match_against""" # now time to figure out the matching ratio_calc = difflib.SequenceMatcher() ratio_calc.set_seq1(match_against) ratios = {} best_ratio = 0 best_text = '' for text in texts: if 0: pass if (text, match_against) in _cache: ratios[text] = _cache[(text, match_against)] elif(match_against, text) in _cache: ratios[text] = _cache[(match_against, text)] else: # set up the SequenceMatcher with other text ratio_calc.set_seq2(text) # try using the levenshtein distance instead #lev_dist = levenshtein_distance(six.text_type(match_against), six.text_type(text)) #ratio = 1 - lev_dist / 10.0 #ratios[text] = ratio # calculate ratio and store it ratios[text] = ratio_calc.ratio() _cache[(match_against, text)] = ratios[text] # if this is the best so far then update best stats if ratios[text] > best_ratio: best_ratio = ratios[text] best_text = text return ratios, best_ratio, best_text #==================================================================== def find_best_match(search_text, item_texts, items, limit_ratio = .5): """Return the item that best matches the search_text * **search_text** The text to search for * **item_texts** The list of texts to search through * **items** The list of items corresponding (1 to 1) to the list of texts to search through. * **limit_ratio** How well the text has to match the best match. If the best match matches lower then this then it is not considered a match and a MatchError is raised, (default = .5) """ search_text = _cut_at_eol(_cut_at_tab(search_text)) text_item_map = UniqueDict() # Clean each item, make it unique and map to # to the item index for text, item in zip(item_texts, items): text_item_map[_cut_at_eol(_cut_at_tab(text))] = item ratios, best_ratio, best_text = \ _get_match_ratios(text_item_map.keys(), search_text) if best_ratio < limit_ratio: raise MatchError(items = text_item_map.keys(), tofind = search_text) return text_item_map[best_text] #==================================================================== _after_tab = re.compile(r"\t.*", re.UNICODE) _after_eol = re.compile(r"\n.*", re.UNICODE) _non_word_chars = re.compile(r"\W", re.UNICODE) def _cut_at_tab(text): """Clean out non characters from the string and return it""" # remove anything after the first tab return _after_tab.sub("", text) def _cut_at_eol(text): """Clean out non characters from the string and return it""" # remove anything after the first EOL return _after_eol.sub("", text) def _clean_non_chars(text): """Remove non word characters""" # should this also remove everything after the first tab? # remove non alphanumeric characters return _non_word_chars.sub("", text) def is_above_or_to_left(ref_control, other_ctrl): """Return true if the other_ctrl is above or to the left of ref_control""" text_r = other_ctrl.rectangle() ctrl_r = ref_control.rectangle() # skip controls where text win is to the right of ctrl if text_r.left >= ctrl_r.right: return False # skip controls where text win is below ctrl if text_r.top >= ctrl_r.bottom: return False # text control top left corner is below control # top left corner - so not to the above or left :) if text_r.top >= ctrl_r.top and text_r.left >= ctrl_r.left: return False return True #==================================================================== distance_cuttoff = 999 def get_non_text_control_name(ctrl, controls, text_ctrls): """ return the name for this control by finding the closest text control above and to its left """ names = [] # simply look for an instance of the control in the list, # we don't use list.index() method as it invokes __eq__ ctrl_index = 0 for i, c in enumerate(controls): if c is ctrl: ctrl_index = i break ctrl_friendly_class_name = ctrl.friendly_class_name() if ctrl_index != 0: prev_ctrl = controls[ctrl_index-1] prev_ctrl_text = prev_ctrl.window_text() if prev_ctrl.friendly_class_name() == "Static" and \ prev_ctrl.is_visible() and prev_ctrl_text and \ is_above_or_to_left(ctrl, prev_ctrl): names.append( prev_ctrl_text + ctrl_friendly_class_name) best_name = '' closest = distance_cuttoff # now for each of the visible text controls for text_ctrl in text_ctrls: # get aliases to the control rectangles text_r = text_ctrl.rectangle() ctrl_r = ctrl.rectangle() # skip controls where text win is to the right of ctrl if text_r.left >= ctrl_r.right: continue # skip controls where text win is below ctrl if text_r.top >= ctrl_r.bottom: continue # calculate the distance between the controls # at first I just calculated the distance from the top left # corner of one control to the top left corner of the other control # but this was not best, so as a text control should either be above # or to the left of the control I get the distance between # the top left of the non text control against the # Top-Right of the text control (text control to the left) # Bottom-Left of the text control (text control above) # then I get the min of these two # We do not actually need to calculate the difference here as we # only need a comparative number. As long as we find the closest one # the actual distance is not all that important to us. # this reduced the unit tests run on my by about 1 second # (from 61 ->60 s) # (x^2 + y^2)^.5 #distance = ( # (text_r.left - ctrl_r.left) ** 2 + # (x^2 + y^2) # (text_r.bottom - ctrl_r.top) ** 2) \ # ** .5 # ^.5 #distance2 = ( # (text_r.right - ctrl_r.left) ** 2 + # (x^2 + y^2) # (text_r.top - ctrl_r.top) ** 2) \ # ** .5 # ^.5 distance = abs(text_r.left - ctrl_r.left) + abs(text_r.bottom - ctrl_r.top) distance2 = abs(text_r.right - ctrl_r.left) + abs(text_r.top - ctrl_r.top) distance = min(distance, distance2) # UpDown control should use Static text only because edit box text is often useless if ctrl_friendly_class_name == "UpDown" and \ text_ctrl.friendly_class_name() == "Static" and distance < closest: # TODO: use search in all text controls for all non-text ones # (like Dijkstra algorithm vs Floyd one) closest = distance ctrl_text = text_ctrl.window_text() if ctrl_text is None: # the control probably doesn't exist so skip it continue best_name = ctrl_text + ctrl_friendly_class_name # if this distance was closer than the last one elif distance < closest: closest = distance #if text_ctrl.window_text() == '': # best_name = ctrl_friendly_class_name + ' '.join(text_ctrl.texts()[1:2]) #else: ctrl_text = text_ctrl.window_text() if ctrl_text is None: # the control probably doesn't exist so skip it continue best_name = ctrl_text + ctrl_friendly_class_name names.append(best_name) return names #==================================================================== def get_control_names(control, allcontrols, textcontrols): """Returns a list of names for this control""" names = [] # if it has a reference control - then use that #if hasattr(control, 'ref') and control.ref: # control = control.ref # Add the control based on it's friendly class name friendly_class_name = control.friendly_class_name() names.append(friendly_class_name) # if it has some character text then add it base on that # and based on that with friendly class name appended cleaned = control.window_text() # Todo - I don't like the hardcoded classnames here! if cleaned and control.has_title: names.append(cleaned) names.append(cleaned + friendly_class_name) elif control.has_title and friendly_class_name != 'TreeView': try: for text in control.texts()[1:]: names.append(friendly_class_name + text) except Exception: #import traceback #from .actionlogger import ActionLogger pass #ActionLogger().log('Warning! Cannot get control.texts()') #\nTraceback:\n' + traceback.format_exc()) # so find the text of the nearest text visible control non_text_names = get_non_text_control_name(control, allcontrols, textcontrols) # and if one was found - add it if non_text_names: names.extend(non_text_names) # it didn't have visible text else: # so find the text of the nearest text visible control non_text_names = get_non_text_control_name(control, allcontrols, textcontrols) # and if one was found - add it if non_text_names: names.extend(non_text_names) # return the names - and make sure there are no duplicates or empty values cleaned_names = set(names) - set([None, ""]) return cleaned_names #==================================================================== class UniqueDict(dict): """A dictionary subclass that handles making its keys unique""" def __setitem__(self, text, item): """Set an item of the dictionary""" # this text is already in the map # so we need to make it unique if text in self: # find next unique text after text1 unique_text = text counter = 2 while unique_text in self: unique_text = text + str(counter) counter += 1 # now we also need to make sure the original item # is under text0 and text1 also! if text + '0' not in self: dict.__setitem__(self, text+'0', self[text]) dict.__setitem__(self, text+'1', self[text]) # now that we don't need original 'text' anymore # replace it with the uniq text text = unique_text # add our current item dict.__setitem__(self, text, item) def find_best_matches( self, search_text, clean = False, ignore_case = False): """Return the best matches for search_text in the items * **search_text** the text to look for * **clean** whether to clean non text characters out of the strings * **ignore_case** compare strings case insensitively """ # now time to figure out the matching ratio_calc = difflib.SequenceMatcher() if ignore_case: search_text = search_text.lower() ratio_calc.set_seq1(search_text) ratios = {} best_ratio = 0 best_texts = [] ratio_offset = 1 if clean: ratio_offset *= .9 if ignore_case: ratio_offset *= .9 for text_ in self: # make a copy of the text as we need the original later text = text_ if clean: text = _clean_non_chars(text) if ignore_case: text = text.lower() # check if this item is in the cache - if yes, then retrieve it if (text, search_text) in _cache: ratios[text_] = _cache[(text, search_text)] elif(search_text, text) in _cache: ratios[text_] = _cache[(search_text, text)] # not in the cache - calculate it and add it to the cache else: # set up the SequenceMatcher with other text ratio_calc.set_seq2(text) # if a very quick check reveals that this is not going # to match then ratio = ratio_calc.real_quick_ratio() * ratio_offset if ratio >= find_best_control_match_cutoff: ratio = ratio_calc.quick_ratio() * ratio_offset if ratio >= find_best_control_match_cutoff: ratio = ratio_calc.ratio() * ratio_offset # save the match we got and store it in the cache ratios[text_] = ratio _cache[(text, search_text)] = ratio # try using the levenshtein distance instead #lev_dist = levenshtein_distance(six.text_type(search_text), six.text_type(text)) #ratio = 1 - lev_dist / 10.0 #ratios[text_] = ratio #print "%5s" %("%0.2f"% ratio), search_text, `text` # if this is the best so far then update best stats if ratios[text_] > best_ratio and \ ratios[text_] >= find_best_control_match_cutoff: best_ratio = ratios[text_] best_texts = [text_] elif ratios[text_] == best_ratio: best_texts.append(text_) #best_ratio *= ratio_offset return best_ratio, best_texts #==================================================================== def build_unique_dict(controls): """Build the disambiguated list of controls Separated out to a different function so that we can get the control identifiers for printing. """ name_control_map = UniqueDict() # get the visible text controls so that we can get # the closest text if the control has no text text_ctrls = [ctrl_ for ctrl_ in controls if ctrl_.can_be_label and ctrl_.is_visible() and ctrl_.window_text()] # collect all the possible names for all controls # and build a list of them for ctrl in controls: ctrl_names = get_control_names(ctrl, controls, text_ctrls) # for each of the names for name in ctrl_names: name_control_map[name] = ctrl return name_control_map #==================================================================== def find_best_control_matches(search_text, controls): """Returns the control that is the the best match to search_text This is slightly differnt from find_best_match in that it builds up the list of text items to search through using information from each control. So for example for there is an OK, Button then the following are all added to the search list: "OK", "Button", "OKButton" But if there is a ListView (which do not have visible 'text') then it will just add "ListView". """ name_control_map = build_unique_dict(controls) #print ">>>>>>>", repr(name_control_map).decode("ascii", "ignore") # # collect all the possible names for all controls # # and build a list of them # for ctrl in controls: # ctrl_names = get_control_names(ctrl, controls) # # # for each of the names # for name in ctrl_names: # name_control_map[name] = ctrl search_text = six.text_type(search_text) best_ratio, best_texts = name_control_map.find_best_matches(search_text) best_ratio_ci, best_texts_ci = \ name_control_map.find_best_matches(search_text, ignore_case = True) best_ratio_clean, best_texts_clean = \ name_control_map.find_best_matches(search_text, clean = True) best_ratio_clean_ci, best_texts_clean_ci = \ name_control_map.find_best_matches( search_text, clean = True, ignore_case = True) if best_ratio_ci > best_ratio: best_ratio = best_ratio_ci best_texts = best_texts_ci if best_ratio_clean > best_ratio: best_ratio = best_ratio_clean best_texts = best_texts_clean if best_ratio_clean_ci > best_ratio: best_ratio = best_ratio_clean_ci best_texts = best_texts_clean_ci if best_ratio < find_best_control_match_cutoff: raise MatchError(items = name_control_map.keys(), tofind = search_text) return [name_control_map[best_text] for best_text in best_texts] # #def GetControlMatchRatio(text, ctrl): # # get the texts for the control # ctrl_names = get_control_names(ctrl) # # #get the best match for these # matcher = UniqueDict() # for name in ctrl_names: # matcher[name] = ctrl # # best_ratio, unused = matcher.find_best_matches(text) # # return best_ratio # # # #def get_controls_ratios(search_text, controls): # name_control_map = UniqueDict() # # # collect all the possible names for all controls # # and build a list of them # for ctrl in controls: # ctrl_names = get_control_names(ctrl) # # # for each of the names # for name in ctrl_names: # name_control_map[name] = ctrl # # match_ratios, best_ratio, best_text = \ # _get_match_ratios(name_control_map.keys(), search_text) # # return match_ratios, best_ratio, best_text,