# Dictionaries and functions (24/2-2021)

## Exercise

Make a function ``print_words(words)`` that given a list of words ``words``, prints each word on a separate line (in an arbitrary order).

In [6]:
# Warning: Dont modify the argument when it is a list

def print_words(words):
    while words:
        print(f'{zoo=} {words=}')
        word = words.pop()
        print(word)

zoo = ['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']

print(f'{zoo=}')
print_words(zoo)
print(f'{zoo=}')

zoo=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']
zoo=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python'] words=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']
python
zoo=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra'] words=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra']
cobra
zoo=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus'] words=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus']
hippopotamus
zoo=['zoo', 'crocodile', 'elephant', 'giraf'] words=['zoo', 'crocodile', 'elephant', 'giraf']
giraf
zoo=['zoo', 'crocodile', 'elephant'] words=['zoo', 'crocodile', 'elephant']
elephant
zoo=['zoo', 'crocodile'] words=['zoo', 'crocodile']
crocodile
zoo=['zoo'] words=['zoo']
zoo
zoo=[]


In [48]:
# Leave the list unchanged

def print_words(words):
    for word in words:
        print(word)
        
words = ['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']

print(f'{words=}')
print_words(words)
print(f'{words=}')

words=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']
zoo
crocodile
elephant
giraf
hippopotamus
cobra
python
words=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']


In [46]:
# Use * and keyword argument to print

words = ['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']

def print_words(words):
    print(*words, sep='\n')
        
print_words(words)

zoo
crocodile
elephant
giraf
hippopotamus
cobra
python


## Exercise

Create a function ``print_words(words, indent=4)`` that given a list of words ``words``, prints each word on a separate line (in an arbitrary order) with ``indent`` leading spaces. Indent should be an optional keyword argument.

In [60]:
# Leave the list unchanged

space = '.'

#def print_words(words, indent):  # indent not optional
def print_words(words, indent=0):  # the standard way
#def print_words(words, *, indent=0):  # force indent to be given as a keyword argument
    for word in words:
        print(space * indent + word)
        
words = ['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']

print_words(words)
print_words(words, 2)  # is it obvious from the code that 2 is the indent?
print_words(words, indent=3)
print_words(words, indent=7)

zoo
crocodile
elephant
giraf
hippopotamus
cobra
python
..zoo
..crocodile
..elephant
..giraf
..hippopotamus
..cobra
..python
...zoo
...crocodile
...elephant
...giraf
...hippopotamus
...cobra
...python
.......zoo
.......crocodile
.......elephant
.......giraf
.......hippopotamus
.......cobra
.......python


## Exercise

Create a function ``longest(words)`` that giving a list of words returns a longest word, say the first if several words have the maximum length.

In [33]:
def longest(words):
    long = words[0]
    for word in words[1:]:
        if len(word) > len(long):
            long = word
        # print(f'{word=}, {long=}')

    return long

print(longest(['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']))

hippopotamus


In [2]:
# slightly cheating for now...

def longest(words):
    return max(words, key=len)  # max takes a keyword argument that is a function

print(longest(['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']))

hippopotamus


## Exercise

Create function ``longest(word1, word2, ...)`` to return the longest word of *one or more* arguments provided.

In [3]:
# Use * argument for zero or more arguments; fails on zero arguments

def longest(*words):
    long = words[0]
    for word in words[1:]:
        if len(word) > len(long):
            long = word
        # print(f'{word=}, {long=}')
    return long

print(longest('zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python'))
# print(longest())  # IndexError: tuple index out of range

hippopotamus


In [4]:
# Better error message, by forcing at least one argument

def longest(word, *words):
    long = word
    for word in words:
        if len(word) > len(long):
            long = word
    return long

print(longest('zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python'))
# print(longest())

hippopotamus


## Exercise

Create a function ``eliminate(text, words)`` that given a space separated text of words ``text``, and a list of words ``words``, returns the text with all occurences of words in ``words`` replaced by a corresponding number of ``*``. E.g.

```
eliminate('Python Java and C are popular languages but Java and C '
          'are statically typed whereas Python is dynamically typed', 
          ['java', 'c', 'statically'])
```

should return the string

```
'Python **** and * are popular languages but **** and * are ********** typed whereas Python is dynamically typed'
```

In [5]:
# Bad version, since many string concatenations

def eliminate(text, words):
    words = set(words)  # 'in' queries are much faster of sets than on lists
    new_string = ''
    for word in text.split():
        new_string += ' '
        if word not in words:
            new_string += word
        else:
            new_string += '*' * len(word)
    return new_string[1:]  # drop space

eliminate(
    'Python Java and C are popular languages but Java and C '
    'are statically typed whereas Python is dynamically typed', 
    ['Java', 'C', 'statically']
)

'Python **** and * are popular languages but **** and * are ********** typed whereas Python is dynamically typed'

In [6]:
def eliminate(text, words):
    words = set(words)  # 'in' queries are much faster of sets than on lists
    new_content = []
    for word in text.split():
        if word not in words:
            new_content.append(word)
        else:
            new_content.append('*' * len(word))
    return ' '.join(new_content)

eliminate('Python Java and C are popular languages but Java and C '
          'are statically typed whereas Python is dynamically typed', 
          ['Java', 'C', 'statically'])

'Python **** and * are popular languages but **** and * are ********** typed whereas Python is dynamically typed'

## Exercise

Modify the definition of ``eliminate`` so that it can be called as ``eliminate(text, word1, word2, ...)``

In [7]:
def eliminate(text, *words):  # * notation
    words = set(words)
    new_content = []
    for word in text.split():
        if word not in words:
            new_content.append(word)
        else:
            new_content.append('*' * len(word))
    return ' '.join(new_content)

eliminate('Python Java and C are popular languages but Java and C '
          'are statically typed whereas Python is dynamically typed', 
          'Java', 'C', 'statically')

'Python **** and * are popular languages but **** and * are ********** typed whereas Python is dynamically typed'

In [8]:
# A list comprehension solution

def eliminate(text, *words):
    words = set(words)
    new_content = ['*' * len(word) if word in words else word for word in text.split()]
    
    return ' '.join(new_content)

eliminate('Python Java and C are popular languages but Java and C '
          'are statically typed whereas Python is dynamically typed', 
          'Java', 'C', 'statically')

'Python **** and * are popular languages but **** and * are ********** typed whereas Python is dynamically typed'

## Exercise

Create a function ``rewrite(text, initial=[word,...], hide=[word,...], upper=[word,...])`` where ``initial``, ``hide``, and ``upper`` are optional keyword arguments each containing a list of words where words in ``initial`` should be replace by only the first letter followed by ``*``, words in ``hide`` all letters should be replaced by ``*``, and words in ``upper`` should be converted to all upper case. E.g.

```
rewrite('Python rocks but Java sucks', initial=['sucks'], hide=['Java'], upper=['Python'])
```

should return 

```
'PYTHON rocks but **** s****'
```

In [None]:
# Let us use frozensets, since the sets are not going to be updated

def rewrite(text, initial=None, hide=None, upper=None):
    initial = frozenset(initial) if initial != None else frozenset()
    hide = frozenset(hide) if hide != None else frozenset()
    upper = frozenset(upper) if upper != None else frozenset()
    
    new_content = []
    for word in text.split():
        if word in hide:
            new_content.append('*' * len(word))
        elif word in initial:
            new_content.append(word[0] + '*' * (len(word) - 1))
        elif word in upper:
            new_content.append(word.upper())
        else:
            new_content.append(word) 
    return ' '.join(new_content)

rewrite('Python rocks but Java sucks', initial=['sucks'], hide=['Java'], upper=['Python'])

In [None]:
# Let us use frozensets as default values, since the sets are not going to be updated

def rewrite(text, 
            initial=frozenset(), 
            hide=frozenset(), 
            upper=frozenset()):
    
    initial = frozenset(initial)
    hide = frozenset(hide)
    upper = frozenset(upper)
    
    new_content = []
    for word in text.split():
        if word in hide:
            new_content.append('*' * len(word))
        elif word in initial:
            new_content.append(word[0] + '*' * (len(word) - 1))
        elif word in upper:
            new_content.append(word.upper())
        else:
            new_content.append(word) 
    return ' '.join(new_content)

rewrite('Python rocks but Java sucks', initial=['sucks'], hide=['Java'], upper=['Python'])

In [12]:
# ... could also use an empty tuple for default value

def rewrite(text, initial=(), hide=(), upper=()):  #  <===  
    
    initial = frozenset(initial)
    hide = frozenset(hide)
    upper = frozenset(upper)
    
    new_content = []
    for word in text.split():
        if word in hide:
            new_content.append('*' * len(word))
        elif word in initial:
            new_content.append(word[0] + '*' * (len(word) - 1))
        elif word in upper:
            new_content.append(word.upper())
        else:
            new_content.append(word) 
    return ' '.join(new_content)

rewrite('Python rocks but Java sucks', initial=['sucks'], hide=['Java'], upper=['Python'])

'PYTHON rocks but **** s****'

## Exercise

Write a method ``substitute(text, replace)`` where ``replace`` should contain a dictionary of (key, value) items, where keys are words that should be replaced by the corresponing value.

In [14]:
def substitute(text, replace):
    new_content = []
    for word in text.split():
        if word in replace:
            new_content.append(replace[word])
        else:
            new_content.append(word)
            
    return ' '.join(new_content)

substitute('Python rocks but Java sucks', {'Python': 'PYTHON', 'Java':'****'})

'PYTHON rocks but **** sucks'

In [18]:
def substitute(text, replace):
    return ' '.join([replace.get(word, word) for word in text.split()])

substitute('Python rocks but Java sucks', {'Python': 'PYTHON', 'Java':'****'})

'PYTHON rocks but **** sucks'

## Exercise

Implement ``rewrite(text, initial=[word,...], hide=[word,...], upper=[word,...])`` using ``substitute(text, replace)``.

In [26]:
# |= operator, new in Python 3.9

def rewrite(text, initial=(), hide=(), upper=()):  #  <===  
    replace = {word:'*' * len(word) for word in hide}
    replace |= {word:word[0] + '*' * (len(word) - 1) for word in initial}
    replace |= {word:word.upper() for word in upper}
    # print(f'{replace=}')
    return substitute(text, replace)

rewrite('Python rocks but Java sucks', initial=['sucks'], hide=['Java'], upper=['Python'])

'PYTHON rocks but **** s****'

In [7]:
# Use the | operator on dictionaries, new in Python 3.9

def rewrite(text, initial=(), hide=(), upper=()):
    return substitute(text, 
        {word:'*' * len(word) for word in hide}
        | {word:word[0] + '*' * (len(word) - 1) for word in initial}
        | {word:word.upper() for word in upper}
    )

print(rewrite('Python rocks but Java sucks', 
              initial=['sucks'], hide=['Java'], upper=['Python']))

# Can create a simple eliminate function using rewrite

def eliminate(text, words):
    return rewrite(text, hide=words)

print(eliminate(
    'Python Java and C are popular languages but Java and C '
    'are statically typed whereas Python is dynamically typed', 
    ['Java', 'C', 'statically']
))

NameError: name 'substitute' is not defined

In [9]:
import sys
sys.version

'3.9.1 (tags/v3.9.1:1e5d33e, Dec  7 2020, 17:08:21) [MSC v.1927 64 bit (AMD64)]'