Kalanand's November 2014 Log

   October 2014   
November 2014
SuMoTuWeThFrSa
      1
2345678
9101112131415
16171819202122
23242526272829
30      
   December 2014   

November 1st

Scala week-7: finding prime numbers

package week7

object primes {

  /** Return the divisors of n. */
  def divisors(n: Int): List[Int] =
    for (i <- List.range(1, n + 1) if n % i == 0) yield i
                                                  //> divisors: (n: Int)List[Int]

  /** Is 'n' a prime number? */
  def isPrime(n: Int) = divisors(n).length == 2   //> isPrime: (n: Int)Boolean

  /** find the second prime number between 1000 and 10000 */
  ((1000 to 10000).toStream filter isPrime)(1)    //> res0: Int = 1013

  /** Define a function to print stream range */
  def streamRange(lo: Int, hi: Int): Stream[Int] = {
    println(lo + " ")
    if (lo >= hi) Stream.Empty
    else Stream.cons(lo, streamRange(lo + 1, hi))
  }                                               //> streamRange: (lo: Int, hi: Int)Stream[Int]

  streamRange(1, 10).take(3).toList               //> 1 
                                                  //| 2 
                                                  //| 3 
                                                  //| res1: List[Int] = List(1, 2, 3)

  /* define an infinite series */
  def from(n: Int): Stream[Int] = n #:: from(n + 1)
                                                  //> from: (n: Int)Stream[Int]
  /* infinite series of natural numbers */
  val nats = from(0)                              //> nats  : Stream[Int] = Stream(0, ?)
  val m4s = nats map (_ * 4)                      //> m4s  : scala.collection.immutable.Stream[Int] = Stream(0, ?)
  (m4s take 10).toList                            //> res2: List[Int] = List(0, 4, 8, 12, 16, 20, 24, 28, 32, 36)

  /** Sieve of Eratosthenes to calculate primes */
  /* Start with all integers from 2, the first prime number. */
  /* The first element of resulting list is 3, a prime number. */
  /* Eliminate all multiples of 3. Iterate for ever. */
  /* At each step the first number in the list is a prime number and we eliminate all its multiples.*/
  def sieve(s: Stream[Int]): Stream[Int] =
    s.head #:: sieve(s.tail filter (_ % s.head != 0))
                                                  //> sieve: (s: Stream[Int])Stream[Int]
  val primes = sieve(from(2))                     //> primes  : Stream[Int] = Stream(2, ?)

  /* Print the first hundred prime numbers */
  primes.take(100).toList                         //> res3: List[Int] = List(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 
                                                  //| 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 1
                                                  //| 31, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 2
                                                  //| 11, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 2
                                                  //| 93, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 3
                                                  //| 89, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 4
                                                  //| 79, 487, 491, 499, 503, 509, 521, 523, 541)

  /** Back to square roots */
  def sqrtStream(x: Double): Stream[Double] = {
    def improve(guess: Double) = (guess + x / guess) / 2
    lazy val guesses: Stream[Double] = 1 #:: (guesses map improve)
    guesses
  }                                               //> sqrtStream: (x: Double)Stream[Double]
  
  sqrtStream(2).take(10).toList                   //> res4: List[Double] = List(1.0, 1.5, 1.4166666666666665, 1.4142156862745097,
                                                  //|  1.4142135623746899, 1.414213562373095, 1.414213562373095, 1.41421356237309
                                                  //| 5, 1.414213562373095, 1.414213562373095)

  /* we can add the termination criteria as before */
  def isGoodEnough(guess: Double, x: Double) =
    math.abs((guess * guess - x) / x) < 0.0001    //> isGoodEnough: (guess: Double, x: Double)Boolean
    
   sqrtStream(4).filter(isGoodEnough(_, 4)).take(10).toList
                                                  //> res5: List[Double] = List(2.0000000929222947, 2.000000000000002, 2.0, 2.0, 
                                                  //| 2.0, 2.0, 2.0, 2.0, 2.0, 2.0)
}

November 7th

Scala week-7 continued: pouring x galons of water using N containers of capacity m1, m2, .., mN

package week7

class Pouring(capacity: Vector[Int]) {

  // States
  type State = Vector[Int]
  val initialState = capacity map (x => 0)
  
  // Moves
  trait Move {
    def change(state: State): State
  }
  case class Empty(glass: Int) extends Move {
    def change(state: State) = state updated (glass, 0)
  }
  case class Fill(glass: Int) extends Move {
    def change(state: State) = state updated (glass, capacity(glass))
  }
  case class Pour(from: Int, to: Int) extends Move {
    def change(state: State) = {
      val amount = state(from) - math.min (capacity(to), state(to))
      state updated (from, state(from) - amount) updated( to, state(to) + amount)
    }
  }
  
  
  val glasses = 0 until capacity.length
  val moves = 
    (for (g <- glasses) yield Empty(g)) ++
    (for (g <- glasses) yield Fill(g)) ++
    (for (from <- glasses; to <- glasses if from != to) yield Pour(from, to))

// Paths
  class Path(history: List[Move], val endState: State) {
    def extend(move: Move) = new Path(move :: history, move change endState)
    override def toString = (history.reverse mkString " ") + " --> " + endState
  }
  
  val initialPath = new Path(Nil, initialState)
  
  
  def from(paths: Set[Path], explored: Set[State]): Stream[Set[Path]] = 
    if(paths.isEmpty) Stream.empty
    else {
      val more = for {
        path <- paths 
        next <- moves map path.extend
        if !(explored contains next.endState)
      } yield next
      paths #:: from(more, explored ++ (more map(_.endState)))
    }
  
  
  val pathSets = from(Set(initialPath), Set(initialState))
  
  def solutions(target: Int): Stream[Path] = 
    for {
      pathSet <- pathSets
      path <- pathSet
      if path.endState contains target
    } yield path
}


object test {
  val problem = new Pouring(Vector(4, 7, 19))     //> problem  : week7.Pouring = week7.Pouring@54a097cc
  problem.moves                                   //> res0: scala.collection.immutable.IndexedSeq[Product with Serializable with we
                                                  //| ek7.test.problem.Move] = Vector(Empty(0), Empty(1), Empty(2), Fill(0), Fill(1
                                                  //| ), Fill(2), Pour(0,1), Pour(0,2), Pour(1,0), Pour(1,2), Pour(2,0), Pour(2,1))
                                                  //| 
  problem.solutions(17)                           //> res1: Stream[week7.test.problem.Path] = Stream(Fill(2) Pour(2,1) Pour(2,1) F
                                                  //| ill(2) Pour(2,1) Pour(0,1) --> Vector(7, 17, 7), ?)
}

November 10th

Python defaultdict

Usually, a Python dictionary throws a KeyError if you try to get an item with a key that is not currently in the dictionary. The defaultdict in contrast will simply create any items that you try to access (provided of course they do not exist yet). To create such a "default" item, it calls the function object that you pass in the constructor (more precisely, it's an arbitrary "callable" object, which includes function and type objects).

Here is how to use defaultdict (from pythondoc page)

>>> from collections import defaultdict
>>> s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
>>> d = defaultdict(list)
>>> for k, v in s: d[k].append(v)
>>> d.items()
[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
>>> d["green"]
[]
Here is an example of item frequency count using defaultdict:
from collections import defaultdict

words = "apple banana apple strawberry banana lemon"

d = defaultdict(int)
for word in words.split(): d[word] += 1
This runs in O(n).
>>> print d
defaultdict(, {'strawberry': 1, 'lemon': 1, 'apple': 2, 'banana': 2})

enumerate(list) along with index

>>> item_list = ["It's", 'only', 'a', 'model']
>>> for index, item in enumerate(item_list, 1):
...     print index, item
... 
1 It's
2 only
3 a
4 model

Try-catch in python

try:
    open('junk.foo')
except Exception as exc:
    traceback.print_exc()

Duck typing

In python we often hear the phrase 'Duck Typing'. This refer to the very laissez faire python attitude about type checking. If it acts like a duck and quacks like a duck... it's a duck.

http://en.wikipedia.org/wiki/Duck_typing

Here is an example usage

>>> import StringIO 
>>> my_file_like_thing = StringIO.StringIO()
>>> try: my_file_like_thing.write('I am writing to a file') 
... except IOError as exc:
...     print 'my file like thing did not work, try something else' % exc
... 
>>> my_file_like_thing.seek(0)
>>> my_file_like_thing.read()
'I am writing to a fileI am writing to a file'

November 14th

item frequency count in python

Assume I have a list of words, and I want to find the number of times each word appears in that list. Here is an example to do this using python itertools built-in functionality.
>>> from collections import Counter
>>> words = "apple banana apple strawberry banana lemon"
>>> freqs = Counter(words.split())
>>> print(freqs)
Counter({'apple': 2, 'banana': 2, 'strawberry': 1, 'lemon': 1})
If I want to save the above frequency distribution as a dictionary instead
>>> from itertools import groupby
>>> words = words.split()
>>> result = dict((key, len(list(group))) for key, group in groupby(sorted(words)))
>>> print result
{'strawberry': 1, 'lemon': 1, 'apple': 2, 'banana': 2}

python map = lazy, python list-comprehension = not-lazy !

Here is an example to illustrate this. The following code
input = [sc.textFile(y).map(lambda x: y) for y in options.input] 
will result in *all* "y", whereas since map is lazy-eval, after the following modification
input = map(lambda y: sc.textFile(y).map(lambda x: y), options.input)
y will be set with each value in options.input properly.

Reversing dictionary items and orderings

>>> my_phrase = ["No", "one", "expects", "the", "Spanish", "Inquisition"]
>>> my_dict = {key: value for value, key in enumerate(my_phrase)}
>>> print(my_dict)
{'Inquisition': 5, 'No': 0, 'expects': 2, 'one': 1, 'Spanish': 4, 'the': 3}
>>> reversed_dict = {value: key for key, value in my_dict.items()}
>>> print(reversed_dict)
{0: 'No', 1: 'one', 2: 'expects', 3: 'the', 4: 'Spanish', 5: 'Inquisition'}

November 30th

Top 10 Python idioms I wish I'd learned earlier!

Someone posted a top 10 that seems reasonably good

http://prooffreaderplus.blogspot.ca/2014/11/top-10-python-idioms-i-wished-id.html

On a similar note, the following blog has lot of useful stuff

http://jvns.ca/blog/2014/11/27/pydata-nyc-i-gave-a-machine-learning-talk-yay/

Go to October's log


Last modified: Wed Dec 9 15:57:25 PST 2014