language semantics
variables and pass-by-reference
when passing objects as arguments to a function, you are only passing reference; no copying
a function can mutate the internals of its arguments
#1
a=[1,2,3]
b=a #pass reference
c=a[:] #pass value
a.append(4)
function arguments
the arguments can be changed
#2, function
def app_ele(inputlist,element):
inputlist.append(element)
app_ele(a,4)
#a will be changed
dynamic references, strong types
'5'+5
#cause error
cannot concatenate ‘str’ and ‘int’ objects
implicit conversions will occur while
- math operation between ‘int’ and ‘float’
estimate type
type(a)
isinstance(a,(int,float))
attributes and methods
find attribute of one object
a='foo'
a.<Tab> #use Tab to have a list of attributes
dir(a)
#attributes and methods can be accessed using getattr()
#hasattr()
#setattr()
getattr(a,'split')
‘Duck’ typing
an object is iterable if it implemented the iterator protocol, or had a __iter__
“magic method”
def isiterable(obj):
try:
iter(obj)
return True
except TypeError: # not iterable
return False
isiterable('a string')
#convert a iterable object to list
if not isinstance(x,list) and iterable(x):
x=list(x)
imports
import modules(.py file) and the functions
import some_module
#------------------
from some_module import func1,func2,func3
func1(x,y)
#------------------
import some_module as sm
sm.func1(x,y)
binary operators and comparisons
a//b #floor-division
a**b #a to the bth power
a & b #and
a | b #or
a ^ b #or, but not both are true
a is not be #reference different object
mutable and immutable objects
- mutable: lists, dicts, NumPy arrays or most user-defined types
- immutable: strings and tuples
scalar types
type | description | format |
---|---|---|
None | null value | N/A |
str | string: ASCII in py2; unicode in py3 | N/A |
unicode | unicode string | u’string’ |
float | double-precision | 4.5e-5 |
bool | logical value | True |
int | integer | 3 |
long | long integer | N/A |
numerical types
long integer will be automatically converted to type ‘long’
avoid integer division in py2.7 by
from __future__ import division
complex number:
cval = 1 + 2j
convert: int(), float()
strings
immutable
a='use single quote'
b="double quote"
c="""
a
long
string
"""
# methods
b=a.replace("quote","quotation")
a=5.6
# convert num to string
a2 = str(5.6)
#sequence of characters
s='python'
list(s)
s[:3]
#escape character: \
s = '12\\34'
#out for s is 12\34
ss = r'a\b\c\d'
#out for ss is 'a\\b\\c\\d'
concatenate
a + b
template or format using %
template = '%.2f %s are worth $%d'
template % (4,5560, 'apple', 1)
Booleans
logical operator: and, or, not
convert: bool()
- bool([]) is False
- bool([1,2,3]) is True
None
null value type
common default value for optional function arguments
dates and times
build-in datetime module provide datetime, date, time types.
from datetime import datetime, date, time
dt = datetime(2011,10,29,20,30,11)
dt.day
dt.minute
dt.date()
dt.time()
dt.strftime('%m/%d/%Y %H:%M')
#convert a string to datetime type --------
dt1 = datetime.strptime('20091031','%Y%m%d')
#replace some time-------
dt.replace(minute=0,second=0)
#difference
delta = dt1 - dt2
dt+delta
control flow
if, elif, else
if x < 0:
print 'negative'
elif x == 0:
print 'zero'
else:
print 'positive'
for loops
the collection can be iterator
seq = [1,2,None,3]
total = 0
for value in seq:
if value is None:
continue
if value == 5:
break
total += value
if the value in the collection are sequences(tuples or lists), it can be conveniently unpacked
for a, b, c in iterator:
pass
while loops
x = 256
total = 0
while x > 0:
if total > 500:
break
total += x
x = x // 2
exception handling
try:
return float(x)
#only suppress ValueError
except (TypeError, ValueError):
return x
#file handle, use finally to execute codes regardless
f = open(path,'w')
try:
write_to_file(f)
except:
print('Failed')
else:
print('Succeeded')
finally:
f.close()
range and xrange
xrange is an iterator; in py3 range is an iterator too.
range(10)
seq = [1,2,3,4]
for i in range(len(seq)):
val = seq[i]
ternary expressions
value = true-expr if condition else
false-expr
data structures and sequences
tuple ( )
immutable
tup = 4, 5, 6
nested_tup = (4,5,6), (7,8)
convert to tuple
tuple([2,3,4])
tuple('string')
concatenate
(4,5,None)+('bar',)
(4,5,None) * 4
unpacking tuples
a,b,c = tup
tup1 = 4, 5, (6, 7)
a,b,(c,d) = tup1
#------
seq = [(1,2),(4,5)]
for a, b in seq:
pass
tuple methods
a = (1,2,2,2,3,4)
a.count(2)
list [ ]
convert to list
list()
add and remove elements
list1.append('a')
#insert to a specific location
list1.insert(1,'red')
#remove an element at a particular index
list1.pop(2)
#remove the first occurrence value
list1.remove('foo')
#check whether containing a value
'foo' in list1
concatenate lists
[4,None, 'foo'] + [7,8,(2,3)]
append lists
x.extend([3,4,(2,3)])
sorting
a.sort()
#sort by the length
b.sort(key=len)
binary search and maintaining a sorted list
- .bisect finds the location where an element should be inserted to keep it sorted
- .insort inserts the element into that location
import bisect
bisect.bisect(4)
bisect.insort(c,6)
slicing
the number of elements is (stop - start)
seq[1:5]
seq[:5]
seq[3:]
seq[-4:]
seq[-6:-2]
#step
seq[::2]
#reverse
seq[::-1]
built-in sequence functions
enumerate
to iterate over a sequence to keep track of the index and value
for i, value in enumerate(collection):
#do something
#create a dict with index and value
mapping = dict((v,i) for i, v in enumerate(some_list))
sorted
return a new sorted list from the elements of any sequence
sorted([1,3,2,4])
sorted('ab cd')
#sort unique elements
sorted(set(somelist))
zip
pair up elements of a number of lists,tuples or other sequences to create a list of tuples
zip(seq1,seq2,..seqn)
unzip
pitchers = [(1,2),(3,4)]
first, last = zip(*pitchers)
reversed
list(reversed(range(10)))
dict { }
key-value
keys should be unique
d1 = {'a': 'some value', 'b': [1,2,3]}
d1['b']='one'
delete element
del d1[5]
ret = d1.pop('a')
find keys and values
d1.keys()
d1.values()
merge with update method
d1.update({'c':1,'e': {1,3]})
create dicts from sequences
mapping = dict(zip(range(5),reversed(range(5))))
default values
if key in some_dict:
value = some_dict(key)
else:
value = default_value
#equivalent to
value = some_dict.get(key,default_value)
# .pop() will raise an exception
# setdefault function
by_letter = {}
for word in words:
letter = word[0] # letter is the key
by_letter.setdefault(letter,[]).append(word)
#equivalent to
from collections import defaultdict
by_letter = defaultdict(list)
for word in words:
by_letter[word(0)].append(word)
valid dict key types
key has to be immutable objects, hashability
check with hash()
set
unordered collection of unique elements
create
set([1,2,3,2,3,4])
#or
{1,2,2,3,4}
operations
a = {1,2,3}
b={3,4}
#union
a | b; a.union(b)
#intersection
a & b; a.intersection(b)
#difference
a - b; a.difference(b)
#symmetric difference(xor)
a ^ b | a.symmetric_difference(b)
#issubset
a.issubset(b)
a.issuperset(b)
a.isdisjoint(b)
a.add(x)
a.remove(x)
list, set and dict comprehensions
[expr for val in collection if condition]
dict_comp = {key-expr: value-expr for value in collection if condition}
set_comp = {expr for value in collection if condition}
loc_mapping = {val:index for index, val in enumerate(strings)}
#equivalent to
loc_mapping = dict((val,index) for index, val in enumerate(strings))
nested list comprehensions
result = [name for names in all_data for name in names if name.count('e') >= 2]
[[x for x in tup] for tup in tups]
functions
def my_func(x,y,z=1.5):
return x*z
namespaces, scope and local functions
def global_func():
#global will make a global
global a
pass
can have a local function inside another function
return multiple values
def f():
#something
return a, b, c
#or
return {'a':a,'b':b}
#call function
a,b,c = f()
functions are objects
for function in function_list:
value = function(value)
#apply function to each element of a collection
map(function, collection)
anonymous(lambda) functions
strings.sort(key = lambda x: len(set(list(x))))
closures, functions that return functions
def make_closure(a):
def closure():
print('something')
return closure
#---
def format_and_pad(template,space):
def formatter(x):
return (template % x).rjust(space)
return formatter
#call
fmt = format_and_pad('%.4f',15)
fmt(1.756)
call syntax with *args
, **kwargs
*args
: receives a tuple; the number of arguments can be changed
**kwargs
: receives a dict, those arguments will automatically assembly a dict
currying: partial argument application
derive new functions from existing ones by partial argument applications
def add_numbers(x,y):
return x+y
add_five = lambda y: add_numbers(5,y)
#equivalent to
from functools import partial
add_five = partial(add_numbers,5)
#pandas, calculate 60d moving average of a time series
ma60 = lambda x: pandas.rolling_mean(x,60)
data.apply(ma60)
generators
construct new iterable objects
return a sequence of values lazily, pausing after each one until the next one is requested, use yield
instead of return
def squares(n=10):
for i in range(1,n+1)
yield i ** 2
#call
gen = squares()
for x in gen:
print(x)
#find unique combination of coins
def make_changes(amount,coins=[10,25,50],hand=None):
hand = [] if hand == None else hand
if amount == 0:
yield hand
for coin in coins:
if amount<coin or (len(hand)>0 and hand[-1]<coin):
continue
for result in make_changes(amount-coin, coins=coins, hand=hand+[coin]):
yield result
#call
for way in make_changes(100):
print(way)
#use normal function, failed to solve arbitary value
def makechange(inamount,coins=[25,30]):
coins.sort(reverse=True)
amount = inamount
changedict={}
for value in coins:
nocoin = amount//value
if nocoin > 0:
changedict[value]=nocoin
remainder = amount%value
if remainder == 0:
break
else:
amount = remainder
if remainder == 0:
return changedict
else:
newcoins=coins[1:]
return makechange(inamount,coins=newcoins)
generator expressions
gen = (x ** 2 for x in xrange(100))
#equivalent to
def _make_gen():
for x in xrange(100):
yield x ** 2
gen = _make_gen()
#---
sum(x ** 2 for x in xrange(100))
itertools module
import itertools as itls
groupby
takes any sequence and a function, returns (key, sub-iterator) grouped by each value of key. use the returned value of the function to group; sort with key before grouping
for key, group in itls.groupby(things, lambda x: x[0])
other funcs
map: imap(func,*iterables)
print("Doubles:")
for i in itls.imap(lambda x: 2*x, xrange(5)):
print (i)
#---
print "Multiples:"
for i in itls.imap(lambda x,y:(x, y, x*y), xrange(5),xrange(5,10)):
print ('%d * %d = %d' % i)
#starmap() can use multiple parameters from tuple
values = [(0, 5), (1, 6), (2, 7), (3, 8), (4, 9)]
for i in itls.starmap(lambda x,y:(x,y,x*y), values):
print ('%d * %d = %d' % i)
ifilter(func,iterable)
: yields elements x for which func(x) is True
combinations(iterable,k)
: generates a sequence of all possible k-tuples of elements in the iterable, ignoring order
permutations(iterable,k)
: generates a sequence of all possible k-tuples of elements in the iterable, respecting order
chain()
receives n iterable objects and return a union of them
izip()
receives n iterable objects and return tuples, like zip()
islice()
for i in itls.islice(itls.count(),start,stop,step):
pass
tee()
receives an iterator and returns n the same iterators; the original iterator is better not used any longer
r = itls.islice(itls.count(),4)
i1, i2, i3 = itls.tee(r,3) # i1 and i2, like a copy
for i, j, k in itls.izip(i1,i2,i3):
print i, j, k
generate new iterators
#count, generator consecutive integers
for i in itls.izip(itls.count(1),['a','b','c']):
print(i)
#cycle() repeat something
for item in itls.cycle(['a','b','c']):
#repeat() repeat something for n times
for i in itls.repeat('over-and-over',3):
file and os
open file
f = open(path)
f.close()
read over lines
for lines in f:
pass
#or
lines = [x.strip() for x in open(path)]
#or
open('tm'.txt').readlines()
write file
f.open(path,'w')
with open('tmp.txt','w') as handle:
handle.writelines(x for x in open(path) if len(x)>1)
r: read-only
w: write-only, create a new file
a: append to an existing file
r+: read and write
b: add to mode for binary files, ‘rb’, ‘wb’
U: use universal newline mode. ‘U’ or ‘rU’
methods
read([size]) #return data from file as a string
readlines([size]) #return list of lines in the file
write(str) #passed string to file
writelines(strings)
close() #close handle
flush() #flush the internal I/O buffer to disk
seek(pos) #move to indicated file position(integer)
tell() #return current file position as integer
closed #True while the file is closed