# Introduction to Python

In [None]:
# Calculator
a = (50 + 1.45) / 12.5 # (CTRL + ENTER)
b = "hello" + " " + "world"

# print
print(a)
print(b)

In [None]:
# Lists
x = [3, "hello", [3, 4]]
y = [1, 2, 3]
print(x + y)

In [None]:
# Flow control
for i in range(10):
 print(i)

In [None]:
for i in x:
 print(i)

In [None]:
i = 0
while i < 10:
 print(i)
 i += 1

In [None]:
# Functions
def plus(a, b):
 return a + b

plus(1, 2)

## Numpy

In [None]:
# Install library (Jupyter 7.3 or later)
# %pip install numpy 

In [None]:
# Older versions
# import sys
# !{sys.executable} -m pip install numpy

In [None]:
# Import library
import numpy as np

In [None]:
# Numpy arrays
x = np.array([3, 4, 5])
y = np.array([4, 9, 7])

print(x)
print(y)

In [None]:
# Numpy allows mathematical operations between arrays
print(x+y)

In [None]:
# Different from normal lists
x = [3,4,5]
y = [4,9,7]
print(x+y)

In [None]:
# 2-dimensional arrays

a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])

print(a*b)

a.dtype

In [None]:
# Documentation
np.array?

In [None]:
# Array dimensions
print(a.ndim) # Number of dimensions
print(a.shape) # Shape of the array

In [None]:
# Reshaping arrays
a = np.array([[1, 2], [3, 4]])
print(a.reshape([1,4])) # [rows, columns]

In [None]:
c = np.array([1, 2, 3, 4, 5, 6])
print(c.reshape(2, 3)) # Convert a 1d array to a 2x3 array

## Indexing Arrays

In [None]:
A = np.array(np.arange(16)).reshape((4, 4))
A

In [None]:
# Select single element

print(A[1,2]) #[row, column]
# or
print(A[1][2])

In [None]:
# Select entire row/column
print(A[0,:]) # : selects everything

print(A[0])

print(A[:,0])

In [None]:
# Select multiple rows/columns
print(A[[1, 2],:]) # 2nd and 3rd row
print(A[:, [2,3]]) # 3rd and 4th column

In [None]:
# Select a submatrix
print(A[[0,1], [0,1]]) # Doesn't work, selects individual elements [0,0] and [1,1]

In [None]:
print(A[[0,1]][:,[0,1]]) # Select the rows first, then the columns

In [None]:
print(A[0:2, 0:2]) # Or with slices

## Boolean indexing

In [None]:
A = [0,1,2,3,4,5,6,7,8,9]
B = [True, False, True, False, True, False, True, False, True, False]

In [None]:
A[B] # Error - This does not work with python arrays

In [None]:
# With numpy arrays, we select every element, where B == True
A = np.array(A)
print(A[B])

In [None]:
# We can use boolean indexing to select elements matching specific conditions
print(A[A > 5])
print(A[A % 2 == 0])
print(A[(A > 2) & (A < 7)])

In [None]:
# Also works with n-dimensional arrays
A = np.array(np.arange(16)).reshape((4, 4))
print(A)
rows = [True, False, True, False]
cols = [True, True, False, False]
A[rows][:,cols]

In [None]:
# Select rows that sum up to more than 25
high_rows = np.sum(A, axis=1) > 25
print(high_rows)
A[high_rows]

# Pandas and Datasets

In [None]:
# In addition to Numpy arrays, we can use additional libraries to better process larger datasets
import pandas as pd

In [None]:
auto = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/mpg.csv')
auto

## Pandas dataframes

In [None]:
# How many examples are in a dataframe
len(auto)

In [None]:
# Print column names
auto.columns

In [None]:
# Are there missing values in the dataset
any(auto.isna())

In [None]:
# Count the missing values per column
auto.isna().sum()

In [None]:
# Drop missing values
auto = auto.dropna()
print(len(auto))
print(auto.isna().sum())

In [None]:
# Drop columns
auto.drop(columns="horsepower")

### Indexing Pandas datasets

In [None]:
auto[1] # Error, cannot index by numbers 

In [None]:
print(auto['mpg']) # Index by column names instead

In [None]:
print(auto[:3]) # Or use slices to select rows

In [None]:
print(auto.loc[0,"mpg"]) # Or with .loc[row_numebr, column_name]

In [None]:
print(auto.iloc[0,0]) # Or with .iloc[row_number, column_number]

In [None]:
print(auto.loc[0:5, ["mpg", "cylinders"]]) # Can also select multiple rows and columns

We can use logical indexing to select specific rows

In [None]:
idx_80 = auto['model_year'] > 80
print(auto.loc[idx_80, ['name', 'model_year']])

In [None]:
# Or in one line
print(auto.loc[auto['model_year'] > 80, ['name', 'model_year']])

## Adding values

In [None]:
# Create new column
auto['Horsepower / Weight'] = auto['horsepower'] / auto['weight']
auto

In [None]:
print(auto.columns)
# Create new (empty) row
auto.append(pd.Series([None,None,None,None,None,None,None,None, None, None], index = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
 'acceleration', 'model_year', 'origin', 'name', 'Horsepower / Weight']), ignore_index=True)

## Additional functions

In [None]:
# Merge datasets
pd.concat([auto, auto])

# This can also be used to add new rows - simply create a new dataframe and concat it to the original

In [None]:
# Save dataset
auto.to_csv('./is_lab_1_dataset.txt', index=False)