Description: This is my first Rmd that I fully coded myself. It’s based off of a set of data collected by a radar system in Canada. There are a total of 34 independent variables in the dataset that I used to create this KNN model.

The data and more information about it can be found at data.world by following this link. And the repo can be found on Github here.

suppressMessages(library(tidyverse))
suppressMessages(library(pander))
suppressMessages(library(class))

df <- read.csv("https://query.data.world/s/a7daatpbbqbht722grkkm3ws522yhg?dws=00000",
               header = TRUE, stringsAsFactors = FALSE)

# Display the structure of the underlying data
str(df)
## 'data.frame':	350 obs. of  35 variables:
##  $ X1        : int  1 1 1 1 1 1 0 1 1 1 ...
##  $ X0        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X0.99539  : num  1 1 1 1 0.0234 ...
##  $ X.0.05889 : num  -0.18829 -0.03365 -0.45161 -0.02401 -0.00592 ...
##  $ X0.85243  : num  0.9304 1 1 0.9414 -0.0992 ...
##  $ X0.02306  : num  -0.36156 0.00485 1 0.06531 -0.11949 ...
##  $ X0.83398  : num  -0.10868 1 0.71216 0.92106 -0.00763 ...
##  $ X.0.37708 : num  -0.936 -0.121 -1 -0.233 -0.118 ...
##  $ X1.1      : num  1 0.89 0 0.772 0.147 ...
##  $ X0.03760  : num  -0.0455 0.012 0 -0.164 0.0664 ...
##  $ X0.85243.1: num  0.5087 0.7308 0 0.528 0.0379 ...
##  $ X.0.17755 : num  -0.6774 0.0535 0 -0.2028 -0.063 ...
##  $ X0.59755  : num  0.344 0.854 0 0.564 0 ...
##  $ X.0.44945 : num  -0.69707 0.00827 0 -0.00712 0 ...
##  $ X0.60536  : num  -0.5169 0.5459 -1 0.3439 -0.0457 ...
##  $ X.0.38223 : num  -0.97515 0.00299 0.14516 -0.27457 -0.1554 ...
##  $ X0.84356  : num  0.05499 0.83775 0.54094 0.5294 -0.00343 ...
##  $ X.0.38542 : num  -0.622 -0.136 -0.393 -0.218 -0.102 ...
##  $ X0.58212  : num  0.331 0.755 -1 0.451 -0.116 ...
##  $ X.0.32192 : num  -1 -0.0854 -0.5447 -0.1781 -0.0541 ...
##  $ X0.56971  : num  -0.1315 0.7089 -0.6997 0.0598 0.0184 ...
##  $ X.0.29674 : num  -0.453 -0.275 1 -0.3558 0.0367 ...
##  $ X0.36946  : num  -0.1806 0.4339 0 0.0231 0.0152 ...
##  $ X.0.47357 : num  -0.35734 -0.12062 0 -0.52879 0.00888 ...
##  $ X0.56811  : num  -0.2033 0.5753 1 0.0329 0.0351 ...
##  $ X.0.51171 : num  -0.2657 -0.4022 0.907 -0.6516 -0.0154 ...
##  $ X0.41078  : num  -0.2047 0.5898 0.5161 0.1329 -0.0324 ...
##  $ X.0.46168 : num  -0.184 -0.2215 1 -0.5321 0.0922 ...
##  $ X0.21266  : num  -0.1904 0.431 1 0.0243 -0.0786 ...
##  $ X.0.34090 : num  -0.11593 -0.17365 -0.20099 -0.62197 0.00732 ...
##  $ X0.42267  : num  -0.1663 0.6044 0.2568 -0.0571 0 ...
##  $ X.0.54487 : num  -0.0629 -0.2418 1 -0.5957 0 ...
##  $ X0.18641  : num  -0.13738 0.56045 -0.32382 -0.04608 -0.00039 ...
##  $ X.0.45300 : num  -0.0245 -0.3824 1 -0.657 0.1201 ...
##  $ g         : chr  "b" "g" "b" "g" ...
# Display the first six lines
head(df)
##   X1 X0 X0.99539 X.0.05889 X0.85243 X0.02306 X0.83398 X.0.37708    X1.1
## 1  1  0  1.00000  -0.18829  0.93035 -0.36156 -0.10868  -0.93597 1.00000
## 2  1  0  1.00000  -0.03365  1.00000  0.00485  1.00000  -0.12062 0.88965
## 3  1  0  1.00000  -0.45161  1.00000  1.00000  0.71216  -1.00000 0.00000
## 4  1  0  1.00000  -0.02401  0.94140  0.06531  0.92106  -0.23255 0.77152
## 5  1  0  0.02337  -0.00592 -0.09924 -0.11949 -0.00763  -0.11824 0.14706
## 6  1  0  0.97588  -0.10602  0.94601 -0.20800  0.92806  -0.28350 0.85996
##   X0.03760 X0.85243.1 X.0.17755 X0.59755 X.0.44945 X0.60536 X.0.38223 X0.84356
## 1 -0.04549    0.50874  -0.67743  0.34432  -0.69707 -0.51685  -0.97515  0.05499
## 2  0.01198    0.73082   0.05346  0.85443   0.00827  0.54591   0.00299  0.83775
## 3  0.00000    0.00000   0.00000  0.00000   0.00000 -1.00000   0.14516  0.54094
## 4 -0.16399    0.52798  -0.20275  0.56409  -0.00712  0.34395  -0.27457  0.52940
## 5  0.06637    0.03786  -0.06302  0.00000   0.00000 -0.04572  -0.15540 -0.00343
## 6 -0.27342    0.79766  -0.47929  0.78225  -0.50764  0.74628  -0.61436  0.57945
##   X.0.38542 X0.58212 X.0.32192 X0.56971 X.0.29674 X0.36946 X.0.47357 X0.56811
## 1  -0.62237  0.33109  -1.00000 -0.13151  -0.45300 -0.18056  -0.35734 -0.20332
## 2  -0.13644  0.75535  -0.08540  0.70887  -0.27502  0.43385  -0.12062  0.57528
## 3  -0.39330 -1.00000  -0.54467 -0.69975   1.00000  0.00000   0.00000  1.00000
## 4  -0.21780  0.45107  -0.17813  0.05982  -0.35575  0.02309  -0.52879  0.03286
## 5  -0.10196 -0.11575  -0.05414  0.01838   0.03669  0.01519   0.00888  0.03513
## 6  -0.68086  0.37852  -0.73641  0.36324  -0.76562  0.31898  -0.79753  0.22792
##   X.0.51171 X0.41078 X.0.46168 X0.21266 X.0.34090 X0.42267 X.0.54487 X0.18641
## 1  -0.26569 -0.20468  -0.18401 -0.19040  -0.11593 -0.16626  -0.06288 -0.13738
## 2  -0.40220  0.58984  -0.22145  0.43100  -0.17365  0.60436  -0.24180  0.56045
## 3   0.90695  0.51613   1.00000  1.00000  -0.20099  0.25682   1.00000 -0.32382
## 4  -0.65158  0.13290  -0.53206  0.02431  -0.62197 -0.05707  -0.59573 -0.04608
## 5  -0.01535 -0.03240   0.09223 -0.07859   0.00732  0.00000   0.00000 -0.00039
## 6  -0.81634  0.13659  -0.82510  0.04606  -0.82395 -0.04262  -0.81318 -0.13832
##   X.0.45300 g
## 1  -0.02447 b
## 2  -0.38238 g
## 3   1.00000 b
## 4  -0.65697 g
## 5   0.12011 b
## 6  -0.80975 g
# Display the field headers
names(df)
##  [1] "X1"         "X0"         "X0.99539"   "X.0.05889"  "X0.85243"  
##  [6] "X0.02306"   "X0.83398"   "X.0.37708"  "X1.1"       "X0.03760"  
## [11] "X0.85243.1" "X.0.17755"  "X0.59755"   "X.0.44945"  "X0.60536"  
## [16] "X.0.38223"  "X0.84356"   "X.0.38542"  "X0.58212"   "X.0.32192" 
## [21] "X0.56971"   "X.0.29674"  "X0.36946"   "X.0.47357"  "X0.56811"  
## [26] "X.0.51171"  "X0.41078"   "X.0.46168"  "X0.21266"   "X.0.34090" 
## [31] "X0.42267"   "X.0.54487"  "X0.18641"   "X.0.45300"  "g"
# Display a summary of various results for each field
summary(df)
##        X1               X0       X0.99539         X.0.05889       
##  Min.   :0.0000   Min.   :0   Min.   :-1.0000   Min.   :-1.00000  
##  1st Qu.:1.0000   1st Qu.:0   1st Qu.: 0.4715   1st Qu.:-0.06539  
##  Median :1.0000   Median :0   Median : 0.8708   Median : 0.01670  
##  Mean   :0.8914   Mean   :0   Mean   : 0.6403   Mean   : 0.04467  
##  3rd Qu.:1.0000   3rd Qu.:0   3rd Qu.: 1.0000   3rd Qu.: 0.19473  
##  Max.   :1.0000   Max.   :0   Max.   : 1.0000   Max.   : 1.00000  
##     X0.85243          X0.02306           X0.83398         X.0.37708       
##  Min.   :-1.0000   Min.   :-1.00000   Min.   :-1.0000   Min.   :-1.00000  
##  1st Qu.: 0.4126   1st Qu.:-0.02487   1st Qu.: 0.2091   1st Qu.:-0.05348  
##  Median : 0.8086   Median : 0.02117   Median : 0.7280   Median : 0.01508  
##  Mean   : 0.6003   Mean   : 0.11615   Mean   : 0.5493   Mean   : 0.12078  
##  3rd Qu.: 1.0000   3rd Qu.: 0.33532   3rd Qu.: 0.9704   3rd Qu.: 0.45157  
##  Max.   : 1.0000   Max.   : 1.00000   Max.   : 1.0000   Max.   : 1.00000  
##       X1.1             X0.03760          X0.85243.1         X.0.17755       
##  Min.   :-1.00000   Min.   :-1.00000   Min.   :-1.00000   Min.   :-1.00000  
##  1st Qu.: 0.08679   1st Qu.:-0.04900   1st Qu.: 0.01918   1st Qu.:-0.06409  
##  Median : 0.68243   Median : 0.01755   Median : 0.66770   Median : 0.02975  
##  Mean   : 0.51045   Mean   : 0.18176   Mean   : 0.47511   Mean   : 0.15599  
##  3rd Qu.: 0.95056   3rd Qu.: 0.53619   3rd Qu.: 0.95816   3rd Qu.: 0.48361  
##  Max.   : 1.00000   Max.   : 1.00000   Max.   : 1.00000   Max.   : 1.00000  
##     X0.59755         X.0.44945           X0.60536         X.0.38223       
##  Min.   :-1.0000   Min.   :-1.00000   Min.   :-1.0000   Min.   :-1.00000  
##  1st Qu.: 0.0000   1st Qu.:-0.07180   1st Qu.: 0.0000   1st Qu.:-0.07787  
##  Median : 0.6457   Median : 0.03050   Median : 0.5978   Median : 0.00000  
##  Mean   : 0.4002   Mean   : 0.09496   Mean   : 0.3434   Mean   : 0.07243  
##  3rd Qu.: 0.9561   3rd Qu.: 0.37562   3rd Qu.: 0.9195   3rd Qu.: 0.31168  
##  Max.   : 1.0000   Max.   : 1.00000   Max.   : 1.0000   Max.   : 1.00000  
##     X0.84356         X.0.38542            X0.58212         X.0.32192       
##  Min.   :-1.0000   Min.   :-1.000000   Min.   :-1.0000   Min.   :-1.00000  
##  1st Qu.: 0.0000   1st Qu.:-0.222532   1st Qu.: 0.0000   1st Qu.:-0.22580  
##  Median : 0.5882   Median : 0.000000   Median : 0.5728   Median : 0.00000  
##  Mean   : 0.3806   Mean   :-0.002526   Mean   : 0.3588   Mean   :-0.02317  
##  3rd Qu.: 0.9362   3rd Qu.: 0.195467   3rd Qu.: 0.9003   3rd Qu.: 0.13672  
##  Max.   : 1.0000   Max.   : 1.000000   Max.   : 1.0000   Max.   : 1.00000  
##     X0.56971         X.0.29674            X0.36946         X.0.47357       
##  Min.   :-1.0000   Min.   :-1.000000   Min.   :-1.0000   Min.   :-1.00000  
##  1st Qu.: 0.0000   1st Qu.:-0.237063   1st Qu.: 0.0000   1st Qu.:-0.35608  
##  Median : 0.4982   Median : 0.000000   Median : 0.5321   Median : 0.00000  
##  Mean   : 0.3360   Mean   : 0.009167   Mean   : 0.3625   Mean   :-0.05622  
##  3rd Qu.: 0.8968   3rd Qu.: 0.188820   3rd Qu.: 0.9129   3rd Qu.: 0.16490  
##  Max.   : 1.0000   Max.   : 1.000000   Max.   : 1.0000   Max.   : 1.00000  
##     X0.56811         X.0.51171           X0.41078         X.0.46168       
##  Min.   :-1.0000   Min.   :-1.00000   Min.   :-1.0000   Min.   :-1.00000  
##  1st Qu.: 0.0000   1st Qu.:-0.32375   1st Qu.: 0.2836   1st Qu.:-0.42899  
##  Median : 0.5492   Median :-0.01491   Median : 0.7085   Median :-0.01768  
##  Mean   : 0.3956   Mean   :-0.06993   Mean   : 0.5420   Mean   :-0.06842  
##  3rd Qu.: 0.9072   3rd Qu.: 0.15792   3rd Qu.: 1.0000   3rd Qu.: 0.15486  
##  Max.   : 1.0000   Max.   : 1.00000   Max.   : 1.0000   Max.   : 1.00000  
##     X0.21266         X.0.34090           X0.42267         X.0.54487        
##  Min.   :-1.0000   Min.   :-1.00000   Min.   :-1.0000   Min.   :-1.000000  
##  1st Qu.: 0.0000   1st Qu.:-0.23494   1st Qu.: 0.0000   1st Qu.:-0.239347  
##  Median : 0.4992   Median : 0.00000   Median : 0.4469   Median : 0.000000  
##  Mean   : 0.3789   Mean   :-0.02701   Mean   : 0.3523   Mean   :-0.002248  
##  3rd Qu.: 0.8846   3rd Qu.: 0.15422   3rd Qu.: 0.8595   3rd Qu.: 0.200935  
##  Max.   : 1.0000   Max.   : 1.00000   Max.   : 1.0000   Max.   : 1.000000  
##     X0.18641         X.0.45300             g            
##  Min.   :-1.0000   Min.   :-1.00000   Length:350        
##  1st Qu.: 0.0000   1st Qu.:-0.16101   Class :character  
##  Median : 0.4131   Median : 0.00000   Mode  :character  
##  Mean   : 0.3498   Mean   : 0.01582                     
##  3rd Qu.: 0.8168   3rd Qu.: 0.17211                     
##  Max.   : 1.0000   Max.   : 1.00000
# Assigning the values "g" and "b" in field AI to "good" and "bad" respectively

df$g <- factor(df$g, levels = c("g", "b"), labels = c("good", "bad"))

# Creating a method to normalize data between 0 and 1

max_min_normalize <- function(x) {
  return((x - min(x)) / (max(x) - min(x)))
}

# Testing the previous function against a first group of numbers

v1 <- c(1, 2, 3, 4, 5)
max_min_normalize(v1)
## [1] 0.00 0.25 0.50 0.75 1.00
# Testing the previous function against a second group of numbers

v2 <- c(10, 20, 30, 40, 50)
max_min_normalize(v2)
## [1] 0.00 0.25 0.50 0.75 1.00
# Removing field B because the data adds no value

df_n <- select(df, -X0)

# Creating a new data frame without field AI and applying the normalization function

df_n2 <- as.data.frame(lapply(df_n[-34], max_min_normalize))

# Checking to see if the normalization is working correctly
summary(df_n2)
##        X1            X0.99539        X.0.05889         X0.85243     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:0.7358   1st Qu.:0.4673   1st Qu.:0.7063  
##  Median :1.0000   Median :0.9354   Median :0.5083   Median :0.9043  
##  Mean   :0.8914   Mean   :0.8202   Mean   :0.5223   Mean   :0.8002  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.5974   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##     X0.02306         X0.83398        X.0.37708           X1.1       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.4876   1st Qu.:0.6046   1st Qu.:0.4733   1st Qu.:0.5434  
##  Median :0.5106   Median :0.8640   Median :0.5075   Median :0.8412  
##  Mean   :0.5581   Mean   :0.7746   Mean   :0.5604   Mean   :0.7552  
##  3rd Qu.:0.6677   3rd Qu.:0.9852   3rd Qu.:0.7258   3rd Qu.:0.9753  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##     X0.03760        X0.85243.1       X.0.17755         X0.59755     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.4755   1st Qu.:0.5096   1st Qu.:0.4680   1st Qu.:0.5000  
##  Median :0.5088   Median :0.8338   Median :0.5149   Median :0.8228  
##  Mean   :0.5909   Mean   :0.7376   Mean   :0.5780   Mean   :0.7001  
##  3rd Qu.:0.7681   3rd Qu.:0.9791   3rd Qu.:0.7418   3rd Qu.:0.9780  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##    X.0.44945         X0.60536        X.0.38223         X0.84356     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.4641   1st Qu.:0.5000   1st Qu.:0.4611   1st Qu.:0.5000  
##  Median :0.5152   Median :0.7989   Median :0.5000   Median :0.7941  
##  Mean   :0.5475   Mean   :0.6717   Mean   :0.5362   Mean   :0.6903  
##  3rd Qu.:0.6878   3rd Qu.:0.9597   3rd Qu.:0.6558   3rd Qu.:0.9681  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##    X.0.38542         X0.58212        X.0.32192         X0.56971     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3887   1st Qu.:0.5000   1st Qu.:0.3871   1st Qu.:0.5000  
##  Median :0.5000   Median :0.7864   Median :0.5000   Median :0.7491  
##  Mean   :0.4987   Mean   :0.6794   Mean   :0.4884   Mean   :0.6680  
##  3rd Qu.:0.5977   3rd Qu.:0.9502   3rd Qu.:0.5684   3rd Qu.:0.9484  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##    X.0.29674         X0.36946        X.0.47357         X0.56811     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3815   1st Qu.:0.5000   1st Qu.:0.3220   1st Qu.:0.5000  
##  Median :0.5000   Median :0.7661   Median :0.5000   Median :0.7746  
##  Mean   :0.5046   Mean   :0.6812   Mean   :0.4719   Mean   :0.6978  
##  3rd Qu.:0.5944   3rd Qu.:0.9565   3rd Qu.:0.5824   3rd Qu.:0.9536  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##    X.0.51171         X0.41078        X.0.46168         X0.21266     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3381   1st Qu.:0.6418   1st Qu.:0.2855   1st Qu.:0.5000  
##  Median :0.4925   Median :0.8543   Median :0.4912   Median :0.7496  
##  Mean   :0.4650   Mean   :0.7710   Mean   :0.4658   Mean   :0.6895  
##  3rd Qu.:0.5790   3rd Qu.:1.0000   3rd Qu.:0.5774   3rd Qu.:0.9423  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##    X.0.34090         X0.42267        X.0.54487         X0.18641     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3825   1st Qu.:0.5000   1st Qu.:0.3803   1st Qu.:0.5000  
##  Median :0.5000   Median :0.7234   Median :0.5000   Median :0.7066  
##  Mean   :0.4865   Mean   :0.6762   Mean   :0.4989   Mean   :0.6749  
##  3rd Qu.:0.5771   3rd Qu.:0.9297   3rd Qu.:0.6005   3rd Qu.:0.9084  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##    X.0.45300     
##  Min.   :0.0000  
##  1st Qu.:0.4195  
##  Median :0.5000  
##  Mean   :0.5079  
##  3rd Qu.:0.5861  
##  Max.   :1.0000
# Assigning field AI from the original data set to the variable g

g <- df[,35]

# Combining the data set with the normalized data back with field AI

dat <- cbind(g, df_n2)

# Setting the size of the training data

train_ratio <- .90

# Finding the number of rows to use for training based on the ratio and size of the data set

train_N <- round(train_ratio * nrow(dat)) +1

# Setting the seed value

set.seed(813)

# Finding the rows that will be used to train the model

train_IDs_dat <- sample(1:nrow(dat), size = train_N, replace = FALSE)

# Creating a set of data that will be used to train the model

train_dat <- dat[train_IDs_dat,]

# Creating a set of data that will be used to test the model.  Any row that's not in the train data will be in the test data.

test_dat <- dat[-train_IDs_dat,]

# Creating a function that will let us create a frequency table

frqtab <- function(x,caption) { round(100*prop.table(table(x)),2) }

# Checking the frequency of field g in the dat table

ft_whole <- frqtab(dat$g)

# Checking the frequency of field g in the train data

ft_train <- frqtab(train_dat$g)

# Checking the frequency of field g in the test table

ft_test <- frqtab(test_dat$g)

#Combining the previous three values in to one data set and naming the fields

ftcmp_df <- as.data.frame(cbind(ft_whole, ft_train, ft_test))
colnames(ftcmp_df) <- c("Whole", "Training Set", "Test Set")

#Using the pander package to display the frequency of the g field from all three tables

pander(ftcmp_df, style="rmarkdown", caption = "Comparison of good and bad frequency in %")
  Whole Training Set Test Set
good 64 63.92 64.71
bad 36 36.08 35.29

Table: Comparison of good and bad frequency in %

# Setting the K value equal to the square root of the number of rows plus 1

k0 <- round(sqrt(nrow(dat))) +1

# Displaying the number of k values

k0
## [1] 20
# Using the class package to do KNN classification

knn.k0 <- knn(train = train_dat[,-1],  test = test_dat[,-1], cl = train_dat[,1], k=k0)

# Determine the Accuracy of the previous classification

accuracy <- round(100*mean(test_dat$g==knn.k0),digits = 2)
cat('\n', 'The overall accuracy is', accuracy, '%')
## 
##  The overall accuracy is 79.41 %
df_z <- as.data.frame(lapply(df[-35], scale))
df_z2 <- select(df_z, -X0)

g <- df[, 35]
dat <- cbind(g, df_z2)

train_ratio <- .9
train_N <- round(train_ratio * nrow(dat)) + 1
set.seed(813)
train_ID_dat <- sample(1:nrow(dat), size = train_N, replace = FALSE)
train_dat <- dat[train_ID_dat,]
test_dat <- dat[-train_ID_dat,]

k0 <- round(sqrt(nrow(dat))) + 1
knn.k0 <- knn(train = train_dat[, -1], test = test_dat[,-1], cl=train_dat[,1], k=k0)

accuracy_z <- round(100*mean(test_dat$g==knn.k0),digits=2)

accur_cmp.df <- as.data.frame(cbind(accuracy, accuracy_z))
colnames(accur_cmp.df) <- c("Max-Min Normalization", "Z-Score Normalization")
pander(accur_cmp.df, style="rmarkdown", caption="Comparison of normailzation methods: Kappa Coefficient (in %)")
Max-Min Normalization Z-Score Normalization
79.41 79.41

Table: Comparison of normailzation methods: Kappa Coefficient (in %)

df_n2 <- as.data.frame(lapply(df[-35], max_min_normalize))
type <- df[, 35]
dat <- cbind(type, df_z2)

train_ratio <- 813
Accuracy.list <- 813   #
i <- 1
for (train_ratio in seq(from=0.05, to=0.95, by=0.05)){
  
  train_N <- round(train_ratio * nrow(dat)) + 1
  set.seed(813)
  train_ID_dat <- sample(1:nrow(dat), size = train_N, replace = FALSE) 
  train_dat <- dat[train_ID_dat, ]
  test_dat <- dat[-train_ID_dat, ]

  k0 <- round(sqrt(nrow(dat))) + 1
  knn.k0 <- knn(train = train_dat[,-1],  test = test_dat[,-1], cl=train_dat[, 1], k=k0)

  Accuracy.list[i] <- round(100*mean(test_dat$type==knn.k0), digits = 2)
  cat('The overall accuracy is ', Accuracy.list[i], '% when the percentage of training samples is', train_ratio*100, '%', '\n')       # to print % accuracy 
  i <- i+1
}
## Warning in knn(train = train_dat[, -1], test = test_dat[, -1], cl = train_dat[,
## : k = 20 exceeds number 19 of patterns
## The overall accuracy is  64.35 % when the percentage of training samples is 5 % 
## The overall accuracy is  66.56 % when the percentage of training samples is 10 % 
## The overall accuracy is  72.3 % when the percentage of training samples is 15 % 
## The overall accuracy is  73.12 % when the percentage of training samples is 20 % 
## The overall accuracy is  71.65 % when the percentage of training samples is 25 % 
## The overall accuracy is  74.59 % when the percentage of training samples is 30 % 
## The overall accuracy is  76.11 % when the percentage of training samples is 35 % 
## The overall accuracy is  77.03 % when the percentage of training samples is 40 % 
## The overall accuracy is  75.39 % when the percentage of training samples is 45 % 
## The overall accuracy is  74.71 % when the percentage of training samples is 50 % 
## The overall accuracy is  73.72 % when the percentage of training samples is 55 % 
## The overall accuracy is  76.26 % when the percentage of training samples is 60 % 
## The overall accuracy is  77.69 % when the percentage of training samples is 65 % 
## The overall accuracy is  79.81 % when the percentage of training samples is 70 % 
## The overall accuracy is  82.56 % when the percentage of training samples is 75 % 
## The overall accuracy is  85.51 % when the percentage of training samples is 80 % 
## The overall accuracy is  82.35 % when the percentage of training samples is 85 % 
## The overall accuracy is  79.41 % when the percentage of training samples is 90 % 
## The overall accuracy is  76.47 % when the percentage of training samples is 95 %
plot(x = seq(from=0.05, to=0.95, by=0.05)*100, y = Accuracy.list, type="b", xlab="Percentage of training samples (%)",ylab="Overall accuracy (%)")

IonosphereAccuracy

df_n <- as.data.frame(lapply(df[-35], max_min_normalize))
type <- df[, 10]
dat <- cbind(type, df_z2)

train_ratio <- 0.85 
train_N <- round(train_ratio * nrow(dat)) + 1
set.seed(813)
train_ID_dat <- sample(1:nrow(dat), size = train_N, replace = FALSE) 
train_dat <- dat[train_ID_dat, ]
test_dat <- dat[-train_ID_dat, ]

k_try <- 813                      
Accuracy.list <- 813      
for (k_try in 1 : (k0*2)){  
    knn.mod <-  knn(train = train_dat[,-1],  test = test_dat[,-1], cl=train_dat[, 1], k = k_try)
    Accuracy.list[k_try] <- round(100*mean(test_dat$type==knn.mod), digits = 2)
    cat('When k =', k_try, 'the overall accuracy =', Accuracy.list[k_try], '%\n')       # to print % accuracy 
}
## When k = 1 the overall accuracy = 7.84 %
## When k = 2 the overall accuracy = 5.88 %
## When k = 3 the overall accuracy = 7.84 %
## When k = 4 the overall accuracy = 5.88 %
## When k = 5 the overall accuracy = 5.88 %
## When k = 6 the overall accuracy = 7.84 %
## When k = 7 the overall accuracy = 5.88 %
## When k = 8 the overall accuracy = 5.88 %
## When k = 9 the overall accuracy = 5.88 %
## When k = 10 the overall accuracy = 5.88 %
## When k = 11 the overall accuracy = 7.84 %
## When k = 12 the overall accuracy = 5.88 %
## When k = 13 the overall accuracy = 5.88 %
## When k = 14 the overall accuracy = 7.84 %
## When k = 15 the overall accuracy = 5.88 %
## When k = 16 the overall accuracy = 5.88 %
## When k = 17 the overall accuracy = 5.88 %
## When k = 18 the overall accuracy = 5.88 %
## When k = 19 the overall accuracy = 5.88 %
## When k = 20 the overall accuracy = 5.88 %
## When k = 21 the overall accuracy = 9.8 %
## When k = 22 the overall accuracy = 7.84 %
## When k = 23 the overall accuracy = 9.8 %
## When k = 24 the overall accuracy = 9.8 %
## When k = 25 the overall accuracy = 9.8 %
## When k = 26 the overall accuracy = 9.8 %
## When k = 27 the overall accuracy = 9.8 %
## When k = 28 the overall accuracy = 9.8 %
## When k = 29 the overall accuracy = 9.8 %
## When k = 30 the overall accuracy = 9.8 %
## When k = 31 the overall accuracy = 9.8 %
## When k = 32 the overall accuracy = 9.8 %
## When k = 33 the overall accuracy = 9.8 %
## When k = 34 the overall accuracy = 11.76 %
## When k = 35 the overall accuracy = 9.8 %
## When k = 36 the overall accuracy = 9.8 %
## When k = 37 the overall accuracy = 9.8 %
## When k = 38 the overall accuracy = 9.8 %
## When k = 39 the overall accuracy = 9.8 %
## When k = 40 the overall accuracy = 9.8 %
plot(x = 1 : (k0*2), y = Accuracy.list, type="b", xlab="k-Value",ylab="Overall accuracy (%)") 

IonosphereKValue