Compare commits
No commits in common. "aea7510ace5c5b266c04c82f3b3456e560b762a4" and "1508d413d868a9b448d34e0f33094e3b58226a27" have entirely different histories.
aea7510ace
...
1508d413d8
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
# Python
|
||||
.venv/*
|
||||
|
||||
# IDEs
|
||||
.idea/*
|
||||
|
||||
9
LICENSE
9
LICENSE
@ -1,9 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 hoo2
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
BIN
PR_Assignment_2025_2026.pdf
Normal file
BIN
PR_Assignment_2025_2026.pdf
Normal file
Binary file not shown.
@ -1,4 +0,0 @@
|
||||
# PR-Assignment2025_26
|
||||
|
||||
About
|
||||
Assignment for "Pattern recognition" course of THMMY in AUTh
|
||||
300
datasets/dataset1.csv
Normal file
300
datasets/dataset1.csv
Normal file
@ -0,0 +1,300 @@
|
||||
24.97467969839018,14.504545498621724,0.0
|
||||
27.090094575573442,12.30597776569358,0.0
|
||||
19.153229570537036,1.1224428018325945,0.0
|
||||
32.980912742625264,28.787312352356068,0.0
|
||||
31.816648495376327,18.80472071109226,0.0
|
||||
38.16481456323595,13.3152538073791,0.0
|
||||
39.16350360498408,18.32761254728531,0.0
|
||||
31.33807364278855,15.250304424591867,0.0
|
||||
39.25679746753066,38.179988690236726,0.0
|
||||
25.59541128586236,17.066636312522682,0.0
|
||||
38.58734644465331,13.564496680602005,0.0
|
||||
31.526307256338228,11.255274327209692,0.0
|
||||
21.64213696740311,16.229022075508126,0.0
|
||||
24.230483518400128,12.263772942140733,0.0
|
||||
29.265584223855853,16.925395788699138,0.0
|
||||
28.252292511785612,20.334244991609307,0.0
|
||||
27.12297858277194,14.338873886920101,0.0
|
||||
30.285511383959097,24.171349190094425,0.0
|
||||
31.005977614466396,23.762995469525492,0.0
|
||||
25.542543700304883,5.91802125245739,0.0
|
||||
26.47348640364448,19.638455353357298,0.0
|
||||
40.18940281207708,18.668778140584514,0.0
|
||||
33.037669482569044,15.15570097243874,0.0
|
||||
35.64867502721652,23.369787466839227,0.0
|
||||
33.394130204970146,25.268421146848222,0.0
|
||||
24.182524821280143,16.36309843064755,0.0
|
||||
24.076464557153482,15.314950036427277,0.0
|
||||
33.087808854171776,22.46070117802791,0.0
|
||||
18.339232140010182,6.4201110075923395,0.0
|
||||
17.030339132501254,2.8012977137840878,0.0
|
||||
19.16367551614868,17.768202278714337,0.0
|
||||
30.399834767507414,25.82402007292977,0.0
|
||||
13.149766949310695,7.122818872315756,0.0
|
||||
46.97246650739394,24.577642421748948,0.0
|
||||
28.807005536350538,5.671512591627327,0.0
|
||||
19.86166087178237,22.016537360768496,0.0
|
||||
33.68111752798275,15.707926477328854,0.0
|
||||
20.36762113637726,8.03480681093552,0.0
|
||||
25.220988271404728,20.783718533159757,0.0
|
||||
34.33944504897553,7.034648673753116,0.0
|
||||
36.46932060220665,19.490815627790962,0.0
|
||||
19.103398936294397,14.528544743090194,0.0
|
||||
37.07834128848617,9.861678090022558,0.0
|
||||
18.127452153677275,16.423119109710647,0.0
|
||||
31.428853589559587,7.868613516331019,0.0
|
||||
27.905341092490623,14.811020759039923,0.0
|
||||
30.57208627848106,16.47851657080185,0.0
|
||||
28.150143537431568,10.103067430983062,0.0
|
||||
24.8718878220957,25.200702803981464,0.0
|
||||
28.332768061917648,10.40332737218593,0.0
|
||||
34.79105563145385,25.851031514753203,0.0
|
||||
25.773964051783633,15.994138686287268,0.0
|
||||
17.850209616305342,12.225538730432731,0.0
|
||||
23.579708925939595,16.52717221207142,0.0
|
||||
39.67835728910902,21.95200378288049,0.0
|
||||
20.685657941316286,13.543158446921126,0.0
|
||||
25.982321923611533,13.34788674306283,0.0
|
||||
33.074224184070935,21.105728582216855,0.0
|
||||
41.01954983229733,27.480464558655118,0.0
|
||||
45.1672627267445,26.70473965037681,0.0
|
||||
37.79788546372592,17.67738408015966,0.0
|
||||
27.27503850636979,12.505828149801104,0.0
|
||||
42.27695106723079,24.688364330607655,0.0
|
||||
29.858302722587403,22.50839515843808,0.0
|
||||
29.01449729155842,14.569375086622134,0.0
|
||||
39.118376298028494,26.812498618194542,0.0
|
||||
26.772225673348355,16.534264495642486,0.0
|
||||
31.230651200983047,14.982395564357496,0.0
|
||||
30.981830732453325,24.2959824230451,0.0
|
||||
28.03812959884472,7.161144455036007,0.0
|
||||
32.27411850598569,2.7366327301359874,0.0
|
||||
27.107044532034994,8.656122084563044,0.0
|
||||
23.589463613172803,15.237271479876568,0.0
|
||||
20.111468802868465,5.825271535991957,0.0
|
||||
35.06056028505704,10.995705258940143,0.0
|
||||
30.631204430741064,17.893731839911286,0.0
|
||||
27.131965003077084,10.0915509408347,0.0
|
||||
25.39877098901797,18.09918318478722,0.0
|
||||
23.62204359917191,15.137924069125997,0.0
|
||||
35.95736680287625,20.865093534740456,0.0
|
||||
19.25339300067111,10.42509394918272,0.0
|
||||
35.41081507834921,34.654146201986585,0.0
|
||||
26.92147343880633,18.646391234497198,0.0
|
||||
28.186788078836543,18.703621351025223,0.0
|
||||
34.47655645449341,21.367794680759722,0.0
|
||||
24.94942860668023,20.01099766854866,0.0
|
||||
20.619589431411057,19.85744626456458,0.0
|
||||
33.44425708920528,17.426920763800318,0.0
|
||||
22.065739513586603,12.198172788136922,0.0
|
||||
26.870704294735837,9.949383283089391,0.0
|
||||
41.45771640922904,18.1964602237986,0.0
|
||||
33.25489239603091,19.16207176556233,0.0
|
||||
32.693637981717615,21.0423014040025,0.0
|
||||
27.354283080087523,14.271375447720086,0.0
|
||||
24.762754572488454,6.182155562616208,0.0
|
||||
20.25463532888877,12.820729887689836,0.0
|
||||
27.86063344341371,21.63107230913077,0.0
|
||||
41.48322608005099,25.51129738260698,0.0
|
||||
22.118569850436504,26.968621655860453,0.0
|
||||
30.156263898374853,32.35603665507355,0.0
|
||||
43.938911315907625,36.87146728611616,1.0
|
||||
43.98164010973954,38.59948179963487,1.0
|
||||
38.3642097224507,32.74945443265497,1.0
|
||||
40.06174541898381,36.564944916573516,1.0
|
||||
38.211428151980904,31.71464769416076,1.0
|
||||
41.18796751419362,39.17608260331072,1.0
|
||||
40.32956978516903,32.12132608437938,1.0
|
||||
38.74219104712392,34.60213315459584,1.0
|
||||
37.72210494625572,30.616041110504007,1.0
|
||||
36.33622356097608,29.9762748547628,1.0
|
||||
45.24942796996638,38.143886379640016,1.0
|
||||
43.69029811861015,40.954644167280954,1.0
|
||||
39.18300072915758,32.59312293613843,1.0
|
||||
42.8882056607554,38.96134862455709,1.0
|
||||
41.8874277267945,38.24951566248669,1.0
|
||||
41.7970394210136,38.44382313299293,1.0
|
||||
45.29560769784872,37.69351493189242,1.0
|
||||
38.948851207318945,35.21808257267893,1.0
|
||||
43.109204390305955,38.506524301031305,1.0
|
||||
41.81384930570261,34.384294111304285,1.0
|
||||
39.49498307925154,34.70969754258862,1.0
|
||||
38.843253001848026,27.944245026333164,1.0
|
||||
36.17788399823665,33.96297344080785,1.0
|
||||
37.38759245589562,28.20016061735646,1.0
|
||||
41.34889181755119,32.327587111792774,1.0
|
||||
47.56975940487664,44.650600643622425,1.0
|
||||
39.39280237825057,34.923309457471376,1.0
|
||||
40.665904607452696,36.72923616356956,1.0
|
||||
42.2965051755828,38.971623707650345,1.0
|
||||
35.45684398004309,28.99070062255243,1.0
|
||||
40.2080355526233,28.815102219902336,1.0
|
||||
37.17141126074457,28.851873721556117,1.0
|
||||
40.45122310214349,34.50461922413571,1.0
|
||||
38.03336925394242,30.654388674877765,1.0
|
||||
42.485283452215896,41.787148066868966,1.0
|
||||
41.05566729774741,33.22731881178824,1.0
|
||||
41.87508716281467,41.092900950729074,1.0
|
||||
35.90304603170612,28.95153356168646,1.0
|
||||
43.56743511016792,34.9873669314657,1.0
|
||||
41.41261208405239,37.24725804517698,1.0
|
||||
36.99157129412091,30.98226840941961,1.0
|
||||
38.42551317235955,32.145574263089536,1.0
|
||||
39.30338217466628,30.20583555034952,1.0
|
||||
37.50111589643913,29.5636132420259,1.0
|
||||
43.6739092799813,34.82489150603591,1.0
|
||||
37.942121816764505,29.101853047029003,1.0
|
||||
42.61452636415815,34.14269079004941,1.0
|
||||
47.444200300915895,39.53989852862226,1.0
|
||||
43.19722096614947,37.934454619057696,1.0
|
||||
41.98808347839853,39.13003547056611,1.0
|
||||
44.249330804803165,35.535744516551816,1.0
|
||||
38.56586869441201,32.75539709202968,1.0
|
||||
43.46871662610693,38.13976912028241,1.0
|
||||
33.92914495601694,32.31742577768868,1.0
|
||||
44.228183467954665,38.301268268490716,1.0
|
||||
37.43529766805671,31.510135784769723,1.0
|
||||
38.13349615272657,33.895461490694956,1.0
|
||||
41.62424291897898,37.34779117424227,1.0
|
||||
35.76878884895523,29.168122116538292,1.0
|
||||
43.07737301365197,37.0338614474408,1.0
|
||||
42.9069464656236,38.59584620980404,1.0
|
||||
42.586980689890154,35.525423072539176,1.0
|
||||
39.02682997132764,31.457595613689392,1.0
|
||||
37.25348279301722,34.84462740058045,1.0
|
||||
39.07582905771642,35.19686426168397,1.0
|
||||
41.09685185187746,35.67356007698102,1.0
|
||||
36.62700959140655,26.820720834644867,1.0
|
||||
45.264249633904704,41.655224227471386,1.0
|
||||
38.82059010749962,30.384999024889282,1.0
|
||||
39.18777491622001,30.18056776272377,1.0
|
||||
36.673073477435956,32.1916059144522,1.0
|
||||
42.323988522572186,38.03154067804528,1.0
|
||||
42.57802314481109,40.248715356441394,1.0
|
||||
38.694266783972154,31.60709389171209,1.0
|
||||
39.720357054216585,31.508103832630738,1.0
|
||||
39.79553912772707,33.938020627418524,1.0
|
||||
39.014699167816985,36.63670065689946,1.0
|
||||
42.198589335802666,34.03434490037531,1.0
|
||||
37.80893745285458,27.839004075789155,1.0
|
||||
38.255464230788945,31.440912831309063,1.0
|
||||
39.81702308015201,34.51783827049311,1.0
|
||||
39.75354689381546,35.86267251265675,1.0
|
||||
39.83112697425367,32.06459203998274,1.0
|
||||
37.87783992466487,32.08166073161177,1.0
|
||||
35.85529149476697,25.139892466967556,1.0
|
||||
38.92126209953337,34.14901944732243,1.0
|
||||
33.114082903576985,27.061174809876867,1.0
|
||||
41.38113629179428,38.263398628678026,1.0
|
||||
38.31687020943566,31.456863749150855,1.0
|
||||
41.760220707423805,36.27628659112911,1.0
|
||||
31.79385757783205,20.842340539935208,1.0
|
||||
42.97774908759978,41.60311507665078,1.0
|
||||
41.326139646287174,34.34639303286358,1.0
|
||||
39.84463097704167,32.46025246240992,1.0
|
||||
44.458824480470355,42.66180638553922,1.0
|
||||
39.96874427122386,36.28198175032862,1.0
|
||||
34.29741701352914,20.443261063797017,1.0
|
||||
36.614827546376404,33.88049036892335,1.0
|
||||
42.51644572248322,35.97583836070076,1.0
|
||||
47.33181426026008,43.68240150675391,1.0
|
||||
26.118558392760157,36.98554399878432,2.0
|
||||
28.656552707957296,38.182605427286646,2.0
|
||||
31.607416672474812,38.165430743949905,2.0
|
||||
27.139287564629807,34.243964514799636,2.0
|
||||
27.70407631469015,36.44495682297099,2.0
|
||||
25.47380189078156,29.88047184222296,2.0
|
||||
25.646086182979026,30.463882697085808,2.0
|
||||
27.18310372746103,31.268012851248805,2.0
|
||||
38.58484612987645,45.2809352128634,2.0
|
||||
24.905121211373455,32.33664627197827,2.0
|
||||
25.270745011397693,32.619470736667495,2.0
|
||||
31.574218719104692,41.11957578453198,2.0
|
||||
24.888378226350962,27.20125925561441,2.0
|
||||
33.47088663340133,42.98269547582912,2.0
|
||||
21.998350581760455,26.790731223991006,2.0
|
||||
19.658655383896416,26.1240249514721,2.0
|
||||
24.855783443092147,37.29907449545912,2.0
|
||||
27.747306284136023,31.596820658905408,2.0
|
||||
25.848560485966363,30.385559864748785,2.0
|
||||
29.413032879384645,33.40167165909398,2.0
|
||||
25.092724456881594,25.611155328691588,2.0
|
||||
30.401081419613305,44.126271170329716,2.0
|
||||
30.985463265384368,35.39274616887036,2.0
|
||||
29.32820759028531,32.57528753666598,2.0
|
||||
29.448777932195036,35.09234759508228,2.0
|
||||
26.807762997189478,33.69472751099397,2.0
|
||||
27.940259639597627,28.122913687033332,2.0
|
||||
26.80559429955721,33.10367151564501,2.0
|
||||
30.100980365760513,38.73113977205426,2.0
|
||||
29.166855491224027,33.45756533292005,2.0
|
||||
29.63440549122643,36.08650566750187,2.0
|
||||
20.18074577122451,30.26418855446613,2.0
|
||||
25.249211019156245,27.41651882667285,2.0
|
||||
22.662973156118795,35.36592638944971,2.0
|
||||
27.767886735119184,30.568961102043133,2.0
|
||||
24.941238457626028,40.802032844206934,2.0
|
||||
27.558347367808977,39.737180563603935,2.0
|
||||
25.69772474526501,37.36797598050147,2.0
|
||||
24.090099793756046,37.147577158523504,2.0
|
||||
21.72548608477651,27.81988973352093,2.0
|
||||
20.01906178043488,31.852204367513252,2.0
|
||||
26.97475322022816,33.40461408502355,2.0
|
||||
33.02474637336182,35.98967149081336,2.0
|
||||
26.119800475748647,37.78289600546074,2.0
|
||||
24.301917252138683,42.62043741118756,2.0
|
||||
25.383219880345973,38.80316007875246,2.0
|
||||
27.62249549894068,41.65355546864391,2.0
|
||||
30.49248411423475,37.950489881728515,2.0
|
||||
27.582220387152844,35.89317328452276,2.0
|
||||
24.136313572884514,31.8004257398429,2.0
|
||||
24.19386591754126,37.28569044358602,2.0
|
||||
23.205137086498254,22.770801042055837,2.0
|
||||
26.72442304834887,31.113610733898497,2.0
|
||||
26.887898142913492,31.270492769661644,2.0
|
||||
34.9774054793011,37.93808623490448,2.0
|
||||
30.954181527374466,37.56671314854478,2.0
|
||||
29.24549588848712,34.71883883682324,2.0
|
||||
21.88712533695842,30.160243107292985,2.0
|
||||
30.141289376124085,35.577373967512656,2.0
|
||||
32.770765322247904,37.74146255933535,2.0
|
||||
26.6244041460391,38.49036688687653,2.0
|
||||
24.888382160176636,25.19004661768992,2.0
|
||||
35.63305566668339,45.16195489007926,2.0
|
||||
28.505961553156162,41.036928169151096,2.0
|
||||
28.64041294381908,35.82237562634678,2.0
|
||||
27.741247519836115,36.065474230525325,2.0
|
||||
25.569314393055397,33.413507025031144,2.0
|
||||
17.66660074386368,28.228061582432417,2.0
|
||||
23.316511548353866,36.475986749959354,2.0
|
||||
34.30901172406985,38.18578592151953,2.0
|
||||
20.963119652369976,31.17673156133823,2.0
|
||||
25.010399420548517,35.08692236181734,2.0
|
||||
29.17998619838086,37.946245134705904,2.0
|
||||
33.923233067628004,46.062217016922155,2.0
|
||||
31.51784268309958,39.31181728528641,2.0
|
||||
28.892159376913362,31.779865868947994,2.0
|
||||
27.43812268680146,39.883413327756415,2.0
|
||||
28.194615431964202,36.1403774213305,2.0
|
||||
31.843125495684333,35.58329710013349,2.0
|
||||
30.469105880205213,37.981316488119425,2.0
|
||||
27.01960370551909,39.44588785035491,2.0
|
||||
27.64225353606146,29.23796270650378,2.0
|
||||
28.65746138462449,39.826173810969614,2.0
|
||||
26.643689070861065,31.013722612392414,2.0
|
||||
31.32278174859892,39.221102916933795,2.0
|
||||
28.68167604045206,31.962844597428607,2.0
|
||||
26.31395481579364,31.135686435737213,2.0
|
||||
31.882782676063314,40.295363556429244,2.0
|
||||
37.13870462120642,41.31160681549878,2.0
|
||||
22.681954309005807,27.950772833843857,2.0
|
||||
27.960767073613454,35.295891579965286,2.0
|
||||
27.96825457781948,30.68953264555008,2.0
|
||||
26.099130697309086,28.262183204285634,2.0
|
||||
23.77638094048231,32.01156280130771,2.0
|
||||
24.62056638277588,22.42544711044635,2.0
|
||||
30.671307017386482,38.605381275055734,2.0
|
||||
31.355304433228575,34.27459396903779,2.0
|
||||
30.45873051781789,42.07716284811732,2.0
|
||||
26.73018411974695,27.980131766953477,2.0
|
||||
27.305209228319153,38.12047635780232,2.0
|
||||
|
200
datasets/dataset2.csv
Normal file
200
datasets/dataset2.csv
Normal file
@ -0,0 +1,200 @@
|
||||
0.44433663406480284
|
||||
0.7925339518025013
|
||||
2.545025517190781
|
||||
-2.2405281354919766
|
||||
-3.4146207810571516
|
||||
0.4147250757128812
|
||||
2.4716875609250395
|
||||
3.2508533686500227
|
||||
-2.2124157772514237
|
||||
0.8103311540401823
|
||||
-2.2378456480862434
|
||||
-1.0343725151083125
|
||||
-0.2866393999205472
|
||||
0.35050031082846966
|
||||
-0.23257661887644265
|
||||
0.37104992044371976
|
||||
-2.2036325076446386
|
||||
-3.8777500488272874
|
||||
2.8262247027233713
|
||||
-1.2536054414966977
|
||||
1.8300949136302889
|
||||
-3.7924412874169358
|
||||
2.251865254208296
|
||||
0.2570457409578632
|
||||
-0.9236636676383496
|
||||
-2.5861327547112447
|
||||
0.4627724385445401
|
||||
-3.3917515364060162
|
||||
2.519237568118281
|
||||
2.0708437291833133
|
||||
-1.4465788871798817
|
||||
-0.9251829940016905
|
||||
-0.22611204881145416
|
||||
1.6993910469750857
|
||||
2.1227353961983857
|
||||
4.273196319071724
|
||||
2.88777219096907
|
||||
0.5666353785594004
|
||||
-0.09509216549859753
|
||||
2.3687417275274667
|
||||
-1.0175404729039093
|
||||
-0.2971809458117672
|
||||
4.288425629676956
|
||||
0.3889065821114318
|
||||
0.6413221145161495
|
||||
2.90622537474183
|
||||
1.1598707423952994
|
||||
-1.8672017031845551
|
||||
-2.6808648382581612
|
||||
-2.257356351084625
|
||||
1.8438025659565977
|
||||
0.5055260158237881
|
||||
1.1443200941441618
|
||||
-3.2732590783318884
|
||||
-2.1425852058928094
|
||||
3.4010894977139543
|
||||
0.13564095268595905
|
||||
-2.360071760287551
|
||||
-1.9507138809874112
|
||||
-1.4804736324707597
|
||||
2.09839536202093
|
||||
-3.6058980845275115
|
||||
-0.13321524701596898
|
||||
0.1612607752725668
|
||||
-2.4182662843731277
|
||||
1.6519289113802673
|
||||
3.2352459229290136
|
||||
1.2835510723130203
|
||||
-0.46171103501133226
|
||||
1.4385286848963696
|
||||
-2.7104394029532695
|
||||
0.1461618156937113
|
||||
0.3650118579425937
|
||||
-1.7731422225304998
|
||||
0.23173488336069376
|
||||
0.7323732091548601
|
||||
0.04884165050449067
|
||||
-3.7831982537550064
|
||||
2.520545453350551
|
||||
1.2960390611726214
|
||||
0.8339240259850527
|
||||
-4.429830654116397
|
||||
1.1981034932976846
|
||||
0.5006987432514954
|
||||
5.395078990496765
|
||||
1.3167697690142401
|
||||
-2.5331872946088674
|
||||
2.9159732047191897
|
||||
2.4548714696480647
|
||||
3.6881975618600977
|
||||
0.9163162111901739
|
||||
1.0017372046393986
|
||||
1.4652349279619639
|
||||
-1.4434023943912062
|
||||
3.1661602486387816
|
||||
-0.7979551803363324
|
||||
-0.42923284647309307
|
||||
-0.37025630912940716
|
||||
1.2005800031405027
|
||||
1.626227534483592
|
||||
3.0651139149056705
|
||||
-0.4924396263158049
|
||||
-4.358030699913084
|
||||
3.4052772765491293
|
||||
2.3183280304007834
|
||||
-0.6208535446990165
|
||||
-2.5468587269752767
|
||||
-5.3970454752899615
|
||||
-0.44739038112030033
|
||||
0.7256301675006839
|
||||
-2.297885631497822
|
||||
1.4929656905020119
|
||||
-1.0897006799007265
|
||||
3.213167412062052
|
||||
0.6599013344441134
|
||||
1.6467484446728242
|
||||
-2.3931026127261035
|
||||
-0.008947120161736707
|
||||
-1.5106627545151445
|
||||
-1.9592951310249997
|
||||
1.1340353002814458
|
||||
0.17644262775908723
|
||||
0.26663946500363167
|
||||
0.9495519770990677
|
||||
-0.42880481650448793
|
||||
-1.4362936202587804
|
||||
-1.7704897396852362
|
||||
-1.9562569550788091
|
||||
-1.8934527203356284
|
||||
2.007489329814657
|
||||
-0.5114881072116803
|
||||
0.3349944909947704
|
||||
-2.7244809827541787
|
||||
-0.19462583201729958
|
||||
-1.3773585421913392
|
||||
1.177437380860664
|
||||
-0.45583985678132727
|
||||
1.63299579931792
|
||||
0.569911710457383
|
||||
-1.9418166085656803
|
||||
1.863578507920539
|
||||
-2.9576421443692613
|
||||
-0.054116890972942776
|
||||
-1.2173866553470154
|
||||
1.9017759515164159
|
||||
-3.577564857790573
|
||||
1.3199771900928572
|
||||
-2.5122932618630074
|
||||
1.8383582424344094
|
||||
2.566827343012099
|
||||
0.2232496102726367
|
||||
0.9856767979383574
|
||||
0.11183545650512718
|
||||
1.9654406770174764
|
||||
-2.4290367499160195
|
||||
-0.5201061272139587
|
||||
-1.3522941683405443
|
||||
-1.3196277667116627
|
||||
-2.722735365405322
|
||||
1.4301439731748897
|
||||
2.6335175120959033
|
||||
-0.6485068656636509
|
||||
0.6329775589967779
|
||||
0.8058363451833187
|
||||
-2.201124060313228
|
||||
0.7503041300010522
|
||||
-3.648347215764568
|
||||
-0.045432316321939725
|
||||
1.8912505783313855
|
||||
1.672089718120626
|
||||
-0.4368671022795008
|
||||
2.648227616662775
|
||||
-0.40655629209167976
|
||||
-0.3733476741948883
|
||||
1.385800452515873
|
||||
-0.05262248037932767
|
||||
-0.5101437254240047
|
||||
-1.1232098196014242
|
||||
-1.7075374848450346
|
||||
-0.7977078996683262
|
||||
2.4176000702161677
|
||||
1.4997926151709475
|
||||
-1.8879981536601977
|
||||
0.1927394993856518
|
||||
0.027252837253441416
|
||||
0.22656976556972794
|
||||
1.5689272476741303
|
||||
0.5976860924148262
|
||||
-0.3165250228807075
|
||||
0.7586422139098628
|
||||
1.70751568778911
|
||||
4.458124612377124
|
||||
-0.9436474482972758
|
||||
-0.06218589742735808
|
||||
3.059155125823772
|
||||
-1.063000033831682
|
||||
-0.40976090900282586
|
||||
0.33916674225468374
|
||||
-1.4933565235405044
|
||||
0.09004349701031275
|
||||
|
50
datasets/dataset3.csv
Normal file
50
datasets/dataset3.csv
Normal file
@ -0,0 +1,50 @@
|
||||
12.007316379366022,35.453343840864136,0.0
|
||||
15.85802747727258,32.73153891115562,0.0
|
||||
14.544735894683827,34.72789914689461,0.0
|
||||
14.336450700339272,27.53028012319661,0.0
|
||||
17.153297933215566,28.657291783700945,0.0
|
||||
11.772915687495166,32.729446114647416,0.0
|
||||
13.351633281379616,37.01936442107696,0.0
|
||||
12.785400760087164,42.4057361644188,0.0
|
||||
21.964703199313593,34.9466593663226,0.0
|
||||
11.82095541705878,27.957215285045507,0.0
|
||||
13.025462089861636,38.25202709652252,0.0
|
||||
11.736607836940316,34.79279709628895,0.0
|
||||
20.629726346506207,31.0042536372229,0.0
|
||||
11.871327025645483,40.168471778391364,0.0
|
||||
19.12235095848515,43.35466886533116,0.0
|
||||
16.555226106059198,36.809862846985574,0.0
|
||||
8.599100836500378,37.53774434509547,0.0
|
||||
13.856927802255358,35.760145339031205,0.0
|
||||
11.619667858193406,34.98560094428625,0.0
|
||||
13.364727517109467,37.47009597163829,0.0
|
||||
14.296782484866847,35.100387562158026,0.0
|
||||
14.980512580163303,34.33587327809435,0.0
|
||||
15.106368994580997,30.09163334353261,0.0
|
||||
14.845299928708526,34.3185411862187,0.0
|
||||
14.291074728202979,40.790037836387256,0.0
|
||||
15.677712519866775,37.51392719715231,1.0
|
||||
12.227883149744216,32.6739737305769,1.0
|
||||
22.971673086159704,29.272784635350718,1.0
|
||||
20.25715137609918,29.44234943276302,1.0
|
||||
14.085876451994762,28.041964931997125,1.0
|
||||
9.566722347265202,28.707666612187055,1.0
|
||||
19.683864852272148,34.14385442219437,1.0
|
||||
6.369476958885775,30.02818259717851,1.0
|
||||
18.641748269630547,36.817848477933424,1.0
|
||||
4.1548488901098155,17.598528798403017,1.0
|
||||
12.256412985285513,36.84822554900505,1.0
|
||||
12.31542320106424,36.261909047689635,1.0
|
||||
7.042987719817875,28.639123879619923,1.0
|
||||
12.609757020930658,31.77303691678669,1.0
|
||||
15.666612931298188,30.92155046175968,1.0
|
||||
8.26269474290834,29.181300941228834,1.0
|
||||
8.35969922969667,33.47369871487308,1.0
|
||||
18.478894400715713,34.247849032128116,1.0
|
||||
16.575618176630922,38.38638489720046,1.0
|
||||
9.446566949560763,19.753726204750123,1.0
|
||||
7.865683779839895,27.323064659860023,1.0
|
||||
5.069436681388034,28.15728044206776,1.0
|
||||
11.853658941742761,28.307662937421608,1.0
|
||||
14.031465547058097,32.85320122715258,1.0
|
||||
8.447916715728624,24.874126576928127,1.0
|
||||
|
8743
datasets/datasetTV.csv
Normal file
8743
datasets/datasetTV.csv
Normal file
File diff suppressed because it is too large
Load Diff
6955
datasets/datasetTest.csv
Normal file
6955
datasets/datasetTest.csv
Normal file
File diff suppressed because it is too large
Load Diff
50
datasets/testset.csv
Normal file
50
datasets/testset.csv
Normal file
@ -0,0 +1,50 @@
|
||||
5.592195311512395,35.6999193871657,1.0
|
||||
13.333621510895533,40.28918046627939,0.0
|
||||
10.733249778789123,28.25624275461612,1.0
|
||||
6.2525288045799865,25.736509310336512,1.0
|
||||
12.308845318440717,29.03770600539197,1.0
|
||||
16.27613878224661,31.790576530123573,1.0
|
||||
24.099039440890856,34.99135933375871,1.0
|
||||
20.383747333476023,30.28545611171144,0.0
|
||||
14.717472811101047,28.86467005609947,1.0
|
||||
10.373630582669104,22.877105646978112,0.0
|
||||
12.495403261838794,35.91517518712359,0.0
|
||||
12.897445295789295,28.479192063037416,0.0
|
||||
6.914464650498677,22.466156316652278,1.0
|
||||
10.399006622653925,28.8867124672721,1.0
|
||||
15.70353006618908,22.143610580354622,0.0
|
||||
19.277795325421383,35.09564741982842,0.0
|
||||
8.001675670052505,22.482157661042464,1.0
|
||||
4.383440945547652,23.408693294759146,1.0
|
||||
10.728211763103795,32.77745637715107,1.0
|
||||
15.185676953132571,33.73921780012754,0.0
|
||||
11.917235467445526,27.630950709034465,1.0
|
||||
4.3421658305223065,27.940407898100837,1.0
|
||||
17.6144060671268,28.911024424376226,1.0
|
||||
7.853059854725827,33.2303280708454,1.0
|
||||
8.285701089133454,32.990700554220936,0.0
|
||||
9.320416922787706,26.640988074914205,0.0
|
||||
16.455626818321953,34.70855859677077,0.0
|
||||
17.754010834264285,34.30775363866857,0.0
|
||||
14.8063863693831,31.67296199309468,0.0
|
||||
14.757783602975078,29.786493635043133,1.0
|
||||
9.930570422090852,32.22831269703413,1.0
|
||||
15.054489283850817,35.82871581795497,0.0
|
||||
9.750029107309508,31.4280027220271,1.0
|
||||
13.586680286331283,28.59915183535125,0.0
|
||||
20.09615738757791,32.67866327391467,0.0
|
||||
14.569090796483817,38.70662650856465,0.0
|
||||
17.02068254232686,31.916200863626422,1.0
|
||||
7.690790172770082,25.62442765033304,1.0
|
||||
17.224225194035046,37.202039597105575,0.0
|
||||
15.380576088003362,34.30144068207968,1.0
|
||||
11.97872908515288,28.575869789477196,1.0
|
||||
14.618648855516811,32.29578524163844,0.0
|
||||
19.590265746032763,34.29006253034293,1.0
|
||||
12.008256626541108,27.29153667084818,0.0
|
||||
10.596957826106175,32.00766271626509,0.0
|
||||
11.555817375253879,45.577624578231564,0.0
|
||||
8.09321620784735,25.381014734705055,1.0
|
||||
12.08327393479309,29.286112774782286,1.0
|
||||
14.900150330392192,26.876456147977215,1.0
|
||||
10.440241676625666,29.585393991193335,1.0
|
||||
|
6
presentation/.gitignore
vendored
Normal file
6
presentation/.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
# TeX auxiliary files
|
||||
*.aux
|
||||
*.log
|
||||
*.out
|
||||
*.synctex.gz
|
||||
|
||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
numpy
|
||||
pandas
|
||||
matplotlib
|
||||
|
||||
3
src/.gitignore
vendored
Normal file
3
src/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Python
|
||||
__pycache__/*
|
||||
|
||||
226
src/partA.py
Normal file
226
src/partA.py
Normal file
@ -0,0 +1,226 @@
|
||||
# ------------------------------------------------------------
|
||||
# Part A - Gaussian Parameter Estimation (MLE) & Visualization
|
||||
# Pattern Recognition – Semester Assignment
|
||||
#
|
||||
# Author:
|
||||
# Christos Choutouridis (ΑΕΜ 8997)
|
||||
# cchoutou@ece.auth.gr
|
||||
#
|
||||
# Description:
|
||||
# This module implements Part A of the assignment:
|
||||
# - Loading and splitting the dataset into classes
|
||||
# - MLE estimation of mean vectors and covariance matrices
|
||||
# - Construction of Gaussian pdf surfaces
|
||||
# - 3D visualization of class-conditional densities
|
||||
#
|
||||
# Notes:
|
||||
# The implementation follows the theoretical formulation of
|
||||
# multivariate Gaussian distributions and MLE parameter
|
||||
# estimation as taught in class.
|
||||
# ------------------------------------------------------------
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from toolbox import load_csv, split_dataset_by_class, dataset1
|
||||
|
||||
from typing import Tuple, Dict
|
||||
|
||||
def mle_mean(X: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
MLE estimate of the mean vector.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : ndarray, shape (N, d)
|
||||
Data samples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mu : ndarray, shape (d,)
|
||||
Estimated mean vector.
|
||||
"""
|
||||
return np.sum(X, axis=0) / X.shape[0]
|
||||
|
||||
|
||||
def mle_covariance(X: np.ndarray, mu: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
MLE estimate of the covariance matrix.
|
||||
(Divide by N, not N-1)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : ndarray, shape (N, d)
|
||||
Data samples.
|
||||
mu : ndarray, shape (d,)
|
||||
Mean vector.
|
||||
|
||||
Returns
|
||||
-------
|
||||
cov : ndarray, shape (d, d)
|
||||
Covariance matrix.
|
||||
"""
|
||||
N = X.shape[0]
|
||||
diff = X - mu
|
||||
cov = (diff.T @ diff) / N
|
||||
return cov
|
||||
|
||||
|
||||
def estimate_gaussians_mle(classes: Dict[int, np.ndarray]) -> Dict[int, Tuple[np.ndarray, np.ndarray]]:
|
||||
"""
|
||||
Estimates mean and covariance (MLE) for each class.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
classes : dict
|
||||
Dictionary mapping class label -> samples of that class.
|
||||
|
||||
Returns
|
||||
-------
|
||||
params : dict
|
||||
Dictionary mapping class label -> (mu, cov),
|
||||
where mu has shape (d,) and cov has shape (d,d).
|
||||
"""
|
||||
params: Dict[int, Tuple[np.ndarray, np.ndarray]] = {}
|
||||
|
||||
for c, Xc in classes.items():
|
||||
mu_c = mle_mean(Xc)
|
||||
cov_c = mle_covariance(Xc, mu_c)
|
||||
params[c] = (mu_c, cov_c)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Part A: Gaussian pdf and grid computation
|
||||
# --------------------------------------------------
|
||||
def gaussian_pdf(point: np.ndarray, mu: np.ndarray, cov: np.ndarray) -> float:
|
||||
"""
|
||||
Multivariate Gaussian pdf at a single point (general dimension).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
point : ndarray, shape (d,)
|
||||
feature data of the point
|
||||
mu : ndarray, shape (d,)
|
||||
mean vector
|
||||
cov : ndarray, shape (d,d)
|
||||
covariance array
|
||||
|
||||
Returns
|
||||
-------
|
||||
value : float
|
||||
pdf value at `point`.
|
||||
"""
|
||||
d = mu.shape[0] # dimension
|
||||
diff = point - mu
|
||||
det = np.linalg.det(cov)
|
||||
inv = np.linalg.inv(cov)
|
||||
|
||||
# (2π)^(d/2) * sqrt(det Σ)
|
||||
norm_const = 1.0 / np.sqrt(((2 * np.pi) ** d) * det)
|
||||
exponent = -0.5 * diff.T @ inv @ diff
|
||||
|
||||
return float(norm_const * np.exp(exponent))
|
||||
|
||||
|
||||
def compute_gaussian_grid(
|
||||
X: np.ndarray, mu: np.ndarray, cov: np.ndarray, grid_size: int = 50
|
||||
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Creates a 2D grid over the range of the first two dimensions of X
|
||||
and computes pdf values using the multivariate Gaussian pdf.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : ndarray, shape (N, d)
|
||||
Data samples (only used to define plotting range for dims 0 and 1).
|
||||
mu : ndarray, shape (d,)
|
||||
mean vector value
|
||||
cov : ndarray, shape (d,d)
|
||||
covariance
|
||||
grid_size : int
|
||||
Number of points per axis.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple:
|
||||
Xgrid: ndarray, shape (grid_size)
|
||||
X Meshgrid coordinates for dimensions 0 and 1
|
||||
Ygrid: ndarray, shape (grid_size)
|
||||
Y Meshgrid coordinates for dimensions 0 and 1,
|
||||
Z: ndarray, shape (grid_size)
|
||||
pdf values at each grid point.
|
||||
"""
|
||||
# Range only on the first two dimensions
|
||||
x_vals = np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), grid_size)
|
||||
y_vals = np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), grid_size)
|
||||
|
||||
Xgrid, Ygrid = np.meshgrid(x_vals, y_vals)
|
||||
Z = np.zeros_like(Xgrid, dtype=float)
|
||||
|
||||
for i in range(Xgrid.shape[0]):
|
||||
for j in range(Xgrid.shape[1]):
|
||||
point = np.array([Xgrid[i, j], Ygrid[i, j]])
|
||||
Z[i, j] = gaussian_pdf(point, mu, cov)
|
||||
|
||||
return Xgrid, Ygrid, Z
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Part A: 3D plotting for multiple classes
|
||||
# --------------------------------------------------
|
||||
def plot_gaussians_3d(
|
||||
X: np.ndarray, params: Dict[int, Tuple[np.ndarray, np.ndarray]], grid_size: int = 50
|
||||
) -> None:
|
||||
"""
|
||||
Plots the Gaussian pdfs (MLE estimates) for all classes on a single 3D figure.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : ndarray, shape (N, 2)
|
||||
All data samples (used to define the plotting range).
|
||||
params : dict
|
||||
Dictionary mapping class label -> (mu, cov).
|
||||
grid_size : int
|
||||
Resolution of the grid for pdf evaluation.
|
||||
"""
|
||||
fig = plt.figure(figsize=(12, 8))
|
||||
ax = fig.add_subplot(111, projection='3d')
|
||||
|
||||
for idx, (c, (mu_c, cov_c)) in enumerate(params.items()):
|
||||
Xgrid, Ygrid, Z = compute_gaussian_grid(X, mu_c, cov_c, grid_size=grid_size)
|
||||
ax.plot_surface(Xgrid, Ygrid, Z, alpha=0.6, label=f"Class {c}")
|
||||
|
||||
ax.set_title("MLE Estimated 2D Gaussians (all classes)")
|
||||
ax.set_xlabel("X1")
|
||||
ax.set_ylabel("X2")
|
||||
ax.set_zlabel("pdf")
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Part A: convenience runner (optional)
|
||||
# --------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Convenience function to run the whole Part A pipeline:
|
||||
- load dataset
|
||||
- split by class
|
||||
- estimate Gaussian parameters (MLE) per class
|
||||
- plot 3D pdf surfaces
|
||||
"""
|
||||
df1 = load_csv(dataset1, header=None)
|
||||
|
||||
X, y, classes = split_dataset_by_class(df1)
|
||||
params = estimate_gaussians_mle(classes)
|
||||
|
||||
# Optional parameters printing
|
||||
for c, (mu_c, cov_c) in params.items():
|
||||
print(f"Class {c}:")
|
||||
print(" mu =", mu_c)
|
||||
print(" cov =\n", cov_c)
|
||||
print()
|
||||
|
||||
# Plot 3D surfaces
|
||||
plot_gaussians_3d(X, params, grid_size=50)
|
||||
340
src/partB.py
Normal file
340
src/partB.py
Normal file
@ -0,0 +1,340 @@
|
||||
# ------------------------------------------------------------
|
||||
# Part B - Parzen Window Density Estimation (1D)
|
||||
# Pattern Recognition – Semester Assignment
|
||||
#
|
||||
# Author:
|
||||
# Christos Choutouridis (ΑΕΜ 8997)
|
||||
# cchoutou@ece.auth.gr
|
||||
#
|
||||
# Description:
|
||||
# This module implements Part B of the assignment:
|
||||
# - 1D Parzen window density estimation using uniform and
|
||||
# Gaussian kernels
|
||||
# - Computation of predicted likelihood for varying bandwidth h
|
||||
# - Comparison with the true N(1, 4) distribution
|
||||
# - MSE analysis and optimal bandwidth selection
|
||||
#
|
||||
# ------------------------------------------------------------
|
||||
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from typing import Sequence
|
||||
from toolbox import load_csv, dataset2
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Optional: D-dimensional Bishop-style kernels (not used in Part B)
|
||||
# --------------------------------------------------
|
||||
def kernel_hypercube(u: np.ndarray) -> float:
|
||||
"""
|
||||
D-dimensional uniform kernel (hypercube).
|
||||
|
||||
Bishop eq. (2.247):
|
||||
k(u) = 1 if |u_i| <= 1/2 for all i, else 0
|
||||
|
||||
In 1D this reduces to:
|
||||
k(u) = 1 if |u| <= 1/2 else 0
|
||||
|
||||
This kernel integrates to 1:
|
||||
∫_{-1/2}^{1/2} 1 du = 1
|
||||
"""
|
||||
return float(np.all(np.abs(u) <= 0.5))
|
||||
|
||||
|
||||
def kernel_gaussian(u: np.ndarray) -> float:
|
||||
"""
|
||||
D-dimensional Gaussian kernel.
|
||||
|
||||
k(u) = (2π)^(-D/2) * exp(-||u||^2 / 2)
|
||||
|
||||
Integral over R^D is 1.
|
||||
"""
|
||||
d = u.shape[0]
|
||||
norm_const = 1.0 / ((2.0 * np.pi) ** (d / 2.0))
|
||||
return float(norm_const * np.exp(-0.5 * np.dot(u, u)))
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# 1D Parzen kernels (used in this Part)
|
||||
# --------------------------------------------------
|
||||
def parzen_kernel_uniform(u: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
1D uniform Parzen kernel (box).
|
||||
|
||||
Bishop-style in 1D:
|
||||
k(u) = 1 if |u| <= 1/2
|
||||
= 0 otherwise
|
||||
|
||||
Integral:
|
||||
∫_{-1/2}^{1/2} 1 du = 1
|
||||
|
||||
Parameters
|
||||
----------
|
||||
u : ndarray
|
||||
Array of values where the kernel is evaluated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
values : ndarray
|
||||
Kernel values at u.
|
||||
"""
|
||||
return (np.abs(u) <= 0.5).astype(float)
|
||||
|
||||
|
||||
def parzen_kernel_gaussian(u: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
1D Gaussian kernel with mean 0, variance 1.
|
||||
|
||||
k(u) = (1 / sqrt(2π)) * exp(-u^2 / 2)
|
||||
|
||||
Integral over R is 1.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
u : ndarray
|
||||
|
||||
Returns
|
||||
-------
|
||||
values : ndarray
|
||||
"""
|
||||
return (1.0 / np.sqrt(2.0 * np.pi)) * np.exp(-0.5 * (u ** 2))
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Parzen estimator (1D, point-wise)
|
||||
# --------------------------------------------------
|
||||
def parzen_estimate_1d(x_eval: float, data: np.ndarray, h: float, kernel_fn) -> float:
|
||||
"""
|
||||
Parzen window density estimate in 1D, for a single point x_eval.
|
||||
|
||||
Implements:
|
||||
p_hat(x_eval) = (1 / (N h)) * sum_n k((x_eval - x_n) / h)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x_eval : float
|
||||
Point where the density is estimated.
|
||||
data : ndarray, shape (N,)
|
||||
1D data samples.
|
||||
h : float
|
||||
Bandwidth (window width).
|
||||
kernel_fn : callable
|
||||
Kernel function K(u), applied elementwise on u.
|
||||
|
||||
Returns
|
||||
-------
|
||||
f_hat : float
|
||||
Estimated pdf value at x_eval.
|
||||
"""
|
||||
N = data.shape[0]
|
||||
u = (x_eval - data) / h
|
||||
return float(np.sum(kernel_fn(u)) / (N * h))
|
||||
|
||||
|
||||
def evaluate_parzen(data: np.ndarray, h: float, kernel_fn) -> np.ndarray:
|
||||
"""
|
||||
Evaluates the Parzen estimate at each sample in 'data' itself.
|
||||
|
||||
For each x_i in data:
|
||||
p_hat(x_i) = (1 / (N h)) * sum_n k((x_i - x_n) / h)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : ndarray, shape (N,)
|
||||
1D data samples.
|
||||
h : float
|
||||
Bandwidth.
|
||||
kernel_fn : callable
|
||||
Kernel function K(u).
|
||||
|
||||
Returns
|
||||
-------
|
||||
estimates : ndarray, shape (N,)
|
||||
Estimated pdf values at each data point.
|
||||
"""
|
||||
N = data.shape[0]
|
||||
estimates = np.zeros(N, dtype=float)
|
||||
|
||||
for i in range(N):
|
||||
estimates[i] = parzen_estimate_1d(data[i], data, h, kernel_fn)
|
||||
|
||||
return estimates
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# True pdf and error
|
||||
# --------------------------------------------------
|
||||
def true_normal_pdf_1d(x: np.ndarray, mu: float, var: float) -> np.ndarray:
|
||||
"""
|
||||
True normal pdf N(mu, var) at points x (array).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : ndarray
|
||||
Points where the pdf is evaluated.
|
||||
mu : float
|
||||
Mean
|
||||
var : float
|
||||
Variance
|
||||
|
||||
Returns
|
||||
-------
|
||||
pdf : ndarray
|
||||
The normal pdf N(mu, var)
|
||||
"""
|
||||
sigma = np.sqrt(var)
|
||||
coef = 1.0 / (np.sqrt(2.0 * np.pi) * sigma)
|
||||
z = (x - mu) / sigma
|
||||
return coef * np.exp(-0.5 * z * z)
|
||||
|
||||
|
||||
def mse(y_true: np.ndarray, y_pred: np.ndarray) -> float:
|
||||
"""
|
||||
Mean squared error between two arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : ndarray
|
||||
actual labels array
|
||||
y_pred : ndarray
|
||||
predicted labels array
|
||||
|
||||
Returns
|
||||
-------
|
||||
err : float
|
||||
Mean squared error.
|
||||
"""
|
||||
return float(np.mean((y_true - y_pred) ** 2))
|
||||
|
||||
|
||||
def scan_bandwidths_parzen(
|
||||
data: np.ndarray, h_values: Sequence[float], kernel_fn, mu_true: float = 1.0, var_true: float = 4.0
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
For each h in h_values, computes:
|
||||
|
||||
- estimated pdf via Parzen (predicted likelihood)
|
||||
- true pdf via N(mu_true, var_true) (true likelihood)
|
||||
- MSE between them
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : ndarray, shape (N,)
|
||||
1D data samples.
|
||||
h_values : sequence of float
|
||||
Bandwidth values to test.
|
||||
kernel_fn : callable
|
||||
Kernel function K(u).
|
||||
mu_true : float
|
||||
True mean, default to 1.0.
|
||||
var_true : float
|
||||
True variance, default to 4.0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
errors : ndarray, shape (len(h_values),)
|
||||
MSE between estimated and true pdf as array of len(h_values)
|
||||
"""
|
||||
true_values = true_normal_pdf_1d(data, mu=mu_true, var=var_true)
|
||||
errors_list = []
|
||||
|
||||
for h in h_values:
|
||||
est_values = evaluate_parzen(data, h, kernel_fn)
|
||||
err = mse(true_values, est_values)
|
||||
errors_list.append(err)
|
||||
|
||||
return np.array(errors_list, dtype=float)
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Plotting helpers
|
||||
# --------------------------------------------------
|
||||
def plot_h_vs_error(h_values: np.ndarray, errors: np.ndarray, title: str) -> None:
|
||||
"""
|
||||
Simple plot of bandwidth vs error.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
h_values : ndarray
|
||||
bandwith values
|
||||
errors : ndarray
|
||||
error values
|
||||
title : str
|
||||
title
|
||||
"""
|
||||
plt.figure(figsize=(8, 5))
|
||||
plt.plot(h_values, errors, marker='o')
|
||||
plt.xlabel("h")
|
||||
plt.ylabel("MSE")
|
||||
plt.title(title)
|
||||
plt.grid(True)
|
||||
plt.show()
|
||||
|
||||
|
||||
def plot_histogram_with_pdf(
|
||||
data: np.ndarray, mu_true: float = 1.0, var_true: float = 4.0, bins: int = 30
|
||||
) -> None:
|
||||
"""
|
||||
Plots a histogram of the data and overlays the true N(mu_true, var_true) pdf.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : ndarray
|
||||
1D data samples.
|
||||
mu_true : float
|
||||
True mean, default to 1.0.
|
||||
var_true : float
|
||||
True variance, default to 4.0.
|
||||
bins : int
|
||||
number of bins, default to 30.
|
||||
"""
|
||||
plt.figure(figsize=(8, 5))
|
||||
|
||||
plt.hist(data, bins=bins, density=True, alpha=0.5, label="Data histogram")
|
||||
|
||||
x_min, x_max = np.min(data), np.max(data)
|
||||
x_plot = np.linspace(x_min, x_max, 200)
|
||||
pdf_true = true_normal_pdf_1d(x_plot, mu=mu_true, var=var_true)
|
||||
|
||||
plt.plot(x_plot, pdf_true, label=f"True N({mu_true}, {var_true}) pdf")
|
||||
plt.xlabel("x")
|
||||
plt.ylabel("Density")
|
||||
plt.title("Dataset2 histogram vs true N({mu_true}, {var_true}) pdf")
|
||||
plt.legend()
|
||||
plt.grid(True)
|
||||
plt.show()
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Part B: main runner
|
||||
# --------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
# Load dataset2 (from GitHub via toolbox)
|
||||
df2 = load_csv(dataset2, header=None)
|
||||
data2 = df2.iloc[:, 0].values
|
||||
|
||||
mu = float(sys.argv[1]) if len(sys.argv) > 1 else 1.0
|
||||
var = float(sys.argv[2]) if len(sys.argv) > 2 else 4.0
|
||||
|
||||
# Optional: histogram + true pdf
|
||||
plot_histogram_with_pdf(data2, mu_true=mu, var_true=var, bins=30)
|
||||
|
||||
# Range of h: [0.1, 10] with step 0.1
|
||||
h_values = np.arange(0.1, 10.1, 0.1)
|
||||
|
||||
# Uniform kernel (parzen)
|
||||
errors_uniform = scan_bandwidths_parzen(data2, h_values, parzen_kernel_uniform, mu_true=mu, var_true=var)
|
||||
best_h_uniform = h_values[np.argmin(errors_uniform)]
|
||||
|
||||
# Gaussian kernel
|
||||
errors_gaussian = scan_bandwidths_parzen(data2, h_values, parzen_kernel_gaussian, mu_true=mu, var_true=var)
|
||||
best_h_gaussian = h_values[np.argmin(errors_gaussian)]
|
||||
|
||||
print("Best h (uniform):", best_h_uniform, " with error: ", errors_uniform[np.argmin(errors_uniform)])
|
||||
print("Best h (gaussian):", best_h_gaussian, " with error: ", errors_gaussian[np.argmin(errors_gaussian)])
|
||||
|
||||
plot_h_vs_error(h_values, errors_uniform, "Uniform kernel: h vs MSE")
|
||||
plot_h_vs_error(h_values, errors_gaussian, "Gaussian kernel: h vs MSE")
|
||||
325
src/partC.py
Normal file
325
src/partC.py
Normal file
@ -0,0 +1,325 @@
|
||||
# ------------------------------------------------------------
|
||||
# Part C - k-Nearest Neighbors Classifier (k-NN)
|
||||
# Pattern Recognition – Semester Assignment
|
||||
#
|
||||
# Author:
|
||||
# Christos Choutouridis (ΑΕΜ 8997)
|
||||
# cchoutou@ece.auth.gr
|
||||
#
|
||||
# Description:
|
||||
# This module implements Part C of the assignment:
|
||||
# - Implementation of a simple k-NN classifier in 2D
|
||||
# - Manual computation of Euclidean distances (no ML libraries)
|
||||
# - Probability estimation for any number of classes
|
||||
# - Accuracy evaluation for k ∈ [1, 30]
|
||||
# - Decision boundary visualization for the best k
|
||||
# ------------------------------------------------------------
|
||||
|
||||
from typing import Sequence, Tuple
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.lines import Line2D
|
||||
from matplotlib.patches import Patch
|
||||
from pandas import DataFrame
|
||||
|
||||
from toolbox import load_csv, split_dataset_by_class, dataset3, testset
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Dataset loading
|
||||
# --------------------------------------------------
|
||||
def load_data(dataset: DataFrame) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Loads dataset and splits it into features and labels.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple:
|
||||
X (ndarray, shape (N, d)):
|
||||
Feature vectors.
|
||||
y (ndarray, shape (N,)):
|
||||
Corresponding class labels.
|
||||
"""
|
||||
df = load_csv(dataset, header=None)
|
||||
X, y, _ = split_dataset_by_class(df)
|
||||
return X, y
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# k-NN core functions
|
||||
# --------------------------------------------------
|
||||
def eucl(x: np.ndarray, trainData: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Computes Euclidean distance of x from all training samples.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : ndarray, shape (d,)
|
||||
Query point.
|
||||
trainData : ndarray, shape (N, d)
|
||||
Training feature vectors.
|
||||
|
||||
Returns
|
||||
-------
|
||||
distances : ndarray, shape (N,)
|
||||
Euclidean distance from x to each training point.
|
||||
"""
|
||||
diff = trainData - x # shape (N, d)
|
||||
sq_dist = np.sum(diff * diff, axis=1)
|
||||
distances = np.sqrt(sq_dist)
|
||||
return distances
|
||||
|
||||
|
||||
def neighbors(x: np.ndarray, data: np.ndarray, k: int) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Returns the indices and distances of the k nearest neighbors of x.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : ndarray, shape (d,)
|
||||
data point
|
||||
data : ndarray, shape (N, d)
|
||||
dataset to search neighbors
|
||||
k : int
|
||||
Number of neighbors to consider
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple:
|
||||
neighbor_indices : ndarray, shape (k,)
|
||||
Indices of the k nearest neighbors.
|
||||
neighbor_distances : ndarray, shape (k,)
|
||||
Distances of the k nearest neighbors (ascending order).
|
||||
"""
|
||||
distances = eucl(x, data)
|
||||
sorted_indices = np.argsort(distances)
|
||||
neighbor_indices = sorted_indices[:k]
|
||||
neighbor_distances = distances[neighbor_indices]
|
||||
return neighbor_indices, neighbor_distances
|
||||
|
||||
|
||||
def predict(
|
||||
X_test: np.ndarray, X_train: np.ndarray, y_train: np.ndarray, k: int
|
||||
):
|
||||
"""
|
||||
Predicts class probabilities and labels for each test sample using k-NN.
|
||||
Supports an arbitrary number of classes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X_test : ndarray, shape (N_test, d)
|
||||
test features
|
||||
X_train : ndarray, shape (N_train, d)
|
||||
train features
|
||||
y_train : ndarray, shape (N_train,)
|
||||
Class labels (may be any discrete integers).
|
||||
k : int
|
||||
number of neighbors to consider
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple:
|
||||
probs (ndarray, shape (N_test, C)):
|
||||
probs[i, j] = estimated probability of class classes[j] for sample i.
|
||||
y_pred (ndarray, shape (N_test,)):
|
||||
Predicted label for each test sample.
|
||||
"""
|
||||
classes = np.unique(y_train)
|
||||
C = len(classes)
|
||||
N_test = X_test.shape[0]
|
||||
|
||||
probs = np.zeros((N_test, C))
|
||||
y_pred = np.zeros(N_test, dtype=classes.dtype)
|
||||
|
||||
for i in range(N_test):
|
||||
x = X_test[i]
|
||||
neighbor_indices, _ = neighbors(x, X_train, k)
|
||||
neighbor_labels = y_train[neighbor_indices]
|
||||
|
||||
# Probabilities per class
|
||||
for j, c in enumerate(classes):
|
||||
probs[i, j] = np.sum(neighbor_labels == c) / k
|
||||
|
||||
# Winner class
|
||||
y_pred[i] = classes[np.argmax(probs[i])]
|
||||
|
||||
return probs, y_pred
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Accuracy & model evaluation
|
||||
# --------------------------------------------------
|
||||
def accuracy(y_true: np.ndarray, y_pred: np.ndarray) -> float:
|
||||
"""
|
||||
Classification accuracy.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : ndarray
|
||||
actual labels
|
||||
y_pred : ndarray
|
||||
predicted labels
|
||||
|
||||
Returns
|
||||
-------
|
||||
acc : float
|
||||
Fraction of correctly classified samples.
|
||||
"""
|
||||
return float(np.mean(y_true == y_pred))
|
||||
|
||||
|
||||
def evaluate_over_k(
|
||||
X_train: np.ndarray, y_train: np.ndarray,
|
||||
X_test: np.ndarray, y_test: np.ndarray,
|
||||
k_values: Sequence[int],
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Evaluates k-NN accuracy for multiple values of k.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X_train, y_train:
|
||||
training set
|
||||
X_test, y_test:
|
||||
test set
|
||||
k_values :
|
||||
sequence of int
|
||||
|
||||
Returns
|
||||
-------
|
||||
accuracies : ndarray, shape (len(k_values),)
|
||||
Accuracy for each value of k.
|
||||
"""
|
||||
accuracies = np.zeros(len(k_values))
|
||||
|
||||
for i, k in enumerate(k_values):
|
||||
_, y_pred = predict(X_test, X_train, y_train, k)
|
||||
accuracies[i] = accuracy(y_test, y_pred)
|
||||
|
||||
return accuracies
|
||||
|
||||
|
||||
def plot_accuracy_vs_k(k_values: np.ndarray, accuracies: np.ndarray) -> None:
|
||||
"""
|
||||
Plots k on the x-axis and accuracy on the y-axis.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
k_values: np.ndarray
|
||||
sequence of int
|
||||
accuracies: np.ndarray
|
||||
accuracies array
|
||||
"""
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.plot(k_values, accuracies, marker="o")
|
||||
plt.xlabel("k")
|
||||
plt.ylabel("Accuracy")
|
||||
plt.title("k-NN accuracy over k")
|
||||
plt.grid(True)
|
||||
plt.show()
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Decision boundary visualization
|
||||
# --------------------------------------------------
|
||||
def plot_decision_boundaries_2d(
|
||||
X_train: np.ndarray, y_train: np.ndarray, k: int, grid_size: int = 200
|
||||
) -> None:
|
||||
"""
|
||||
Plots the decision boundaries of the k-NN classifier in 2D using contourf.
|
||||
Supports any number of classes, but requires **exactly 2 features**.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X_train : ndarray, shape (N_train, 2)
|
||||
training features
|
||||
y_train : ndarray, shape (N_train,)
|
||||
training labels
|
||||
k : int
|
||||
Number of neighbors.
|
||||
grid_size : int
|
||||
Grid resolution for the contour.
|
||||
"""
|
||||
# --- Check for 2D features ---
|
||||
if X_train.shape[1] != 2:
|
||||
raise ValueError(
|
||||
f"plot_decision_boundaries_2d supports only 2D features, "
|
||||
f"but got X_train with shape {X_train.shape}"
|
||||
)
|
||||
|
||||
classes = np.unique(y_train)
|
||||
C = len(classes)
|
||||
class_to_idx = {c: idx for idx, c in enumerate(classes)}
|
||||
|
||||
# Grid limits
|
||||
x_min, x_max = X_train[:, 0].min() - 0.5, X_train[:, 0].max() + 0.5
|
||||
y_min, y_max = X_train[:, 1].min() - 0.5, X_train[:, 1].max() + 0.5
|
||||
|
||||
xx, yy = np.meshgrid(
|
||||
np.linspace(x_min, x_max, grid_size),
|
||||
np.linspace(y_min, y_max, grid_size),
|
||||
)
|
||||
|
||||
grid_points = np.column_stack([xx.ravel(), yy.ravel()])
|
||||
_, y_pred_grid = predict(grid_points, X_train, y_train, k)
|
||||
|
||||
Z_idx = np.vectorize(class_to_idx.get)(y_pred_grid).reshape(xx.shape)
|
||||
|
||||
# Discrete colormap
|
||||
cmap = plt.cm.get_cmap("Set2", C)
|
||||
levels = np.arange(C + 1) - 0.5
|
||||
|
||||
plt.figure(figsize=(12, 8))
|
||||
|
||||
# Filled boundaries
|
||||
plt.contourf(xx, yy, Z_idx, levels=levels, cmap=cmap, alpha=0.3)
|
||||
|
||||
# Plot samples
|
||||
for c, idx in class_to_idx.items():
|
||||
mask = (y_train == c)
|
||||
plt.scatter(
|
||||
X_train[mask, 0], X_train[mask, 1],
|
||||
c=[cmap(idx)], edgecolors="k", s=30
|
||||
)
|
||||
|
||||
# --- Custom legend: Region + Samples per class ---
|
||||
legend_elements = []
|
||||
for c, idx in class_to_idx.items():
|
||||
color = cmap(idx)
|
||||
legend_elements.append(Patch(facecolor=color, edgecolor="none",
|
||||
alpha=0.3, label=f"Region: class {c}"))
|
||||
legend_elements.append(Line2D([], [], marker="o", linestyle="",
|
||||
markerfacecolor=color,
|
||||
markeredgecolor="k",
|
||||
label=f"Samples: class {c}"))
|
||||
|
||||
plt.legend(handles=legend_elements, loc="upper right", framealpha=0.9)
|
||||
plt.xlabel("x1")
|
||||
plt.ylabel("x2")
|
||||
plt.title(f"k-NN decision boundaries (k = {k})")
|
||||
plt.grid(True)
|
||||
plt.show()
|
||||
|
||||
|
||||
# --------------------------------------------------
|
||||
# Main runner
|
||||
# --------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
# Load training and test sets
|
||||
X_train, y_train = load_data(dataset=dataset3)
|
||||
X_test, y_test = load_data(dataset=testset)
|
||||
|
||||
# Evaluate over k
|
||||
k_values = np.arange(1, 31, 1)
|
||||
accuracies = evaluate_over_k(X_train, y_train, X_test, y_test, k_values)
|
||||
|
||||
# Best k
|
||||
best_idx = np.argmax(accuracies)
|
||||
best_k = int(k_values[best_idx])
|
||||
best_acc = accuracies[best_idx]
|
||||
|
||||
print(f"Best k: {best_k} with accuracy: {best_acc:.4f}")
|
||||
|
||||
# Plots
|
||||
plot_accuracy_vs_k(k_values, accuracies)
|
||||
plot_decision_boundaries_2d(X_train, y_train, best_k, grid_size=200)
|
||||
65
src/toolbox.py
Normal file
65
src/toolbox.py
Normal file
@ -0,0 +1,65 @@
|
||||
# ------------------------------------------------------------
|
||||
# Common tools for the entire assignment
|
||||
#
|
||||
# Author:
|
||||
# Christos Choutouridis (ΑΕΜ 8997)
|
||||
# cchoutou@ece.auth.gr
|
||||
# ------------------------------------------------------------
|
||||
|
||||
from typing import Tuple, Dict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
|
||||
|
||||
def github_raw(user, repo, branch, path):
|
||||
return f"https://raw.githubusercontent.com/{user}/{repo}/{branch}/{path}"
|
||||
|
||||
|
||||
dataset1 = github_raw("hoo2", "PR-Assignment2025_26", "master", "datasets/dataset1.csv")
|
||||
dataset2 = github_raw("hoo2", "PR-Assignment2025_26", "master", "datasets/dataset2.csv")
|
||||
dataset3 = github_raw("hoo2", "PR-Assignment2025_26", "master", "datasets/dataset3.csv")
|
||||
testset = github_raw("hoo2", "PR-Assignment2025_26", "master", "datasets/testset.csv")
|
||||
|
||||
|
||||
def load_csv(path, header=None):
|
||||
"""
|
||||
Loads a CSV file and returns a pandas DataFrame.
|
||||
"""
|
||||
return pd.read_csv(path, header=header)
|
||||
|
||||
|
||||
def split_dataset_by_class(df: DataFrame) -> Tuple[np.ndarray, np.ndarray, Dict[int, np.ndarray]]:
|
||||
"""
|
||||
Splits a dataset into features, labels and per-class subsets with the assumptions that:
|
||||
|
||||
- All columns except the last are feature columns.
|
||||
- The last column is the class label.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df: DataFrame
|
||||
Data samples as DataFrame.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple:
|
||||
X : ndarray, shape (N, d)
|
||||
Feature matrix.
|
||||
y : ndarray, shape (N,)
|
||||
Labels.
|
||||
classes : dict
|
||||
Dictionary mapping each class label to the subset of X that belongs to that class.
|
||||
|
||||
Example
|
||||
-------
|
||||
X, y, classes = split_dataset_by_class(df)
|
||||
"""
|
||||
n_cols = df.shape[1] # Number of columns
|
||||
X = df.iloc[:, :n_cols - 1].values # Features = all columns except last
|
||||
y = df.iloc[:, n_cols - 1].values # Labels = last column
|
||||
|
||||
classes = {c: X[y == c] for c in np.unique(y)}
|
||||
|
||||
return X, y, classes
|
||||
Loading…
x
Reference in New Issue
Block a user