diff --git a/homeworks/hw04/data/large_srs_rating.csv b/homeworks/hw04/data/large_srs_rating.csv
new file mode 100644
index 0000000..c792261
--- /dev/null
+++ b/homeworks/hw04/data/large_srs_rating.csv
@@ -0,0 +1,181 @@
+,Title,Genre,Premiere,Runtime,IMDb Score,Language
+0,Win It All,Comedy,"April 7, 2017",88,6.2,English
+1,When We First Met,Romantic comedy,"February 9, 2018",97,6.4,English
+2,I Am the Pretty Thing That Lives in the House,Horror,"October 28, 2016",89,4.6,English
+3,Sergio,Biopic,"April 17, 2020",118,6.1,English
+4,7 años,Drama,"October 28, 2016",76,6.8,Spanish
+5,Intuition,Thriller,"May 28, 2020",116,5.3,Spanish
+6,Road to Roma,Making-of,"February 11, 2020",72,7.7,Spanish
+7,Rebecca,Romantic thriller,"October 21, 2020",123,6.0,English
+8,Crazy Awesome Teachers,Comedy-drama,"August 17, 2020",101,6.2,Indonesian
+9,Baggio: The Divine Ponytail,Biopic,"May 26, 2021",92,6.2,Italian
+10,Seaspiracy,Documentary,"March 24, 2021",89,8.2,English
+11,The Day of the Lord,Drama,"October 30, 2020",93,4.9,Spanish
+12,The Old Guard,Superhero/Action,"July 10, 2020",124,6.7,English
+13,Dolly Kitty and Those Twinkling Stars,Drama,"September 18, 2020",120,5.4,Hindi
+14,Homecoming: A Film by Beyonce ,Documentary,"April 17, 2019",137,7.5,English
+15,The Week Of,Comedy,"April 27, 2018",116,5.2,English
+16,Soni,Crime drama,"January 18, 2019",97,7.2,Hindi
+17,Long Live Brij Mohan,Comedy,"August 3, 2018",105,6.8,Hindi
+18,Dolly Parton: A MusiCares Tribute,Documentary,"April 7, 2021",55,6.5,English
+19,Da 5 Bloods,War drama,"June 12, 2020",155,6.5,English
+20,Tony Parker: The Final Shot,Documentary,"January 6, 2021",98,6.8,French
+21,Icarus,Documentary,"August 4, 2017",120,7.9,English
+22,Sol Levante,Anime / Short,"April 2, 2020",4,4.7,English
+23,The Other Side of the Wind,Drama,"November 2, 2018",122,6.8,English
+24,First They Killed My Father,Drama,"September 15, 2017",136,7.2,Khmer/English/French
+25,Porta dos Fundos: The First Temptation of Christ,Comedy,"December 3, 2019",46,4.6,Portuguese
+26,Murder to Mercy: The Cyntoia Brown Story,Documentary,"April 29, 2020",97,6.4,English
+27,The Irishman: In Conversation,Aftershow / Interview,"November 27, 2019",23,7.4,English
+28,Spenser Confidential,Action comedy,"March 6, 2020",111,6.2,English
+29,Yes Day,Comedy,"March 12, 2021",86,5.7,English
+30,Prime Time,Thriller,"April 14, 2021",91,5.7,Polish
+31,The Last Laugh,Comedy-drama,"January 11, 2019",98,5.6,English
+32,Triple Frontier,Action-thriller,"March 13, 2019",125,6.4,English
+33,The Princess Switch,Romantic comedy,"November 16, 2018",101,6.0,English
+34,House Arrest,Comedy,"November 15, 2019",104,5.5,Hindi
+35,High Flying Bird,Sports-drama,"February 8, 2019",90,6.2,English
+36,Tony Robbins: I Am Not Your Guru,Documentary,July 15. 2016,116,6.7,English
+37,The Do-Over,Action comedy,"May 27, 2016",108,5.7,English
+38,The Legacy of a Whitetail Deer Hunter,Adventure/Comedy,"July 6, 2018",83,5.5,English
+39,The Speed Cubers,Documentary,"July 29, 2020",40,7.4,English
+40,If Anything Happens I Love You,Animation / Short,"November 20, 2020",12,7.8,English
+41,Justin Timberlake + The Tennessee Kids,Concert Film,"October 12, 2016",90,7.7,English
+42,Clinical,Thriller,"January 13, 2017",104,5.1,English
+43,GIMS: On the Record,Documentary,"September 17, 2020",96,6.8,French
+44,Sometimes,Drama,"May 1, 2018",101,7.2,Tamil
+45,CounterPunch ,Documentary,"June 16, 2017",91,6.7,English
+46,Dark Forces,Thriller,"August 21, 2020",81,2.6,Spanish
+47,El Camino Christmas,Dark comedy,"December 8, 2017",89,5.7,English
+48,Horse Girl,Drama,"February 7, 2020",104,5.9,English
+49,Life Overtakes Me,Documentary,"June 14, 2019",40,6.5,English/Swedish
+50,See You Yesterday,Science fiction,"May 17, 2019",87,5.2,English
+51,I'll Sleep When I'm Dead,Documentary,"August 19, 2016",79,6.6,English
+52,Someone Great,Romantic comedy,"April 19, 2019",92,6.2,English
+53,The Sleepover,Comedy,"August 21, 2020",103,5.6,English
+54,Ibiza,Comedy,"May 25, 2018",94,5.2,English
+55,El Camino: A Breaking Bad Movie,Crime drama,"October 11, 2019",121,7.3,English
+56,I Am All Girls,Thriller,"May 14, 2021",107,5.8,English
+57,The Perfection,Horror-thriller,"May 24, 2019",90,6.1,English
+58,The Incredible Jessica James,Comedy,"July 28, 2017",83,6.5,English
+59,Pagglait,Comedy-drama,"March 26, 2021",114,6.9,Hindi
+60,Team Foxcatcher,Documentary,"April 29, 2016",90,7.3,English/Russian
+61,Tig,Documentary,"July 17, 2015",80,7.4,English
+62,One-Way to Tomorrow,Romance,"June 19, 2020",90,5.6,Turkish
+63,Earth and Blood,Action,"April 17, 2020",80,4.9,French
+64,Outside the Wire,Action/Science fiction,"January 15, 2021",114,5.4,English
+65,The Trader,Documentary,"February 9, 2018",23,6.8,Georgian
+66,Fyre: The Greatest Party That Never Happened,Documentary,"January 18, 2019",97,7.2,English
+67,Nobody Sleeps in the Woods Tonight,Horror,"October 28, 2020",103,4.8,Polish
+68,"Crack: Cocaine, Corruption & Conspiracy",Documentary,"January 11, 2021",89,6.7,English
+69,Disclosure: Trans Lives on Screen,Documentary,"June 19, 2020",107,8.2,English
+70,Mercury 13,Documentary,"April 20, 2018",79,6.8,English
+71,Hope Frozen: A Quest to Live Twice,Documentary,"September 15, 2020",80,6.7,Thia/English
+72,The Players,Comedy,"July 15, 2020",88,4.6,Italian
+73,The Great Hack,Documentary,"July 24, 2019",114,7.1,English
+74,On My Skin,Crime drama,"September 12, 2018",100,7.3,Italian
+75,Imperial Dreams,Drama,"February 3, 2017",87,6.7,English
+76,Brene Brown: The Call to Courage,Documentary,"April 19, 2019",76,7.7,English
+77,Sturgill Simpson Presents: Sound & Fury,Animation / Musicial,"September 27, 2019",41,6.4,English
+78,ReMastered: Who Shot the Sheriff?,Documentary,"October 12, 2018",57,6.9,English
+79,Little Evil,Comedy horror,"September 1, 2017",94,5.7,English
+80,XOXO,Drama,"August 26, 2016",92,5.3,English
+81,Los Tigres del Norte at Folsom Prison,Documentary,"September 15, 2019",64,7.0,Spanish
+82,Night in Paradise,Drama,"April 9, 2021",132,6.7,Korean
+83,The Claudia Kishi Club,Documentary,"July 10, 2020",17,6.9,English
+84,Shawn Mendes: Live in Concert,Concert Film,"November 25, 2020",87,7.4,English
+85,The Death and Life of Marsha P. Johnson,Documentary,"October 6, 2017",105,7.3,English
+86,Feel the Beat,Family/Comedy-drama,"June 19, 2020",107,6.3,English
+87,Double Dad,Comedy-drama,"January 15, 2021",103,5.6,Portuguese
+88,The Willoughbys,Animation/Comedy/Adventure,"April 22, 2020",90,6.4,English
+89,Guilty,Thriller,"March 6, 2020",119,5.4,Hindi
+90,"Strip Down, Rise Up",Documentary,"February 5, 2021",112,5.2,English
+91,The Call,Drama,"November 27, 2020",112,4.1,Korean
+92,Leyla Everlasting,Comedy,"December 4, 2020",112,3.7,Turkish
+93,The Last Days of American Crime,Heist film/Thriller,"June 5, 2020",149,3.7,English
+94,Mank,Biopic,"December 4, 2020",132,6.9,English
+95,15 August,Comedy-drama,"March 29, 2019",124,5.8,Marathi
+96,The Perfect Date,Romantic comedy,"April 12, 2019",89,5.8,English
+97,To All the Boys: Always and Forever,Romantic comedy,"February 12, 2021",109,6.3,English
+98,A Christmas Prince: The Royal Baby,Romantic comedy,"December 5, 2019",85,5.4,English
+99,Project Power,Superhero,"August 14, 2020",113,6.0,English
+100,The Forest of Love,Drama,"October 11, 2019",151,6.3,Japanese
+101,We Are One,Documentary,"July 14, 2020",86,4.6,French
+102,Eli,Horror,"October 18, 2019",98,5.7,English
+103,Get Me Roger Stone,Documentary,"May 12, 2017",101,7.3,English
+104,Dude,Teen comedy-drama,"April 20, 2018",97,5.1,English
+105,Rattlesnake,Horror,"October 25, 2019",85,4.6,English
+106,American Factory,Documentary,"August 21, 2019",110,7.4,English
+107,Okja,Action-adventure,"June 28, 2017",121,7.3,English/Korean
+108,The Wrong Missy,Comedy,"May 13, 2020",90,5.7,English
+109,iBoy,Science fiction/Thriller,"January 27, 2017",90,6.0,English
+110,Angela's Christmas,Animation,"November 30, 2018",30,7.1,English
+111,Death Note,Horror thriller,"August 25, 2017",100,4.4,English
+112,Take Your Pills,Documentary,"March 16, 2018",87,6.4,English
+113,Blue Miracle,Drama,"May 27, 2021",95,6.7,English
+114,Airplane Mode,Comedy,"January 23, 2020",96,5.0,Portuguese
+115,"Ram Dass, Going Home",Documentary,"April 6, 2018",31,7.1,English
+116,Chasing Coral ,Documentary,"July 14, 2017",89,8.1,English
+117,"Bikram: Yogi, Guru, Predator",Documentary,"November 20, 2019",86,6.7,English
+118,They'll Love Me When I'm Dead,Documentary,"November 2, 2018",98,7.4,English
+119,The Road to El Camino: A Breaking Bad Movie,Making-of,"October 29, 2019",13,7.2,English
+120,ARQ,Science fiction/Thriller,"September 16, 2016",88,6.4,English
+121,Stuck Apart,Drama,"January 8, 2021",96,6.1,Turkish
+122,Miss Americana,Documentary,"January 31, 2020",85,7.4,English
+123,Anelka: Misunderstood,Documentary,"August 5, 2020",94,6.4,French
+124,Gaga: Five Foot Two,Documentary,"September 22, 2017",100,7.0,English
+125,The Babysitter: Killer Queen,Comedy/Horror,"September 10, 2020",102,5.8,English
+126,I'm No Longer Here: A Discussion with Guillermo del Toro and Alfonso Cuaron,Aftershow / Interview,"November 3, 2020",14,7.0,English
+127,Joan Didion: The Center Will Not Hold,Documentary,October 27. 2017,98,7.5,English
+128,Bomb Scared,Black comedy,"October 12, 2017",89,5.6,Spanish/Basque
+129,To the Bone,Drama,"July 14, 2017",107,6.8,English
+130,Taylor Swift: Reputation Stadium Tour,Concert Film,"December 31, 2018",125,8.4,English
+131,Biggie: I Got a Story to Tell,Documentary,"March 1, 2021",97,6.9,English
+132,The Other One: The Long Strange Trip of Bob Weir,Documentary,"May 22, 2015",83,7.3,English
+133,How It Ends,Action-thriller,"July 13, 2018",113,5.0,English
+134,Operation Christmas Drop,Romantic comedy,"November 5, 2020",96,5.8,English
+135,Lionheart,Comedy,"January 4, 2019",94,5.7,English
+136,Saving Capitalism,Documentary,"November 21, 2017",73,6.8,English
+137,The Fundamentals of Caring,Comedy-drama,"June 24, 2016",97,7.3,English
+138,Forgive Us Our Debts,Drama,"May 4, 2018",104,6.0,Italian
+139,Citation,Drama,"November 6, 2020",151,6.2,English
+140,American Son,Drama,"November 1, 2019",90,5.8,English
+141,Arlo the Alligator Boy,Animated musical comedy,"April 16, 2021",92,6.7,English
+142,Monster,Drama,"May 7, 2021",98,6.5,English
+143,Have a Good Trip: Adventures in Psychedelics,Documentary,"May 11, 2020",85,6.8,English
+144,Lust Stories,Drama,"June 15, 2018",120,6.5,Hindi
+145,Como Caído del Cielo,Musical comedy,"December 24, 2019",112,6.4,Spanish
+146,Ghost Stories,Horror anthology,"January 1, 2020",144,4.3,Hindi
+147,Paradox,Musical/Western/Fantasy,"March 23, 2018",73,3.9,English
+148,Rising High,Satire,"April 17, 2020",94,5.8,German
+149,The Lovebirds,Romantic comedy,"May 22, 2020",87,6.1,English
+150,Seventeen,Coming-of-age comedy-drama,"October 18, 2019",99,7.2,Spanish
+151,The Angel,Spy thriller,"September 14, 2018",114,6.6,English
+152,Grass Is Greener,Documentary,"April 20, 2019",97,7.1,English
+153,Wheelman,Action thriller,"October 20, 2017",82,6.4,English
+154,End Game,Documentary,"May 4, 2018",40,7.1,English
+155,Raat Akeli Hai,Thriller,"July 31, 2020",149,7.3,Hindi
+156,Paddleton,Drama-Comedy,"February 22, 2019",89,7.2,English
+157,First Match,Sports-drama,"March 30, 2018",102,6.4,English
+158,Mute,Science fiction/Mystery,"February 23, 2018",126,5.5,English
+159,The After Party,Comedy,"August 24, 2018",89,5.8,English
+160,Tribhanga – Tedhi Medhi Crazy,Drama,"January 15, 2021",95,6.1,Hindi
+161,Derren Brown: Sacrifice,Mentalism special,"October 19, 2018",49,7.1,English
+162,Candy Jar,Comedy,"April 27, 2018",92,5.8,English
+163,The Rachel Divide,Documentary,"April 27, 2018",104,6.2,English
+164,Alex Strangelove,Romantic comedy,"June 8, 2018",99,6.3,English
+165,The Edge of Democracy,Documentary,"June 19, 2019",121,7.2,Portuguese
+166,The Girl on the Train,Thriller,"February 26, 2021",120,4.4,Hindi
+167,The Ballad of Buster Scruggs,Western,"November 16, 2018",132,7.3,English
+168,Chadwick Boseman: Portrait of an Artist,Documentary,"April 17, 2021",21,6.5,English
+169,The Decline,Thriller,"March 27, 2020",83,5.9,French
+170,"Out of Many, One",Documentary,"December 12, 2018",34,5.7,English
+171,IO,Science fiction/Drama,"January 18, 2019",95,4.7,English
+172,Vampires vs. the Bronx,Horror comedy,"October 2, 2020",86,5.6,English
+173,To All the Boys I've Loved Before,Romantic comedy,"August 17, 2018",99,7.1,English
+174,Nail Bomber: Manhunt,Documentary,"May 26, 2021",72,6.3,English
+175,Upstarts,Drama,"October 18, 2019",112,6.7,Hindi
+176,Casting JonBenet,Documentary,"April 28, 2017",80,6.1,English
+177,Uncorked,Drama,"March 27, 2020",103,6.3,English
+178,Army of the Dead,Zombie/Heist,"May 21, 2021",148,5.9,English
+179,My Beautiful Broken Brain,Documentary,"March 18, 2016",91,7.1,English
diff --git a/homeworks/hw04/data/netflix_originals.csv b/homeworks/hw04/data/netflix_originals.csv
new file mode 100644
index 0000000..788f929
--- /dev/null
+++ b/homeworks/hw04/data/netflix_originals.csv
@@ -0,0 +1,585 @@
+Title,Genre,Premiere,Runtime,IMDb Score,Language
+Enter the Anime,Documentary,"August 5, 2019",58,2.5,English/Japanese
+Dark Forces,Thriller,"August 21, 2020",81,2.6,Spanish
+The App,Science fiction/Drama,"December 26, 2019",79,2.6,Italian
+The Open House,Horror thriller,"January 19, 2018",94,3.2,English
+Kaali Khuhi,Mystery,"October 30, 2020",90,3.4,Hindi
+Drive,Action,"November 1, 2019",147,3.5,Hindi
+Leyla Everlasting,Comedy,"December 4, 2020",112,3.7,Turkish
+The Last Days of American Crime,Heist film/Thriller,"June 5, 2020",149,3.7,English
+Paradox,Musical/Western/Fantasy,"March 23, 2018",73,3.9,English
+Sardar Ka Grandson,Comedy,"May 18, 2021",139,4.1,Hindi
+Searching for Sheela,Documentary,"April 22, 2021",58,4.1,English
+The Call,Drama,"November 27, 2020",112,4.1,Korean
+Whipped,Romantic comedy,"September 18, 2020",97,4.1,Indonesian
+All Because of You,Action comedy,"October 1, 2020",101,4.2,Malay
+Mercy,Thriller,"November 22, 2016",90,4.2,English
+After the Raid,Documentary,"December 19, 2019",25,4.3,Spanish
+Ghost Stories,Horror anthology,"January 1, 2020",144,4.3,Hindi
+The Last Thing He Wanted,Political thriller,"February 21, 2020",115,4.3,English
+What Happened to Mr. Cha?,Comedy,"January 1, 2021",102,4.3,Korean
+Death Note,Horror thriller,"August 25, 2017",100,4.4,English
+"Hello Privilege. It's Me, Chelsea",Documentary,"September 13, 2019",64,4.4,English
+Secret Obsession,Thriller,"July 18, 2019",97,4.4,English
+Sextuplets,Comedy,"August 16, 2019",99,4.4,English
+The Girl on the Train,Thriller,"February 26, 2021",120,4.4,Hindi
+Thunder Force,Superhero-Comedy,"April 9, 2021",105,4.4,English
+Fatal Affair,Thriller,"July 16, 2020",89,4.5,English
+Just Say Yes,Romantic comedy,"April 2, 2021",97,4.5,Dutch
+Seriously Single,Comedy,"July 31, 2020",107,4.5,English
+The Misadventures of Hedi and Cokeman,Comedy,"February 10, 2021",99,4.5,French
+5 Star Christmas,Comedy,"December 7, 2018",95,4.6,Italian
+After Maria,Documentary,"May 24, 2019",37,4.6,English/Spanish
+I Am the Pretty Thing That Lives in the House,Horror,"October 28, 2016",89,4.6,English
+Paris Is Us,Romance drama,"February 22, 2019",83,4.6,French
+Porta dos Fundos: The First Temptation of Christ,Comedy,"December 3, 2019",46,4.6,Portuguese
+Rattlesnake,Horror,"October 25, 2019",85,4.6,English
+The Players,Comedy,"July 15, 2020",88,4.6,Italian
+We Are One,Documentary,"July 14, 2020",86,4.6,French
+Finding Agnes,Drama,"November 30, 2020",105,4.7,Filipino
+IO,Science fiction/Drama,"January 18, 2019",95,4.7,English
+Sentinelle,Action,"March 5, 2021",80,4.7,French
+Sol Levante,Anime / Short,"April 2, 2020",4,4.7,English
+The Binding,Drama,"October 2, 2020",93,4.7,Italian
+We Can Be Heroes,Superhero,"December 25, 2020",100,4.7,English
+Christmas Crossfire,Thriller,"December 4, 2020",106,4.8,German
+Coin Heist,Heist,"January 6, 2017",97,4.8,English
+Mrs. Serial Killer,Thriller,"May 1, 2020",106,4.8,Hindi
+Nobody Sleeps in the Woods Tonight,Horror,"October 28, 2020",103,4.8,Polish
+Take the 10,Comedy,"January 20, 2017",80,4.8,English
+The Main Event,Comedy,"April 10, 2020",101,4.8,English
+The Ridiculous 6,Western,"December 11, 2015",119,4.8,English
+Earth and Blood,Action,"April 17, 2020",80,4.9,French
+Fearless,Animation/Superhero,"August 14, 2020",89,4.9,English
+Holiday Rush,Family film,"November 28, 2019",94,4.9,English
+The Day of the Lord,Drama,"October 30, 2020",93,4.9,Spanish
+Airplane Mode,Comedy,"January 23, 2020",96,5,Portuguese
+How It Ends,Action-thriller,"July 13, 2018",113,5,English
+Love Like the Falling Rain,Drama,"October 15, 2020",86,5,Indonesian
+Rebirth,Thriller,"July 15, 2016",100,5,English
+Squared Love,Romantic comedy,"February 11, 2021",102,5,Polish
+Cadaver,Horror,"October 22, 2020",86,5.1,Norwegian
+Clinical,Thriller,"January 13, 2017",104,5.1,English
+Coffee & Kareem,Action comedy,"April 3, 2020",88,5.1,English
+Dude,Teen comedy-drama,"April 20, 2018",97,5.1,English
+Geez & Ann,Romantic drama,"February 25, 2021",105,5.1,Indonesian
+The Larva Island Movie,Animation,"July 23, 2020",90,5.1,English
+#REALITYHIGH,Comedy,"September 8, 2017",99,5.2,English
+American Factory: A Conversation with the Obamas ,Aftershow / Interview,"August 21, 2019",10,5.2,English
+Desperados,Romantic comedy,"July 3, 2020",106,5.2,English
+Dolly Parton's Christmas on the Square,Christmas musical,"November 22, 2020",98,5.2,English
+Father of the Year,Comedy,"July 20, 2018",94,5.2,English
+Firebrand,Drama,"February 22, 2019",112,5.2,Marathi
+Ghost Lab,Horror,"May 26, 2021",117,5.2,Thai
+Girlfriend's Day,Comedy,"February 14, 2017",70,5.2,English
+Handsome: A Netflix Mystery Movie,Comedy,"May 5, 2017",81,5.2,English
+Hubie Halloween,Comedy,"October 7, 2020",103,5.2,English
+Ibiza,Comedy,"May 25, 2018",94,5.2,English
+Rim of the World,Science fiction adventure,"May 24, 2019",98,5.2,English
+Sandy Wexler,Comedy,"April 14, 2017",131,5.2,English
+See You Yesterday,Science fiction,"May 17, 2019",87,5.2,English
+Still Laugh-In: The Stars Celebrate,Variety show,"May 14, 2019",60,5.2,English
+"Strip Down, Rise Up",Documentary,"February 5, 2021",112,5.2,English
+Tall Girl,Comedy-drama,"September 13, 2019",102,5.2,English
+The Beast,Drama,"November 27, 2020",99,5.2,Italian
+The Week Of,Comedy,"April 27, 2018",116,5.2,English
+A Christmas Prince: The Royal Wedding,Romantic comedy,"November 30, 2018",92,5.3,English
+Back to School,Comedy,"August 30, 2019",83,5.3,French
+Dangerous Lies,Thriller,"April 30, 2020",97,5.3,English
+Gunjan Saxena: The Kargil Girl,Drama,"August 12, 2020",112,5.3,Hindi
+Intuition,Thriller,"May 28, 2020",116,5.3,Spanish
+The Most Assassinated Woman in the World,Thriller,"September 7, 2018",102,5.3,French
+Things Heard & Seen,Horror,"April 29, 2021",121,5.3,English
+"To Each, Her Own",Romantic comedy,"June 24, 2018",95,5.3,French
+Who Would You Take to a Deserted Island?,Drama,"April 12, 2019",93,5.3,Spanish
+XOXO,Drama,"August 26, 2016",92,5.3,English
+A Babysitter's Guide to Monster Hunting,Comedy/Fantasy/Family,"October 15, 2020",98,5.4,English
+A Christmas Prince: The Royal Baby,Romantic comedy,"December 5, 2019",85,5.4,English
+Despite Everything,Comedy,"May 3, 2019",78,5.4,Spanish
+Dolly Kitty and Those Twinkling Stars,Drama,"September 18, 2020",120,5.4,Hindi
+Freaks: You're One of Us,Supernatural drama,"September 2, 2020",92,5.4,German
+"Game Over, Man!",Action/Comedy,"March 23, 2018",101,5.4,English
+Guilty,Thriller,"March 6, 2020",119,5.4,Hindi
+In the Tall Grass,Horror,"October 4, 2019",101,5.4,English
+Madame Claude,Drama,"April 2, 2021",112,5.4,French
+Naked,Comedy,"August 11, 2017",96,5.4,English
+Outside the Wire,Action/Science fiction,"January 15, 2021",114,5.4,English
+The Princess Switch: Switched Again,Romantic comedy,"November 19, 2020",97,5.4,English
+Under the Riccione Sun,Romantic teenage drama,"July 1, 2020",101,5.4,Italian
+A Very Murray Christmas,Comedy / Musical,"December 4, 2015",56,5.5,English
+Been So Long,Musical,"October 26, 2018",100,5.5,English
+Dead Kids,Thriller,"December 1, 2019",94,5.5,Filipino
+Get the Grift,Comedy,"April 28, 2021",94,5.5,Portuguese
+Ghosts of Sugar Land,Documentary,October 16. 2019,21,5.5,English
+House Arrest,Comedy,"November 15, 2019",104,5.5,Hindi
+Kevin Hart's Guide to Black History,Variety show,"February 8, 2019",63,5.5,English
+Love Wedding Repeat,Romantic comedy,"April 10, 2020",100,5.5,English
+Mute,Science fiction/Mystery,"February 23, 2018",126,5.5,English
+�l�t?r�,Crime drama,"October 2, 2020",106,5.5,English
+Red Dot,Thriller,"February 11, 2021",86,5.5,Swedish
+Ride or Die,Psychological thriller drama,"April 15, 2021",142,5.5,Japanese
+Step Sisters,Comedy,"January 19, 2018",108,5.5,English
+The Cloverfield Paradox,Science fiction,"February 4, 2018",102,5.5,English
+The Knight Before Christmas,Romantic comedy,"November 21, 2019",92,5.5,English
+The Legacy of a Whitetail Deer Hunter,Adventure/Comedy,"July 6, 2018",83,5.5,English
+The Package,Black comedy,"August 10, 2018",94,5.5,English
+Unicorn Store,Comedy,"April 5, 2019",92,5.5,English
+Wine Country,Comedy,"May 10, 2019",103,5.5,English
+Bomb Scared,Black comedy,"October 12, 2017",89,5.6,Spanish/Basque
+Brahman Naman,Comedy,"July 7, 2016",95,5.6,English
+Double Dad,Comedy-drama,"January 15, 2021",103,5.6,Portuguese
+Falling Inn Love,Romantic comedy,"August 29, 2019",97,5.6,English
+Hold the Dark,Thriller,"September 28, 2018",125,5.6,English
+"Love, Guaranteed",Romantic comedy,"September 3, 2020",91,5.6,English
+One-Way to Tomorrow,Romance,"June 19, 2020",90,5.6,Turkish
+Sarah Cooper: Everything's Fine,Variety show,"October 27, 2020",49,5.6,English
+The Last Laugh,Comedy-drama,"January 11, 2019",98,5.6,English
+The Last Paradiso,Romantic drama,"February 5, 2021",107,5.6,Italian
+The Midnight Sky,Science fiction,"December 23, 2020",118,5.6,English
+The Paramedic,Thriller,"September 16, 2020",94,5.6,Spanish
+The Sleepover,Comedy,"August 21, 2020",103,5.6,English
+Vampires vs. the Bronx,Horror comedy,"October 2, 2020",86,5.6,English
+Why Did You Kill Me?,Documentary,"April 14, 2021",83,5.6,English
+A Week Away,Christian musical,"March 26, 2021",97,5.7,English
+Caught by a Wave,Romantic teen drama,"March 25, 2021",99,5.7,Italian
+Christmas Inheritance,Romantic drama,"December 15, 2017",104,5.7,English
+Dad Wanted,Family,"September 11, 2020",102,5.7,Spanish
+El Camino Christmas,Dark comedy,"December 8, 2017",89,5.7,English
+Eli,Horror,"October 18, 2019",98,5.7,English
+Ginny Weds Sunny,Romantic comedy,"October 9, 2020",125,5.7,Hindi
+Good Sam,Drama,"May 16, 2019",89,5.7,English
+Lionheart,Comedy,"January 4, 2019",94,5.7,English
+Little Evil,Comedy horror,"September 1, 2017",94,5.7,English
+One Take,Documentary,"June 18, 2020",85,5.7,Thai
+"Out of Many, One",Documentary,"December 12, 2018",34,5.7,English
+Point Blank,Action,"July 12, 2019",86,5.7,English
+Prime Time,Thriller,"April 14, 2021",91,5.7,Polish
+The Do-Over,Action comedy,"May 27, 2016",108,5.7,English
+The Holiday Calendar,Romantic comedy,"November 2, 2018",95,5.7,English
+The Woman in the Window,Psychological thriller,"May 14, 2021",100,5.7,English
+The Wrong Missy,Comedy,"May 13, 2020",90,5.7,English
+Velvet Buzzsaw,Thriller,"February 1, 2019",112,5.7,English
+Yes Day,Comedy,"March 12, 2021",86,5.7,English
+15 August,Comedy-drama,"March 29, 2019",124,5.8,Marathi
+A California Christmas,Romantic comedy,"December 14, 2020",107,5.8,English
+A Christmas Prince,Romantic comedy,"November 17, 2017",92,5.8,English
+All Day and a Night,Drama,"May 1, 2020",121,5.8,English
+American Son,Drama,"November 1, 2019",90,5.8,English
+Barry,Biopic,"December 16, 2016",104,5.8,English
+Candy Jar,Comedy,"April 27, 2018",92,5.8,English
+Choked: Paisa Bolta Hai,Drama,"June 5, 2020",114,5.8,Hindi
+Class of '83,Drama,"August 21, 2020",98,5.8,Hindi
+Extinction,Science fiction/Thriller,"July 27, 2018",95,5.8,English
+Happy Anniversary,Romantic comedy,"March 30, 2018",78,5.8,English
+I Am All Girls,Thriller,"May 14, 2021",107,5.8,English
+Let It Snow,Romantic comedy,"November 8, 2019",92,5.8,English
+Mascots,Mockumentary,"October 13, 2016",95,5.8,English
+Operation Christmas Drop,Romantic comedy,"November 5, 2020",96,5.8,English
+Rajma Chawal,Comedy-drama,"November 30, 2018",118,5.8,Hindi
+Rich in Love,Romantic comedy,"April 30, 2020",105,5.8,Portuguese
+Rising High,Satire,"April 17, 2020",94,5.8,German
+Rodney King,One-man show,"April 28, 2017",52,5.8,English
+Sierra Burgess Is a Loser,Romantic comedy-drama,"September 7, 2018",105,5.8,English
+Small Crimes,Dark comedy,"April 28, 2017",95,5.8,English
+Special Correspondents,Satire,"April 29, 2016",100,5.8,English
+TAU,Science fiction/Thriller,"June 29, 2018",97,5.8,English
+The After Party,Comedy,"August 24, 2018",89,5.8,English
+The Babysitter: Killer Queen,Comedy/Horror,"September 10, 2020",102,5.8,English
+The Claus Family,Fantasy,"December 7, 2020",96,5.8,Dutch
+The Kissing Booth 2,Romantic comedy,"July 24, 2020",131,5.8,English
+The Perfect Date,Romantic comedy,"April 12, 2019",89,5.8,English
+What We Wanted,Drama,"November 11, 2020",93,5.8,German
+You've Got This,Romantic comedy,"October 2, 2020",111,5.8,Spanish
+6 Balloons,Drama,"April 6, 2018",75,5.9,English
+A Fall from Grace,Thriller,"January 17, 2020",120,5.9,English
+Amateur,Sports-drama,"April 6, 2018",96,5.9,English
+Army of the Dead,Zombie/Heist,"May 21, 2021",148,5.9,English
+Cam,Psychological horror,"November 16, 2018",94,5.9,English
+Earthquake Bird,Mystery,"November 15, 2019",107,5.9,English
+"Frankenstein's Monster's Monster, Frankenstein",Mockumentary,"July 16, 2019",32,5.9,English
+Horse Girl,Drama,"February 7, 2020",104,5.9,English
+Notes from Dunblane: Lesson from a School Shooting,Documentary,"September 28, 2018",23,5.9,English
+Maska,Romantic comedy,"March 27, 2020",111,5.9,Hindi
+The Decline,Thriller,"March 27, 2020",83,5.9,French
+The Minimalists: Less Is Now,Documentary,"January 1, 2021",53,5.9,English
+The Polka King,Comedy-drama,"January 12, 2018",95,5.9,English
+The Prom,Musical,"December 11, 2020",132,5.9,English
+True Memoirs of an International Assassin,Action comedy,"November 11, 2016",98,5.9,English
+Ultras,Sports film,"March 20, 2020",108,5.9,Italian
+Come Sunday,Biopic,"April 13, 2018",106,6,English
+Forgive Us Our Debts,Drama,"May 4, 2018",104,6,Italian
+iBoy,Science fiction/Thriller,"January 27, 2017",90,6,English
+Lovefucked,Drama,"August 9, 2019",106,6,Hindi
+Juanita,Drama,"March 8, 2019",90,6,English
+Murder Mystery,Comedy mystery,"June 14, 2019",97,6,English
+Project Power,Superhero,"August 14, 2020",113,6,English
+Rebecca,Romantic thriller,"October 21, 2020",123,6,English
+The Christmas Chronicles: Part Two,Christmas comedy,"November 25, 2020",115,6,English
+The Kissing Booth,Romantic comedy,"May 11, 2018",105,6,English
+The Princess Switch,Romantic comedy,"November 16, 2018",101,6,English
+To All the Boys: P.S. I Still Love You,Romantic comedy,"February 12, 2020",102,6,English
+War Machine,War-Comedy,"May 26, 2017",122,6,English
+6 Underground,Action,"December 13, 2019",128,6.1,English
+Between Two Ferns: The Movie,Comedy,"September 20, 2019",82,6.1,English
+Burning Sands,Drama,"March 10, 2017",102,6.1,English
+Casting JonBenet,Documentary,"April 28, 2017",80,6.1,English
+Deidra & Laney Rob a Train,Drama,"March 17, 2017",94,6.1,English
+Finding 'Ohana,Family,"January 29, 2021",123,6.1,English
+Holidate,Romantic comedy/Holiday,"October 28, 2020",104,6.1,English
+Holiday in the Wild,Adventure-romance,"November 1, 2019",85,6.1,English
+Hot Girls Wanted,Documentary,"May 29, 2015",84,6.1,English
+Like Father,Comedy,"August 3, 2018",103,6.1,English
+Lost Girls,Crime drama,"March 13, 2020",95,6.1,English
+Otherhood,Comedy,"August 2, 2019",100,6.1,English
+Pee-wee's Big Holiday,Adventure,"March 18, 2016",89,6.1,English
+Rogue City,Crime drama,"October 30, 2020",116,6.1,French
+Sergio,Biopic,"April 17, 2020",118,6.1,English
+Stuck Apart,Drama,"January 8, 2021",96,6.1,Turkish
+Tersanjung the Movie,Drama,"April 1, 2021",114,6.1,Indonesian
+The Killer,Western,"November 10, 2017",99,6.1,Portuguese
+The Lovebirds,Romantic comedy,"May 22, 2020",87,6.1,English
+The Most Hated Woman in America,Biopic,"March 24, 2017",92,6.1,English
+The Perfection,Horror-thriller,"May 24, 2019",90,6.1,English
+Tribhanga � Tedhi Medhi Crazy,Drama,"January 15, 2021",95,6.1,Hindi
+Unknown Origins,Thriller,"August 28, 2020",96,6.1,Spanish
+Work It,Dance comedy,"August 7, 2020",93,6.1,English
+Alien Xmas,Stop Motion,"November 20, 2020",42,6.2,English
+Baggio: The Divine Ponytail,Biopic,"May 26, 2021",92,6.2,Italian
+Below Zero,Drama,"January 29, 2021",106,6.2,Spanish
+Citation,Drama,"November 6, 2020",151,6.2,English
+Crazy Awesome Teachers,Comedy-drama,"August 17, 2020",101,6.2,Indonesian
+Have You Ever Seen Fireflies?,Comedy,"April 9, 2021",114,6.2,Turkish
+High Flying Bird,Sports-drama,"February 8, 2019",90,6.2,English
+In the Shadow of the Moon,Thriller,"September 27, 2019",115,6.2,English
+Lost Bullet,Thriller,"June 19, 2020",92,6.2,French
+Octonauts & the Caves of Sac Actun,Animation,"August 14, 2020",72,6.2,English
+Offering to the Storm,Thriller,"July 24, 2020",139,6.2,Spanish
+Roxanne Roxanne,Biopic,"March 23, 2018",98,6.2,English
+Someone Great,Romantic comedy,"April 19, 2019",92,6.2,English
+Spenser Confidential,Action comedy,"March 6, 2020",111,6.2,English
+The Land of Steady Habits,Drama,"September 14, 2018",98,6.2,English
+The Rachel Divide,Documentary,"April 27, 2018",104,6.2,English
+Voyuer ,Documentary,"December 1, 2017",95,6.2,English
+Win It All,Comedy,"April 7, 2017",88,6.2,English
+1922,Horror/Crime drama,"October 20, 2017",102,6.3,English
+A Tale of Two Kitchens,Documentary,"May 22, 2019",30,6.3,English/Spanish
+Alex Strangelove,Romantic comedy,"June 8, 2018",99,6.3,English
+Apostle,Horror-thriller,"October 12, 2018",129,6.3,English
+Benji,Family film,"March 16, 2018",87,6.3,English
+Bright,Urban fantasy,"December 22, 2017",117,6.3,English
+Cargo,Drama/Horror,"May 18, 2018",104,6.3,English
+Concrete Cowboy,Drama,"April 2, 2021",111,6.3,English
+Feel the Beat,Family/Comedy-drama,"June 19, 2020",107,6.3,English
+Get the Goat,Comedy,"March 18, 2021",97,6.3,Portuguese
+I Am Not an Easy Man,Romantic comedy,"April 13, 2018",98,6.3,French
+June & Kopi,Drama,"January 28, 2021",90,6.3,Indonesian
+Music Teacher,Drama,"April 19, 2019",101,6.3,Hindi
+Nail Bomber: Manhunt,Documentary,"May 26, 2021",72,6.3,English
+Notes for My Son,Drama,"November 24, 2020",83,6.3,Spanish
+Polar,Action,"January 25, 2019",118,6.3,English
+Porta dos Fundos: The Last Hangover,Comedy,"December 21, 2018",44,6.3,Portuguese
+Sand Castle,War,"April 21, 2017",113,6.3,English
+Shimmer Lake,Crime thriller,"June 9, 2017",86,6.3,English
+Spectral,Science fiction/Action,"December 9, 2016",108,6.3,English
+The Babysitter,Teen comedy horror,"October 13, 2017",85,6.3,English
+The Discovery,Science fiction/Drama,"March 31, 2017",102,6.3,English
+The Forest of Love,Drama,"October 11, 2019",151,6.3,Japanese
+The Laundromat,Comedy-drama,"October 18, 2019",98,6.3,English
+The Legend of Cocaine Island,Documentary,"March 29, 2019",87,6.3,English
+The Outsider,Crime drama,"March 9, 2018",120,6.3,English/Japanese
+Time to Hunt,Thriller,"April 23, 2020",134,6.3,Korean
+To All the Boys: Always and Forever,Romantic comedy,"February 12, 2021",109,6.3,English
+Travis Scott: Look Mom I Can Fly,Documentary,"August 28, 2019",85,6.3,English
+Uncorked,Drama,"March 27, 2020",103,6.3,English
+Anelka: Misunderstood,Documentary,"August 5, 2020",94,6.4,French
+"Ariana Grande: Excuse Me, I Love You",Concert Film,"December 21, 2020",97,6.4,English
+ARQ,Science fiction/Thriller,"September 16, 2016",88,6.4,English
+Birders,Documentary,"September 25, 2019",37,6.4,English/Spanish
+Como Ca�do del Cielo,Musical comedy,"December 24, 2019",112,6.4,Spanish
+First Match,Sports-drama,"March 30, 2018",102,6.4,English
+Fractured,Thriller,"October 11, 2019",100,6.4,English
+Irreplaceable You,Drama,"February 16, 2018",96,6.4,English
+Isi & Ossi,Romantic comedy,"February 14, 2020",113,6.4,German
+John Was Trying to Contact Aliens,Documentary,"August 20, 2020",16,6.4,English
+Layla Majnun,Romantic drama,"February 11, 2021",119,6.4,Indonesian
+Murder to Mercy: The Cyntoia Brown Story,Documentary,"April 29, 2020",97,6.4,English
+My Own Man,Documentary,"December 13, 2014",81,6.4,English
+Nappily Ever After,Comedy-drama,"September 21, 2018",98,6.4,English
+Over the Moon,Animation/Musical/Adventure,"October 23, 2020",95,6.4,English
+Street Flow,Drama,"October 12, 2019",96,6.4,French
+Strong Island,Documentary,September 15. 2017,107,6.4,English
+Sturgill Simpson Presents: Sound & Fury,Animation / Musicial,"September 27, 2019",41,6.4,English
+Take Your Pills,Documentary,"March 16, 2018",87,6.4,English
+The Heartbreak Club,Comedy-drama,"January 14, 2021",101,6.4,Indonesian
+The Mars Generation,Documentary,"May 5, 2017",97,6.4,English
+The Occupant,Thriller,"March 25, 2020",103,6.4,Spanish
+The Willoughbys,Animation/Comedy/Adventure,"April 22, 2020",90,6.4,English
+Triple Frontier,Action-thriller,"March 13, 2019",125,6.4,English
+Two Catalonias,Documentary,"September 28, 2018",116,6.4,Spanish/Catalan
+Walk. Ride. Rodeo.,Drama,"March 8, 2019",99,6.4,English
+Wheelman,Action thriller,"October 20, 2017",82,6.4,English
+When We First Met,Romantic comedy,"February 9, 2018",97,6.4,English
+A 3 Minute Hug,Documentary,"October 28, 2019",28,6.5,English/Spanish
+All the Bright Places,Romance,"February 28, 2020",108,6.5,English
+All Together Now,Drama,"August 28, 2020",93,6.5,English
+Altered Carbon: Resleeved,Anime/Science fiction,"March 19, 2020",74,6.5,Japanese
+Antoine Griezmann: The Making of a Legend,Documentary,"March 21, 2019",60,6.5,French
+Canvas ,Animation / Short,"December 11, 2020",9,6.5,English
+Chadwick Boseman: Portrait of an Artist,Documentary,"April 17, 2021",21,6.5,English
+Chopsticks,Comedy,"May 31, 2019",100,6.5,Hindi
+Da 5 Bloods,War drama,"June 12, 2020",155,6.5,English
+Dolly Parton: A MusiCares Tribute,Documentary,"April 7, 2021",55,6.5,English
+Eurovision Song Contest: The Story of Fire Saga,Musical comedy,"June 26, 2020",123,6.5,English
+Gerald's Game,Horror thriller,"September 29, 2017",103,6.5,English
+His House,Thriller,"October 30, 2020",93,6.5,English
+Jingle Jangle: A Christmas Journey,Family/Christmas musical,"November 13, 2020",119,6.5,English
+Life Overtakes Me,Documentary,"June 14, 2019",40,6.5,English/Swedish
+Lust Stories,Drama,"June 15, 2018",120,6.5,Hindi
+Monster,Drama,"May 7, 2021",98,6.5,English
+Mowgli: Legend of the Jungle,Adventure,"December 7, 2018",104,6.5,English
+Nobody Knows I'm Here,Drama,"June 24, 2020",91,6.5,Spanish
+Nobody Speak: Trials of the Free Press,Documentary,"June 23, 2017",95,6.5,English
+Oxygen,Science fiction thriller,"May 12, 2021",101,6.5,French
+Set It Up,Romantic comedy,"June 15, 2018",105,6.5,English
+The Incredible Jessica James,Comedy,"July 28, 2017",83,6.5,English
+Tigertail,Drama,"April 10, 2020",91,6.5,English/Taiwanese/Mandarin
+Tramps,Romance,"April 21, 2017",83,6.5,English
+What Did Jack Do?,Drama / Short,"January 20, 2020",17,6.5,English
+Bad Trip,Hidden-camera prank comedy,"March 26, 2021",86,6.6,English
+Bird Box,Psychological thriller,"December 21, 2018",124,6.6,English
+Bulbbul,Horror,"June 24, 2020",94,6.6,Hindi
+Crazy About Her,Romantic comedy,"February 26, 2021",102,6.6,Spanish
+Elisa & Marcela,Romance,"June 7, 2019",118,6.6,Spanish
+I'll Sleep When I'm Dead,Documentary,"August 19, 2016",79,6.6,English
+I'm Thinking of Ending Things,Psychological thriller,"September 4, 2020",134,6.6,English
+It Takes a Lunatic,Documentary,"October 25, 2019",126,6.6,English
+Milestone,Drama,"May 7, 2021",98,6.6,Hindi
+Recovery Boys,Documentary,"June 29, 2018",89,6.6,English
+ReMastered: Who Killed Jam Master Jay?,Documentary,"December 7, 2018",58,6.6,English
+Shawn Mendes: In Wonder,Documentary,"November 23, 2020",83,6.6,English
+Space Sweepers,Science fiction,"February 5, 2021",136,6.6,Korean
+The American Meme,Documentary,"December 7, 2018",98,6.6,English
+The Angel,Spy thriller,"September 14, 2018",114,6.6,English
+The Crimes That Bind,Crime drama,"August 20, 2020",99,6.6,Spanish
+The Red Sea Diving Resort,Spy thriller,"July 31, 2019",130,6.6,English
+What Would Sophia Loren Do?,Documentary,"January 15, 2021",32,6.6,English
+A Whisker Away,Anime/Fantasy,"June 18, 2020",104,6.7,Japanese
+Ajeeb Daastaans,Drama,"April 16, 2021",142,6.7,Hindi
+Arlo the Alligator Boy,Animated musical comedy,"April 16, 2021",92,6.7,English
+"Bikram: Yogi, Guru, Predator",Documentary,"November 20, 2019",86,6.7,English
+Blame!,Anime/Science fiction,"May 20, 2017",106,6.7,Japanese
+Blue Miracle,Drama,"May 27, 2021",95,6.7,English
+CounterPunch ,Documentary,"June 16, 2017",91,6.7,English
+"Crack: Cocaine, Corruption & Conspiracy",Documentary,"January 11, 2021",89,6.7,English
+Extraction,Action,"April 24, 2020",117,6.7,English
+Giving Voice,Documentary,"December 11, 2020",90,6.7,English
+Hillbilly Elegy,Drama,"November 24, 2020",117,6.7,English
+Hope Frozen: A Quest to Live Twice,Documentary,"September 15, 2020",80,6.7,Thia/English
+Imperial Dreams,Drama,"February 3, 2017",87,6.7,English
+Just Another Christmas,Comedy,"December 3, 2020",101,6.7,Portuguese
+Little Miss Sumo,Documentary,"October 28, 2019",19,6.7,Japanese
+Malcolm & Marie,Romantic drama,"February 5, 2021",106,6.7,English
+"Michael Bolton's Big, Sexy, Valentine's Day Special",Variety Show,"February 7, 2017",54,6.7,English
+Moxie,Drama,"March 3, 2021",111,6.7,English
+Night in Paradise,Drama,"April 9, 2021",132,6.7,Korean
+Paper Lives,Drama,"March 12, 2021",97,6.7,Turkish
+Parchis: The Documentary,Documentary,"July 10, 2019",106,6.7,Spanish
+Tallulah,Comedy-drama,"July 29, 2016",111,6.7,English
+The Old Guard,Superhero/Action,"July 10, 2020",124,6.7,English
+Tony Robbins: I Am Not Your Guru,Documentary,July 15. 2016,116,6.7,English
+Upstarts,Drama,"October 18, 2019",112,6.7,Hindi
+22 July,Drama,"October 10, 2018",144,6.8,English
+7 a�os,Drama,"October 28, 2016",76,6.8,Spanish
+A Futile and Stupid Gesture,Biographical/Comedy,"January 26, 2018",101,6.8,English
+A Life of Speed: The Juan Manuel Fangio Story,Documentary,"March 20, 2020",92,6.8,Spanish
+A Love Song for Latasha,Documentary,"September 21, 2020",19,6.8,English
+All in My Family,Documentary,"May 3, 2019",39,6.8,English/Mandarin
+Always Be My Maybe,Romantic comedy,"May 31, 2019",102,6.8,English
+Becoming,Documentary,"May 6, 2020",89,6.8,English
+Long Live Brij Mohan,Comedy,"August 3, 2018",105,6.8,Hindi
+Calibre,Thriller,"June 29, 2018",101,6.8,English
+Death to 2020,Comedy,"December 27, 2020",70,6.8,English
+GIMS: On the Record,Documentary,"September 17, 2020",96,6.8,French
+Have a Good Trip: Adventures in Psychedelics,Documentary,"May 11, 2020",85,6.8,English
+Heroin(e) ,Documentary,"September 12, 2017",39,6.8,English
+Mercury 13,Documentary,"April 20, 2018",79,6.8,English
+Saving Capitalism,Documentary,"November 21, 2017",73,6.8,English
+Serious Men,Drama,"October 2, 2020",114,6.8,Hindi
+The Boys in the Band,Drama,"September 30, 2020",121,6.8,English
+The Boys in the Band: Something Personal,Aftershow / Interview,"September 30, 2020",28,6.8,English
+The Life Ahead,Drama,"November 13, 2020",95,6.8,Italian
+The Other Side of the Wind,Drama,"November 2, 2018",122,6.8,English
+The Trader,Documentary,"February 9, 2018",23,6.8,Georgian
+To the Bone,Drama,"July 14, 2017",107,6.8,English
+Tony Parker: The Final Shot,Documentary,"January 6, 2021",98,6.8,French
+AK vs AK,Thriller,"December 24, 2020",108,6.9,Hindi
+Amanda Knox,Documentary,"September 30, 2016",92,6.9,English
+Bigflo & Oil: Hip Hop Frenzy,Documentary,"October 8, 2020",100,6.9,French
+Biggie: I Got a Story to Tell,Documentary,"March 1, 2021",97,6.9,English
+Cops and Robbers,Animation / Short,"December 28, 2020",7,6.9,English
+I Don't Feel at Home in This World Anymore,Drama,"February 24, 2017",96,6.9,English
+Laerte-se,Documentary,"May 19, 2017",100,6.9,Portuguese
+Mank,Biopic,"December 4, 2020",132,6.9,English
+Our Souls at Night,Romance,"September 29, 2017",103,6.9,English
+Outlaw King,Historical-epic,"November 9, 2018",121,6.9,English
+Pagglait,Comedy-drama,"March 26, 2021",114,6.9,Hindi
+ReMastered: Who Shot the Sheriff?,Documentary,"October 12, 2018",57,6.9,English
+Seeing Allred,Documentary,"February 9, 2018",95,6.9,English
+Spelling the Dream,Documentary,"June 3, 2020",83,6.9,English
+The Claudia Kishi Club,Documentary,"July 10, 2020",17,6.9,English
+The Half of It,Romance,"May 1, 2020",105,6.9,English
+The Highwaymen,Crime drama,"March 29, 2019",131,6.9,English
+The Lonely Island Presents: The Unauthorized Bash Brothers Experience,Comedy / Musical,"May 23, 2019",30,6.9,English
+The Meyerowitz Stories (New and Selected),Comedy-drama,"October 13, 2017",112,6.9,English
+Feminists: What Were They Thinking?,Documentary,"October 12, 2018",86,7,English
+Gaga: Five Foot Two,Documentary,"September 22, 2017",100,7,English
+I'm No Longer Here: A Discussion with Guillermo del Toro and Alfonso Cuaron,Aftershow / Interview,"November 3, 2020",14,7,English
+Kingdom of Us,Documentary,"October 13, 2017",109,7,English
+"Lorena, Light-Footed Woman",Documentary,"November 20, 2019",28,7,Spanish
+Los Tigres del Norte at Folsom Prison,Documentary,"September 15, 2019",64,7,Spanish
+Ma Rainey's Black Bottom,Drama,"December 18, 2020",94,7,English
+Ma Rainey's Black Bottom: A Legacy Brought to Screen,Aftershow / Interview,"December 18, 2020",31,7,English
+Operation Varsity Blues: The College Admissions Scandal,Documentary,"March 17, 2021",99,7,English
+Pele,Documentary,"February 23, 2021",108,7,English
+ReMastered: Devil at the Crossroads,Documentary,"April 26, 2019",48,7,English
+ReMastered: The Lion's Share,Documentary,"May 17, 2019",84,7,English
+ReMastered: The Miami Showband Massacre,Documentary,"March 22, 2019",70,7,English
+Resurface,Documentary,"September 1, 2017",27,7,English
+Rocko's Modern Life: Static Cling,Animation / Comedy,"August 9, 2019",45,7,English
+Rose Island,Comedy,"December 9, 2020",117,7,Italian
+The Christmas Chronicles,Christmas/Fantasy/Adventure/Comedy,"November 22, 2018",104,7,English
+The Dirt,Biopic,"March 22, 2019",108,7,English
+The Night Comes for Us,Action-thriller,"October 19, 2018",121,7,Indonesian
+13th: A Conversation with Oprah Winfrey & Ava DuVernay,Aftershow / Interview,"January 26, 2017",36,7.1,English
+Angela's Christmas,Animation,"November 30, 2018",30,7.1,English
+Angela's Christmas Wish,Animation,"December 1, 2020",47,7.1,English
+Beats,Drama,"June 19, 2019",110,7.1,English
+Circus of Books,Documentary,"April 22, 2020",92,7.1,English
+Dance Dreams: Hot Chocolate Nutcracker,Documentary,"November 27, 2020",80,7.1,English
+Derren Brown: Sacrifice,Mentalism special,"October 19, 2018",49,7.1,English
+El Pepe: A Supreme Life,Documentary,"December 27, 2019",73,7.1,Spanish
+End Game,Documentary,"May 4, 2018",40,7.1,English
+Evelyn,Documentary,"September 10, 2019",96,7.1,English
+Ferry,Crime drama,"May 14, 2021",106,7.1,Dutch
+Grass Is Greener,Documentary,"April 20, 2019",97,7.1,English
+Guillermo Vilas: Settling the Score,Documentary,"October 27, 2020",94,7.1,Spanish
+Joshua: Teenager vs. Superpower,Documentary,"May 26, 2017",78,7.1,English
+Keith Richards: Under the Influence,Documentary,"September 18, 2015",81,7.1,English
+Knock Down the House,Documentary,"May 1, 2019",87,7.1,English
+Loudon Wainwright III: Surviving Twin,One-man show,"November 13, 2018",91,7.1,English
+My Beautiful Broken Brain,Documentary,"March 18, 2016",91,7.1,English
+One of Us,Documentary,"October 20, 2017",95,7.1,English
+Pieces of a Woman,Drama,"January 7, 2021",126,7.1,English
+"Ram Dass, Going Home",Documentary,"April 6, 2018",31,7.1,English
+ReMastered: Tricky Dick & the Man in Black,Documentary,"November 2, 2018",58,7.1,English
+Rooting for Roona,Documentary,"October 15, 2020",41,7.1,Bengali
+The Devil All the Time,Psychological thriller,"September 16, 2020",138,7.1,English
+The Dig,Drama,"January 29, 2021",112,7.1,English
+The Great Hack,Documentary,"July 24, 2019",114,7.1,English
+The White Tiger,Drama,"January 22, 2021",125,7.1,English
+To All the Boys I've Loved Before,Romantic comedy,"August 17, 2018",99,7.1,English
+American Murder: The Family Next Door,Documentary,"September 30, 2020",82,7.2,English
+Audrie & Daisy,Documentary,"September 23, 2016",98,7.2,English
+First They Killed My Father,Drama,"September 15, 2017",136,7.2,Khmer/English/French
+Fyre: The Greatest Party That Never Happened,Documentary,"January 18, 2019",97,7.2,English
+Into the Inferno,Documentary,"October 28, 2016",107,7.2,English
+LA Originals,Documentary,"April 10, 2020",92,7.2,English
+Ladies First,Documentary,"March 8, 2018",39,7.2,English/Hindi
+Love per Square Foot,Romantic comedy,"February 14, 2018",133,7.2,Hindi
+Paddleton,Drama-Comedy,"February 22, 2019",89,7.2,English
+Private Life,Drama,"October 5, 2018",124,7.2,English
+Seventeen,Coming-of-age comedy-drama,"October 18, 2019",99,7.2,Spanish
+Sometimes,Drama,"May 1, 2018",101,7.2,Tamil
+Soni,Crime drama,"January 18, 2019",97,7.2,Hindi
+The 40-Year-Old Version,Comedy,"October 9, 2020",124,7.2,English
+The Disciple,Drama,"April 30, 2021",129,7.2,Marathi
+The Edge of Democracy,Documentary,"June 19, 2019",121,7.2,Portuguese
+The King,Historical drama,"November 1, 2019",140,7.2,English
+The Road to El Camino: A Breaking Bad Movie,Making-of,"October 29, 2019",13,7.2,English
+The Siege of Jadotville,War,"October 7, 2016",108,7.2,English
+Zion ,Documentary,"August 10, 2018",11,7.2,English
+Dolemite Is My Name,Biopic,"October 25, 2019",118,7.3,English
+El Camino: A Breaking Bad Movie,Crime drama,"October 11, 2019",121,7.3,English
+Extremis,Documentary,"September 13, 2016",24,7.3,English
+Father Soldier Son,Documentary,"July 17, 2020",100,7.3,English
+Get Me Roger Stone,Documentary,"May 12, 2017",101,7.3,English
+I'm No Longer Here,Drama,"May 27, 2020",105,7.3,Spanish
+Mucho Mucho Amor: The Legend of Walter Mercado ,Documentary,"July 8, 2020",96,7.3,Spanish/English
+Octonauts & the Great Barrier Reef,Animation,"October 13, 2020",47,7.3,English
+Okja,Action-adventure,"June 28, 2017",121,7.3,English/Korean
+On My Skin,Crime drama,"September 12, 2018",100,7.3,Italian
+Raat Akeli Hai,Thriller,"July 31, 2020",149,7.3,Hindi
+ReMastered: Massacre at the Stadium,Documentary,"January 11, 2019",64,7.3,English/Spanish
+ReMastered: The Two Killings of Sam Cooke,Documentary,"February 8, 2019",64,7.3,English
+Secrets of the Saqqara Tomb,Documentary,"October 28, 2020",114,7.3,English/Arabic
+Sitara: Let Girls Dream,Animation / Short,"March 8, 2020",15,7.3,English
+Sky Ladder: The Art of Cai Guo-Qiang,Documentary,"October 14, 2016",79,7.3,English/Mandarin
+Team Foxcatcher,Documentary,"April 29, 2016",90,7.3,English/Russian
+The Ballad of Buster Scruggs,Western,"November 16, 2018",132,7.3,English
+The Death and Life of Marsha P. Johnson,Documentary,"October 6, 2017",105,7.3,English
+The Fundamentals of Caring,Comedy-drama,"June 24, 2016",97,7.3,English
+The Other One: The Long Strange Trip of Bob Weir,Documentary,"May 22, 2015",83,7.3,English
+American Factory,Documentary,"August 21, 2019",110,7.4,English
+Fire in Paradise,Documentary,"November 1, 2019",39,7.4,English
+Long Shot,Documentary,"September 29, 2017",40,7.4,English
+Miss Americana,Documentary,"January 31, 2020",85,7.4,English
+Period. End of Sentence.,Documentary,"February 12, 2019",26,7.4,English/Hindi
+Shawn Mendes: Live in Concert,Concert Film,"November 25, 2020",87,7.4,English
+Shirkers,Documentary,"October 26, 2018",97,7.4,English
+The Black Godfather,Documentary,"June 7, 2019",118,7.4,English
+The Irishman: In Conversation,Aftershow / Interview,"November 27, 2019",23,7.4,English
+The Speed Cubers,Documentary,"July 29, 2020",40,7.4,English
+They'll Love Me When I'm Dead,Documentary,"November 2, 2018",98,7.4,English
+Tig,Documentary,"July 17, 2015",80,7.4,English
+"Barbra: The Music, The Mem'ries, The Magic!",Concert Film,"November 22, 2017",108,7.5,English
+Blackpink: Light Up the Sky,Documentary,"October 14, 2020",79,7.5,Korean
+City of Joy,Documentary,"September 7, 2018",74,7.5,English
+Dick Johnson Is Dead,Documentary,"October 2, 2020",90,7.5,English
+Homecoming: A Film by Beyonce ,Documentary,"April 17, 2019",137,7.5,English
+Invader Zim: Enter the Florpus,Animation / Science Fiction,"August 16, 2019",71,7.5,English
+Joan Didion: The Center Will Not Hold,Documentary,October 27. 2017,98,7.5,English
+John Mulaney & the Sack Lunch Bunch,Variety show,"December 24, 2019",70,7.5,English
+Reversing Roe,Documentary,"September 13, 2018",99,7.5,English
+The White Helmets,Documentary,September 16. 2016,40,7.5,English
+Athlete A,Documentary,"June 24, 2020",104,7.6,English
+Ludo,Anthology/Dark comedy,"November 12, 2020",149,7.6,Hindi
+Quincy,Documentary,"September 21, 2018",124,7.6,English
+Rolling Thunder Revue: A bob Dylan Story by Martin Scorsere,Documentary,"June 12, 2019",144,7.6,English
+Tell Me Who I Am,Documentary,"October 18, 2019",85,7.6,English
+The Bleeding Edge,Documentary,"July 27, 2018",100,7.6,English
+The Social Dilemma,Documentary,"September 9, 2020",94,7.6,English
+The Two Popes,Drama,"December 20, 2019",125,7.6,English
+"What Happened, Miss Simone?",Documentary,"June 26, 2015",84,7.6,English
+Yeh Ballet,Drama,"February 21, 2020",117,7.6,Hindi
+Anima,Musical / Short,"June 27, 2019",15,7.7,English
+Beasts of No Nation,War drama,"October 16, 2015",136,7.7,English/Akan
+Brene Brown: The Call to Courage,Documentary,"April 19, 2019",76,7.7,English
+Crip Camp: A Disability Revolution,Documentary,"March 25, 2020",108,7.7,English
+"Jim & Andy: The Great Beyond - Featuring a Very Special, Contractually Obligated Mention of Tony Cliffton ",Documentary,"November 17, 2017",94,7.7,English
+Justin Timberlake + The Tennessee Kids,Concert Film,"October 12, 2016",90,7.7,English
+Road to Roma,Making-of,"February 11, 2020",72,7.7,Spanish
+Roma,Drama,"December 14, 2018",135,7.7,Spanish
+If Anything Happens I Love You,Animation / Short,"November 20, 2020",12,7.8,English
+The Irishman,Crime drama,"November 27, 2019",209,7.8,English
+The Trial of the Chicago 7,Drama,"October 16, 2020",130,7.8,English
+A Secret Love,Documentary,"April 29, 2020",82,7.9,English
+Icarus,Documentary,"August 4, 2017",120,7.9,English
+Marriage Story,Drama,"December 6, 2019",136,7.9,English
+The Ivory Game,Documentary,"November 4, 2016",112,7.9,English
+Struggle: The Life and Lost Art of Szukaiski,Documentary,"December 21, 2018",105,8,English
+Chasing Coral ,Documentary,"July 14, 2017",89,8.1,English
+My Octopus Teacher,Documentary,"September 7, 2020",85,8.1,English
+Rising Phoenix,Documentary,"August 26, 2020",106,8.1,English
+13th,Documentary,"October 7, 2016",100,8.2,English
+Disclosure: Trans Lives on Screen,Documentary,"June 19, 2020",107,8.2,English
+Klaus,Animation/Christmas/Comedy/Adventure,"November 15, 2019",97,8.2,English
+Seaspiracy,Documentary,"March 24, 2021",89,8.2,English
+The Three Deaths of Marisela Escobedo,Documentary,"October 14, 2020",109,8.2,Spanish
+Cuba and the Cameraman ,Documentary,"November 24, 2017",114,8.3,English
+Dancing with the Birds,Documentary,"October 23, 2019",51,8.3,English
+Ben Platt: Live from Radio City Music Hall,Concert Film,"May 20, 2020",85,8.4,English
+Taylor Swift: Reputation Stadium Tour,Concert Film,"December 31, 2018",125,8.4,English
+Winter on Fire: Ukraine's Fight for Freedom,Documentary,"October 9, 2015",91,8.4,English/Ukranian/Russian
+Springsteen on Broadway,One-man show,"December 16, 2018",153,8.5,English
+Emicida: AmarElo - It's All For Yesterday,Documentary,"December 8, 2020",89,8.6,Portuguese
+David Attenborough: A Life on Our Planet,Documentary,"October 4, 2020",83,9,English
\ No newline at end of file
diff --git a/homeworks/hw04/data/small_srs_rating.csv b/homeworks/hw04/data/small_srs_rating.csv
new file mode 100644
index 0000000..dbc8d41
--- /dev/null
+++ b/homeworks/hw04/data/small_srs_rating.csv
@@ -0,0 +1,71 @@
+,Title,Genre,Premiere,Runtime,IMDb Score,Language
+0,Heroin(e) ,Documentary,"September 12, 2017",39,6.8,English
+1,Shawn Mendes: In Wonder,Documentary,"November 23, 2020",83,6.6,English
+2,Kaali Khuhi,Mystery,"October 30, 2020",90,3.4,Hindi
+3,Tell Me Who I Am,Documentary,"October 18, 2019",85,7.6,English
+4,I'll Sleep When I'm Dead,Documentary,"August 19, 2016",79,6.6,English
+5,Mrs. Serial Killer,Thriller,"May 1, 2020",106,4.8,Hindi
+6,Bulbbul,Horror,"June 24, 2020",94,6.6,Hindi
+7,The Disciple,Drama,"April 30, 2021",129,7.2,Marathi
+8,In the Tall Grass,Horror,"October 4, 2019",101,5.4,English
+9,13th: A Conversation with Oprah Winfrey & Ava DuVernay,Aftershow / Interview,"January 26, 2017",36,7.1,English
+10,The Holiday Calendar,Romantic comedy,"November 2, 2018",95,5.7,English
+11,Caught by a Wave,Romantic teen drama,"March 25, 2021",99,5.7,Italian
+12,The Do-Over,Action comedy,"May 27, 2016",108,5.7,English
+13,Reversing Roe,Documentary,"September 13, 2018",99,7.5,English
+14,I Am Not an Easy Man,Romantic comedy,"April 13, 2018",98,6.3,French
+15,Drive,Action,"November 1, 2019",147,3.5,Hindi
+16,Ghost Stories,Horror anthology,"January 1, 2020",144,4.3,Hindi
+17,Kingdom of Us,Documentary,"October 13, 2017",109,7.0,English
+18,One Take,Documentary,"June 18, 2020",85,5.7,Thai
+19,Eurovision Song Contest: The Story of Fire Saga,Musical comedy,"June 26, 2020",123,6.5,English
+20,The Boys in the Band,Drama,"September 30, 2020",121,6.8,English
+21,Nail Bomber: Manhunt,Documentary,"May 26, 2021",72,6.3,English
+22,The Siege of Jadotville,War,"October 7, 2016",108,7.2,English
+23,American Son,Drama,"November 1, 2019",90,5.8,English
+24,Pagglait,Comedy-drama,"March 26, 2021",114,6.9,Hindi
+25,Lovefucked,Drama,"August 9, 2019",106,6.0,Hindi
+26,Da 5 Bloods,War drama,"June 12, 2020",155,6.5,English
+27,Always Be My Maybe,Romantic comedy,"May 31, 2019",102,6.8,English
+28,They'll Love Me When I'm Dead,Documentary,"November 2, 2018",98,7.4,English
+29,Fractured,Thriller,"October 11, 2019",100,6.4,English
+30,The Old Guard,Superhero/Action,"July 10, 2020",124,6.7,English
+31,Who Would You Take to a Deserted Island?,Drama,"April 12, 2019",93,5.3,Spanish
+32,7 años,Drama,"October 28, 2016",76,6.8,Spanish
+33,Barry,Biopic,"December 16, 2016",104,5.8,English
+34,Holiday Rush,Family film,"November 28, 2019",94,4.9,English
+35,Invader Zim: Enter the Florpus,Animation / Science Fiction,"August 16, 2019",71,7.5,English
+36,The App,Science fiction/Drama,"December 26, 2019",79,2.6,Italian
+37,El Camino: A Breaking Bad Movie,Crime drama,"October 11, 2019",121,7.3,English
+38,GIMS: On the Record,Documentary,"September 17, 2020",96,6.8,French
+39,Baggio: The Divine Ponytail,Biopic,"May 26, 2021",92,6.2,Italian
+40,Como Caído del Cielo,Musical comedy,"December 24, 2019",112,6.4,Spanish
+41,A Secret Love,Documentary,"April 29, 2020",82,7.9,English
+42,Unicorn Store,Comedy,"April 5, 2019",92,5.5,English
+43,Athlete A,Documentary,"June 24, 2020",104,7.6,English
+44,Tall Girl,Comedy-drama,"September 13, 2019",102,5.2,English
+45,The American Meme,Documentary,"December 7, 2018",98,6.6,English
+46,Double Dad,Comedy-drama,"January 15, 2021",103,5.6,Portuguese
+47,The Sleepover,Comedy,"August 21, 2020",103,5.6,English
+48,Amanda Knox,Documentary,"September 30, 2016",92,6.9,English
+49,The Open House,Horror thriller,"January 19, 2018",94,3.2,English
+50,The Players,Comedy,"July 15, 2020",88,4.6,Italian
+51,Lost Girls,Crime drama,"March 13, 2020",95,6.1,English
+52,What Did Jack Do?,Drama / Short,"January 20, 2020",17,6.5,English
+53,"Love, Guaranteed",Romantic comedy,"September 3, 2020",91,5.6,English
+54,Finding 'Ohana,Family,"January 29, 2021",123,6.1,English
+55,Ginny Weds Sunny,Romantic comedy,"October 9, 2020",125,5.7,Hindi
+56,To All the Boys I've Loved Before,Romantic comedy,"August 17, 2018",99,7.1,English
+57,Mowgli: Legend of the Jungle,Adventure,"December 7, 2018",104,6.5,English
+58,Altered Carbon: Resleeved,Anime/Science fiction,"March 19, 2020",74,6.5,Japanese
+59,All in My Family,Documentary,"May 3, 2019",39,6.8,English/Mandarin
+60,If Anything Happens I Love You,Animation / Short,"November 20, 2020",12,7.8,English
+61,Cargo,Drama/Horror,"May 18, 2018",104,6.3,English
+62,The Beast,Drama,"November 27, 2020",99,5.2,Italian
+63,"Bikram: Yogi, Guru, Predator",Documentary,"November 20, 2019",86,6.7,English
+64,Unknown Origins,Thriller,"August 28, 2020",96,6.1,Spanish
+65,"Game Over, Man!",Action/Comedy,"March 23, 2018",101,5.4,English
+66,Sitara: Let Girls Dream,Animation / Short,"March 8, 2020",15,7.3,English
+67,Fyre: The Greatest Party That Never Happened,Documentary,"January 18, 2019",97,7.2,English
+68,Things Heard & Seen,Horror,"April 29, 2021",121,5.3,English
+69,Alex Strangelove,Romantic comedy,"June 8, 2018",99,6.3,English
diff --git a/homeworks/hw04/hw04.ipynb b/homeworks/hw04/hw04.ipynb
new file mode 100644
index 0000000..748b68e
--- /dev/null
+++ b/homeworks/hw04/hw04.ipynb
@@ -0,0 +1,1178 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Homework 4: Simulation, Sampling, and Hypothesis Testing\n",
+ "\n",
+ "## Due Tuesday, February 21st at 11:59PM\n",
+ "\n",
+ "Welcome to Homework 4! This homework will cover:\n",
+ "- Simulations (see [CIT 9.3-9.4](https://inferentialthinking.com/chapters/09/3/Simulation.html))\n",
+ "- Sampling and Empirical Distributions (see [CIT 10-10.4](https://inferentialthinking.com/chapters/10/Sampling_and_Empirical_Distributions.html))\n",
+ "- Models and Hypothesis Testing (see [CIT 11.2](https://inferentialthinking.com/chapters/11/2/Multiple_Categories.html))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Instructions\n",
+ "\n",
+ "Remember to start early and submit often. You are given six slip days throughout the quarter to extend deadlines. See the syllabus for more details. With the exception of using slip days, late work will not be accepted unless you have made special arrangements with your instructor.\n",
+ "\n",
+ "**Important**: For homeworks, the `otter` tests don't usually tell you that your answer is correct. More often, they help catch careless mistakes. It's up to you to ensure that your answer is correct. If you're not sure, ask someone (not for the answer, but for some guidance about your approach). These are great questions for office hours (the schedule can be found [here](https://dsc10.com/calendar)) or EdStem. Directly sharing answers is not okay, but discussing problems with the course staff or with other students is encouraged. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Please don't change this cell, but do make sure to run it.\n",
+ "import babypandas as bpd\n",
+ "import numpy as np\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.style.use('ggplot')\n",
+ "\n",
+ "import otter\n",
+ "grader = otter.Notebook()\n",
+ "\n",
+ "import warnings\n",
+ "warnings.simplefilter('ignore')\n",
+ "\n",
+ "%reload_ext pandas_tutor"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1. Lucky Triton Lotto, Continued 🔱 🎱 🧜\n",
+ "\n",
+ "In the last homework, we calculated the probability of winning the grand prize (free housing) on a Lucky Triton Lotto lottery ticket, and found that it was quite low 😭."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Just run this cell, do not change it!\n",
+ "free_housing_chance = (1 / 62) * (1 / 61) * (1 / 60) * (1 / 59) * (1 / 58) * (1 / 16)\n",
+ "free_housing_chance"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this question, we'll approach the same question not using math, but using simulation. \n",
+ "\n",
+ "It's important to remember how this lottery works:\n",
+ "- When you buy a Lucky Triton Lotto ticket, you first pick five different numbers, one at a time, from 1 to 31. Then you separately pick a number from 1 to 8, which may or may not be the same as one of the first five. These are **your numbers**. For example, you may select (15, 1, 13, 3, 31, 8). This is a sequence of six numbers - **order matters**!\n",
+ "- The **winning numbers** are chosen by King Triton drawing five balls, one at a time, **without replacement**, from a pot of white balls numbered 1 to 31. Then, he draws a gold ball, the Tritonball, from a pot of gold balls numbered 1 to 8. Both pots are completely separate, hence the different ball colors. For example, maybe the winning numbers are (13, 15, 25, 3, 5, 8).\n",
+ "\n",
+ "We’ll assume for this problem that in order to win the grand prize (free housing), all six of your numbers need to match the winning numbers and be in the **exact same positions**. In other words, your entire sequence of numbers must be exactly the same. However, if some numbers in your sequence match up with the corresponding number in the winning sequence, you will still win some Triton Cash. \n",
+ "\n",
+ "Suppose again that your numbers are (15, 1, 13, 3, 31, 8) and the winning numbers are (13, 15, 25, 3, 5, 8). In this case, two of your numbers are considered to match two of the winning numbers. Notice that although both sequences include the number 15 within the first five numbers (representing a white ball), since they are in different positions, that's not considered a match.\n",
+ "\n",
+ "- Your numbers: (15, 1, 13, **3**, 31, **8**)\n",
+ "- Winning numbers: (13, 15, 25, **3**, 5, **8**)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 1.1.** Write a function called `simulate_one_ticket`. It should take no arguments, and it should return an array with 6 random numbers, simulating how the numbers are selected for a single Lucky Triton Lotto ticket. The first five numbers should all be randomly chosen without replacement, from 1 to 31. The last number should be between 1 and 8."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def simulate_one_ticket():\n",
+ " \"\"\"Simulate one Lucky Triton Lotto ticket.\"\"\"\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q1_1\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 1.2.** It's draw day. You checked the winning numbers King Triton drew, which happened to be **(24, 12, 3, 18, 23, 5)**. You didn't win free housing, and you are quite sad.\n",
+ "\n",
+ "Suppose you want to remind yourself how unlikely it is to win the grand prize. Call the function `simulate_one_ticket` 100,000 times. In your 100,000 tickets, **how many times did you win the grand prize (free housing)?** Assign your answer to `count_free_housing`. (It would cost a fortune if you were to buy 100,000 tickets – it's pretty nice to be able to simulate this experiment instead of doing it in real life!) \n",
+ "\n",
+ "*Hints*:\n",
+ "\n",
+ "- First, implement a simulation where you only buy 10 tickets. Once you are sure you have that figured out, change it to 100,000 tickets. It may take a little while (up to a minute) for Python to perform the calculations when you are buying 100,000 tickets.\n",
+ "\n",
+ "- You'll have to count how many of the numbers you chose match the numbers that were drawn. One way to do this involves [`np.count_nonzero`](https://numpy.org/doc/stable/reference/generated/numpy.count_nonzero.html). Remember you need **all** the numbers to match to win the grand prize, and a number is only considered a match if it appears at the same position in your numbers and the winning numbers."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "count_free_housing = ...\n",
+ "...\n",
+ "count_free_housing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q1_2\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Remember, the mathematical probability of winning free housing is quite low, on the order of $10^{-9}$. That's a lot lower than than 1 in 100,000, which is $10^{-5}$.\n",
+ "\n",
+ "**Question 1.3.** As we've seen, you would need to be extremely lucky to win the grand prize. To encourage more students to buy Lucky Triton Lotto tickets, students can win Triton Cash if some of their numbers match the corresponding winning numbers, as described in the introduction. Again, simulate the act of buying 100,000 tickets, but this time find **the greatest number of matches achieved by any of your tickets**, and assign this number to `most_matches`. \n",
+ "\n",
+ "The winning numbers are the same from the previous part: **(24, 12, 3, 18, 23, 5)**.\n",
+ "\n",
+ "For example, if 90,000 of your tickets matched 1 winning number and 10,000 of your tickets matched 2 winning numbers, then you would set `most_matches` to 2. If 99,999 of your tickets matched 1 winning number and one of your tickets matched 4 winning numbers, you would set `most_matches` to 4. If you happened to win the grand prize on one of your tickets, you would set `most_matches` to 6. Remember, order matters.\n",
+ "\n",
+ "*Hint*: There are several ways to approach this; one way involves storing the number of matches per ticket in an array and finding the largest number in that array. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "most_matches = ...\n",
+ "...\n",
+ "most_matches"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q1_3\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 1.4.** Suppose one Lucky Triton Lotto ticket costs $5.\n",
+ "\n",
+ "The Lucky Triton Lotto advertisement on Instagram promises you will never lose money because of the following generous prizes:\n",
+ "\n",
+ "- Win $10 with a 1-number match\n",
+ "\n",
+ "- Win $25 with a 2-number match\n",
+ "\n",
+ "- Win $100 with a 3-number match\n",
+ "\n",
+ "- Win $1,000 with a 4-number match\n",
+ "\n",
+ "- Win $5,000 with a 5-number match\n",
+ "\n",
+ "- Win $20,000 with a 6-number match (free housing!)\n",
+ "\n",
+ "If you had the money to buy 100,000 tickets, what would be your net winnings from buying these tickets? Since this is net winnings, this should account for the prizes you win and the cost of buying the tickets. Assign the amount to `net_winnings`. Note that a positive value means you won money overall, and a negative value means you lost money overall. Do you believe the advertisement's claims?\n",
+ "\n",
+ "The winning numbers are the same from the previous part: **(24, 12, 3, 18, 23, 5)**.\n",
+ "\n",
+ "*Hint*: Again, there are a few ways you could approach this problem. One way involves generating another 100,000 random tickets and counting the amount earned per ticket, adding to a running total. Alternatively, if you created an array of the number of matches per ticket in Question 1.3, you could loop through that array."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "net_winnings = ...\n",
+ "...\n",
+ "net_winnings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q1_4\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. Sampling with Netflix 🍿\n",
+ "\n",
+ "In this question, we will use a dataset consisting of information about all Netflix Original movies to get some practice with sampling. Run the cell below to load the data into a DataFrame, indexed by title."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Just run this cell, do not change it!\n",
+ "movie_data = bpd.read_csv('data/netflix_originals.csv').set_index('Title')\n",
+ "movie_data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We've provided a function called `compute_statistics` that takes as input a DataFrame with two columns, `'Runtime'` and `'IMDb Score'`, and then:\n",
+ "- draws a histogram of `'Runtime'`,\n",
+ "- draws a histogram of `'IMDb Score'`, and\n",
+ "- returns a two-element array containing the mean `'Runtime'` and mean `'IMDb Score'`.\n",
+ "\n",
+ "Run the cell below to define the `compute_statistics` function, and a helper function called `histograms`. Don't worry about how this code works, and please don't change anything."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Don't change this cell, just run it.\n",
+ "def histograms(df):\n",
+ " runtimes = df.get('Runtime').values\n",
+ " ratings = df.get('IMDb Score').values\n",
+ " \n",
+ " plt.subplots(1, 2, figsize=(15, 4), dpi=100)\n",
+ "\n",
+ " plt.subplot(1, 2, 1)\n",
+ " plt.hist(runtimes, density=True, alpha=0.5, color='blue', ec='w', bins=np.arange(0, 250, 10))\n",
+ " plt.title('Distribution of Runtimes')\n",
+ "\n",
+ " plt.subplot(1, 2, 2)\n",
+ " plt.hist(ratings, density=True, alpha=0.5, color='blue', ec='w', bins=np.arange(0, 10, 0.4))\n",
+ " plt.title('Distribution of IMDb Scores')\n",
+ " \n",
+ "def compute_statistics(runtimes_and_ratings_data, draw=True):\n",
+ " if draw:\n",
+ " histograms(runtimes_and_ratings_data)\n",
+ " avg_runtime = np.average(runtimes_and_ratings_data.get('Runtime').values)\n",
+ " avg_rating = np.average(runtimes_and_ratings_data.get('IMDb Score').values)\n",
+ " avg_array = np.array([avg_runtime, avg_rating]) \n",
+ " return avg_array"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can use this `compute_statistics` function to show the distribution of `'Runtime'` and `'IMDb Score'` and compute their means, for any collection of movies. \n",
+ "\n",
+ "Run the next cell to show these distributions and compute the means for all Netflix Original movies. Notice that an array containing the mean `'Runtime'` and mean `'IMDb Score'` values is displayed before the histograms."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "movie_stats = compute_statistics(movie_data)\n",
+ "movie_stats"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, imagine that instead of having access to the full *population* of movies, we only have access to data on a smaller subset of movies, or a *sample*. For 584 movies, it's not so unreasonable to expect to see all the data, but usually we aren't so lucky. Instead, we often make *statistical inferences* about a large underlying population using a smaller sample.\n",
+ "\n",
+ "**Statistical inference** is the process of using data in a sample to _infer_ some characteristic about the population from which the sample was drawn. A common strategy for statistical inference is to estimate parameters of the population by computing the same statistics on a sample. This strategy sometimes works well and sometimes doesn't. The degree to which it gives us useful answers depends on several factors.\n",
+ "\n",
+ "One very important factor in the utility of samples is how they were gathered. Let's look at some different sampling strategies."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Convenience sampling\n",
+ "One sampling methodology, which is **generally a bad idea**, is to choose movies which are somehow convenient to sample. For example, you might choose movies that you have personally watched, since it's easier to collect information about them. This is called, somewhat pejoratively, *convenience sampling*.\n",
+ "\n",
+ "**Question 2.1.** Suppose you love scary movies 👻 and you decide to manually look up information on all Netflix Original movies in the following genres:\n",
+ "- `'Horror'`\n",
+ "- `'Thriller'`\n",
+ "- `'Horror thriller'`\n",
+ "\n",
+ "Assign `convenience_sample` to a subset of `movie_data` that contains only the rows for movies that are in one of these genres."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "convenience_sample = ...\n",
+ "convenience_sample"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q2_1\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 2.2.** Assign `convenience_stats` to an array of the mean `'Runtime'` and mean `'IMDb Score'` of your convenience sample. Since they're computed on a sample, these are called *sample means*. \n",
+ "\n",
+ "**_Hint_**: Use the function `compute_statistics`; it's okay if histograms are displayed as well."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "convenience_stats = ...\n",
+ "convenience_stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q2_2\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next, we'll compare the distribution of `'Runtime'` in our convenience sample to the distribution of `'Runtime'` for all the movies in our dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Just run this cell, do not change it!\n",
+ "def compare_runtimes(first, second, first_title, second_title):\n",
+ " \"\"\"Compare the runtimes in two DataFrames.\"\"\"\n",
+ " bins = np.arange(0, 250, 10)\n",
+ " \n",
+ " plt.subplots(1, 2, figsize=(15, 4), dpi=85)\n",
+ "\n",
+ " plt.subplot(1, 2, 1)\n",
+ " plt.hist(first.get('Runtime'), bins=bins, density=True, ec='w', color='blue', alpha=0.5)\n",
+ " plt.title(f'Runtimes ({first_title})')\n",
+ " \n",
+ " plt.subplot(1, 2, 2)\n",
+ " plt.hist(second.get('Runtime'), bins=bins, density=True, ec='w', color='blue', alpha=0.5)\n",
+ " plt.title(f'Runtimes ({second_title})')\n",
+ "\n",
+ "compare_runtimes(movie_data, convenience_sample, 'All Movies', 'Convenience Sample')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 2.3.** From what you see in the histograms above, did the convenience sample give us an accurate picture of the runtimes for the full population of movies? Why or why not?\n",
+ "\n",
+ "Assign either 1, 2, 3, or 4 to the variable `sampling_q3` below. \n",
+ "1. Yes. The sample is large enough, so it is an accurate representation of the population.\n",
+ "2. No. Normally convenience samples give us an accurate representation of the population, but only if the sample size is large enough. Our convenience sample here was too small.\n",
+ "3. No. Normally convenience samples give us an accurate representation of the population, but we just got unlucky.\n",
+ "4. No. Convenience samples generally don't give us an accurate representation of the population."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sampling_q3 = ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q2_3\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Simple random sampling\n",
+ "A more principled approach is to sample uniformly at random from the movies. If we ensure that each movie is selected at most once, this is a **random sample without replacement**, sometimes abbreviated to \"**simple random sample**\" or \"**SRS**\". Imagine writing down each movie's title on a card, putting the cards in a hat, and shuffling the hat. To sample, pull out cards one by one and set them aside, stopping when the specified *sample size* is reached.\n",
+ "\n",
+ "We've produced two simple random samples of `ratings_data`: the variable `small_srs_data` contains a SRS of size 70, and the variable `large_srs_data` contains a SRS of size 180."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we'll run the same analyses on the small simple random sample, the large simple random sample, and the convenience sample. The subsequent code draws the histograms and computes the means for `'Runtime'` and `'IMDb Score'`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Don't change this cell, but do run it.\n",
+ "small_srs_data = bpd.read_csv('data/small_srs_rating.csv').set_index('Title')\n",
+ "large_srs_data = bpd.read_csv('data/large_srs_rating.csv').set_index('Title')\n",
+ "\n",
+ "small_stats = compute_statistics(small_srs_data, draw=False);\n",
+ "large_stats = compute_statistics(large_srs_data, draw=False);\n",
+ "convenience_stats = compute_statistics(convenience_sample, draw=False);\n",
+ "\n",
+ "print('Full data stats: ', movie_stats)\n",
+ "print('Small SRS stats:', small_stats)\n",
+ "print('Large SRS stats:', large_stats)\n",
+ "print('Convenience sample stats: ', convenience_stats)\n",
+ "\n",
+ "color_dict = {\n",
+ " 'small SRS': 'blue',\n",
+ " 'large SRS': 'green',\n",
+ " 'convenience sample': 'orange'\n",
+ "}\n",
+ "\n",
+ "plt.subplots(3, 2, figsize=(15, 15), dpi=100)\n",
+ "i = 1\n",
+ "\n",
+ "for df, name in zip([small_srs_data, large_srs_data, convenience_sample], color_dict.keys()):\n",
+ " plt.subplot(3, 2, i)\n",
+ " i += 2\n",
+ " plt.hist(df.get('Runtime'), density=True, alpha=0.5, color=color_dict[name], ec='w', \n",
+ " bins=np.arange(0, 250, 10))\n",
+ " plt.title(f'Runtimes ({name})');\n",
+ "\n",
+ "i = 2\n",
+ "for df, name in zip([small_srs_data, large_srs_data, convenience_sample], color_dict.keys()):\n",
+ " plt.subplot(3, 2, i)\n",
+ " i += 2\n",
+ " plt.hist(df.get('IMDb Score'), density=True, alpha=0.5, color=color_dict[name], ec='w', \n",
+ " bins=np.arange(0, 10, 0.4))\n",
+ " plt.title(f'IMDb Ratings ({name})');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Producing simple random samples\n",
+ "Often it's useful to take random samples even when we have a larger dataset available. One reason is that doing so can help us understand how inaccurate other samples are.\n",
+ "\n",
+ "As we saw in Lecture 13, DataFrames have a `.sample` method for producing simple random samples. Note that its default is to sample **without** replacement, which aligns with how simple random samples are drawn."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 2.4.** Produce a simple random sample *without replacement* of size 70 from `movie_data`. Store an array containing the mean `'Runtime'` and mean `'IMDb Score'` of your SRS in `my_small_stats`. Again, it's fine if histograms are displayed."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "my_small_stats = ...\n",
+ "my_small_stats"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Run the cell in which `my_small_stats` is defined many times, to collect new samples and compute their sample means.\n",
+ "\n",
+ "
\n",
+ "\n",
+ "Now, recall, `small_stats` is an array containing the mean `'Runtime'` and mean `'IMDb Score'` for the one small SRS that we provided you with:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "small_stats"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "source": [
+ "Answer the following two-fold question:\n",
+ "- Are the values in `my_small_stats` (the mean `'Runtime'` and `'IMDb Score'` for **your** small SRS) similar to the values in `small_stats` (the mean `'Runtime'` and `'IMDb Score'` for the small SRS **we provided you with**)? \n",
+ "- Each time you collect a new sample – i.e. each time you re-run the cell where `my_small_stats` is defined – do the values in `my_small_stats` change a lot?\n",
+ "\n",
+ "Assign either 1, 2, 3, or 4 to the variable `sampling_q4` below.\n",
+ "1. The values in `my_small_stats` are identical to the values in `small_stats`, and change a bit each time a new sample is collected.\n",
+ "2. The values in `my_small_stats` are identical to the values in `small_stats`, and don't change at all each time a new sample is collected.\n",
+ "3. The values in `my_small_stats` are very different from the values in `small_stats`, and don't change at all each time a new sample is collected.\n",
+ "4. The values in `my_small_stats` are slightly different from the values in `small_stats`, and change a bit each time a new sample is collected.\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sampling_q4 = ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q2_4\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 2.5.** Similarly, create a simple random sample of size 180 from `movie_data` and store an array of the sample's mean `'Runtime'` and mean `'IMDb Score'` in `my_large_stats`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "my_large_stats = ...\n",
+ "my_large_stats"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Run the cell in which `my_large_stats` is defined many times. Do the histograms and mean statistics (mean `'Runtime'` and mean `'IMDb Score'`) seem to change more or less across samples of size 180 than across samples of size 70?\n",
+ "\n",
+ "Assign either 1, 2, or 3 to the variable `sampling_q5` below. \n",
+ "\n",
+ "1. The statistics change *less* across samples of size 180 than across samples of size 70.\n",
+ "2. The statistics change an *equal amount* across samples of size 180 and across samples of size 70.\n",
+ "3. The statistics change *more* across samples of size 180 than across samples of size 70."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sampling_q5 = ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q2_5\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 3. Was it by Random Chansey? 🎲\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You recently decided to buy the video game *Pokémon Yellow* from someone on Ebay. The seller tells you that they've modified the game so that the probabilities of encountering certain Pokémon in certain locations have been altered. However, the seller doesn't tell you which specific locations have had their probability models changed and what they've been changed to.\n",
+ "\n",
+ "As you are playing *Pokémon Yellow*, you arrive at the Safari Zone, one of the most iconic locations in the game. You're curious as to your chances of encountering your favorite Pokémon, Chansey, in this location. You go onto [Bulbapedia](https://bulbapedia.bulbagarden.net/wiki/Kanto_Safari_Zone#Area_1) to find the probability model for this location, and you discover that for each Pokémon encounter in the Safari Zone, there is a 4% chance of encountering Chansey. \n",
+ "\n",
+ "After a few hours of gameplay in the Safari Zone, you have encountered Chansey only 23 times out of 784 total Pokémon encounters (around 2.9%). You start to suspect that the Safari Zone may have been one of the locations in which the previous owner of the game changed the probability model.\n",
+ "\n",
+ "To test this, you decide to run a hypothesis test with the following hypotheses:\n",
+ "\n",
+ "**Null Hypothesis**: In your copy of *Pokémon Yellow*, the probability of encountering Chansey at each Pokémon encounter in the Safari Zone is 4%. \n",
+ "\n",
+ "**Alternative Hypothesis**: In your copy of *Pokémon Yellow*, the probability of encountering Chansey at each Pokémon encounter in the Safari Zone is less than 4%."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 3.1.** Complete the implementation of the function `one_simulation`, which has no arguments. It should randomly generate 784 Pokémon encounters in the Safari Zone and return the **proportion** of encountered Pokémon that were Chansey. \n",
+ "\n",
+ "*Hint*: Use `np.random.multinomial`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def one_simulation():\n",
+ " ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q3_1\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 3.2.** The test statistic for our hypothesis test will be the difference between the proportion of Chansey encounters in a given sample of 784 Safari Zone encounters and the expected proportion of Chansey encounters, i.e.\n",
+ "\n",
+ "$$\\text{test statistic} = \\text{proportion of Chansey encounters in sample} - 0.04$$\n",
+ "\n",
+ "\n",
+ "Let's conduct 10,000 simulations. Create an array named `proportion_diffs` containing 10,000 simulated values of the test statistic described above. Utilize the function created in the previous question to perform this task."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "proportion_diffs = ...\n",
+ "\n",
+ "# Visualize with a histogram. Don't change anything below.\n",
+ "bpd.DataFrame().assign(proportion_differences=proportion_diffs).plot(kind='hist', bins=20, density=True, ec='w', figsize=(10, 5));\n",
+ "plt.axvline(x=(23 / 784 - 0.04), color='black', linewidth=4, label='observed statistic')\n",
+ "plt.legend();"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q3_2\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 3.3.** Calculate the p-value for this hypothesis test, and assign the result to `safari_zone_p`.\n",
+ "\n",
+ "*Hint*: Do large values of our test statistic favor the alternative hypothesis, or do small values of our test statistic favor the alternative hypothesis?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "safari_zone_p = ...\n",
+ "safari_zone_p"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q3_3\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 3.4.** Using the standard p-value cutoff of 0.05, what can we conclude from our hypothesis test? Assign either 1, 2, 3, or 4 to the variable `safari_zone_conclusion`, corresponding to the best conclusion.\n",
+ " \n",
+ " 1. We reject the null hypothesis. There is not enough evidence to say if the observed data is consistent with the model.\n",
+ " 1. We reject the null hypothesis. The observed data is inconsistent with the model.\n",
+ " 1. We accept the null hypothesis. The observed data is consistent with the model.\n",
+ " 1. We fail to reject the null hypothesis. There is not enough evidence to say that the observed data is inconsistent with the model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "safari_zone_conclusion = ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q3_4\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 3.5.** In this question, we chose as our test statistic the proportion of Chansey encounters in the Safari Zone minus 0.04. But this is not the only statistic we could have chosen; there are many that could have worked here. \n",
+ "\n",
+ "From the options below, choose the test statistic that would **not** have worked for this hypothesis test, and assign 1, 2, 3, or 4 to the variable `bad_choice`.\n",
+ "\n",
+ "1. The number of Chansey encounters out of 784 enounters in the Safari Zone.\n",
+ "1. The proportion of Chansey encounters in the Safari Zone.\n",
+ "1. 0.04 minus the proportion of Chansey encounters in the Safari Zone.\n",
+ "1. The absolute difference between 0.04 and the proportion of Chansey encounters in the Safari Zone.\n",
+ "\n",
+ "*Hint*: Our goal is to find a test statistic that will help us determine whether we encounter Chansey **less** often than expected."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bad_choice = ...\n",
+ "bad_choice"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q3_5\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 4. Mystery Box 🎁 ❓"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Subway Surfers is an \"endless runner\" mobile game where the player controls a character that continually runs forward along a subway track. The player tries to dodge obstacles and collect rewards along the way, all while avoiding train collisions and being caught by the subway inspector. One of the rewards is a Mystery Box which contains one of several possible prizes.\n",
+ "\n",
+ "\n",
+ "\n",
+ "There are four types of prizes in a Mystery Box: `'Jackpot'`, `'Rare'`, `'Special'`, and `'Common'`. The most valuable is the `'Jackpot'` 🤩 but it's also the most rare. Unfortunately, there is no publicly available information on the exact probabilities of getting any of the four types of prizes in a Mystery Box. However, Yutian plays Subway Surfers a lot, and based on her experience with the game, she proposes the following probability distribution."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "| Type | Yutian's Estimated Probability|\n",
+ "| --- | --- |\n",
+ "| Jackpot | $0.03$ |\n",
+ "| Rare | $0.10$ |\n",
+ "| Special | $0.30$ |\n",
+ "| Common | $0.57$ |\n",
+ "\n",
+ "We'll store this distribution in an array, in the order `'Jackpot'`, `'Rare'`, `'Special'`, and `'Common'`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Just run this cell, do not change it!\n",
+ "yutian_dist = np.array([0.03, 0.1, 0.3, 0.57])\n",
+ "yutian_dist"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To assess the validity of Yutian's model, you collect data from many Subway Surfer players. You learn that in total, out of 1,628 Mystery Box prizes:\n",
+ "- 33 were `'Jackpot'`,\n",
+ "- 179 were `'Rare'`,\n",
+ "- 586 were `'Special'`, and\n",
+ "- the rest were `'Common'`.\n",
+ "\n",
+ "You then calculate the **empirical** type distribution using the data you collected and store it in an array as well (in the same order as before):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Just run this cell, do not change it!\n",
+ "empirical_dist = np.array([33, 179, 586, 1628 - (33 + 179 + 586)]) / 1628\n",
+ "empirical_dist"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "While `empirical_dist` is not identical to `yutian_dist`, it's still possible that Yutian's model is plausible, and that the observed differences are due to random chance. Let's run a hypothesis test to investigate further, using the following hypotheses: \n",
+ "\n",
+ "**Null Hypothesis**: The types of Mystery Box prizes are drawn randomly from the distribution `yutian_dist`.\n",
+ "\n",
+ "**Alternative Hypothesis**: The types of Mystery Box prizes are _not_ drawn randomly from the distribution `yutian_dist`.\n",
+ "\n",
+ "Note that this hypothesis test involves four proportions – one for each of `'Jackpot'`, `'Rare'`, `'Special'`, and `'Common'`.\n",
+ "\n",
+ "**Question 4.1.** Which of the following is **not** a reasonable choice of test statistic for this hypothesis test? Assign 1, 2, or 3 to the variable `unreasonable_test_statistic`. \n",
+ "1. The absolute difference between the sum of the proposed distribution (Yutian's expected proportion of types) and the sum of the empirical distribution (actual proportion of types).\n",
+ "2. Among all four prize types, the largest absolute difference between Yutian's expected proportion and the actual proportion of prizes of that type.\n",
+ "3. The sum of the absolute difference between the proposed distribution (Yutian's expected proportion of types) and the empirical distribution (actual proportion of types).\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "unreasonable_test_statistic = ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q4_1\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 4.2.** We'll use the TVD, i.e. **total variation distance**, as our test statistic. Below, complete the implementation of the function `total_variation_distance`, which takes in two distributions (stored as arrays) as arguments and returns the total variation distance between the two arrays.\n",
+ "\n",
+ "Then, use the function `total_variation_distance` to determine the TVD between the type distribution proposed by Yutian and the empirical type distribution you observed. Assign this TVD to `observed_tvd`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def total_variation_distance(first_distrib, second_distrib):\n",
+ " '''Computes the total variation distance between two distributions.'''\n",
+ " ...\n",
+ "\n",
+ "observed_tvd = ...\n",
+ "observed_tvd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q4_2\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 4.3.** Now, we'll calculate 5,000 simulated TVDs to see what a typical TVD between the proposed distribution and an empirical distribution would look like if Yutian's model were accurate. Since our real-life data includes 1628 Mystery Box prizes, in each trial of the simulation, we'll:\n",
+ "- draw 1628 Mystery Boxes at random from Yutian's proposed distribution, then \n",
+ "- calculate the TVD between **Yutian's proposed type distribution** and the **empirical type distribution from the simulated sample**. \n",
+ "\n",
+ "Store these 5,000 simulated TVDs in an array called `simulated_tvds`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "simulated_tvds = ...\n",
+ "\n",
+ "# Visualize the distribution of TVDs with a histogram\n",
+ "bpd.DataFrame().assign(simulated_tvds=simulated_tvds).plot(kind='hist', density=True, ec='w', figsize=(10, 5));\n",
+ "plt.axvline(x=observed_tvd, color='black', linewidth=4, label='observed TVD')\n",
+ "plt.legend();"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q4_3\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 4.4.** Now, check the p-value of our test by computing the proportion of times in our simulation that we saw a TVD greater than or equal to our observed TVD. Assign your result to `type_p_value`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "type_p_value = ...\n",
+ "type_p_value"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q4_4\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Question 4.5.** Using the standard p-value cutoff of 0.05, what can we conclude from our hypothesis test? Assign either 1, 2, 3, or 4 to the variable `type_conclusion`, corresponding to the best conclusion.\n",
+ " \n",
+ " 1. We accept the null hypothesis. The observed data is consistent with the model.\n",
+ " 2. We reject the null hypothesis. There is not enough evidence to say if the observed data is consistent with the model.\n",
+ " 3. We reject the null hypothesis. The observed data is inconsistent with the model.\n",
+ " 4. We fail to reject the null hypothesis. There is not enough evidence to say that the observed data is inconsistent with the model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "type_conclusion = ...\n",
+ "type_conclusion"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "deletable": false,
+ "editable": false
+ },
+ "outputs": [],
+ "source": [
+ "grader.check(\"q4_5\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Finish Line 🏁"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To submit your assignment:\n",
+ "\n",
+ "1. Select `Kernel -> Restart & Run All` to ensure that you have executed all cells, including the test cells.\n",
+ "2. Read through the notebook to make sure everything is fine and all tests passed.\n",
+ "3. Run the cell below to run all tests, and make sure that they all pass.\n",
+ "4. Download your notebook using `File -> Download as -> Notebook (.ipynb)`, then upload your notebook to Gradescope.\n",
+ "5. Stick around while the Gradescope autograder grades your work. Make sure you see that all tests have passed on Gradescope.\n",
+ "6. Check that you have a confirmation email from Gradescope and save it as proof of your submission. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "grader.check_all()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/homeworks/hw04/images/chansey.png b/homeworks/hw04/images/chansey.png
new file mode 100644
index 0000000..ac8d37b
Binary files /dev/null and b/homeworks/hw04/images/chansey.png differ
diff --git a/homeworks/hw04/images/mystery_box.png b/homeworks/hw04/images/mystery_box.png
new file mode 100644
index 0000000..ab3b86d
Binary files /dev/null and b/homeworks/hw04/images/mystery_box.png differ
diff --git a/homeworks/hw04/images/subway_surfers.png b/homeworks/hw04/images/subway_surfers.png
new file mode 100644
index 0000000..bcc3046
Binary files /dev/null and b/homeworks/hw04/images/subway_surfers.png differ
diff --git a/homeworks/hw04/tests/q1_1.py b/homeworks/hw04/tests/q1_1.py
new file mode 100644
index 0000000..2e17016
--- /dev/null
+++ b/homeworks/hw04/tests/q1_1.py
@@ -0,0 +1,12 @@
+test = { 'name': 'q1_1',
+ 'points': None,
+ 'suites': [ { 'cases': [ { 'code': '>>> callable(simulate_one_ticket) and isinstance(simulate_one_ticket(), np.ndarray) # Make sure simulate_one_ticket is a function that returns an '
+ 'array\n'
+ 'True',
+ 'hidden': False,
+ 'locked': False},
+ {'code': '>>> len(simulate_one_ticket()) == 6 # Make sure there are 6 numbers returned\nTrue', 'hidden': False, 'locked': False}],
+ 'scored': True,
+ 'setup': '',
+ 'teardown': '',
+ 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q1_2.py b/homeworks/hw04/tests/q1_2.py
new file mode 100644
index 0000000..25dece4
--- /dev/null
+++ b/homeworks/hw04/tests/q1_2.py
@@ -0,0 +1,9 @@
+test = { 'name': 'q1_2',
+ 'points': None,
+ 'suites': [ { 'cases': [ { 'code': '>>> import numbers\n>>> isinstance(count_free_housing, numbers.Integral) # Make sure count_free_housing is an integer\nTrue',
+ 'hidden': False,
+ 'locked': False}],
+ 'scored': True,
+ 'setup': '',
+ 'teardown': '',
+ 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q1_3.py b/homeworks/hw04/tests/q1_3.py
new file mode 100644
index 0000000..a4ffd04
--- /dev/null
+++ b/homeworks/hw04/tests/q1_3.py
@@ -0,0 +1,3 @@
+test = { 'name': 'q1_3',
+ 'points': None,
+ 'suites': [{'cases': [{'code': '>>> 0 <= most_matches <= 6 \nTrue', 'hidden': False, 'locked': False}], 'scored': True, 'setup': '', 'teardown': '', 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q1_4.py b/homeworks/hw04/tests/q1_4.py
new file mode 100644
index 0000000..62568ef
--- /dev/null
+++ b/homeworks/hw04/tests/q1_4.py
@@ -0,0 +1,7 @@
+test = { 'name': 'q1_4',
+ 'points': None,
+ 'suites': [ { 'cases': [{'code': '>>> net_winnings < 0 # If you fail this test, check your work again!\nTrue', 'hidden': False, 'locked': False}],
+ 'scored': True,
+ 'setup': '',
+ 'teardown': '',
+ 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q2_1.py b/homeworks/hw04/tests/q2_1.py
new file mode 100644
index 0000000..9ad04c9
--- /dev/null
+++ b/homeworks/hw04/tests/q2_1.py
@@ -0,0 +1,7 @@
+test = { 'name': 'q2_1',
+ 'points': None,
+ 'suites': [ { 'cases': [{'code': '>>> isinstance(convenience_sample, bpd.DataFrame) and convenience_sample.shape == (45, 5)\nTrue', 'hidden': False, 'locked': False}],
+ 'scored': True,
+ 'setup': '',
+ 'teardown': '',
+ 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q2_2.py b/homeworks/hw04/tests/q2_2.py
new file mode 100644
index 0000000..7737129
--- /dev/null
+++ b/homeworks/hw04/tests/q2_2.py
@@ -0,0 +1,7 @@
+test = { 'name': 'q2_2',
+ 'points': None,
+ 'suites': [ { 'cases': [{'code': '>>> isinstance(convenience_stats, np.ndarray) and len(convenience_stats) == 2\nTrue', 'hidden': False, 'locked': False}],
+ 'scored': True,
+ 'setup': '',
+ 'teardown': '',
+ 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q2_3.py b/homeworks/hw04/tests/q2_3.py
new file mode 100644
index 0000000..cddd6db
--- /dev/null
+++ b/homeworks/hw04/tests/q2_3.py
@@ -0,0 +1,3 @@
+test = { 'name': 'q2_3',
+ 'points': None,
+ 'suites': [{'cases': [{'code': '>>> sampling_q3 in {1, 2, 3, 4}\nTrue', 'hidden': False, 'locked': False}], 'scored': True, 'setup': '', 'teardown': '', 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q2_4.py b/homeworks/hw04/tests/q2_4.py
new file mode 100644
index 0000000..074bb75
--- /dev/null
+++ b/homeworks/hw04/tests/q2_4.py
@@ -0,0 +1,3 @@
+test = { 'name': 'q2_4',
+ 'points': None,
+ 'suites': [{'cases': [{'code': '>>> sampling_q4 in {1, 2, 3, 4}\nTrue', 'hidden': False, 'locked': False}], 'scored': True, 'setup': '', 'teardown': '', 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q2_5.py b/homeworks/hw04/tests/q2_5.py
new file mode 100644
index 0000000..bd1d6a3
--- /dev/null
+++ b/homeworks/hw04/tests/q2_5.py
@@ -0,0 +1,3 @@
+test = { 'name': 'q2_5',
+ 'points': None,
+ 'suites': [{'cases': [{'code': '>>> sampling_q5 in {1, 2, 3}\nTrue', 'hidden': False, 'locked': False}], 'scored': True, 'setup': '', 'teardown': '', 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q3_1.py b/homeworks/hw04/tests/q3_1.py
new file mode 100644
index 0000000..2a2a39a
--- /dev/null
+++ b/homeworks/hw04/tests/q3_1.py
@@ -0,0 +1,7 @@
+test = { 'name': 'q3_1',
+ 'points': None,
+ 'suites': [ { 'cases': [{'code': '>>> callable(one_simulation) and 0.015 < one_simulation() < 0.075\nTrue', 'hidden': False, 'locked': False}],
+ 'scored': True,
+ 'setup': '',
+ 'teardown': '',
+ 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q3_2.py b/homeworks/hw04/tests/q3_2.py
new file mode 100644
index 0000000..bd32306
--- /dev/null
+++ b/homeworks/hw04/tests/q3_2.py
@@ -0,0 +1,7 @@
+test = { 'name': 'q3_2',
+ 'points': None,
+ 'suites': [ { 'cases': [{'code': '>>> isinstance(proportion_diffs, np.ndarray) and len(proportion_diffs) == 10000\nTrue', 'hidden': False, 'locked': False}],
+ 'scored': True,
+ 'setup': '',
+ 'teardown': '',
+ 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q3_3.py b/homeworks/hw04/tests/q3_3.py
new file mode 100644
index 0000000..3e335ab
--- /dev/null
+++ b/homeworks/hw04/tests/q3_3.py
@@ -0,0 +1,3 @@
+test = { 'name': 'q3_3',
+ 'points': None,
+ 'suites': [{'cases': [{'code': '>>> 0 <= safari_zone_p <= 1\nTrue', 'hidden': False, 'locked': False}], 'scored': True, 'setup': '', 'teardown': '', 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q3_4.py b/homeworks/hw04/tests/q3_4.py
new file mode 100644
index 0000000..02b5b6d
--- /dev/null
+++ b/homeworks/hw04/tests/q3_4.py
@@ -0,0 +1,3 @@
+test = { 'name': 'q3_4',
+ 'points': None,
+ 'suites': [{'cases': [{'code': '>>> safari_zone_conclusion in [1, 2, 3, 4]\nTrue', 'hidden': False, 'locked': False}], 'scored': True, 'setup': '', 'teardown': '', 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q3_5.py b/homeworks/hw04/tests/q3_5.py
new file mode 100644
index 0000000..dfda692
--- /dev/null
+++ b/homeworks/hw04/tests/q3_5.py
@@ -0,0 +1,3 @@
+test = { 'name': 'q3_5',
+ 'points': None,
+ 'suites': [{'cases': [{'code': '>>> bad_choice in [1, 2, 3, 4]\nTrue', 'hidden': False, 'locked': False}], 'scored': True, 'setup': '', 'teardown': '', 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q4_1.py b/homeworks/hw04/tests/q4_1.py
new file mode 100644
index 0000000..ea1cadb
--- /dev/null
+++ b/homeworks/hw04/tests/q4_1.py
@@ -0,0 +1,3 @@
+test = { 'name': 'q4_1',
+ 'points': None,
+ 'suites': [{'cases': [{'code': '>>> unreasonable_test_statistic in [1, 2, 3]\nTrue', 'hidden': False, 'locked': False}], 'scored': True, 'setup': '', 'teardown': '', 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q4_2.py b/homeworks/hw04/tests/q4_2.py
new file mode 100644
index 0000000..cbf3452
--- /dev/null
+++ b/homeworks/hw04/tests/q4_2.py
@@ -0,0 +1,8 @@
+test = { 'name': 'q4_2',
+ 'points': None,
+ 'suites': [ { 'cases': [ {'code': '>>> callable(total_variation_distance) and (0 <= observed_tvd <= 1)\nTrue', 'hidden': False, 'locked': False},
+ {'code': '>>> np.isclose(total_variation_distance(np.array([0.5, 0.5]), np.array([1, 0])), 0.5)\nTrue', 'hidden': False, 'locked': False}],
+ 'scored': True,
+ 'setup': '',
+ 'teardown': '',
+ 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q4_3.py b/homeworks/hw04/tests/q4_3.py
new file mode 100644
index 0000000..cd6f2c6
--- /dev/null
+++ b/homeworks/hw04/tests/q4_3.py
@@ -0,0 +1,7 @@
+test = { 'name': 'q4_3',
+ 'points': None,
+ 'suites': [ { 'cases': [{'code': '>>> isinstance(simulated_tvds, np.ndarray) and (len(simulated_tvds) == 5000)\nTrue', 'hidden': False, 'locked': False}],
+ 'scored': True,
+ 'setup': '',
+ 'teardown': '',
+ 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q4_4.py b/homeworks/hw04/tests/q4_4.py
new file mode 100644
index 0000000..3d272e0
--- /dev/null
+++ b/homeworks/hw04/tests/q4_4.py
@@ -0,0 +1,3 @@
+test = { 'name': 'q4_4',
+ 'points': None,
+ 'suites': [{'cases': [{'code': '>>> (0 <= type_p_value <=1)\nTrue', 'hidden': False, 'locked': False}], 'scored': True, 'setup': '', 'teardown': '', 'type': 'doctest'}]}
diff --git a/homeworks/hw04/tests/q4_5.py b/homeworks/hw04/tests/q4_5.py
new file mode 100644
index 0000000..bf8a5ac
--- /dev/null
+++ b/homeworks/hw04/tests/q4_5.py
@@ -0,0 +1,3 @@
+test = { 'name': 'q4_5',
+ 'points': None,
+ 'suites': [{'cases': [{'code': '>>> type_conclusion in [1, 2, 3, 4]\nTrue', 'hidden': False, 'locked': False}], 'scored': True, 'setup': '', 'teardown': '', 'type': 'doctest'}]}