From 0717f724fb1bae3a5859357a4750889dbc39b1e8 Mon Sep 17 00:00:00 2001
From: tahirpukhta <152134239+tahirpukhta@users.noreply.github.com>
Date: Sun, 17 Dec 2023 21:46:07 +0530
Subject: [PATCH] Update exercise_1_poem.py

I also confirmed manually that the word 'and' occurs 9 times in the whole corpus!!
---
 .../13_read_write_files/exercise_1_poem.py    | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/Basics/Exercise/13_read_write_files/exercise_1_poem.py b/Basics/Exercise/13_read_write_files/exercise_1_poem.py
index b4470301..48038337 100644
--- a/Basics/Exercise/13_read_write_files/exercise_1_poem.py
+++ b/Basics/Exercise/13_read_write_files/exercise_1_poem.py
@@ -18,4 +18,24 @@
 print("Words with max occurances are: ")
 for word, count in word_stats.items():
     if count==max_count:
-        print(word)
\ No newline at end of file
+        print(word)
+#I tried the above code and found the answer to be as follows:
+'''Max occurances of any word is: 8
+Words with max occurances are: 
+I
+the'''
+#However, when I tried solving the problem on my own, it resulted in the following output at the end:
+'''The word with maximum occurrence is 'and' with count 9'''
+word_counts={}
+with open("C://Users//Tahir//Desktop//poem.txt","r+") as p:
+    for line in p:
+        tokens=line.lower().split(' ') #to ensure 'The' and 'the' are counted as same words. At the same time tokenize each line.
+        for word in tokens:
+            word= word.strip(',.!?;:') #remove punctuation so that 'word,' and 'word' are counted same.
+            if word in word_counts:
+                word_counts[word]+=1
+            else:
+                word_counts[word]=1
+print(word_counts)
+max_word=max(word_counts, key=word_counts.get) #key argument tells max() to operate on the word counts instead of keys which are just words here sorted in lexicographical order.
+print(f"The word with maximum occurrence is '{max_word}' with count {word_counts[max_word]}")