From 0717f724fb1bae3a5859357a4750889dbc39b1e8 Mon Sep 17 00:00:00 2001 From: tahirpukhta <152134239+tahirpukhta@users.noreply.github.com> Date: Sun, 17 Dec 2023 21:46:07 +0530 Subject: [PATCH] Update exercise_1_poem.py I also confirmed manually that the word 'and' occurs 9 times in the whole corpus!! --- .../13_read_write_files/exercise_1_poem.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Basics/Exercise/13_read_write_files/exercise_1_poem.py b/Basics/Exercise/13_read_write_files/exercise_1_poem.py index b4470301..48038337 100644 --- a/Basics/Exercise/13_read_write_files/exercise_1_poem.py +++ b/Basics/Exercise/13_read_write_files/exercise_1_poem.py @@ -18,4 +18,24 @@ print("Words with max occurances are: ") for word, count in word_stats.items(): if count==max_count: - print(word) \ No newline at end of file + print(word) +#I tried the above code and found the answer to be as follows: +'''Max occurances of any word is: 8 +Words with max occurances are: +I +the''' +#However, when I tried solving the problem on my own, it resulted in the following output at the end: +'''The word with maximum occurrence is 'and' with count 9''' +word_counts={} +with open("C://Users//Tahir//Desktop//poem.txt","r+") as p: + for line in p: + tokens=line.lower().split(' ') #to ensure 'The' and 'the' are counted as same words. At the same time tokenize each line. + for word in tokens: + word= word.strip(',.!?;:') #remove punctuation so that 'word,' and 'word' are counted same. + if word in word_counts: + word_counts[word]+=1 + else: + word_counts[word]=1 +print(word_counts) +max_word=max(word_counts, key=word_counts.get) #key argument tells max() to operate on the word counts instead of keys which are just words here sorted in lexicographical order. +print(f"The word with maximum occurrence is '{max_word}' with count {word_counts[max_word]}")