This repository has been archived by the owner on Jul 1, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathuniqword.py
814 lines (645 loc) · 30 KB
/
uniqword.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
"""uniQword is a program to read and count words from one or multiple files and perform some statistical operations."""
import cmd # Used for the command-line interface.
import codecs # Used to avoid codec problems when reading files.
import collections # Used for frequency counts.
import os # Used for directory-wide operations.
import time # Used by the command-line interface for sleep() when bidding farewell to the user.
import zipfile # Used to read odt files.
import re # Used for text parsing.
from typing import Optional # Used for type hinting.
import PyPDF2 # Used to read PDF files.
import docx # Used to read docx files.
from lxml import etree # Used to read odt files.
# All currently accepted formats for files to examine.
SUPPORTED_FORMATS = (".txt", ".docx", ".odt", ".pdf")
# The default number of elements for frequency lists if unspecified by user input.
FREQUENCY_TOP = 20
# Symbols to accept within words.
ACCEPT = ("-", "_")
# Subset of ACCEPT to remove from start/end of words.
REMOVE = ("-",)
# Symbols (regex) to count as word separators.
SEPARATORS = r"\s'"
class DecryptionError(Exception):
"""Catches the event in which an encrypted file is provided with a wrong password or none at all."""
pass
class WordsFile:
"""Manage the file and collect and enumerate the words it contains."""
file_words = []
file_unique_words = set()
file_path = ""
# Attributes to optimise performance in case of repeated calls.
words_count = None
uniques_count = None
specific_count = {}
frequency_list = None
password = ""
def __init__(self, file_path: str, password: str):
"""
Initialise the file instance by storing a list of all words.
:param file_path: the file path and name.
:param password: the password provided for the file, if given.
"""
self.file_path = file_path
if password:
self.password = password
self.store_all_words()
def __repr__(self):
"""Represent the class as its own name plus the path of the contained file."""
return f"{self.__class__.__name__}: {self.file_path}"
def __bool__(self):
"""
Test whether the WordsFile is not empty.
:return: False if the file contains no words, True otherwise.
"""
return len(self.file_words) > 0
def __eq__(self, other):
"""Compare two instances on the base of the file path they point to."""
return self.file_path == other.file_path
def store_all_words(self):
"""
Store an instance list with each word in the chosen file, eliminating every punctuation sign.
:raise DecryptionError: if a wrong password was provided.
:raise NotImplementedError: if the file is encrypted with an unsupported algorythm.
:raise ValueError: if the provided file is of an unsupported format.
"""
contents = ""
if self.file_path.endswith(".pdf"):
with open(self.file_path, "rb") as pdf:
reader = PyPDF2.PdfFileReader(pdf) # Create a PDF handler.
if reader.isEncrypted and self.password:
# Try to open the file with the given password.
# Will raise NotImplementedError if the algorythm is not supported by PyPDF2.
if reader.decrypt(self.password) == 0:
raise DecryptionError
elif reader.isEncrypted and not self.password:
raise DecryptionError
for page in range(reader.numPages):
current_page = reader.getPage(page) # Get one page at a time.
contents += current_page.extractText() # Store the contents.
elif self.file_path.endswith(".docx"):
raw_document = docx.Document(self.file_path)
for paragraph in raw_document.paragraphs:
contents += paragraph.text
elif self.file_path.endswith(".odt"):
odt = zipfile.ZipFile(self.file_path) # Open the file like a zip archive.
# Get the content and take only the raw text.
with odt.open('content.xml') as content:
for child in etree.parse(content).iter():
if "text" in child.tag and child.text is not None:
# Add each tag's text to the context plus a line break to ensure words don't end up joined.
contents += child.text+"\n"
elif self.file_path.endswith(".txt"):
with codecs.open(self.file_path) as file:
contents = file.read()
else:
raise ValueError
# Get all to lowercase. This is useful to count unique words, and we don't need case sensitiveness anyway.
contents = contents.lower()
# Get individual lines.
contents = contents.splitlines()
if self.file_path.endswith(".pdf"):
# Eliminate the fake line breaks PDFs have.
contents = "".join(contents)
# Real line breaks in PDFs automatically get some whitespace, so we don't need to join words using it.
else:
# Join all words and lines with whitespace, which we'll use to separate individual words.
contents = " ".join(contents)
all_words = self.purify_words(contents)
if len(all_words): # Avoid adding empty contents.
self.file_words = all_words # Store the list of words in an instance attribute for easy and cheap access.
self.file_unique_words.update(all_words) # Store all unique words in another attribute.
@staticmethod
def purify_words(contents: str) -> list:
"""
Clean up words by removing empty words, whitespace and symbols.
:param contents: the string to purify.
:return: a list of purified words.
"""
all_words = []
# Separate words.
contents = re.split(r"["+SEPARATORS+r"]", contents)
# Filter out "empty" words and filter characters inside words to make sure we only get real(istic) words.
for word in filter(lambda w: w not in ["", "\n"], contents):
# Get all alphanumeric characters, plus hyphens and underscores.
word = [char for char in word if char.isalnum() or char in ACCEPT]
while len(word): # Ensure we're not working on an empty word.
# Remove hyphens at start or end.
if word[0] in REMOVE:
word.pop(0)
continue
if word[-1] in REMOVE:
word.pop(-1)
continue
# Join together all letters of the word again and make a list of words.
all_words.append("".join(word))
break
return all_words
def get_words(self) -> Optional[list]:
"""
Get the list of the file's words.
:return: the list of words or None.
"""
if len(self.file_words):
return self.file_words
return None
def get_unique_words(self) -> Optional[set]:
""":return: a set of the unique words in the chosen file or None if no words are present."""
if self.file_unique_words:
return self.file_unique_words
return None
def count_all_words(self) -> int:
""":return: the count of all words in the chosen file."""
if self.words_count is None:
self.words_count = len(self.file_words)
return self.words_count
def count_unique_words(self) -> int:
""":return: the count of all unique words in the chosen file."""
if self.uniques_count is None:
self.uniques_count = len(self.get_unique_words())
return self.uniques_count
def count_word(self, word: str) -> int:
""":return: the count of the occurrences of the specified word in the chosen file."""
return self.specific_count.setdefault(word, self.file_words.count(word))
def get_frequency(self) -> collections.Counter:
"""
Get the frequency list of all words in the file.
:return: a counter ["word"] = occurrences in descending order.
"""
if self.frequency_list is None:
frequency_counter = collections.Counter()
for word in self.file_words:
frequency_counter[word] += 1
self.frequency_list = frequency_counter.most_common()
return self.frequency_list
class FilesCollection:
"""
Collect and manage all files to operate on.
All functions are built to be compatible with the output of individual files (WordsFile).
"""
files = {} # Key: file name. Value: WordsFile instance.
collective_words = []
collective_unique_words = set()
directories = {} # Key: directory path. Value: list of file paths.
# Attributes to act as a cache to optimise performance in case of repeated calls.
collective_words_count = None
collective_uniques_count = None
collective_specific_count = {}
collective_frequency_list = None
def __init__(self, *files: Optional[WordsFile]):
"""
Store all provided files.
:param files: zero or more files to store.
"""
if len(files) == 0:
return
self.add_files(*files)
def __repr__(self):
"""Represent the collection as its name plus a list of all files it contains."""
if len(self.files):
files = "\n".join([entry.__repr__() for entry in self.files.values()])
return f"{self.__class__.__name__}:\n{files}"
return f"{self.__class__.__name__}"
def __str__(self):
"""Print the list of the files in the collection."""
if len(self.files):
return "\n".join([entry.__repr__() for entry in self.files.values()])
return "The collection is empty."
def __bool__(self):
""":return: False if the collection is empty, True otherwise."""
return len(self.files) > 0
def __len__(self):
"""Return how many files the collection contains."""
return len(self.files)
def reset_values(self):
"""Reset all instance cache variables to force recounting all values."""
self.collective_words_count = None
self.collective_uniques_count = None
self.collective_specific_count = {}
self.collective_frequency_list = None
def get_files(self) -> str:
"""Provide the file paths of each file in the collection."""
for file_path in self.files.keys():
yield file_path
def add_files(self, *files: WordsFile):
"""
Add the provided file(s) to the collection and their words to the collective words.
:param files: one or more WordsFile to add to the collection.
:raise TypeError: if the provided files are not valid WordsFile instances.
"""
for file in files:
if not isinstance(file, WordsFile):
raise TypeError
# Add the file to the collection using its file_path as index for optimal lookup.
self.files.update({file.file_path: file})
self.collective_words += file.get_words()
self.collective_unique_words.update(file.get_words())
self.reset_values()
def remove_files(self, *file_paths: str) -> int:
"""
Remove the provided files from the collection. File paths that are not found are ignored.
:param file_paths: the file paths to remove from the collection.
:raise ValueError: if no (valid) file is provided.
:return: the number of files successfully deleted.
"""
if not len(file_paths):
raise ValueError("No file path to remove was provided.")
removed = 0
for file in file_paths:
try:
# Remove all words contained in the given file from the collection of words.
for word in self.files[file].get_words():
self.collective_words.remove(word)
self.collective_unique_words.discard(word)
del self.files[file] # Delete the file itself from the collection.
removed += 1
finally:
# Clean up the cache if at least one file was correctly deleted.
if removed > 0:
self.reset_values()
return removed
def add_directories(self, *directories: str) -> list:
"""
Add the provided directory or directories to the collection by instantiating all files contained therein.
Will not add files beginning in .
:param directories: the path(s) of each directory to add.
:raise ValueError: if no valid directory is provided.
:return: the list of all files added successfully.
"""
if not len(directories):
raise ValueError
added = []
for directory in directories:
directory_files = []
for file_name in os.listdir(directory):
if file_name.endswith(SUPPORTED_FORMATS):
if file_name in self.files.keys():
continue # Ignore files that are already in the collection.
try:
if directory != ".":
file_name = directory+"\\"+file_name
self.add_files(WordsFile(file_name, ""))
directory_files.append(file_name)
added.append(file_name)
except DecryptionError:
pass # Suppress cases where passworded files are found, ignore them and move on.
self.directories.update({directory: directory_files})
return added
def remove_directories(self, *directories: str) -> list:
"""
Remove the provided directory or directories from the collection by removing each file contained therein.
Files that had been added individually will not be removed even if they are present in the directory to remove.
:param directories: the path(s) of each directory to remove.
:raise ValueError: if no valid directory is provided.
:return: the list of all files successfully removed.
"""
if not len(directories):
raise ValueError
removed = []
for directory in directories:
try:
for file in self.directories[directory]:
self.remove_files(file)
removed.append(file)
except KeyError:
continue
return removed
def get_collective_words(self) -> Optional[list]:
""":return: the list of all the files' words or None."""
if len(self.collective_words):
return self.collective_words
return None
def get_collective_unique_words(self) -> Optional[set]:
""":return: a set of the unique words in the collection or None if no words are present."""
if self.collective_unique_words:
return self.collective_unique_words
return None
def count_collective_words(self) -> int:
""":return: the count of all words in the collection."""
if self.collective_words_count is None:
self.collective_words_count = len(self.collective_words)
return self.collective_words_count
def count_collective_unique_words(self) -> int:
""":return: the count of all unique words in the collection."""
if self.collective_uniques_count is None:
self.collective_uniques_count = len(self.collective_unique_words)
return self.collective_uniques_count
def count_collective_word(self, word: str) -> int:
""":return: the count of the occurrences of the word in the collection."""
return self.collective_specific_count.setdefault(word, self.collective_words.count(word))
def get_frequency(self, top: int=FREQUENCY_TOP, reverse: bool = False) -> collections.Counter:
f"""
Get the frequency list of all words in the collection.
:param top: the amount of words to return at most. Defaults to {str(FREQUENCY_TOP)}. 0 outputs the whole list.
:param reverse: whether the frequency list should show the least common items. Defaults to False.
:return: a counter ["word"] = occurrences in descending order.
"""
if top is None:
top = FREQUENCY_TOP
if self.collective_frequency_list is None:
frequency_counter = collections.Counter()
for word in self.collective_words:
frequency_counter[word] += 1
self.collective_frequency_list = frequency_counter.most_common()
output = self.collective_frequency_list
if output is None:
return collections.Counter()
if len(output) == 0:
return collections.Counter()
if reverse:
output.reverse()
if top == 0:
top = len(output)
output = output[:top]
return output
def print_stats(self, *, frequency_top: int=0, frequency_reverse: bool = False) -> str:
"""
Print all useful stats to a file.
:return: the name of the file.
"""
stats = "Stats for file"
if len(self) == 1:
# Use the only file's name as name for the stats file.
file_name = "stats_"+self.files.__iter__().__next__()
stats += ": {}".format(file_name)
else:
# Use uniQword as name for the stats file.
file_name = "uniQword.txt"
stats += "s:\n{}".format("\n".join(self.files.keys()))
stats += "\n\n"
# Add stats for word count.
stats += "The {plural} {unique} unique words out of {total} total words.\n\n".format(
plural="file contains" if len(self) == 1 else "files contain",
unique=self.count_collective_unique_words(),
total=self.count_collective_words()
)
# Add stats for frequency.
words = self.get_frequency(top=frequency_top, reverse=frequency_reverse)
output = []
# Stuff for string padding.
longest_word = max([len(word[0]) for word in words])
if longest_word + 4 <= 60:
longest_word += 4
else:
longest_word = 60
# Format the output to be easier on the eyes.
for entry in words:
# Calculate how many tabs to put in depending on the length of the word.
output.append(f"{entry[0]:{longest_word}}{entry[1]}")
stats += "{rev} frequent {amount} {plural}:\n{freq}".format(
rev="Least" if frequency_reverse else "Most",
amount=len(output),
plural="word" if len(output) == 1 else "words",
freq="\n".join(output)
)
with open(file_name+".txt", "w", encoding="UTF-8") as file:
file.write(stats)
return file_name
class CommandLineInterface(cmd.Cmd):
"""Manage the command-line interface."""
intro = "Welcome. I am uniQword, I can count all the words in your files and more.\n" \
"To begin, select a file with the \"add\" command or type ? to read a list of commands."
prompt = "uniQword, "
file = FilesCollection()
def check_file(self) -> bool:
""":return: True if there is at least one valid file selected, False otherwise."""
if not self.file:
print("I can't operate without a file!")
self.onecmd("help add")
return False
if self.file.get_collective_words() is None:
print(f"The selected file{'s are' if len(self.file) > 1 else ' is'} empty.")
return False
return True
@staticmethod
def emptyline(**kwargs):
"""Scold the user for entering an empty command."""
print("No idea what to do? Type help or ? to see a list of commands.")
@staticmethod
def default(line, **kwargs):
"""Scold the user for writing an unrecognised command."""
try:
# Take only the first word if more are given.
command = line.split(' ')[0]
except IndexError:
command = line
print(f"I don't know of a command called \"{command}\".")
CommandLineInterface.emptyline()
def do_add(self, user_entry: str):
"""
Select a file or directory to operate on. You can select multiple items one at a time.
Please provide a password if needed. Passworded files will be ignored when adding an entire directory.
To add all compatible files in the current directory, type .
Examples:
uniQword, add .
uniqword, add mydir\folder
uniQword, add myfile.pdf
uniQword, add passwordedfile.pdf myp@ssw0rd
"""
# Identify if the user provided any input at all.
if not user_entry:
print("Plase specify something to add! Type \"uniQword, help add\" to receive help.")
return
user_entry.strip() # Remove trailing spaces.
# Identify if the user asked for a directory or a file.
if user_entry == "." or "." not in user_entry:
try:
added = [file for file in self.file.add_directories(user_entry)]
if len(added):
print(f"I successfully added the following file{'' if len(added) == 1 else 's'}:\n" +
"\n".join(added))
else:
print("I couldn't find any compatible file. Please remember to add passworded files individually.")
except FileNotFoundError or NotADirectoryError:
print("I couldn't find the specified directory.")
else:
try:
file, password = user_entry.split(" ")
except ValueError:
file = user_entry
password = ""
try:
self.file.add_files(WordsFile(file, password))
print(f"I selected the file: {user_entry}.")
except FileNotFoundError:
if len(file):
print("I couldn't find the file you asked for. Please try again.")
else:
print("I need a file name in order to add it!")
self.onecmd("help add")
except ValueError:
print("I couldn't decode the file. Please save it in UTF-8 before retrying.")
except TypeError:
print(f"I cannot use this file. Please convert it to one of the supported formats: "
f"{', '.join(SUPPORTED_FORMATS)}.")
except DecryptionError:
print("I need the correct password for this file!\n"
"Leave an empty space after the file name and type the password, example:\n"
"uniQword, add myfile.txt myp@ssw0rd")
except NotImplementedError:
print("I couldn't decrypt the file. Please retry with a non-passworded copy.")
def do_remove(self, user_entry: str):
"""
Remove a file or directory from use.
Use . to remove the current directory only, and * to remove all files and directory.
Type "uniQword, files" to read a list of the files currently in use.
Examples:
uniQword, remove .
uniQword, remove myfolder
uniQword, remove mydir\myfolder
uniQword, remove myfile.txt
uniQword, remove *
"""
if not self.file:
print("There are no files to remove.")
return
if not user_entry:
print("Please select something to remove.")
self.onecmd("help remove")
return
user_entry.strip()
# Check if the user wants to clear the list.
if user_entry in ["*"]:
removed = 0
for file_path in self.file.get_files():
removed += self.file.remove_files(file_path)
print(f"I removed {'the only file' if removed == 1 else 'all '+str(removed)+' files'} from the list.")
return
# Try to remove a file.
try:
self.file.remove_files(user_entry)
print(f"I removed the file \"{user_entry}\" from the list.")
except ValueError:
# If it doesn't work, it may be a directory.
try:
removed = self.file.remove_directories(user_entry)
if len(removed):
print(f"I successfully removed the following file{'' if len(removed) == 1 else 's'}:\n" +
"\n".join([file for file in removed]))
else:
print("I couldn't remove any file. Make sure you type the whole path correctly.\n"
"Do \"uniQword, files\" to check which file paths I'm currently using.")
except ValueError:
print("Please specify a valid file or folder to remove!\n"
"Do \"uniQword, files\" for a list of currently selected files and directories.")
def do_files(self, arg):
"""List all files currently being processed."""
del arg
print("Here are all the files we're working on:\n"+"\n".join([entry for entry in self.file.get_files()]))
def do_count(self, user_entry):
"""
Count the number of words and unique words in the currently selected files.
Or count how many times a specific word occurs in the currently selected files.
Example:
uniQword, count
uniQword, count banana
"""
if self.check_file() is False:
return
user_entry.strip()
# Check if the user wants to count a specific word.
if user_entry:
occurrences = self.file.count_collective_word(user_entry)
print(f"The file{'' if len(self.file) == 1 else 's'} contain{'' if len(self.file) == 1 else 's'} "
f"{occurrences} occurrences of the word \"{user_entry}\".")
return
total_words = self.file.count_collective_words()
total_uniques = self.file.count_collective_unique_words()
print(f"The file{'' if len(self.file) == 1 else 's'} contain{'' if len(self.file) == 1 else 's'} "
f"{total_words} total words, {total_uniques} of which unique "
f"({round((total_uniques / total_words) * 100, 2)}%).")
def do_frequency(self, options: str):
f"""
Print the frequency list of the current file. It can be printed in reverse and the maximum amount of results can
be trimmed. By default, the first {str(FREQUENCY_TOP)} results will be printed. Input * to print all results.
Examples:
uniQword, frequency
uniQword, frequency *
uniQword, frequency * reversed
uniQword, frequency reversed
uniQword, frequency 50
uniQword, frequency 50 reversed
"""
if self.check_file() is False:
return
try:
option_1, option_2 = options.split(" ")
option_2.strip()
except ValueError:
option_1 = options
option_2 = None
is_reversed = False
top = None
output = ""
option_1.strip()
if option_1.isnumeric():
top = int(option_1.strip())
if option_2 in ["r", "reverse", "reversed"]:
is_reversed = True
elif "*" in option_1:
top = 0
if option_2 in ["r", "reverse", "reversed"]:
is_reversed = True
elif option_1 in ["r", "reverse", "reversed"]:
is_reversed = True
if "*" in option_2:
top = 0
frequency = self.file.get_frequency(top=top, reverse=is_reversed)
# Stuff for string padding.
longest_word = max([len(word[0]) for word in frequency])
if longest_word+4 <= 60:
longest_word += 4
else:
longest_word = 60
# Format the output to be easier on the eyes.
for entry in frequency:
# Calculate how many tabs to put in depending on the length of the word.
output += f"{entry[0]:{longest_word}}{entry[1]}\n"
print(f"Here are the {'least' if is_reversed else 'most'} common {str(top) if top else str(FREQUENCY_TOP)} "
f"elements for the selected document{'' if len(self.file) == 1 else 's'}:\n{output}")
def do_print(self, options):
"""
Print all available stats on the currently selected document(s) to a file.
The file will appear in the current folder and will be overwritten if already present.
Options for the frequency list (number of items, reversed list) may be entered.
Examples:
uniQword, print
uniQword, print reversed
uniQword, print 15
uniQword, print 15 reversed
uniQword, print reversed 15
"""
if self.check_file() is False:
return
try:
option_1, option_2 = options.split(" ")
option_2.strip()
except ValueError:
option_1 = options
option_2 = None
is_reversed = False
top = 0
option_1.strip()
if option_1.isnumeric():
top = int(option_1.strip())
if option_2 in ["r", "reverse", "reversed"]:
is_reversed = True
elif option_1 in ["r", "reverse", "reversed"]:
is_reversed = True
print(f"I printed data on {len(self.file)} file{'' if len(self.file) == 1 else 's'} on a file named "
f"{self.file.print_stats(frequency_top=top, frequency_reverse=is_reversed)}.")
@staticmethod
def do_bye(arg):
"""
Exit the program.
Example: uniQword, bye
"""
del arg # Apparently cmd requires me to accept an argument for every command, so I delete it. >:)
print("See you, space cowboy!")
time.sleep(2)
exit()
def do_test(self, arg):
"""Test something."""
pass
if __name__ == "__main__":
CommandLineInterface().cmdloop()