ss = '''
In the beginning God created the heavens and the earth. Now the earth was formless and empty, darkness was over the surface of the deep, and the Spirit of God was hovering over the waters.
And God said, "Let there be light," and there was light. God saw that the light was good, and he separated the light from the darkness. God called the light "day," and the darkness he called "night." And there was evening, and there was morning-the first day.
And God said, "Let there be a vault between the waters to separate water from water." So God made the vault and separated the water under the vault from the water above it. And it was so. God called the vault "sky." And there was evening, and there was morning-the second day.
And God said, "Let the water under the sky be gathered to one place, and let dry ground appear." And it was so. God called the dry ground "land," and the gathered waters he called "seas." And God saw that it was good.
'''.strip()
with open("bibleSentences.15.txt",'w') as f: f.write(ss) # write test file
########################################
import re
filename="bibleSentences.15.txt"
def getData(filename):
with open(filename,'r') as f:
#converting to list where each element is an individual line of text file
lines=[line.rstrip() for line in f]
return lines
def normalize(filename):
#converting all letters to lowercase
lowercase_lines=[x.lower() for x in getData(filename)]
#strip out all non-word or tab or space characters(remove punts)
stripped_lines=[re.sub(r"[^\w \t]+", "", x) for x in lowercase_lines]
return stripped_lines
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
stopwords.words('english')
stopwords=set(stopwords.words('english'))
def countwords(filename):
output_array=[]
for sentence in normalize(filename):
temp_list=[]
for word in sentence.split():
if word.lower() not in stopwords:
temp_list.append(word)
output_array.append(' '.join(temp_list))
return output_array
output=countwords(filename)
print(output)
['beginning god created heavens earth earth formless empty darkness surface deep spirit god hovering waters',
'god said let light light god saw light good separated light darkness god called light day darkness called night evening morningthe first day',
'god said let vault waters separate water water god made vault separated water vault water god called vault sky evening morningthe second day',
'god said let water sky gathered one place let dry ground appear god called dry ground land gathered waters called seas god saw good']