In this program, I have done automation using Selenium, to find english meaning of each sanskrit word from a dictionary website.
The function eng_meaning accepts the word and using selenium and BeautifulSoup scarps the website and find english meaning.
def eng_meaning(word):
"""
returns english meaning of the sanskrit word passed
"""
url = 'https://www.learnsanskrit.cc/index.php?mode=3&direct=au&script=hk&tran_input='
driver = webdriver.Chrome("/usr/bin/chromedriver")
driver.get(url)
meanings = []
try:
#find text box where we enter the word
text_box = driver.find_element_by_id("tran_input")
#enter word in the text box
text_box.send_keys(word)
#clicking translate button
buttons = driver.find_elements_by_tag_name("button")
buttons[-1].click()
html_page = driver.page_source
#parse html page
soup = BeautifulSoup(html_page, 'html.parser')
#find table which holds meaning
table = soup.body.find('table', attrs={'class':'table0 bgcolor0'})
table_body = table.find('tbody')
#find rows of table which holds meaning
rows = table_body.find_all('tr')
process = True #will become false if word/sentence other then passed word is on table
row_itr = 0
for row in rows:
#find coulmns in the current row
columns = row.find_all('td')
i = 0
for col in columns:
#split, join and strip the current snaskrit or english word and store in meaning
meaning = " ".join(col.get_text().split())
meaning = meaning.strip()
if (i == 0):
tmp = meaning
if (row_itr == 0):
#add sanskrit word in the list. Only done once
sanskrit_word = meaning
elif (i == 2):
if (row_itr == 0):
#add english transliteration in list. Only done once
english_transliteration = meaning
if (word != meaning and word != tmp):
#current word is process doesn't match with passed word. So stop processing
process = False
break
elif (i == 3):
#append sanskrit word and english transliteration in list. Done only once
if (row_itr == 0):
meanings.append(sanskrit_word)
meanings.append(english_transliteration)
#append the english meaning to list
meanings.append(meaning)
i+=1
if (process is False):
#stop processing
break
row_itr += 1
return meanings
except(Exception, AttributeError) as e:
print("No meaning found for", word)
finally:
if len(meanings) != 0:
print("meanings found")
driver.quit()
The program reading_writing_to_file.py reads a sanskrit text file, remove duplicate words and store meaning of each word in another text file.
import sanskrit_to_english as se
def remove_duplicates(file_name):
"""
Remove duplicate word from the file
"""
unique_words = set()
with open(file_name, 'r') as file:
for line in file:
for word in line.split():
unique_words.add(word)
file.close()
return unique_words
def eng_translation(file_name_r, file_name_w):
"""
Read sanskrit text file and find english meaning of each word
"""
words = list(remove_duplicates(file_name_r))
with open(file_name_w, 'w') as file:
for word in words:
meanings = se.eng_meaning(word)
if (meanings):
file.write(meanings[0])
file.write(" - ")
file.write(meanings[1])
file.write(" - ")
meanings_str = ' \\'.join([meanings[i] for i in range(2, len(meanings))])
file.write(meanings_str)
file.write("\n")
file.close()
file_name_r = "data.txt"
file_name_w = "Sanskrit_english.txt"
eng_translation(file_name_r, file_name_w)
I am beginner in selenium. How can I improve run-time performance of this code. It takes approx. 5 minutes to find meaning of a word.
Sanskrit dictionary website - learnsanskrit.cc