Initial commit - wr - Translate a term via WordReference.com
(HTM) hg clone https://bitbucket.org/iamleot/wr
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
---
(DIR) changeset b39ff5cc15f86bfd16630cfd18db3b5df8f18d2a
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Wed, 2 Oct 2019 15:35:18
Initial commit
Diffstat:
README | 7 ++
wr.py | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 162 insertions(+), 0 deletions(-)
---
diff -r 000000000000 -r b39ff5cc15f8 README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Wed Oct 02 15:35:18 2019 +0200
@@ -0,0 +1,7 @@
+wr is a Python 3 script/module to translate term by using
+WordReference.com, e.g.:
+
+ % wr enit example
+ example [n] (typical instance): esempio
+ example [n] (ideal, model): esemplare
+ example [sth]⇒ [vtr] rare, often passive (give example): fare un esempio di
diff -r 000000000000 -r b39ff5cc15f8 wr.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/wr.py Wed Oct 02 15:35:18 2019 +0200
@@ -0,0 +1,155 @@
+#!/usr/pkg/bin/python3.7
+
+#
+# Copyright (c) 2019 Leonardo Taccari
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+"""
+Translate a term via WordReference.com
+
+wr is a script/module to translate terms via WordReference.com.
+"""
+
+
+from bs4 import BeautifulSoup, SoupStrainer
+from typing import List
+from urllib import parse, request
+import collections
+import textwrap
+
+
+WORDREFERENCE_URL = 'https://www.wordreference.com/{dictionary}/{term}'
+WORDREFERENCE_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0'
+
+
+Term = collections.namedtuple('Term', [
+ 'term',
+ 'type',
+ 'term_description',
+ 'translation',
+])
+Term.term.__doc__ += 'Term'
+Term.term_description.__doc__ += 'Description of the term'
+Term.type.__doc__ += 'Type'
+Term.translation.__doc__ += 'Translation'
+
+
+def translate(dictionary: str, term: str) -> List[Term]:
+ """Translate a term scraping WordReference.com
+
+ Given a pair of dictionaries (e.g. `enit') via `dictionary',
+ translate the term `term' and return all translation as a list of Term-s.
+
+ Supported dictionaries are: `ar' (Arabic), `cz' (Czech), `de' (German),
+ `en' (English), `es' (Spanish), `fr' (French), `gr' (Greek),
+ `it' (Italian), `ja' (Japanese), `ko' (Korean), `nl' (Dutch),
+ `pl' (Polish), `pt' (Portuguese), `ro' (Romanian), `ru' (Russian),
+ `sv' (Swedish), `tr' (Turkish), `zh' (Chinese).
+
+ >>> ts = translate('enit', 'example')
+ >>> type(ts)
+ <class 'list'>
+ >>> len(ts)
+ 3
+ >>> type(ts[0])
+ <class 'wr.Term'>
+ >>> ts[0].term
+ 'example'
+ >>> ts[0].type
+ 'n'
+ >>> ts[0].term_description
+ '(typical instance)'
+ >>> ts[0].translation
+ 'esempio'
+ """
+ req = request.Request(WORDREFERENCE_URL.format(
+ dictionary=parse.quote(dictionary),
+ term=parse.quote(term)))
+ req.add_header('User-Agent', WORDREFERENCE_USER_AGENT)
+ with request.urlopen(req) as r:
+ content = BeautifulSoup(r, 'html.parser',
+ parse_only=SoupStrainer(id='articleWRD'))
+
+ ts = []
+
+ if content and content.table:
+ for tr in content.table.find_all('tr', id=True):
+ frwrd, fr2, towrd = tr.find_all('td')
+ pos2 = frwrd.find('em', class_='tooltip POS2')
+ term = frwrd.strong.text.strip()
+ if fr2.find('span', class_='dsense'):
+ fr2.span.clear()
+ term_description = fr2.text.strip()
+ if pos2 and pos2.children and len(list(pos2.children)) > 0:
+ type = list(pos2.children)[0].strip()
+ else:
+ type = ''
+ translation = list(towrd.children)[0].strip()
+ ts.append(
+ Term(
+ term=term,
+ type=type,
+ term_description=term_description,
+ translation=translation,
+ )
+ )
+
+ return ts
+
+
+def print_term(term: Term):
+ """Pretty print a Term"""
+ if term.type:
+ sfmt = '{term} [{type}] {description}:\n{translation}'
+ else:
+ sfmt = '{term} {description}:\n{translation}'
+
+ print(textwrap.fill(sfmt.format(
+ term=term.term,
+ type=term.type,
+ description=term.term_description,
+ translation=term.translation),
+ width=80,
+ break_long_words=False,
+ break_on_hyphens=False))
+
+
+if __name__ == '__main__':
+ import sys
+
+ def usage():
+ print('usage: {} dict term'.format(sys.argv[0]))
+ sys.exit(1)
+
+ if len(sys.argv) != 3:
+ usage()
+
+ dictionary = sys.argv[1]
+ term = sys.argv[2]
+
+ for t in translate(dictionary, term):
+ print_term(t)