Initial commit - wr - Translate a term via WordReference.com
 (HTM) hg clone https://bitbucket.org/iamleot/wr
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
       ---
 (DIR) changeset b39ff5cc15f86bfd16630cfd18db3b5df8f18d2a
 (HTM) Author: Leonardo Taccari <iamleot@gmail.com>
       Date:   Wed,  2 Oct 2019 15:35:18 
       
       Initial commit
       
       Diffstat:
        README |    7 ++
        wr.py  |  155 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        2 files changed, 162 insertions(+), 0 deletions(-)
       ---
       diff -r 000000000000 -r b39ff5cc15f8 README
       --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
       +++ b/README    Wed Oct 02 15:35:18 2019 +0200
       @@ -0,0 +1,7 @@
       +wr is a Python 3 script/module to translate term by using
       +WordReference.com, e.g.:
       +
       +    % wr enit example
       +    example [n] (typical instance): esempio
       +    example [n] (ideal, model): esemplare
       +    example [sth]⇒ [vtr] rare, often passive (give example): fare un esempio di
       diff -r 000000000000 -r b39ff5cc15f8 wr.py
       --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
       +++ b/wr.py     Wed Oct 02 15:35:18 2019 +0200
       @@ -0,0 +1,155 @@
       +#!/usr/pkg/bin/python3.7
       +
       +#
       +# Copyright (c) 2019 Leonardo Taccari
       +# All rights reserved.
       +#
       +# Redistribution and use in source and binary forms, with or without
       +# modification, are permitted provided that the following conditions
       +# are met:
       +#
       +# 1. Redistributions of source code must retain the above copyright
       +#    notice, this list of conditions and the following disclaimer.
       +# 2. Redistributions in binary form must reproduce the above copyright
       +#    notice, this list of conditions and the following disclaimer in the
       +#    documentation and/or other materials provided with the distribution.
       +#
       +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
       +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
       +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
       +# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       +# POSSIBILITY OF SUCH DAMAGE.
       +#
       +
       +
       +"""
       +Translate a term via WordReference.com
       +
       +wr is a script/module to translate terms via WordReference.com.
       +"""
       +
       +
       +from bs4 import BeautifulSoup, SoupStrainer
       +from typing import List
       +from urllib import parse, request
       +import collections
       +import textwrap
       +
       +
       +WORDREFERENCE_URL = 'https://www.wordreference.com/{dictionary}/{term}'
       +WORDREFERENCE_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0'
       +
       +
       +Term = collections.namedtuple('Term', [
       +    'term',
       +    'type',
       +    'term_description',
       +    'translation',
       +])
       +Term.term.__doc__ += 'Term'
       +Term.term_description.__doc__ += 'Description of the term'
       +Term.type.__doc__ += 'Type'
       +Term.translation.__doc__ += 'Translation'
       +
       +
       +def translate(dictionary: str, term: str) -> List[Term]:
       +    """Translate a term scraping WordReference.com
       +
       +    Given a pair of dictionaries (e.g. `enit') via `dictionary',
       +    translate the term `term' and return all translation as a list of Term-s.
       +
       +    Supported dictionaries are: `ar' (Arabic), `cz' (Czech), `de' (German),
       +    `en' (English), `es' (Spanish), `fr' (French), `gr' (Greek),
       +    `it' (Italian), `ja' (Japanese), `ko' (Korean), `nl' (Dutch),
       +    `pl' (Polish), `pt' (Portuguese), `ro' (Romanian), `ru' (Russian),
       +    `sv' (Swedish), `tr' (Turkish), `zh' (Chinese).
       +
       +    >>> ts = translate('enit', 'example')
       +    >>> type(ts)
       +    <class 'list'>
       +    >>> len(ts)
       +    3
       +    >>> type(ts[0])
       +    <class 'wr.Term'>
       +    >>> ts[0].term
       +    'example'
       +    >>> ts[0].type
       +    'n'
       +    >>> ts[0].term_description
       +    '(typical instance)'
       +    >>> ts[0].translation
       +    'esempio'
       +    """
       +    req = request.Request(WORDREFERENCE_URL.format(
       +                              dictionary=parse.quote(dictionary),
       +                              term=parse.quote(term)))
       +    req.add_header('User-Agent', WORDREFERENCE_USER_AGENT)
       +    with request.urlopen(req) as r:
       +        content = BeautifulSoup(r, 'html.parser',
       +                                parse_only=SoupStrainer(id='articleWRD'))
       +
       +    ts = []
       +
       +    if content and content.table:
       +        for tr in content.table.find_all('tr', id=True):
       +            frwrd, fr2, towrd = tr.find_all('td')
       +            pos2 = frwrd.find('em', class_='tooltip POS2')
       +            term = frwrd.strong.text.strip()
       +            if fr2.find('span', class_='dsense'):
       +                fr2.span.clear()
       +            term_description = fr2.text.strip()
       +            if pos2 and pos2.children and len(list(pos2.children)) > 0:
       +                type = list(pos2.children)[0].strip()
       +            else:
       +                type = ''
       +            translation = list(towrd.children)[0].strip()
       +            ts.append(
       +                Term(
       +                    term=term,
       +                    type=type,
       +                    term_description=term_description,
       +                    translation=translation,
       +                )
       +            )
       +
       +    return ts
       +
       +
       +def print_term(term: Term):
       +    """Pretty print a Term"""
       +    if term.type:
       +        sfmt = '{term} [{type}] {description}:\n{translation}'
       +    else:
       +        sfmt = '{term} {description}:\n{translation}'
       +
       +    print(textwrap.fill(sfmt.format(
       +                            term=term.term,
       +                            type=term.type,
       +                            description=term.term_description,
       +                            translation=term.translation),
       +                        width=80,
       +                        break_long_words=False,
       +                        break_on_hyphens=False))
       +
       +
       +if __name__ == '__main__':
       +    import sys
       +
       +    def usage():
       +        print('usage: {} dict term'.format(sys.argv[0]))
       +        sys.exit(1)
       +
       +    if len(sys.argv) != 3:
       +        usage()
       +
       +    dictionary = sys.argv[1]
       +    term = sys.argv[2]
       +
       +    for t in translate(dictionary, term):
       +        print_term(t)