tccr.it

       Rewrite the parsing in translate() and adjust Term and print_term() accordingly - wr - Translate a term via WordReference.com
 (HTM) hg clone https://bitbucket.org/iamleot/wr
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
       ---
 (DIR) changeset b7eb697e8b8c4c3096ab4c0edd6cadac2a682859
 (DIR) parent a9ddda00f212ad36066e7963bf29e84b8bc661f1
 (HTM) Author: Leonardo Taccari <iamleot@gmail.com>
       Date:   Fri, 18 Oct 2019 12:36:05 
       
       Rewrite the parsing in translate() and adjust Term and print_term() accordingly
       
       Adjust the parsing to catch all rows instead only the ones with an `id'
       attribute.  Add initial logic to parse examples.
       Multiple rows are still not properly honored (e.g. there could be multiple
       translation-s that span over several rows).
       
       Diffstat:
        wr.py |  87 +++++++++++++++++++++++++++++++++++++++++++-----------------------
        1 files changed, 56 insertions(+), 31 deletions(-)
       ---
       diff -r a9ddda00f212 -r b7eb697e8b8c wr.py
       --- a/wr.py     Sat Oct 05 13:06:43 2019 +0200
       +++ b/wr.py     Fri Oct 18 12:36:05 2019 +0200
       @@ -48,14 +48,16 @@
        
        Term = collections.namedtuple('Term', [
            'term',
       -    'type',
       +    'term_type',
            'term_description',
            'translation',
       +    'translation_type',
        ])
        Term.term.__doc__ += 'Term'
        Term.term_description.__doc__ += 'Description of the term'
       -Term.type.__doc__ += 'Type'
       +Term.term_type.__doc__ += 'Type of the term'
        Term.translation.__doc__ += 'Translation'
       +Term.translation_type.__doc__ += 'Type of the translation'
        
        
        def translate(dictionary: str, term: str) -> List[Term]:
       @@ -79,12 +81,14 @@
            <class 'wr.Term'>
            >>> ts[0].term
            'example'
       -    >>> ts[0].type
       +    >>> ts[0].term_type
            'n'
            >>> ts[0].term_description
            '(typical instance)'
            >>> ts[0].translation
            'esempio'
       +    >>> ts[0].translation_type
       +    'nm'
            """
            req = request.Request(WORDREFERENCE_URL.format(
                                      dictionary=parse.quote(dictionary),
       @@ -97,44 +101,65 @@
            ts = []
        
            if content and content.table:
       -        for tr in content.table.find_all('tr', id=True):
       -            frwrd, fr2, towrd = tr.find_all('td')
       -            pos2 = frwrd.find('em', class_='tooltip POS2')
       -            term = frwrd.strong.text.strip()
       -            if fr2.find('span', class_='dsense'):
       -                fr2.span.clear()
       -            term_description = fr2.text.strip()
       -            if pos2 and pos2.children and len(list(pos2.children)) > 0:
       -                type = list(pos2.children)[0].strip()
       -            else:
       -                type = ''
       -            if towrd.find('em', class_='POS2'):
       -                towrd.em.clear()
       -            translation = towrd.text.strip()
       -            ts.append(
       -                Term(
       -                    term=term,
       -                    type=type,
       -                    term_description=term_description,
       -                    translation=translation,
       -                )
       -            )
       +        t = {}
       +        for tr in content.table.find_all('tr'):
       +            if 'wrtopsection' in tr.get('class', []) or 'langHeader' in tr.get('class', []):
       +                continue
       +
       +            if tr.get('id'):
       +                if t:
       +                    ts.append(Term(**t))
       +                t = {}
       +
       +            if tr.find('td', class_='FrWrd'):
       +                frwrd = tr.find('td', class_='FrWrd')
       +                t['term'] = frwrd.strong.text.strip()
       +                pos2 = frwrd.find('em', class_='tooltip POS2')
       +                if pos2 and pos2.children and len(list(pos2.children)) > 0:
       +                    t['term_type'] = list(pos2.children)[0].strip()
       +                else:
       +                    t['term_type'] = ''
       +                _, fr2, _ = tr.find_all('td')
       +                t['term_description'] = fr2.text.strip()
       +
       +            if tr.find('td', class_='ToWrd'):
       +                towrd = tr.find('td', class_='ToWrd')
       +                pos2 = towrd.find('em', class_='POS2')
       +                if pos2 and pos2.children and len(list(pos2.children)) > 0:
       +                    t['translation_type'] = list(pos2.children)[0].strip()
       +                    towrd.find('em', class_='POS2').clear()
       +                else:
       +                    t['translation_type'] = ''
       +                t['translation'] = towrd.text.strip()
       +
       +            if tr.find('td', class_='FrEx'):
       +                frex = tr.find('td', class_='FrEx')
       +
       +            if tr.find('td', class_='ToEx'):
       +                toex = tr.find('td', class_='ToEx')
       +
       +        else:
       +            if t:
       +                ts.append(Term(**t))
        
            return ts
        
        
        def print_term(term: Term):
            """Pretty print a Term"""
       -    if term.type:
       -        sfmt = '{term} [{type}] {description}:\n{translation}'
       -    else:
       -        sfmt = '{term} {description}:\n{translation}'
       +    sfmt = '{term} '
       +    if term.term_type:
       +        sfmt += '[{term_type}] '
       +    sfmt += '{description}:\n{translation} '
       +    if term.translation_type:
       +        sfmt += '[{translation_type}]'
        
            print(textwrap.fill(sfmt.format(
                                    term=term.term,
       -                            type=term.type,
       +                            term_type=term.term_type,
                                    description=term.term_description,
       -                            translation=term.translation),
       +                            translation=term.translation,
       +                            translation_type=term.translation_type).strip(),
                                width=80,
                                break_long_words=False,
                                break_on_hyphens=False))