man page documentation for all tools: copied from sfeed and changed - tscrape - twitter scraper
 (HTM) git clone git://git.codemadness.org/tscrape
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit f05f3eb6c90f7b1baf7369498609dc5d5d212b63
 (DIR) parent 797f715398aeac08febc8067ed6423da727e4f45
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat, 17 Aug 2019 12:10:00 +0200
       
       man page documentation for all tools: copied from sfeed and changed
       
       initial version, needs some more work.
       
       Diffstat:
         M Makefile                            |      15 ++++++++++++---
         A tscrape.5                           |      47 +++++++++++++++++++++++++++++++
         A tscrape_html.1                      |      34 +++++++++++++++++++++++++++++++
         A tscrape_plain.1                     |      46 +++++++++++++++++++++++++++++++
         A tscrape_update.1                    |      89 +++++++++++++++++++++++++++++++
         A tscraperc.5                         |      98 +++++++++++++++++++++++++++++++
       
       6 files changed, 326 insertions(+), 3 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -36,7 +36,11 @@ COMPATOBJ =\
        
        LIB = ${LIBUTIL} ${LIBXML} ${COMPATOBJ}
        
       -MAN1 = tscrape.1
       +MAN1 = ${BIN:=.1}\
       +        ${SCRIPTS:=.1}
       +MAN5 = \
       +        tscrape.5\
       +        tscraperc.5
        DOC = \
                LICENSE\
                README
       @@ -66,7 +70,7 @@ ${LIBXML}: ${LIBXMLOBJ}
        dist:
                rm -rf "${NAME}-${VERSION}"
                mkdir -p "${NAME}-${VERSION}"
       -        cp -f ${MAN1} ${DOC} ${HDR} \
       +        cp -f ${MAN1} ${MAN5} ${DOC} ${HDR} \
                        ${SRC} ${LIBXMLSRC} ${LIBUTILSRC} ${COMPATSRC} ${SCRIPTS} \
                        Makefile config.mk \
                        tscraperc.example style.css \
       @@ -90,10 +94,14 @@ install: all
                        style.css\
                        README\
                        "${DESTDIR}${DOCPREFIX}"
       -        # installing manual pages for tools.
       +        # installing manual pages for general commands: section 1.
                mkdir -p "${DESTDIR}${MANPREFIX}/man1"
                cp -f ${MAN1} "${DESTDIR}${MANPREFIX}/man1"
                for m in $(MAN1); do chmod 644 "${DESTDIR}${MANPREFIX}/man1/$$m"; done
       +        # installing manual pages for file formats: section 5.
       +        mkdir -p "${DESTDIR}${MANPREFIX}/man5"
       +        cp -f ${MAN5} "${DESTDIR}${MANPREFIX}/man5"
       +        for m in ${MAN5}; do chmod 644 "${DESTDIR}${MANPREFIX}/man5/$$m"; done
        
        uninstall:
                # removing executable files and scripts.
       @@ -106,5 +114,6 @@ uninstall:
                -rmdir "${DESTDIR}${DOCPREFIX}"
                # removing manual pages.
                for m in $(MAN1); do rm -f "${DESTDIR}${MANPREFIX}/man1/$$m"; done
       +        for m in ${MAN5}; do rm -f "${DESTDIR}${MANPREFIX}/man5/$$m"; done
        
        .PHONY: all clean dist install uninstall
 (DIR) diff --git a/tscrape.5 b/tscrape.5
       @@ -0,0 +1,47 @@
       +.Dd July 20, 2019
       +.Dt TSCRAPE 5
       +.Os
       +.Sh NAME
       +.Nm tscrape
       +.Nd output format
       +.Sh SYNOPSIS
       +.Nm
       +.Sh DESCRIPTION
       +.Xr tscrape 1
       +writes the feed data in a TAB-separated format to stdout.
       +.Sh TAB-SEPARATED FORMAT FIELDS
       +The items are output per line in a TSV-like format.
       +.Pp
       +The fields are not allowed to have newlines and TABs, all whitespace characters
       +are replaced by a single space character.
       +Control characters are removed.
       +.Sh TAB-SEPARATED FORMAT FIELDS
       +The items are saved in a TSV-like format. Control characters are replaced
       +by a single space.
       +.Pp
       +The order and format of the fields are:
       +.Bl -tag -width 17n
       +.It UNIX timestamp
       +UNIX timestamp in UTC+0.
       +.It username
       +Twitter username (can be a retweet).
       +.It fullname
       +Twitter fullname (can be a retweet).
       +.It tweet text
       +Tweet text.
       +.It item id
       +Item id.
       +.It item username
       +Item username.
       +.It item fullname
       +Item fullname.
       +.It item retweetid
       +Item Retweet ID.
       +.It item is pinned
       +Item is pinned or not? 0 or 1.
       +.El
       +.Sh SEE ALSO
       +.Xr tscrape 1 ,
       +.Xr tscrape_plain 1
       +.Sh AUTHORS
       +.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
 (DIR) diff --git a/tscrape_html.1 b/tscrape_html.1
       @@ -0,0 +1,34 @@
       +.Dd July 20, 2019
       +.Dt TSCRAPE_HTML 1
       +.Os
       +.Sh NAME
       +.Nm tscrape_html
       +.Nd format TSV data to HTML
       +.Sh SYNOPSIS
       +.Nm
       +.Op Ar file...
       +.Sh DESCRIPTION
       +.Nm
       +formats tscrape data (TSV) from
       +.Xr tscrape 1
       +from stdin or
       +.Ar file
       +to stdout in HTML.
       +If one or more
       +.Ar file
       +are specified, the basename of the
       +.Ar file
       +is used as the feed name in the output.
       +If no
       +.Ar file
       +parameters are specified and so the data is read from stdin the feed name
       +is empty.
       +.Pp
       +Items with a timestamp from the last day compared to the system time at the
       +time of formatting are counted and marked as new.
       +.Sh SEE ALSO
       +.Xr tscrape 1 ,
       +.Xr tscrape_plain 1 ,
       +.Xr tscrape 5
       +.Sh AUTHORS
       +.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
 (DIR) diff --git a/tscrape_plain.1 b/tscrape_plain.1
       @@ -0,0 +1,46 @@
       +.Dd July 20, 2019
       +.Dt TSCRAPE_PLAIN 1
       +.Os
       +.Sh NAME
       +.Nm tscrape_plain
       +.Nd format tscrape data to a plain-text list
       +.Sh SYNOPSIS
       +.Nm
       +.Op Ar file...
       +.Sh DESCRIPTION
       +.Nm
       +formats tscrape data (TSV) from
       +.Xr tscrape 1
       +from stdin or
       +.Ar file
       +to stdout as a plain-text list.
       +If one or more
       +.Ar file
       +are specified, the basename of the
       +.Ar file
       +is used as the feed name in the output.
       +If no
       +.Ar file
       +parameters are specified and so the data is read from stdin the feed name
       +is empty.
       +.Pp
       +Items with a timestamp from the last day compared to the system time at the
       +time of formatting are marked as new.
       +.Pp
       +.Nm
       +aligns the output.
       +It shows a maximum of 70 column-wide characters for the title and outputs
       +an ellipsis symbol if the title is longer and truncated.
       +Make sure the environment variable
       +.Ev LC_CTYPE
       +is set to a UTF-8 locale, so it can determine the proper column-width
       +per rune, using
       +.Xr mbtowc 3
       +and
       +.Xr wcwidth 3 .
       +.Sh SEE ALSO
       +.Xr tscrape 1 ,
       +.Xr tscrape_html 1 ,
       +.Xr tscrape 5
       +.Sh AUTHORS
       +.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
 (DIR) diff --git a/tscrape_update.1 b/tscrape_update.1
       @@ -0,0 +1,89 @@
       +.Dd August 17, 2019
       +.Dt TSCRAPE_UPDATE 1
       +.Os
       +.Sh NAME
       +.Nm tscrape_update
       +.Nd update feeds and merge with old feeds
       +.Sh SYNOPSIS
       +.Nm
       +.Op Ar tscraperc
       +.Sh DESCRIPTION
       +.Nm
       +updates feeds files and merges the new data with the previous files.
       +These are the files in the directory
       +.Pa $HOME/.tscrape/feeds
       +by default.
       +.Sh OPTIONS
       +.Bl -tag -width 17n
       +.It Ar tscraperc
       +Config file, if not specified uses the path
       +.Pa $HOME/.tscrape/tscraperc
       +by default.
       +See the
       +.Sx FILES READ
       +section for more information.
       +.El
       +.Sh FILES READ
       +.Bl -tag -width 17n
       +.It Ar tscraperc
       +Config file, see the tscraperc.example file for an example.
       +This file is evaluated as a shellscript in
       +.Nm .
       +.Pp
       +Atleast the following functions can be overridden per feed:
       +.Bl -tag -width 17n
       +.It Fn fetch
       +to use
       +.Xr wget 1 ,
       +OpenBSD
       +.Xr ftp 1
       +or an other download program.
       +.It Fn merge
       +to change the merge logic.
       +.It Fn filter
       +to filter on fields.
       +.It Fn order
       +to change the sort order.
       +.El
       +.Pp
       +The
       +.Fn feeds
       +function is called to process the feeds.
       +The default
       +.Fn feed
       +function is executed concurrently as a background job in your
       +.Xr tscraperc 5
       +config file to make updating faster.
       +The variable
       +.Va maxjobs
       +can be changed to limit or increase the amount of concurrent jobs (8 by
       +default).
       +.El
       +.Sh FILES WRITTEN
       +.Bl -tag -width 17n
       +.It feedname
       +TAB-separated format containing all items per feed.
       +The
       +.Nm
       +script merges new items with this file.
       +The filename cannot contain '/' characters, they will be replaced with '_'.
       +.El
       +.Sh EXAMPLES
       +To update your feeds and format them in various formats:
       +.Bd -literal
       +# Update
       +tscrape_update "configfile"
       +# Plain-text list
       +tscrape_plain $HOME/.tscrape/feeds/* > $HOME/.tscrape/feeds.txt
       +# HTML
       +tscrape_html $HOME/.tscrape/feeds/* > $HOME/.tscrape/feeds.html
       +.Ed
       +.Sh SEE ALSO
       +.Xr tscrape 1 ,
       +.Xr tscrape_html 1 ,
       +.Xr tscrape_plain 1 ,
       +.Xr sh 1 ,
       +.Xr tscrape 5 ,
       +.Xr tscraperc 5
       +.Sh AUTHORS
       +.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
 (DIR) diff --git a/tscraperc.5 b/tscraperc.5
       @@ -0,0 +1,98 @@
       +.Dd July 14, 2019
       +.Dt TSCRAPERC 5
       +.Os
       +.Sh NAME
       +.Nm tscraperc
       +.Nd tscrape_update(1) configuration file
       +.Sh DESCRIPTION
       +.Nm
       +is the configuration file for
       +.Xr tscrape_update 1 .
       +.Pp
       +The variable
       +.Va tscrapepath
       +can be set for the directory to store the TAB-separated feed files,
       +by default this is
       +.Pa $HOME/.tscrape/feeds .
       +.
       +.Sh FUNCTIONS
       +The following functions must be defined in a
       +.Nm
       +file:
       +.Bl -tag -width Ds
       +.It Fn feeds
       +This function is like a "main" function called from
       +.Xr tscrape_update 1 .
       +.It Fn feed "name" "feedurl"
       +Function to process the feed, its arguments are in the order:
       +.Bl -tag -width Ds
       +.It Fa name
       +Name of the feed, this is also used as the filename for the TAB-separated
       +feed file.
       +The filename cannot contain '/' characters, they will be replaced with '_'.
       +.It Fa feedurl
       +Uri to fetch the RSS/Atom data from, usually a HTTP or HTTPS uri.
       +.El
       +.El
       +.Sh OVERRIDE FUNCTIONS
       +Because
       +.Xr tscrape_update 1
       +is a shellscript each function can be overridden to change its behaviour,
       +notable functions are:
       +.Bl -tag -width Ds
       +.It Fn fetch "name" "uri" "feedfile"
       +Fetch feed from url and writes data to stdout, its arguments are:
       +.Bl -tag -width Ds
       +.It Fa name
       +Specified name in configuration file (useful for logging).
       +.It Fa uri
       +Uri to fetch.
       +.It Fa feedfile
       +Used feedfile (useful for comparing modification times).
       +.El
       +.It Fn merge "name" "oldfile" "newfile"
       +Merge data of oldfile with newfile and writes it to stdout, its arguments are:
       +.Bl -tag -width Ds
       +.It Fa name
       +Feed name.
       +.It Fa oldfile
       +Old file.
       +.It Fa newfile
       +New file.
       +.El
       +.It Fn filter "name"
       +Filter
       +.Xr tscrape 5
       +data from stdin, write to stdout, its arguments are:
       +.Bl -tag -width Ds
       +.It Fa name
       +Feed name.
       +.El
       +.It Fn order "name"
       +Sort
       +.Xr tscrape 5
       +data from stdin, write to stdout, its arguments are:
       +.Bl -tag -width Ds
       +.It Fa name
       +Feed name.
       +.El
       +.El
       +.Sh EXAMPLES
       +An example configuration file is included named tscraperc.example and also
       +shown below:
       +.Bd -literal
       +#tscrapepath="$HOME/.tscrape/feeds"
       +
       +# list of feeds to fetch:
       +feeds() {
       +        # feed <name> <feedurl>
       +        feed "Rich Felker" "https://twitter.com/richfelker"
       +        feed "Internet of shit" "https://twitter.com/internetofshit"
       +        feed "Donald Trump" "https://twitter.com/realdonaldtrump"
       +}
       +.Ed
       +.Sh SEE ALSO
       +.Xr tscrape_update 1 ,
       +.Xr sh 1
       +.Sh AUTHORS
       +.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org