From nobody@FreeBSD.org  Sat Jun  2 00:01:02 2007
Return-Path: <nobody@FreeBSD.org>
Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52])
	by hub.freebsd.org (Postfix) with ESMTP id C6E6D16A400
	for <freebsd-gnats-submit@FreeBSD.org>; Sat,  2 Jun 2007 00:01:02 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from www.freebsd.org (www.freebsd.org [69.147.83.33])
	by mx1.freebsd.org (Postfix) with ESMTP id 9B95313C468
	for <freebsd-gnats-submit@FreeBSD.org>; Sat,  2 Jun 2007 00:01:02 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from www.freebsd.org (localhost [127.0.0.1])
	by www.freebsd.org (8.13.1/8.13.1) with ESMTP id l52010ol019675
	for <freebsd-gnats-submit@FreeBSD.org>; Sat, 2 Jun 2007 00:01:00 GMT
	(envelope-from nobody@www.freebsd.org)
Received: (from nobody@localhost)
	by www.freebsd.org (8.13.1/8.13.1/Submit) id l52010tn019674;
	Sat, 2 Jun 2007 00:01:00 GMT
	(envelope-from nobody)
Message-Id: <200706020001.l52010tn019674@www.freebsd.org>
Date: Sat, 2 Jun 2007 00:01:00 GMT
From: Dieter<freebsd@sopwith.solgatos.com>
To: freebsd-gnats-submit@FreeBSD.org
Subject: atrun(8) loses jobs due to race condition
X-Send-Pr-Version: www-3.0

>Number:         113239
>Category:       bin
>Synopsis:       [patch] atrun(8) loses jobs due to race condition
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Sat Jun 02 00:10:03 GMT 2007
>Closed-Date:    Tue Jun 11 13:47:32 CDT 2013
>Last-Modified:  Tue Jun 11 18:50:00 UTC 2013
>Originator:     Dieter
>Release:        6.2
>Organization:
>Environment:
6.2-RELEASE amd64
>Description:
Due to a race condition, atrun(8) can unlink a job before it is executed.
This can result in lost data.
>How-To-Repeat:
Put a sleep in to emulate something (fork() perhaps) taking a
long time.  Set up an at job and execute atrun.  Execute atrun
a second time before the sleep returns.  Observe that your at
job did not get executed, see error message in syslog.

The patch file has code to demo the problem.
>Fix:
I have a workaround.  Only unlink the file if it is more
than 6 hours old.  Strictly speaking this is not a true fix,
the race condition is still present, but if fork is taking
6 hours you have other problems.

The patch file implements this workaround.


Patch attached with submission follows:

===================================================================
RCS file: RCS/atrun.c,v
retrieving revision 1.1
diff -r1.1 atrun.c
83a84,88
> /* Workaround for race condition: only unlink file if it is
>  * older than 6 hours.
>  */
> #define MIN_UNLINK_TIME 60*60*6   /* Number of seconds in 6 hours */
> 
143a149,161
> #if 0
>       /* If something takes too long and another instance of
>        * atrun starts up, it will unlink our file out from
>        * under us.  To demonstrate this race condition,
>        * enable the sleep, set MIN_UNLINK_TIME to 0, create
>        * an at job ("echo hello" is sufficient) and have atrun
>        * run more frequently than the sleep time.  The 70 second
>        * sleep assumes atrun is run from cron once a minute.
>        */
>       syslog(LOG_DEBUG, "Sleeping to trigger race condition, file=%s\n", filename);
>       sleep(70);
> #endif
> 
179c197
< 	perr("cannot open input file");
---
> 	syslog(LOG_ERR, "Cannot open input file %s : %m\n", filename);
479a498,500
> 	 *
> 	 *  Workaround for race condition: only unlink file if it is
> 	 *  older than MIN_UNLINK_TIME seconds.
481c502,504
< 	if ((run_time < now) && !(S_IXUSR & buf.st_mode) && (S_IRUSR & buf.st_mode))
---
> 	if (( (run_time + MIN_UNLINK_TIME) < now) && !(S_IXUSR & buf.st_mode) && (S_IRUSR & buf.st_mode))
> 	  {
> 	    syslog(LOG_DEBUG, "Unlinking %s run_time=%ld now=%ld\n", dirent->d_name, run_time, now);
482a506
> 	  }


>Release-Note:
>Audit-Trail:

From: Guy Helmer <guy.helmer@gmail.com>
To: bug-followup@FreeBSD.org,
 freebsd@sopwith.solgatos.com
Cc:  
Subject: Re: bin/113239: [patch] atrun(8) loses jobs due to race condition
Date: Wed, 5 Jun 2013 16:13:09 -0500

 I'm not comfortable with working around the race condition by =
 determining whether a file seems "too old". Would serializing access to =
 the queue work using the following patch:
 
 Index: atrun.c
 =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
 =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
 =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
 --- atrun.c	(revision 251433)
 +++ atrun.c	(working copy)
 @@ -31,6 +31,7 @@
  /* System Headers */
 =20
  #include <sys/fcntl.h>
 +#include <sys/file.h>
  #include <sys/types.h>
  #include <sys/stat.h>
  #ifdef __FreeBSD__
 @@ -521,6 +522,9 @@
      if ((spool =3D opendir(".")) =3D=3D NULL)
  	perr("cannot read %s", ATJOB_DIR);
 =20
 +    if (flock(dirfd(spool), LOCK_EX) =3D=3D -1)
 +	perr("cannot lock %s", ATJOB_DIR);
 +
      now =3D time(NULL);
      run_batch =3D 0;
      batch_uid =3D (uid_t) -1;
 

From: Dieter BSD <dieterbsd@gmail.com>
To: bug-followup@FreeBSD.org, guy.helmer@gmail.com
Cc:  
Subject: Re: bin/113239: [patch] atrun(8) loses jobs due to race condition
Date: Fri, 7 Jun 2013 10:07:05 -0700

 The flock fix seems to work (after getting it to compile).  Thank you.
 
 The error message complaining about "Cannot open input file" really
 needs to provide the filename. (There are a few other error messages
 that could benefit from a similar improvement.)
 
 ===================================================================
 RCS file: RCS/atrun.c,v
 retrieving revision 1.1
 diff -u -r1.1 atrun.c
 --- atrun.c     2013/06/06 20:41:09     1.1
 +++ atrun.c     2013/06/07 16:39:59
 @@ -31,6 +31,7 @@
  /* System Headers */
 
  #include <sys/fcntl.h>
 +#include <sys/file.h>
  #include <sys/types.h>
  #include <sys/stat.h>
  #include <sys/wait.h>
 @@ -70,11 +71,11 @@
  /* Macros */
 
  #ifndef ATJOB_DIR
 -#define ATJOB_DIR "/usr/spool/atjobs/"
 +#define ATJOB_DIR "/var/at/jobs/"
  #endif
 
  #ifndef ATSPOOL_DIR
 -#define ATSPOOL_DIR "/usr/spool/atspool/"
 +#define ATSPOOL_DIR "/var/at/spool/"
  #endif
 
  #ifndef LOADAVG_MX
 @@ -141,6 +142,12 @@
 
      PRIV_END
 
 +#define SLEEP_TEST 0     /* Test fix for PR bin/113239 */
 +#ifdef SLEEP_TEST
 +    syslog(LOG_DEBUG, "Sleeping to trigger race condition,
 file=%s\n", filename);
 +    sleep(70);
 +#endif
 +
      pid = fork();
      if (pid == -1)
         perr("cannot fork");
 @@ -176,7 +183,11 @@
  #endif
 
      if (stream == NULL)
 -       perr("cannot open input file");
 +    {
 +       char error_string[1024];
 +       sprintf(error_string, "Cannot open input file %s\n", filename);
 +       perr(error_string);
 +    }
 
      if ((fd_in = dup(fileno(stream))) <0)
         perr("error duplicating input file descriptor");
 @@ -444,6 +455,16 @@
      if ((spool = opendir(".")) == NULL)
         perr("cannot read " ATJOB_DIR);
 
 +#define WITH_FIX 1
 +#ifdef WITH_FIX
 +    if (flock(dirfd(spool), LOCK_EX) == -1)  /* Fix for PR bin/113239 */
 +      {
 +       char error_string[1024];
 +       sprintf(error_string,"cannot lock %s", ATJOB_DIR);
 +       perr(error_string);
 +      }
 +#endif
 +
      now = time(NULL);
      run_batch = 0;
      batch_uid = (uid_t) -1;
State-Changed-From-To: open->closed 
State-Changed-By: ghelmer 
State-Changed-When: Tue Jun 11 13:46:50 CDT 2013 
State-Changed-Why:  
Thanks for testing -- committed in rev 251625. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=113239 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: bin/113239: commit references a PR
Date: Tue, 11 Jun 2013 18:43:54 +0000 (UTC)

 Author: ghelmer
 Date: Tue Jun 11 18:43:27 2013
 New Revision: 251625
 URL: http://svnweb.freebsd.org/changeset/base/251625
 
 Log:
   Prevent races running the queue by serializing access to the
   queue directory.
   
   PR:		bin/113239
 
 Modified:
   head/libexec/atrun/atrun.c
 
 Modified: head/libexec/atrun/atrun.c
 ==============================================================================
 --- head/libexec/atrun/atrun.c	Tue Jun 11 18:43:25 2013	(r251624)
 +++ head/libexec/atrun/atrun.c	Tue Jun 11 18:43:27 2013	(r251625)
 @@ -31,6 +31,7 @@ static const char rcsid[] =
  /* System Headers */
  
  #include <sys/fcntl.h>
 +#include <sys/file.h>
  #include <sys/types.h>
  #include <sys/stat.h>
  #ifdef __FreeBSD__
 @@ -521,6 +522,9 @@ main(int argc, char *argv[])
      if ((spool = opendir(".")) == NULL)
  	perr("cannot read %s", ATJOB_DIR);
  
 +    if (flock(dirfd(spool), LOCK_EX) == -1)
 +	perr("cannot lock %s", ATJOB_DIR);
 +
      now = time(NULL);
      run_batch = 0;
      batch_uid = (uid_t) -1;
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
>Unformatted:
