From nobody@FreeBSD.org  Mon May 28 20:11:15 2007
Return-Path: <nobody@FreeBSD.org>
Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52])
	by hub.freebsd.org (Postfix) with ESMTP id 9B95116A4A5
	for <freebsd-gnats-submit@FreeBSD.org>; Mon, 28 May 2007 20:11:15 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from www.freebsd.org (www.freebsd.org [69.147.83.33])
	by mx1.freebsd.org (Postfix) with ESMTP id 8CD8013C45B
	for <freebsd-gnats-submit@FreeBSD.org>; Mon, 28 May 2007 20:11:15 +0000 (UTC)
	(envelope-from nobody@FreeBSD.org)
Received: from www.freebsd.org (localhost [127.0.0.1])
	by www.freebsd.org (8.13.1/8.13.1) with ESMTP id l4SKBF1I063688
	for <freebsd-gnats-submit@FreeBSD.org>; Mon, 28 May 2007 20:11:15 GMT
	(envelope-from nobody@www.freebsd.org)
Received: (from nobody@localhost)
	by www.freebsd.org (8.13.1/8.13.1/Submit) id l4SKBFtZ063687;
	Mon, 28 May 2007 20:11:15 GMT
	(envelope-from nobody)
Message-Id: <200705282011.l4SKBFtZ063687@www.freebsd.org>
Date: Mon, 28 May 2007 20:11:15 GMT
From: Marcel Moolenaar<marcel@FreeBSD.org>
To: freebsd-gnats-submit@FreeBSD.org
Subject: [MCA] Multiple records can have the same sequence number
X-Send-Pr-Version: www-3.0

>Number:         113102
>Category:       ia64
>Synopsis:       [MCA] Multiple records can have the same sequence number
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-ia64
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Mon May 28 20:20:03 GMT 2007
>Closed-Date:    Tue Apr 13 22:29:57 UTC 2010
>Last-Modified:  Tue Apr 13 22:30:06 UTC 2010
>Originator:     Marcel Moolenaar
>Release:        -CURRENT
>Organization:
>Environment:
FreeBSD pluto1.freebsd.org 7.0-CURRENT FreeBSD 7.0-CURRENT #1: Mon May 28 03:36:46 UTC 2007     marcel@pluto1.freebsd.org:/p/obj/p/7.x/src/sys/PLUTO1  ia64
>Description:
Certain classes of errors can result in multiple MCA records with the same sequence number.
It is currently assumed that each record has it's own (unique) sequence number.
The sequence number is used to create nodes in the sysctl tree under hw.mca and when multiple
records have the same sequence number, we can't save the record.
We get the following error: can't re-use a leaf (57)!
It seems that the class of errors for which this happens are those errors for which we need to
get CPU-specific errors from each CPU. As the error seems to happen when we get the MCA
record from the AP after we got the error from the BSP.

>How-To-Repeat:

>Fix:


>Release-Note:
>Audit-Trail:
State-Changed-From-To: open->closed 
State-Changed-By: marcel 
State-Changed-When: Tue Apr 13 22:29:32 UTC 2010 
State-Changed-Why:  
Resolved in 9-CURRENT. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=113102 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: ia64/113102: commit references a PR
Date: Tue, 13 Apr 2010 22:20:22 +0000 (UTC)

 Author: marcel
 Date: Tue Apr 13 22:20:12 2010
 New Revision: 206570
 URL: http://svn.freebsd.org/changeset/base/206570
 
 Log:
   Populate the sysctl tree with any MCA records we collected.
   The sequence number is used as the name of a sysctl node,
   under which we add the MCA records using the CPU id as the
   leaf  name.
   
   Add the hw.mca.inject sysctl to provide a way to inject
   MC errors and trigger machine checks.
   
   PR:		ia64/113102
 
 Modified:
   head/sys/ia64/ia64/mca.c
   head/sys/ia64/include/mca.h
 
 Modified: head/sys/ia64/ia64/mca.c
 ==============================================================================
 --- head/sys/ia64/ia64/mca.c	Tue Apr 13 21:32:06 2010	(r206569)
 +++ head/sys/ia64/ia64/mca.c	Tue Apr 13 22:20:12 2010	(r206570)
 @@ -1,5 +1,5 @@
  /*-
 - * Copyright (c) 2002 Marcel Moolenaar
 + * Copyright (c) 2002-2010 Marcel Moolenaar
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
 @@ -37,6 +37,7 @@
  #include <vm/vm.h>
  #include <vm/vm_kern.h>
  #include <machine/mca.h>
 +#include <machine/pal.h>
  #include <machine/sal.h>
  #include <machine/smp.h>
  
 @@ -44,19 +45,19 @@ MALLOC_DEFINE(M_MCA, "MCA", "Machine Che
  
  struct mca_info {
  	STAILQ_ENTRY(mca_info) mi_link;
 -	char	mi_name[32];
 +	u_long	mi_seqnr;
 +	u_int	mi_cpuid;
  	size_t	mi_recsz;
  	char	mi_record[0];
  };
  
 -static STAILQ_HEAD(, mca_info) mca_records =
 -    STAILQ_HEAD_INITIALIZER(mca_records);
 +STAILQ_HEAD(mca_info_list, mca_info);
  
 -int64_t		mca_info_size[SAL_INFO_TYPES];
 -vm_offset_t	mca_info_block;
 -struct mtx	mca_info_block_lock;
 +static int64_t		mca_info_size[SAL_INFO_TYPES];
 +static vm_offset_t	mca_info_block;
 +static struct mtx	mca_info_block_lock;
  
 -SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RW, 0, "MCA container");
 +SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RW, NULL, "MCA container");
  
  static int mca_count;		/* Number of records stored. */
  static int mca_first;		/* First (lowest) record ID. */
 @@ -69,6 +70,32 @@ SYSCTL_INT(_hw_mca, OID_AUTO, first, CTL
  SYSCTL_INT(_hw_mca, OID_AUTO, last, CTLFLAG_RD, &mca_last, 0,
      "Last record id");
  
 +static struct mtx mca_sysctl_lock;
 +
 +static int
 +mca_sysctl_inject(SYSCTL_HANDLER_ARGS)
 +{
 +	struct ia64_pal_result res;
 +	u_int val;
 +	int error;
 +
 +	val = 0;
 +	error = sysctl_wire_old_buffer(req, sizeof(u_int));
 +	if (!error)
 +		error = sysctl_handle_int(oidp, &val, 0, req);
 +
 +	if (error != 0 || req->newptr == NULL)
 +		return (error);
 +
 +	/* For example: val=137 causes a fatal CPU error. */
 +	res = ia64_call_pal_stacked(PAL_MC_ERROR_INJECT, val, 0, 0);
 +	printf("%s: %#lx, %#lx, %#lx, %#lx\n", __func__, res.pal_status,
 +	    res.pal_result[0], res.pal_result[1], res.pal_result[2]);
 +	return (0);
 +}
 +SYSCTL_PROC(_hw_mca, OID_AUTO, inject, CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
 +    mca_sysctl_inject, "I", "set to trigger a MCA");
 +
  static int
  mca_sysctl_handler(SYSCTL_HANDLER_ARGS)
  {
 @@ -85,27 +112,8 @@ mca_sysctl_handler(SYSCTL_HANDLER_ARGS)
  	return (error);
  }
  
 -void
 -ia64_mca_populate(void)
 -{
 -	struct mca_info *rec;
 -
 -	mtx_lock_spin(&mca_info_block_lock);
 -	while (!STAILQ_EMPTY(&mca_records)) {
 -		rec = STAILQ_FIRST(&mca_records);
 -		STAILQ_REMOVE_HEAD(&mca_records, mi_link);
 -		mtx_unlock_spin(&mca_info_block_lock);
 -		(void)SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca),
 -		    OID_AUTO, rec->mi_name, CTLTYPE_OPAQUE | CTLFLAG_RD,
 -		    rec->mi_record, rec->mi_recsz, mca_sysctl_handler, "S,MCA",
 -		    "Error record");
 -		mtx_lock_spin(&mca_info_block_lock);
 -	}
 -	mtx_unlock_spin(&mca_info_block_lock);
 -}
 -
 -void
 -ia64_mca_save_state(int type)
 +static void
 +ia64_mca_collect_state(int type, struct mca_info_list *reclst)
  {
  	struct ia64_sal_result result;
  	struct mca_record_header *hdr;
 @@ -123,13 +131,13 @@ ia64_mca_save_state(int type)
  	if (mca_info_block == 0)
  		return;
  
 -	mtx_lock_spin(&mca_info_block_lock);
  	while (1) {
 +		mtx_lock_spin(&mca_info_block_lock);
  		result = ia64_sal_entry(SAL_GET_STATE_INFO, type, 0,
  		    mca_info_block, 0, 0, 0, 0);
  		if (result.sal_status < 0) {
  			mtx_unlock_spin(&mca_info_block_lock);
 -			return;
 +			break;
  		}
  
  		hdr = (struct mca_record_header *)mca_info_block;
 @@ -142,9 +150,10 @@ ia64_mca_save_state(int type)
  		    M_NOWAIT | M_ZERO);
  		if (rec == NULL)
  			/* XXX: Not sure what to do. */
 -			return;
 +			break;
  
 -		sprintf(rec->mi_name, "%lld", (long long)seqnr);
 +		rec->mi_seqnr = seqnr;
 +		rec->mi_cpuid = PCPU_GET(cpuid);
  
  		mtx_lock_spin(&mca_info_block_lock);
  
 @@ -163,7 +172,6 @@ ia64_mca_save_state(int type)
  			if (seqnr != hdr->rh_seqnr) {
  				mtx_unlock_spin(&mca_info_block_lock);
  				free(rec, M_MCA);
 -				mtx_lock_spin(&mca_info_block_lock);
  				continue;
  			}
  		}
 @@ -171,23 +179,51 @@ ia64_mca_save_state(int type)
  		rec->mi_recsz = recsz;
  		bcopy((char*)mca_info_block, rec->mi_record, recsz);
  
 -		if (mca_count > 0) {
 -			if (seqnr < mca_first)
 -				mca_first = seqnr;
 -			else if (seqnr > mca_last)
 -				mca_last = seqnr;
 -		} else
 -			mca_first = mca_last = seqnr;
 -
 -		mca_count++;
 -		STAILQ_INSERT_TAIL(&mca_records, rec, mi_link);
 -
  		/*
  		 * Clear the state so that we get any other records when
  		 * they exist.
  		 */
  		result = ia64_sal_entry(SAL_CLEAR_STATE_INFO, type, 0, 0, 0,
  		    0, 0, 0);
 +
 +		mtx_unlock_spin(&mca_info_block_lock);
 +
 +		STAILQ_INSERT_TAIL(reclst, rec, mi_link);
 +	}
 +}
 +
 +void
 +ia64_mca_save_state(int type)
 +{
 +	char name[64];
 +	struct mca_info_list reclst = STAILQ_HEAD_INITIALIZER(reclst);
 +	struct mca_info *rec;
 +	struct sysctl_oid *oid;
 +
 +	ia64_mca_collect_state(type, &reclst);
 +
 +	STAILQ_FOREACH(rec, &reclst, mi_link) {
 +		sprintf(name, "%lu", rec->mi_seqnr);
 +		oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca),
 +		    OID_AUTO, name, CTLFLAG_RW, NULL, name);
 +		if (oid == NULL)
 +			continue;
 +
 +		mtx_lock(&mca_sysctl_lock);
 +		if (mca_count > 0) {
 +			if (rec->mi_seqnr < mca_first)
 +				mca_first = rec->mi_seqnr;
 +			else if (rec->mi_seqnr > mca_last)
 +				mca_last = rec->mi_seqnr;
 +		} else
 +			mca_first = mca_last = rec->mi_seqnr;
 +		mca_count++;
 +		mtx_unlock(&mca_sysctl_lock);
 +
 +		sprintf(name, "%u", rec->mi_cpuid);
 +		SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), rec->mi_cpuid,
 +		    name, CTLTYPE_OPAQUE | CTLFLAG_RD, rec->mi_record,
 +		    rec->mi_recsz, mca_sysctl_handler, "S,MCA", "MCA record");
  	}
  }
  
 @@ -237,7 +273,14 @@ ia64_mca_init(void)
  	 * should be rare. On top of that, performance is not an issue when
  	 * dealing with machine checks...
  	 */
 -	mtx_init(&mca_info_block_lock, "MCA spin lock", NULL, MTX_SPIN);
 +	mtx_init(&mca_info_block_lock, "MCA info lock", NULL, MTX_SPIN);
 +
 +	/*
 +	 * Serialize sysctl operations with a sleep lock. Note that this
 +	 * implies that we update the sysctl tree in a context that allows
 +	 * sleeping.
 +	 */
 +	mtx_init(&mca_sysctl_lock, "MCA sysctl lock", NULL, MTX_DEF);
  
  	/*
  	 * Get and save any processor and platfom error records. Note that in
 
 Modified: head/sys/ia64/include/mca.h
 ==============================================================================
 --- head/sys/ia64/include/mca.h	Tue Apr 13 21:32:06 2010	(r206569)
 +++ head/sys/ia64/include/mca.h	Tue Apr 13 22:20:12 2010	(r206570)
 @@ -1,5 +1,5 @@
  /*-
 - * Copyright (c) 2002 Marcel Moolenaar
 + * Copyright (c) 2002-2010 Marcel Moolenaar
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
 @@ -240,7 +240,6 @@ struct mca_pcidev_reg {
  #ifdef _KERNEL
  
  void ia64_mca_init(void);
 -void ia64_mca_populate(void);
  void ia64_mca_save_state(int);
  
  #endif /* _KERNEL */
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: ia64/113102: commit references a PR
Date: Tue, 13 Apr 2010 22:27:52 +0000 (UTC)

 Author: marcel
 Date: Tue Apr 13 22:27:39 2010
 New Revision: 206571
 URL: http://svn.freebsd.org/changeset/base/206571
 
 Log:
   The sequence number is now a node under which the MCA records are
   hung by CPU id.
   When showing the MCA record, print the MIB as a comment.
   
   PR:		ia64/113102
 
 Modified:
   head/sbin/mca/mca.c
 
 Modified: head/sbin/mca/mca.c
 ==============================================================================
 --- head/sbin/mca/mca.c	Tue Apr 13 22:20:12 2010	(r206570)
 +++ head/sbin/mca/mca.c	Tue Apr 13 22:27:39 2010	(r206571)
 @@ -53,10 +53,12 @@ __FBSDID("$FreeBSD$");
  
  #define	BCD(x)	((x >> 4) * 10 + (x & 15))
  
 +#define	HW_MCA_MAX_CPUID	255
 +
  static char hw_mca_count[] = "hw.mca.count";
  static char hw_mca_first[] = "hw.mca.first";
  static char hw_mca_last[] = "hw.mca.last";
 -static char hw_mca_recid[] = "hw.mca.%d";
 +static char hw_mca_recid[] = "hw.mca.%lu.%u";
  
  static char default_dumpfile[] = "/var/log/mca.log";
  
 @@ -372,10 +374,13 @@ show_section(struct mca_section_header *
  }
  
  static void
 -show(char *data)
 +show(char *data, const char *mib)
  {
  	size_t reclen, seclen;
  
 +	if (mib != NULL)
 +		printf("<!-- MIB: %s -->\n", mib);
 +
  	printf("<record>\n");
  	reclen = show_header((void*)data) - sizeof(struct mca_record_header);
  	data += sizeof(struct mca_record_header);
 @@ -402,7 +407,7 @@ showall(char *buf, size_t buflen)
  		if (buflen < reclen)
  			return;
  
 -		show(buf);
 +		show(buf, NULL);
  
  		buf += reclen;
  		buflen -= reclen;
 @@ -442,7 +447,7 @@ main(int argc, char **argv)
  	char *buf;
  	size_t len;
  	int ch, error, fd;
 -	int count, first, last;
 +	int count, first, last, cpuid;
  
  	while ((ch = getopt(argc, argv, "df:")) != -1) {
  		switch(ch) {
 @@ -481,12 +486,19 @@ main(int argc, char **argv)
  		if (error)
  			err(1, hw_mca_last);
  
 +		cpuid = 0;
  		while (count && first <= last) {
 -			sprintf(mib, hw_mca_recid, first);
 -			len = 0;
 -			error = sysctlbyname(mib, NULL, &len, NULL, 0);
 -			if (error == ENOENT) {
 +			do {
 +				sprintf(mib, hw_mca_recid, first, cpuid);
 +				len = 0;
 +				error = sysctlbyname(mib, NULL, &len, NULL, 0);
 +				if (error != ENOENT)
 +					break;
 +				cpuid++;
 +			} while (cpuid <= HW_MCA_MAX_CPUID);
 +			if (error == ENOENT && cpuid > HW_MCA_MAX_CPUID) {
  				first++;
 +				cpuid = 0;
  				continue;
  			}
  			if (error)
 @@ -503,11 +515,15 @@ main(int argc, char **argv)
  			if (fl_dump)
  				dump(buf);
  			else
 -				show(buf);
 +				show(buf, mib);
  
  			free(buf);
 -			first++;
  			count--;
 +			if (cpuid == HW_MCA_MAX_CPUID) {
 +				first++;
 +				cpuid = 0;
 +			} else
 +				cpuid++;
  		}
  	} else {
  		fd = open(file, O_RDONLY);
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
>Unformatted:
