Jan Wieck wieck at lists.slony.info
Mon Apr 13 10:41:35 PDT 2009
Update of /home/cvsd/slony1/slony1-engine/src/slonik
In directory main.slony.info:/tmp/cvs-serv30306

Modified Files:
      Tag: REL_1_2_STABLE
	slonik.c 
Log Message:
Changed failover logic to not only look at direct subscribers. 
Failover is still not flawless, but a lot better this way.

Jan


Index: slonik.c
===================================================================
RCS file: /home/cvsd/slony1/slony1-engine/src/slonik/slonik.c,v
retrieving revision 1.67.2.18
retrieving revision 1.67.2.19
diff -C2 -d -r1.67.2.18 -r1.67.2.19
*** slonik.c	5 Mar 2009 22:37:23 -0000	1.67.2.18
--- slonik.c	13 Apr 2009 17:41:33 -0000	1.67.2.19
***************
*** 2692,2696 ****
  
  	/*
! 	 * For every set we're interested in lookup the direct subscriber nodes.
  	 */
  	for (i = 0; i < num_sets; i++)
--- 2692,2696 ----
  
  	/*
! 	 * For every set we're interested in lookup the subscriber nodes.
  	 */
  	for (i = 0; i < num_sets; i++)
***************
*** 2706,2714 ****
  					 "    from \"_%s\".sl_subscribe "
  					 "    where sub_set = %d "
- 					 "    and sub_provider = %d "
  					 "    and sub_active and sub_forward; ",
  					 stmt->hdr.script->clustername,
! 					 setinfo[i].set_id,
! 					 stmt->no_id);
  
  		res3 = db_exec_select((SlonikStmt *) stmt, adminfo1, &query);
--- 2706,2712 ----
  					 "    from \"_%s\".sl_subscribe "
  					 "    where sub_set = %d "
  					 "    and sub_active and sub_forward; ",
  					 stmt->hdr.script->clustername,
! 					 setinfo[i].set_id);
  
  		res3 = db_exec_select((SlonikStmt *) stmt, adminfo1, &query);
***************
*** 2754,2757 ****
--- 2752,2757 ----
  	 * all other nodes.
  	 */
+ 	printf("INFO: calling failedNode(%d,%d) on node %d\n",
+ 			stmt->no_id, stmt->backup_node, stmt->no_id);
  	slon_mkquery(&query,
  				 "select \"_%s\".failedNode(%d, %d); ",
***************
*** 2772,2775 ****
--- 2772,2777 ----
  			continue;
  
+ 		printf("INFO: calling failedNode(%d,%d) on node %d\n",
+ 				stmt->no_id, stmt->backup_node, nodeinfo[i].no_id);
  		if (db_exec_command((SlonikStmt *) stmt, nodeinfo[i].adminfo, &query) < 0)
  		{
***************
*** 2800,2803 ****
--- 2802,2806 ----
  	 * restarted.
  	 */
+ 	printf("INFO: Waiting for slon engines to restart\n");
  	n = 0;
  	while (n < num_nodes)
***************
*** 2878,2882 ****
  
  	/*
! 	 * For every set determine the direct subscriber with the highest applied
  	 * sync, preferring the backup node.
  	 */
--- 2881,2885 ----
  
  	/*
! 	 * For every set determine the subscriber with the highest applied
  	 * sync, preferring the backup node.
  	 */
***************
*** 3005,3010 ****
--- 3008,3018 ----
  		}
  
+ 		printf("INFO: Node with highest sync for set %d is %d\n",
+ 				setinfo[i].set_id, use_node);
+ 
  		if (use_node != stmt->backup_node)
  		{
+ 			printf("INFO: switching node %d to temporarily receive set %d from node %d\n",
+ 					stmt->backup_node, setinfo[i].set_id, use_node);
  			slon_mkquery(&query,
  						 "select \"_%s\".storeListen(%d,%d,%d); "
***************
*** 3026,3030 ****
  
  	/*
! 	 * Now execute all FAILED_NODE events on the node that had the highest of
  	 * all events alltogether.
  	 */
--- 3034,3038 ----
  
  	/*
! 	 * Now execute all FAILED_NODE2 events on the node that had the highest of
  	 * all events alltogether.
  	 */



More information about the Slony1-commit mailing list