[Slony1-commit] By cbbrowne: Added a dynamic SYNC group "windowing" scheme, along with

Mon Jan 10 23:45:38 PST 2005

Log Message:
-----------
Added a dynamic SYNC group "windowing" scheme, along with docs and
configuration option code.

This allows defining a target "desired_sync_time", defaulting to 1
minute, where, if replication is behind, slon will gradually increase
sync group sizes to try to get the expected time for the group to
take that period of time.

It backs off IMMEDIATELY, if there is a large or long running SYNC,
and starts at 1, and increases group sizes by the greater of 1 and
10% of the last group size.  Once the node gets up to date, that
also causes it to back off to sync sizes of 1 or 2.

This makes it reasonable to have the maximum sync grouping set rather
high, as the groups will only get large gradually, in keeping with
how the system is performing.

Modified Files:
--------------
    slony1-engine/src/slon:
        confoptions.h (r1.10 -> r1.11)
        slon.c (r1.39 -> r1.40)
        remote_worker.c (r1.68 -> r1.69)
    slony1-engine/doc/adminguide:
        slon.sgml (r1.4 -> r1.5)

-------------- next part --------------
Index: confoptions.h
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/confoptions.h,v
retrieving revision 1.10
retrieving revision 1.11
diff -Lsrc/slon/confoptions.h -Lsrc/slon/confoptions.h -u -w -r1.10 -r1.11

--- src/slon/confoptions.h
+++ src/slon/confoptions.h
@@ -23,6 +23,7 @@
 extern int sync_interval_timeout;
 
 extern int sync_group_maxsize;
+extern int desired_sync_time;
 
 char *Syslog_ident;
 char *Syslog_facility;
@@ -154,6 +155,20 @@
 		0,
 		500
 	},
+	{
+		{
+			(const char *)"desired_sync_time",
+			gettext_noop("maximum time planned for grouped SYNCs"),
+			gettext_noop("If replication is behind, slon will try to increase numbers of "
+						 "syncs done targetting that they should take this quantity of "
+						 "time to process"),
+			SLON_C_INT
+		},
+		&desired_sync_time,
+		60000,
+		10000,
+		600000
+	},
 #ifdef HAVE_SYSLOG
 	{
 		{
Index: remote_worker.c
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/remote_worker.c,v
retrieving revision 1.68
retrieving revision 1.69
diff -Lsrc/slon/remote_worker.c -Lsrc/slon/remote_worker.c -u -w -r1.68 -r1.69
--- src/slon/remote_worker.c
+++ src/slon/remote_worker.c
@@ -210,7 +210,16 @@
 
 int             sync_group_maxsize;
 
+int last_sync_group_size;
+int next_sync_group_size;
 
+int desired_sync_time = 60000;
+int ideal_sync ;
+struct timeval  sync_start;
+struct timeval  sync_end;
+int last_sync_length;
+int max_sync;
+int min_sync;
 /*
  * ---------- Local functions ----------
  */
@@ -431,8 +440,37 @@
 
 			if (true)
 			{
+				/* Force last_sync_group_size to a reasonable range */
+				if (last_sync_group_size < 1) 
+					last_sync_group_size = 1;
+				if (last_sync_group_size > 100)
+					last_sync_group_size = 1;
+
+				gettimeofday(&sync_end, NULL);
+				last_sync_length = 
+					(sync_end.tv_sec - sync_start.tv_sec) * 1000 +
+					(sync_end.tv_usec - sync_start.tv_usec) / 1000;
+
+				/* Force last_sync_length to a reasonable range */
+				if ((last_sync_length < 10) || (last_sync_length > 1000000)) {
+						/* sync_length seems to be trash - force group size to 1 */
+					next_sync_group_size = 1;
+				} else {
+					/* Estimate an "ideal" number of syncs based on how long they took last time */
+					ideal_sync = (last_sync_group_size * desired_sync_time) / last_sync_length;
+					max_sync = ((last_sync_group_size * 110) / 100 ) + 1;
+					next_sync_group_size = ideal_sync;
+					if (next_sync_group_size > max_sync)
+						next_sync_group_size = max_sync;
+					if (next_sync_group_size < 1) 
+						next_sync_group_size = 1;
+					slon_log(SLON_DEBUG2, "calc sync size - last time: %d last length: %d ideal: %d proposed size: %d\n",
+							 last_sync_group_size, last_sync_length, ideal_sync, next_sync_group_size);
+				}
+		
+				gettimeofday(&sync_start, NULL);
 				pthread_mutex_lock(&(node->message_lock));
-				while (sync_group_size < sync_group_maxsize && node->message_head != NULL)
+				while (sync_group_size < next_sync_group_size && node->message_head != NULL)
 				{
 					if (node->message_head->msg_type != WMSG_EVENT)
 						break;
@@ -488,11 +526,13 @@
 			 * further down).
 			 */
 			dstring_reset(&query1);
+			last_sync_group_size = 0;
 			for (i = 0; i < sync_group_size; i++)
 			{
 				query_append_event(&query1, sync_group[i]);
 				if (i < (sync_group_size - 1))
 					free(sync_group[i]);
+				last_sync_group_size++;
 			}
 			slon_appendquery(&query1, "commit transaction;");
 
Index: slon.c
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/src/slon/slon.c,v
retrieving revision 1.39
retrieving revision 1.40
diff -Lsrc/slon/slon.c -Lsrc/slon/slon.c -u -w -r1.39 -r1.40
--- src/slon/slon.c
+++ src/slon/slon.c
@@ -74,7 +74,7 @@
 	InitializeConfOptions();
 
 
-	while ((c = getopt(argc, argv, "f:d:s:t:g:c:p:hv")) != EOF)
+	while ((c = getopt(argc, argv, "f:d:s:t:g:c:p:o:hv")) != EOF)
 	{
 		switch (c)
 		{
@@ -106,6 +106,10 @@
 			set_config_option("pid_file", optarg);
 			break;
 
+		case 'o':
+			set_config_option("desired_sync_time", optarg);
+			break;
+
 		case 'h':
 			errors++;
 			break;
Index: slon.sgml
===================================================================
RCS file: /usr/local/cvsroot/slony1/slony1-engine/doc/adminguide/slon.sgml,v
retrieving revision 1.4
retrieving revision 1.5
diff -Ldoc/adminguide/slon.sgml -Ldoc/adminguide/slon.sgml -u -w -r1.4 -r1.5
--- doc/adminguide/slon.sgml
+++ doc/adminguide/slon.sgml
@@ -118,6 +118,39 @@
 			</varlistentry>
 
 			<varlistentry>
+    <term><option>-o</option><replaceable class="parameter">desired sync time</replaceable></term>
+    <listitem><para> A <quote>maximum</quote> time planned for grouped SYNCs.</para>
+
+     <para> If replication is running behind, slon will gradually
+      increase the numbers of SYNCs grouped together, targetting that
+      (based on the time taken for the <emphasis>last</emphasis> group
+      of SYNCs) they shouldn't take more than ths specified
+      <quote>desired sync time</quote>.</para>
+
+     <para> The default value is 60000ms, equal to one minute. </para>
+
+     <para> That way, you can expect (or at least hope!) that you'll
+      get a <command>COMMIT</command> roughly once per minute. </para>
+
+     <para> It isn't <emphasis>totally</emphasis> predictable, as it
+     is entirely possible for someone to request a <emphasis>very
+     large update,</emphasis> all as one transaction, that can
+     <quote>blow up</quote> the length of the resulting SYNC to be
+     nearly arbitrarily long.  In such a case, the heuristic will back
+     off for the <emphasis>next</emphasis> group.</para>
+
+     <para> The overall effect is to improve
+     <productname>Slony-I</productname>'s ability to cope with
+     variations in traffic.  By starting with 1 SYNC, and gradually
+     moving to more, even if there turn out to be variations large
+     enough to cause <productname>PostgreSQL</productname> backends to
+     crash, <productname>Slony-I</productname> will back off down to
+     doing one sync at a time, if need be, so that if it is at all
+     possible for replication to progress, progress it will.</para>
+    </listitem>
+   </varlistentry>      
+
+   <varlistentry>
 				<term><option>-c</option><replaceable class="parameter">cleanup cycles</replaceable><//arg></term>
 				<listitem>
 					<para>
@@ -133,6 +166,7 @@
 				</listitem>
 			</varlistentry>
 
+   
 			<varlistentry>
 				<term><option>-p</option><replaceable class="parameter">PID filename</replaceable><//arg></term>
 				<listitem>