Christopher Browne cbbrowne at ca.afilias.info
Mon Jul 20 09:03:30 PDT 2009
Gordon Shannon <gordo169 at gmail.com> writes:
> I think I found the duplicate key problem.  The "log_actionseq" column in
> the sl_log_1/2 tables is a bigint.  But the compress_actionseq() function in
> remote_worker.c is working only with signed ints not longs.  So if a value
> greater than 2,147,483,647 comes along, the value in curr_number will
> overflow.
>
> Here's the relevant debug info from the log:
>
> 14 0719 18:33:27 DEBUG4 compress_actionseq(list,subquery) Action list:
> '4832430056','4832430057','4832430058','4832430059','4832430060','4832430061','4832430062','4832430063','4832430064','4832430065','4832430066','4832430067','4832430068','4832430069','4832430070','4832430071','4832430072','4832430073','4832430074','4832430075','4832430076','4832430077','4832430078','4832430079','4832430080','4832430081','4832430082','4832430083','4832430084','4832430085','4832430086','4832430087','4832430088','4832430089','4832430090','4832430091','4832430092','4832430093','4832430094','4832430095','4832430096','4832430097','4832430098','4832430099','4832430100','4832430101','4832430102','4832430103','4832430104','4832430105','4832430106','4832430107','4832430108','4832430109','4832430110','4832430111','4832430112','4832430113','4832430114','4832430115','4832430116','4832430117','4832430118','4832430119','4832430120','4832430121','4832430122','4832430123','4832430124','4832430125','4832430126','4832430127','4832430128','4832430129','4832430130','4832430131','4832430132','4832430133','4832430134','4832430135','4832430136','4832430137','4832430138','4832430139','4832430140','4832430141','4832430142','4832430143','4832430144','4832430145','4832430146','4832430147','4832430148','4832430149','4832430150','4832430151','4832430152','4832430153','4832430154','4832430155','4832430156','4832430157','4832430158','4832430159'
> 14 0719 18:33:27 DEBUG4 Finished number: 537462760
> 14 0719 18:33:27 DEBUG4 Finished number: 537462761
> 14 0719 18:33:27 DEBUG4 Finished number: 537462762
> (...)
> 14 0719 18:33:27 DEBUG4 Finished number: 537462860
> 14 0719 18:33:27 DEBUG4 Finished number: 537462861
> 14 0719 18:33:27 DEBUG4 Finished number: 537462862
> 14 0719 18:33:27 DEBUG4 Finished number: 537462863
> 14 0719 18:33:27 DEBUG4 between entry - 537462760 537462863
> 14 0719 18:33:27 DEBUG4  compressed actionseq subquery...   log_actionseq
> not between '537462760' and '537462863'
>
> Note that 537462760 is what you get when truncate the 8-byte 4832430056 to a
> 4-byte integer. So essentially it trying to sync rows that already came over
> in the subscription event.
>
> Let me know if you need more details.

Good work, man!

I don't believe "long" is an acceptable data type here; on a 32 bit
platform, that's still just a 32 bit value.  We've got to go with
"long long" to guarantee a 64 bit type.

The following patch should resolve this, and I'll observe that this is
an issue for both 1.2 and 2.0 :-(.

Index: src/slon/remote_worker.c
===================================================================
RCS file: /home/cvsd/slony1/slony1-engine/src/slon/remote_worker.c,v
retrieving revision 1.176.2.1
diff -c -u -r1.176.2.1 remote_worker.c
--- src/slon/remote_worker.c	17 Jun 2009 21:37:38 -0000	1.176.2.1
+++ src/slon/remote_worker.c	20 Jul 2009 16:01:01 -0000
@@ -3806,7 +3806,7 @@
 		res1 = PQexec(local_dbconn, dstring_data(&query));
 		monitor_subscriber_query(&pm);
 
-		slon_log(SLON_INFO, "about to monitor_subscriber_query - pulling big actionid list %d\n", provider);
+		slon_log(SLON_INFO, "about to monitor_subscriber_query - pulling big actionid list for %d\n", provider->no_id);
 
 		if (PQresultStatus(res1) != PGRES_TUPLES_OK)
 		{
@@ -5537,7 +5537,7 @@
 compress_actionseq(const char *ssy_actionlist, SlonDString *action_subquery)
 {
 	CompressState			state;
-	int			curr_number,
+	long long			curr_number,
 				curr_min,
 				curr_max;
 	int			curr_digit;
@@ -5685,7 +5685,7 @@
 				if (state == COLLECTING_DIGITS)
 				{
 					/* Finished another number... Fold it into the ranges... */
-					slon_log(SLON_DEBUG4, "Finished number: %d\n", curr_number);
+					slon_log(SLON_DEBUG4, "Finished number: %lld\n", curr_number);
 
 					/*
 					 * If we haven't a range, then the range is the current
@@ -5736,16 +5736,16 @@
 						}
 						if (curr_max == curr_min)
 						{
-							slon_log(SLON_DEBUG4, "simple entry - %d\n", curr_max);
+							slon_log(SLON_DEBUG4, "simple entry - %lld\n", curr_max);
 							slon_appendquery(action_subquery,
-										" log_actionseq <> '%d' ", curr_max);
+										" log_actionseq <> '%lld' ", curr_max);
 						}
 						else
 						{
-							slon_log(SLON_DEBUG4, "between entry - %d %d\n",
+							slon_log(SLON_DEBUG4, "between entry - %lld %lld\n",
 									 curr_min, curr_max);
 							slon_appendquery(action_subquery,
-								 " log_actionseq not between '%d' and '%d' ",
+								 " log_actionseq not between '%lld' and '%lld' ",
 											 curr_min, curr_max);
 						}
 						curr_min = curr_number;
@@ -5771,16 +5771,16 @@
 		}
 		if (curr_max == curr_min)
 		{
-			slon_log(SLON_DEBUG4, "simple entry - %d\n", curr_max);
+			slon_log(SLON_DEBUG4, "simple entry - %lld\n", curr_max);
 			slon_appendquery(action_subquery,
-							 " log_actionseq <> '%d' ", curr_max);
+							 " log_actionseq <> '%lld' ", curr_max);
 		}
 		else
 		{
-			slon_log(SLON_DEBUG4, "between entry - %d %d\n",
+			slon_log(SLON_DEBUG4, "between entry - %lld %lld\n",
 					 curr_min, curr_max);
 			slon_appendquery(action_subquery,
-							 " log_actionseq not between '%d' and '%d' ",
+							 " log_actionseq not between '%lld' and '%lld' ",
 							 curr_min, curr_max);
 		}
 

-- 
let name="cbbrowne" and tld="ca.afilias.info" in String.concat "@" [name;tld];;
<http://dba2.int.libertyrms.com/>
Christopher Browne
(416) 673-4124 (land)
"Bother,"  said Pooh,  "Eeyore, ready  two photon  torpedoes  and lock
phasers on the Heffalump, Piglet, meet me in transporter room three"


More information about the Slony1-general mailing list