dfilter, ftypes: get rid of FT_PCRE.

It's not a valid field type, it's only a hack to support regular expression matching in packet-matching expressions. Instead, in the packet-matching code, have a separate syntax tree type for Perl-compatible regular expressions, and a separate instruction to load one into a register, and have the "matching" operator for field types take a GRegex * as the second argument.
author: Guy Harris <gharris@sonic.net> 2021-03-21 03:06:17 -0700
committer: Guy Harris <gharris@sonic.net> 2021-03-21 03:27:44 -0700
commit: b61fd6d76a2ce97f2bae40d8a8eacebc91e617ea (patch)
tree: d229e30e3f969bb916d1dd50f1d98a60b33252d0
parent: ce611792c37f5c80692bc927e07d3d4f36cd5b65 (diff)
21 files changed, 265 insertions, 303 deletions
diff --git a/epan/dfilter/dfvm.c b/epan/dfilter/dfvm.c
index 5bbcbaccb6..e944478d3a 100644
--- a/epan/dfilter/dfvm.c
+++ b/epan/dfilter/dfvm.c
@@ -36,6 +36,9 @@ dfvm_value_free(dfvm_value_t *v)
 		case DRANGE:
 			drange_free(v->value.drange);
 			break;
+		case PCRE:
+			g_regex_unref(v->value.pcre);
+			break;
 		default:
 			/* nothing */
 			;
@@ -105,6 +108,12 @@ dfvm_dump(FILE *f, dfilter_t *df)
 					arg2->value.numeric);
 				wmem_free(NULL, value_str);
 				break;
+			case PUT_PCRE:
+				fprintf(f, "%05d PUT_PCRE\t%s -> reg#%u\n",
+					id,
+					g_regex_get_pattern(arg1->value.pcre),
+					arg2->value.numeric);
+				break;
 			case CHECK_EXISTS:
 			case READ_TREE:
 			case CALL_FUNCTION:
@@ -169,6 +178,11 @@ dfvm_dump(FILE *f, dfilter_t *df)
 				g_assert_not_reached();
 				break;
 
+			case PUT_PCRE:
+				/* We already dumped these */
+				g_assert_not_reached();
+				break;
+
 			case MK_RANGE:
 				arg3 = insn->arg3;
 				fprintf(f, "%05d MK_RANGE\t\treg#%u[",
@@ -347,6 +361,16 @@ put_fvalue(dfilter_t *df, fvalue_t *fv, int reg)
 	return TRUE;
 }
 
+/* Put a constant PCRE in a register. These will not be cleared by
+ * free_register_overhead. */
+static gboolean
+put_pcre(dfilter_t *df, GRegex *pcre, int reg)
+{
+	df->registers[reg] = g_list_append(NULL, pcre);
+	df->owns_memory[reg] = FALSE;
+	return TRUE;
+}
+
 typedef gboolean (*FvalueCmpFunc)(const fvalue_t*, const fvalue_t*);
 
 static gboolean
@@ -370,6 +394,26 @@ any_test(dfilter_t *df, FvalueCmpFunc cmp, int reg1, int reg2)
 }
 
 static gboolean
+any_matches(dfilter_t *df, int reg1, int reg2)
+{
+	GList	*list_a, *list_b;
+
+	list_a = df->registers[reg1];
+
+	while (list_a) {
+		list_b = df->registers[reg2];
+		while (list_b) {
+			if (fvalue_matches((fvalue_t *)list_a->data, (GRegex *)list_b->data)) {
+				return TRUE;
+			}
+			list_b = g_list_next(list_b);
+		}
+		list_a = g_list_next(list_a);
+	}
+	return FALSE;
+}
+
+static gboolean
 any_in_range(dfilter_t *df, int reg1, int reg2, int reg3)
 {
 	GList	*list1, *list_low, *list_high;
@@ -568,7 +612,7 @@ dfvm_apply(dfilter_t *df, proto_tree *tree)
 				break;
 
 			case ANY_MATCHES:
-				accum = any_test(df, fvalue_matches,
+				accum = any_matches(df,
 						arg1->value.numeric, arg2->value.numeric);
 				break;
 
@@ -609,6 +653,14 @@ dfvm_apply(dfilter_t *df, proto_tree *tree)
 				break;
 #endif
 
+			case PUT_PCRE:
+#if 0
+				/* These were handled in the constants initialization */
+				accum = put_pcre(df,
+						arg1->value.pcre, arg2->value.numeric);
+				break;
+#endif
+
 			default:
 				g_assert_not_reached();
 				break;
@@ -640,6 +692,10 @@ dfvm_init_const(dfilter_t *df)
 				put_fvalue(df,
 						arg1->value.fvalue, arg2->value.numeric);
 				break;
+			case PUT_PCRE:
+				put_pcre(df,
+						arg1->value.pcre, arg2->value.numeric);
+				break;
 			case CHECK_EXISTS:
 			case READ_TREE:
 			case CALL_FUNCTION:
diff --git a/epan/dfilter/dfvm.h b/epan/dfilter/dfvm.h
index cfc5ce51f0..71e6cd4449 100644
--- a/epan/dfilter/dfvm.h
+++ b/epan/dfilter/dfvm.h
@@ -23,7 +23,8 @@ typedef enum {
 	REGISTER,
 	INTEGER,
 	DRANGE,
-	FUNCTION_DEF
+	FUNCTION_DEF,
+	PCRE
 } dfvm_value_type_t;
 
 typedef struct {
@@ -35,6 +36,7 @@ typedef struct {
 		drange_t		*drange;
 		header_field_info	*hfinfo;
 		df_func_def_t		*funcdef;
+		GRegex			*pcre;
 	} value;
 
 } dfvm_value_t;
@@ -49,6 +51,7 @@ typedef enum {
 	RETURN,
 	READ_TREE,
 	PUT_FVALUE,
+	PUT_PCRE,
 	ANY_EQ,
 	ANY_NE,
 	ANY_GT,
diff --git a/epan/dfilter/gencode.c b/epan/dfilter/gencode.c
index ecc929a738..cfd22c920a 100644
--- a/epan/dfilter/gencode.c
+++ b/epan/dfilter/gencode.c
@@ -231,6 +231,27 @@ dfw_append_function(dfwork_t *dfw, stnode_t *node, dfvm_value_t **p_jmp)
 	return val2->value.numeric;
 }
 
+/* returns register number */
+static int
+dfw_append_put_pcre(dfwork_t *dfw, GRegex *pcre)
+{
+	dfvm_insn_t	*insn;
+	dfvm_value_t	*val1, *val2;
+	int		reg;
+
+	insn = dfvm_insn_new(PUT_PCRE);
+	val1 = dfvm_value_new(PCRE);
+	val1->value.pcre = pcre;
+	val2 = dfvm_value_new(REGISTER);
+	reg = dfw->first_constant--;
+	val2->value.numeric = reg;
+	insn->arg1 = val1;
+	insn->arg2 = val2;
+	dfw_append_const(dfw, insn);
+
+	return reg;
+}
+
 
 /**
  * Adds an instruction for a relation operator where the values are already
@@ -404,6 +425,9 @@ gen_entity(dfwork_t *dfw, stnode_t *st_arg, dfvm_value_t **p_jmp)
 	else if (e_type == STTYPE_FUNCTION) {
 		reg = dfw_append_function(dfw, st_arg, p_jmp);
 	}
+	else if (e_type == STTYPE_PCRE) {
+		reg = dfw_append_put_pcre(dfw, (GRegex *)stnode_steal_data(st_arg));
+	}
 	else {
 		/* printf("sttype_id is %u\n", (unsigned)e_type); */
 		g_assert_not_reached();
diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon
index c29b91a592..4ea3a2b673 100644
--- a/epan/dfilter/grammar.lemon
+++ b/epan/dfilter/grammar.lemon
@@ -118,6 +118,7 @@ any "error" symbols are shifted, if possible. */
 		case STTYPE_NUM_TYPES:
 		case STTYPE_RANGE:
 		case STTYPE_FVALUE:
+		case STTYPE_PCRE:
 			g_assert_not_reached();
 			break;
 	}
diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c
index 7837c3be28..c1be5f3a56 100644
--- a/epan/dfilter/semcheck.c
+++ b/epan/dfilter/semcheck.c
@@ -133,7 +133,6 @@ compatible_ftypes(ftenum_t a, ftenum_t b)
 					return FALSE;
 			}
 
-		case FT_PCRE:
 		case FT_NUM_TYPES:
 			g_assert_not_reached();
 	}
@@ -219,7 +218,6 @@ mk_fvalue_from_val_string(dfwork_t *dfw, header_field_info *hfinfo, char *s)
 		case FT_STRINGZPAD:
 		case FT_STRINGZTRUNC:
 		case FT_EUI64:
-		case FT_PCRE:
 		case FT_GUID:
 		case FT_OID:
 		case FT_REL_OID:
@@ -383,7 +381,6 @@ is_bytes_type(enum ftenum type)
 		case FT_INT48:
 		case FT_INT56:
 		case FT_INT64:
-		case FT_PCRE:
 		case FT_EUI64:
 			return FALSE;
 
@@ -395,6 +392,46 @@ is_bytes_type(enum ftenum type)
 	return FALSE;
 }
 
+/* Gets a GRegex from a string, and sets the error message on failure. */
+static GRegex*
+dfilter_g_regex_from_string(dfwork_t *dfw, const char *s)
+{
+	GError *regex_error = NULL;
+	GRegexCompileFlags cflags = (GRegexCompileFlags)(G_REGEX_CASELESS | G_REGEX_OPTIMIZE);
+	GRegex *pcre;
+
+	/*
+	 * As FT_BYTES and FT_PROTOCOL contain arbitrary binary data
+	 * and FT_STRING is not guaranteed to contain valid UTF-8,
+	 * we have to disable support for UTF-8 patterns and treat
+	 * every pattern and subject as raw bytes.
+	 *
+	 * Should support for UTF-8 patterns be necessary, then we
+	 * should compile a pattern without G_REGEX_RAW. Additionally,
+	 * we MUST use g_utf8_validate() before calling g_regex_match_full()
+	 * or risk crashes.
+	 */
+	cflags = (GRegexCompileFlags)(cflags | G_REGEX_RAW);
+
+	pcre = g_regex_new(
+			s,			/* pattern */
+			cflags,			/* Compile options */
+			(GRegexMatchFlags)0,	/* Match options */
+			&regex_error		/* Compile / study errors */
+			);
+
+	if (regex_error) {
+		if (dfw->error_message == NULL)
+			dfw->error_message = g_strdup(regex_error->message);
+		g_error_free(regex_error);
+		if (pcre) {
+			g_regex_unref(pcre);
+		}
+		return NULL;
+	}
+	return pcre;
+}
+
 /* Check the semantics of an existence test. */
 static void
 check_exists(dfwork_t *dfw, stnode_t *st_arg1)
@@ -439,6 +476,7 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1)
 		case STTYPE_INTEGER:
 		case STTYPE_FVALUE:
 		case STTYPE_SET:
+		case STTYPE_PCRE:
 		case STTYPE_NUM_TYPES:
 			g_assert_not_reached();
 	}
@@ -636,6 +674,7 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string,
 	df_func_def_t		*funcdef;
 	ftenum_t		ftype1, ftype2;
 	fvalue_t		*fvalue;
+	GRegex			*pcre;
 	char			*s;
 
 	type2 = stnode_type_id(st_arg2);
@@ -677,11 +716,12 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string,
 	         type2 == STTYPE_CHARCONST) {
 		s = (char *)stnode_data(st_arg2);
 		if (strcmp(relation_string, "matches") == 0) {
-			/* Convert to a FT_PCRE */
-			if (type2 == STTYPE_STRING)
-				fvalue = dfilter_fvalue_from_string(dfw, FT_PCRE, s);
-			else
-				fvalue = dfilter_fvalue_from_unparsed(dfw, FT_PCRE, s, FALSE);
+			/* Convert to a GRegex */
+			pcre = dfilter_g_regex_from_string(dfw, s);
+			if (!pcre) {
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_PCRE, pcre);
 		} else {
 			/* Skip incompatible fields */
 			while (hfinfo1->same_name_prev_id != -1 &&
@@ -720,13 +760,11 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string,
 					dfw->error_message = NULL;
 				}
 			}
+			if (!fvalue) {
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		}
-
-		if (!fvalue) {
-			THROW(TypeError);
-		}
-
-		new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		if (stnode_type_id(st_node) == STTYPE_TEST) {
 			sttype_test_set2_args(st_node, st_arg1, new_st);
 		} else {
@@ -1018,6 +1056,7 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string,
 	header_field_info	*hfinfo1, *hfinfo2;
 	ftenum_t		ftype1, ftype2;
 	fvalue_t		*fvalue;
+	GRegex			*pcre;
 	char			*s;
 	int                     len_range;
 
@@ -1081,16 +1120,20 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string,
 		DebugLog(("    5 check_relation_LHS_RANGE(type2 = STTYPE_STRING)\n"));
 		s = (char*)stnode_data(st_arg2);
 		if (strcmp(relation_string, "matches") == 0) {
-			/* Convert to a FT_PCRE */
-			fvalue = dfilter_fvalue_from_string(dfw, FT_PCRE, s);
+			/* Convert to a GRegex * */
+			pcre = dfilter_g_regex_from_string(dfw, s);
+			if (!pcre) {
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_PCRE, pcre);
 		} else {
 			fvalue = dfilter_fvalue_from_string(dfw, FT_BYTES, s);
+			if (!fvalue) {
+				DebugLog(("    5 check_relation_LHS_RANGE(type2 = STTYPE_STRING): Could not convert from string!\n"));
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		}
-		if (!fvalue) {
-			DebugLog(("    5 check_relation_LHS_RANGE(type2 = STTYPE_STRING): Could not convert from string!\n"));
-			THROW(TypeError);
-		}
-		new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		sttype_test_set2_args(st_node, st_arg1, new_st);
 		stnode_free(st_arg2);
 	}
@@ -1099,36 +1142,51 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string,
 		s = (char*)stnode_data(st_arg2);
 		len_range = drange_get_total_length(sttype_range_drange(st_arg1));
 		if (strcmp(relation_string, "matches") == 0) {
-			/* Convert to a FT_PCRE */
-			fvalue = dfilter_fvalue_from_unparsed(dfw, FT_PCRE, s, FALSE);
-		}
-
-		/* The RHS should be FT_BYTES. However, there is a special case where
-		 * the range slice on the LHS is one byte long. In that case, it is natural
-		 * for the user to specify a normal hex integer on the RHS, with the "0x"
-		 * notation, as in "slice[0] == 0x10". We can't allow this for any
-		 * slices that are longer than one byte, because then we'd have to know
-		 * which endianness the byte string should be in. */
-		else if (len_range == 1 && strlen(s) == 4 && strncmp(s, "0x", 2) == 0) {
-		    /* Even if the RHS string starts with "0x", it still could fail to
-		     * be an integer.  Try converting it here. */
-		    fvalue = dfilter_fvalue_from_unparsed(dfw, FT_UINT8, s, allow_partial_value);
-		    if (fvalue) {
-			FVALUE_FREE(fvalue);
-			/* The value doees indeed fit into 8 bits. Create a BYTE_STRING
-			 * from it. Since we know that the last 2 characters are a valid
-			 * hex string, just use those directly. */
-			fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, s+2, allow_partial_value);
-		    }
-		}
-		else {
-		    fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, s, allow_partial_value);
-		}
-		if (!fvalue) {
-			DebugLog(("    5 check_relation_LHS_RANGE(type2 = STTYPE_UNPARSED): Could not convert from string!\n"));
-			THROW(TypeError);
+			/* Convert to a GRegex */
+			pcre = dfilter_g_regex_from_string(dfw, s);
+			if (!pcre) {
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_PCRE, pcre);
+		} else {
+			/*
+			 * The RHS should be FT_BYTES. However, there is a
+			 * special case where the range slice on the LHS is
+			 * one byte long. In that case, it is natural
+			 * for the user to specify a normal hex integer
+			 * on the RHS, with the "0x" notation, as in
+			 * "slice[0] == 0x10". We can't allow this for any
+			 * slices that are longer than one byte, because
+			 * then we'd have to know which endianness the
+			 * byte string should be in.
+			 */
+			if (len_range == 1 && strlen(s) == 4 && strncmp(s, "0x", 2) == 0) {
+				/*
+				 * Even if the RHS string starts with "0x",
+				 * it still could fail to be an integer.
+				 * Try converting it here.
+				 */
+				fvalue = dfilter_fvalue_from_unparsed(dfw, FT_UINT8, s, allow_partial_value);
+				if (fvalue) {
+					FVALUE_FREE(fvalue);
+					/*
+					 * The value doees indeed fit into
+					 * 8 bits. Create a BYTE_STRING
+					 * from it. Since we know that
+					 * the last 2 characters are a valid
+					 * hex string, just use those directly.
+					 */
+					fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, s+2, allow_partial_value);
+				}
+			} else {
+				fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, s, allow_partial_value);
+			}
+			if (!fvalue) {
+				DebugLog(("    5 check_relation_LHS_RANGE(type2 = STTYPE_UNPARSED): Could not convert from string!\n"));
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		}
-		new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		sttype_test_set2_args(st_node, st_arg1, new_st);
 		stnode_free(st_arg2);
 	}
@@ -1136,18 +1194,22 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string,
 		DebugLog(("    5 check_relation_LHS_RANGE(type2 = STTYPE_CHARCONST)\n"));
 		s = (char*)stnode_data(st_arg2);
 		if (strcmp(relation_string, "matches") == 0) {
-			/* Convert to a FT_PCRE */
-			fvalue = dfilter_fvalue_from_unparsed(dfw, FT_PCRE, s, FALSE);
+			/* Convert to a GRegex */
+			pcre = dfilter_g_regex_from_string(dfw, s);
+			if (!pcre) {
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_PCRE, pcre);
 		} else {
 			/* The RHS should be FT_BYTES, but a character is just a
 			 * one-byte byte string. */
 			fvalue = dfilter_fvalue_from_charconst_string(dfw, FT_BYTES, s, allow_partial_value);
+			if (!fvalue) {
+				DebugLog(("    5 check_relation_LHS_RANGE(type2 = STTYPE_UNPARSED): Could not convert from string!\n"));
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		}
-		if (!fvalue) {
-			DebugLog(("    5 check_relation_LHS_RANGE(type2 = STTYPE_UNPARSED): Could not convert from string!\n"));
-			THROW(TypeError);
-		}
-		new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		sttype_test_set2_args(st_node, st_arg1, new_st);
 		stnode_free(st_arg2);
 	}
@@ -1222,6 +1284,7 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string,
 	header_field_info	*hfinfo2;
 	ftenum_t		ftype1, ftype2;
 	fvalue_t		*fvalue;
+	GRegex			*pcre;
 	char			*s;
 	df_func_def_t		*funcdef;
 	df_func_def_t		*funcdef2;
@@ -1264,32 +1327,38 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string,
 	else if (type2 == STTYPE_STRING) {
 		s = (char*)stnode_data(st_arg2);
 		if (strcmp(relation_string, "matches") == 0) {
-			/* Convert to a FT_PCRE */
-			fvalue = dfilter_fvalue_from_string(dfw, FT_PCRE, s);
+			/* Convert to a GRegex */
+			pcre = dfilter_g_regex_from_string(dfw, s);
+			if (!pcre) {
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_PCRE, pcre);
 		} else {
 			fvalue = dfilter_fvalue_from_string(dfw, ftype1, s);
+			if (!fvalue) {
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		}
-		if (!fvalue) {
-			THROW(TypeError);
-		}
-
-		new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		sttype_test_set2_args(st_node, st_arg1, new_st);
 		stnode_free(st_arg2);
 	}
 	else if (type2 == STTYPE_UNPARSED || type2 == STTYPE_CHARCONST) {
 		s = (char*)stnode_data(st_arg2);
 		if (strcmp(relation_string, "matches") == 0) {
-			/* Convert to a FT_PCRE */
-			fvalue = dfilter_fvalue_from_unparsed(dfw, FT_PCRE, s, FALSE);
+			/* Convert to a GRegex */
+			pcre = dfilter_g_regex_from_string(dfw, s);
+			if (!pcre) {
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_PCRE, pcre);
 		} else {
 			fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, s, allow_partial_value);
+			if (!fvalue) {
+				THROW(TypeError);
+			}
+			new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		}
-		if (!fvalue) {
-			THROW(TypeError);
-		}
-
-		new_st = stnode_new(STTYPE_FVALUE, fvalue);
 		sttype_test_set2_args(st_node, st_arg1, new_st);
 		stnode_free(st_arg2);
 	}
diff --git a/epan/dfilter/sttype-pointer.c b/epan/dfilter/sttype-pointer.c
index 0ee0d4b0cc..489db05872 100644
--- a/epan/dfilter/sttype-pointer.c
+++ b/epan/dfilter/sttype-pointer.c
@@ -24,6 +24,22 @@ fvalue_free(gpointer value)
 	}
 }
 
+static void
+pcre_free(gpointer value)
+{
+	GRegex	*pcre = (GRegex*)value;
+
+	/* If the data was not claimed with stnode_steal_data(), free it. */
+	if (pcre) {
+		/*
+		 * They're reference-counted, so just drop the reference
+		 * count; it'll get freed when the reference count drops
+		 * to 0.
+		 */
+		g_regex_unref(pcre);
+	}
+}
+
 void
 sttype_register_pointer(void)
 {
@@ -41,9 +57,17 @@ sttype_register_pointer(void)
 		fvalue_free,
 		NULL
 	};
+	static sttype_t pcre_type = {
+		STTYPE_PCRE,
+		"PCRE",
+		NULL,
+		pcre_free,
+		NULL
+	};
 
 	sttype_register(&field_type);
 	sttype_register(&fvalue_type);
+	sttype_register(&pcre_type);
 }
 
 /*
diff --git a/epan/dfilter/syntax-tree.h b/epan/dfilter/syntax-tree.h
index 555915664d..3123b49589 100644
--- a/epan/dfilter/syntax-tree.h
+++ b/epan/dfilter/syntax-tree.h
@@ -27,6 +27,7 @@ typedef enum {
 	STTYPE_RANGE,
 	STTYPE_FUNCTION,
 	STTYPE_SET,
+	STTYPE_PCRE,
 	STTYPE_NUM_TYPES
 } sttype_id_t;
 
diff --git a/epan/ftypes/CMakeLists.txt b/epan/ftypes/CMakeLists.txt
index 31a4607889..0346c1ccf2 100644
--- a/epan/ftypes/CMakeLists.txt
+++ b/epan/ftypes/CMakeLists.txt
@@ -26,7 +26,6 @@ set(FTYPE_FILES
 	ftype-ipv6.c
 	ftype-guid.c
 	ftype-none.c
-	ftype-pcre.c
 	ftype-protocol.c
 	ftype-string.c
 	ftype-time.c
diff --git a/epan/ftypes/ftype-bytes.c b/epan/ftypes/ftype-bytes.c
index ee15aa508e..dc414e3e62 100644
--- a/epan/ftypes/ftype-bytes.c
+++ b/epan/ftypes/ftype-bytes.c
@@ -650,21 +650,10 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
 }
 
 static gboolean
-cmp_matches(const fvalue_t *fv_a, const fvalue_t *fv_b)
+cmp_matches(const fvalue_t *fv, const GRegex *regex)
 {
-	GByteArray *a = fv_a->value.bytes;
-	GRegex *regex = fv_b->value.re;
+	GByteArray *a = fv->value.bytes;
 
-	/* fv_b is always a FT_PCRE, otherwise the dfilter semcheck() would have
-	 * warned us. For the same reason (and because we're using g_malloc()),
-	 * fv_b->value.re is not NULL.
-	 */
-	if (strcmp(fv_b->ftype->name, "FT_PCRE") != 0) {
-		return FALSE;
-	}
-	if (! regex) {
-		return FALSE;
-	}
 	return g_regex_match_full(
 		regex,			/* Compiled PCRE */
 		(char *)a->data,	/* The data to check for the pattern... */
diff --git a/epan/ftypes/ftype-pcre.c b/epan/ftypes/ftype-pcre.c
deleted file mode 100644
index 854d957ee4..0000000000
--- a/epan/ftypes/ftype-pcre.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Wireshark - Network traffic analyzer
- * By Gerald Combs <gerald@wireshark.org>
- * Copyright 2001 Gerald Combs
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-/* Perl-Compatible Regular Expression (PCRE) internal field type.
- * Used with the "matches" dfilter operator, allowing efficient
- * compilation and studying of a PCRE pattern in dfilters.
- */
-
-#include "config.h"
-
-#include <ftypes-int.h>
-
-#include <glib.h>
-#include <string.h>
-
-static void
-gregex_fvalue_new(fvalue_t *fv)
-{
-    fv->value.re = NULL;
-}
-
-static void
-gregex_fvalue_free(fvalue_t *fv)
-{
-    if (fv->value.re) {
-        g_regex_unref(fv->value.re);
-        fv->value.re = NULL;
-    }
-}
-
-/* Generate a FT_PCRE from a parsed string pattern.
- * On failure, if err_msg is non-null, set *err_msg to point to a
- * g_malloc()ed error message. */
-static gboolean
-val_from_string(fvalue_t *fv, const char *pattern, gchar **err_msg)
-{
-    GError *regex_error = NULL;
-    GRegexCompileFlags cflags = (GRegexCompileFlags)(G_REGEX_CASELESS | G_REGEX_OPTIMIZE);
-
-    /*
-     * As FT_BYTES and FT_PROTOCOL contain arbitrary binary data and FT_STRING
-     * is not guaranteed to contain valid UTF-8, we have to disable support for
-     * UTF-8 patterns and treat every pattern and subject as raw bytes.
-     *
-     * Should support for UTF-8 patterns be necessary, then we should compile a
-     * pattern without G_REGEX_RAW. Additionally, we MUST use g_utf8_validate()
-     * before calling g_regex_match_full() or risk crashes.
-     */
-    cflags = (GRegexCompileFlags)(cflags | G_REGEX_RAW);
-
-    /* Free up the old value, if we have one */
-    gregex_fvalue_free(fv);
-
-    fv->value.re = g_regex_new(
-            pattern,            /* pattern */
-            cflags,             /* Compile options */
-            (GRegexMatchFlags)0,                  /* Match options */
-            &regex_error        /* Compile / study errors */
-            );
-
-    if (regex_error) {
-        if (err_msg) {
-            *err_msg = g_strdup(regex_error->message);
-        }
-        g_error_free(regex_error);
-        if (fv->value.re) {
-            g_regex_unref(fv->value.re);
-        }
-        return FALSE;
-    }
-    return TRUE;
-}
-
-/* Generate a FT_PCRE from an unparsed string pattern.
- * On failure, if err_msg is non-null, set *err_msg to point to a
- * g_malloc()ed error message. */
-static gboolean
-val_from_unparsed(fvalue_t *fv, const char *pattern, gboolean allow_partial_value, gchar **err_msg)
-{
-    g_assert(! allow_partial_value);
-
-    return val_from_string(fv, pattern, err_msg);
-}
-
-static int
-gregex_repr_len(fvalue_t *fv, ftrepr_t rtype _U_, int field_display _U_)
-{
-    return (int)strlen(g_regex_get_pattern(fv->value.re));
-}
-
-static void
-gregex_to_repr(fvalue_t *fv, ftrepr_t rtype _U_, int field_display _U_, char *buf, unsigned int size)
-{
-    g_strlcpy(buf, g_regex_get_pattern(fv->value.re), size);
-}
-
-/* BEHOLD - value contains the string representation of the regular expression,
- * and we want to store the compiled PCRE RE object into the value. */
-static void
-gregex_fvalue_set(fvalue_t *fv, const char *value)
-{
-    g_assert(value != NULL);
-    /* Free up the old value, if we have one */
-    gregex_fvalue_free(fv);
-    val_from_unparsed(fv, value, FALSE, NULL);
-}
-
-static gpointer
-gregex_fvalue_get(fvalue_t *fv)
-{
-    return fv->value.re;
-}
-
-void
-ftype_register_pcre(void)
-{
-    static ftype_t pcre_type = {
-        FT_PCRE,            /* ftype */
-        "FT_PCRE",          /* name */
-        "Compiled Perl-Compatible Regular Expression (GRegex) object", /* pretty_name */
-        0,                  /* wire_size */
-        gregex_fvalue_new,  /* new_value */
-        gregex_fvalue_free, /* free_value */
-        val_from_unparsed,  /* val_from_unparsed */
-        val_from_string,    /* val_from_string */
-        gregex_to_repr,     /* val_to_string_repr */
-        gregex_repr_len,    /* len_string_repr */
-
-        { .set_value_string = gregex_fvalue_set }, /* union set_value */
-        { .get_value_ptr = gregex_fvalue_get },    /* union get_value */
-
-        NULL,               /* cmp_eq */
-        NULL,               /* cmp_ne */
-        NULL,               /* cmp_gt */
-        NULL,               /* cmp_ge */
-        NULL,               /* cmp_lt */
-        NULL,               /* cmp_le */
-        NULL,               /* cmp_bitwise_and */
-        NULL,               /* cmp_contains */
-        NULL,               /* cmp_matches */
-
-        NULL,               /* len */
-        NULL,               /* slice */
-    };
-    ftype_register(FT_PCRE, &pcre_type);
-}
-
-/*
- * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
- *
- * Local variables:
- * c-basic-offset: 4
- * tab-width: 8
- * indent-tabs-mode: nil
- * End:
- *
- * vi: set shiftwidth=4 tabstop=8 expandtab:
- * :indentSize=4:tabSize=8:noTabs=true:
- */
diff --git a/epan/ftypes/ftype-protocol.c b/epan/ftypes/ftype-protocol.c
index 71ffae00c0..b3dc5328a5 100644
--- a/epan/ftypes/ftype-protocol.c
+++ b/epan/ftypes/ftype-protocol.c
@@ -383,21 +383,13 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
 }
 
 static gboolean
-cmp_matches(const fvalue_t *fv_a, const fvalue_t *fv_b)
+cmp_matches(const fvalue_t *fv, const GRegex *regex)
 {
-	const protocol_value_t *a = (const protocol_value_t *)&fv_a->value.protocol;
-	GRegex *regex = fv_b->value.re;
+	const protocol_value_t *a = (const protocol_value_t *)&fv->value.protocol;
 	volatile gboolean rc = FALSE;
 	const char *data = NULL; /* tvb data */
 	guint32 tvb_len; /* tvb length */
 
-	/* fv_b is always a FT_PCRE, otherwise the dfilter semcheck() would have
-	 * warned us. For the same reason (and because we're using g_malloc()),
-	 * fv_b->value.re is not NULL.
-	 */
-	if (strcmp(fv_b->ftype->name, "FT_PCRE") != 0) {
-		return FALSE;
-	}
 	if (! regex) {
 		return FALSE;
 	}
diff --git a/epan/ftypes/ftype-string.c b/epan/ftypes/ftype-string.c
index f6813d3b30..c42f08d928 100644
--- a/epan/ftypes/ftype-string.c
+++ b/epan/ftypes/ftype-string.c
@@ -186,18 +186,10 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
 }
 
 static gboolean
-cmp_matches(const fvalue_t *fv_a, const fvalue_t *fv_b)
+cmp_matches(const fvalue_t *fv, const GRegex *regex)
 {
-	char *str = fv_a->value.string;
-	GRegex *regex = fv_b->value.re;
-
-	/* fv_b is always a FT_PCRE, otherwise the dfilter semcheck() would have
-	 * warned us. For the same reason (and because we're using g_malloc()),
-	 * fv_b->value.re is not NULL.
-	 */
-	if (strcmp(fv_b->ftype->name, "FT_PCRE") != 0) {
-		return FALSE;
-	}
+	char *str = fv->value.string;
+
 	if (! regex) {
 		return FALSE;
 	}
diff --git a/epan/ftypes/ftypes-int.h b/epan/ftypes/ftypes-int.h
index 30051acd41..98af5997c2 100644
--- a/epan/ftypes/ftypes-int.h
+++ b/epan/ftypes/ftypes-int.h
@@ -62,6 +62,7 @@ typedef gint64 (*FvalueGetSignedInteger64Func)(fvalue_t*);
 typedef double (*FvalueGetFloatingFunc)(fvalue_t*);
 
 typedef gboolean (*FvalueCmp)(const fvalue_t*, const fvalue_t*);
+typedef gboolean (*FvalueMatches)(const fvalue_t*, const GRegex*);
 
 typedef guint (*FvalueLen)(fvalue_t*);
 typedef void (*FvalueSlice)(fvalue_t*, GByteArray *, guint offset, guint length);
@@ -109,7 +110,7 @@ struct _ftype_t {
 	FvalueCmp		cmp_le;
 	FvalueCmp		cmp_bitwise_and;
 	FvalueCmp		cmp_contains;
-	FvalueCmp		cmp_matches;
+	FvalueMatches		cmp_matches;
 
 	FvalueLen		len;
 	FvalueSlice		slice;
diff --git a/epan/ftypes/ftypes.c b/epan/ftypes/ftypes.c
index e62582ab79..a8a4838ffa 100644
--- a/epan/ftypes/ftypes.c
+++ b/epan/ftypes/ftypes.c
@@ -31,7 +31,6 @@ ftypes_initialize(void)
 	ftype_register_string();
 	ftype_register_time();
 	ftype_register_tvbuff();
-	ftype_register_pcre();
 }
 
 /* Each ftype_t is registered via this function */
@@ -528,8 +527,7 @@ void
 fvalue_set_string(fvalue_t *fv, const gchar *value)
 {
 	g_assert(IS_FT_STRING(fv->ftype->ftype) ||
-			fv->ftype->ftype == FT_UINT_STRING ||
-			fv->ftype->ftype == FT_PCRE);
+			fv->ftype->ftype == FT_UINT_STRING);
 	g_assert(fv->ftype->set_value.set_value_string);
 	fv->ftype->set_value.set_value_string(fv, value);
 }
@@ -618,7 +616,6 @@ fvalue_get(fvalue_t *fv)
 			fv->ftype->ftype == FT_FCWWN ||
 			fv->ftype->ftype == FT_GUID ||
 			fv->ftype->ftype == FT_IPv6 ||
-			fv->ftype->ftype == FT_PCRE ||
 			fv->ftype->ftype == FT_PROTOCOL ||
 			IS_FT_STRING(fv->ftype->ftype) ||
 			fv->ftype->ftype == FT_UINT_STRING ||
@@ -753,7 +750,7 @@ fvalue_contains(const fvalue_t *a, const fvalue_t *b)
 }
 
 gboolean
-fvalue_matches(const fvalue_t *a, const fvalue_t *b)
+fvalue_matches(const fvalue_t *a, const GRegex *b)
 {
 	/* XXX - check compatibility of a and b */
 	g_assert(a->ftype->cmp_matches);
diff --git a/epan/ftypes/ftypes.h b/epan/ftypes/ftypes.h
index 197c1d58a2..740b0e0280 100644
--- a/epan/ftypes/ftypes.h
+++ b/epan/ftypes/ftypes.h
@@ -58,7 +58,6 @@ enum ftenum {
 	FT_IPv6,
 	FT_IPXNET,
 	FT_FRAMENUM,	/* a UINT32, but if selected lets you go to frame with that number */
-	FT_PCRE,	/* a compiled Perl-Compatible Regular Expression object */
 	FT_GUID,	/* GUID, UUID */
 	FT_OID,		/* OBJECT IDENTIFIER */
 	FT_EUI64,
@@ -370,7 +369,7 @@ gboolean
 fvalue_contains(const fvalue_t *a, const fvalue_t *b);
 
 gboolean
-fvalue_matches(const fvalue_t *a, const fvalue_t *b);
+fvalue_matches(const fvalue_t *a, const GRegex *b);
 
 guint
 fvalue_length(fvalue_t *fv);
diff --git a/epan/proto.c b/epan/proto.c
index 08994bae86..3e013e9192 100644
--- a/epan/proto.c
+++ b/epan/proto.c
@@ -7101,7 +7101,6 @@ proto_item_get_display_repr(wmem_allocator_t *scope, proto_item *pi)
 		return "";
 	fi = PITEM_FINFO(pi);
 	DISSECTOR_ASSERT(fi->hfinfo != NULL);
-	DISSECTOR_ASSERT(fi->hfinfo->type != FT_PCRE);
 	return fvalue_to_string_repr(scope, &fi->value, FTREPR_DISPLAY, fi->hfinfo->display);
 }
 
@@ -8236,16 +8235,6 @@ tmp_fld_check_assert(header_field_info *hfinfo)
 	if (!hfinfo->abbrev || !hfinfo->abbrev[0])
 		g_error("Field '%s' does not have an abbreviation\n", hfinfo->name);
 
-	/*
-	 * FT_PCRE is a special "field" type; it's not for use by
-	 * fields, just by field values in filter expressions.
-	 */
-	if (hfinfo->type == FT_PCRE) {
-		g_error("Field '%s' (%s) is of type FT_PCRE,"
-			" which is not allowed\n",
-			hfinfo->name, hfinfo->abbrev);
-	}
-
 	/*  These types of fields are allowed to have value_strings,
 	 *  true_false_strings or a protocol_t struct
 	 */
@@ -11533,12 +11522,6 @@ construct_match_selected_string(field_info *finfo, epan_dissect_t *edt,
 			}
 			break;
 
-		case FT_PCRE:
-			/* FT_PCRE never appears as a type for a registered field. It is
-			 * only used internally. */
-			DISSECTOR_ASSERT_NOT_REACHED();
-			break;
-
 		/* By default, use the fvalue's "to_string_repr" method. */
 		default:
 			/* Figure out the string length needed.
diff --git a/epan/wslua/wslua_field.c b/epan/wslua/wslua_field.c
index c683ebacbe..8e1ab66445 100644
--- a/epan/wslua/wslua_field.c
+++ b/epan/wslua/wslua_field.c
@@ -207,7 +207,7 @@ WSLUA_METAMETHOD FieldInfo__tostring(lua_State* L) {
     if (fi->ws_fi->value.ftype->val_to_string_repr) {
         gchar* repr = NULL;
 
-        if (fi->ws_fi->hfinfo->type == FT_PROTOCOL || fi->ws_fi->hfinfo->type == FT_PCRE) {
+        if (fi->ws_fi->hfinfo->type == FT_PROTOCOL) {
             repr = fvalue_to_string_repr(NULL, &fi->ws_fi->value,FTREPR_DFILTER,BASE_NONE);
         }
         else {
diff --git a/epan/wslua/wslua_proto_field.c b/epan/wslua/wslua_proto_field.c
index e3f63273df..2d4b0d06cd 100644
--- a/epan/wslua/wslua_proto_field.c
+++ b/epan/wslua/wslua_proto_field.c
@@ -746,8 +746,6 @@ WSLUA_CONSTRUCTOR ProtoField_new(lua_State* L) {
     case FT_STRINGZTRUNC:
         WSLUA_ARG_ERROR(ProtoField_new,TYPE,"Unsupported ProtoField field type");
         break;
-    /* FT_PCRE isn't a valid field type. */
-    case FT_PCRE:
     default:
         WSLUA_ARG_ERROR(ProtoField_new,TYPE,"Invalid ProtoField field type");
         break;
diff --git a/test/lua/globals_2.2.txt b/test/lua/globals_2.2.txt
index e891184fbf..67442b8112 100644
--- a/test/lua/globals_2.2.txt
+++ b/test/lua/globals_2.2.txt
@@ -767,7 +767,6 @@
         ["IPv6"] = 24,
         ["NONE"] = 0,
         ["OID"] = 29,
-        ["PCRE"] = 27,
         ["PROTOCOL"] = 1,
         ["RELATIVE_TIME"] = 16,
         ["REL_OID"] = 33,
diff --git a/tools/convert_proto_tree_add_text.pl b/tools/convert_proto_tree_add_text.pl
index 055c547398..c54964d6d4 100755
--- a/tools/convert_proto_tree_add_text.pl
+++ b/tools/convert_proto_tree_add_text.pl
@@ -76,7 +76,7 @@ my %FIELD_TYPE = ('FT_NONE' => "FT_NONE", 'FT_PROTOCOL' => "FT_PROTOCOL", 'FT_BO
 				   'FT_STRING' => "FT_STRING", 'FT_STRINGZ' => "FT_STRINGZ", 'FT_UINT_STRING' => "FT_UINT_STRING",
 				   'FT_ETHER' => "FT_ETHER", 'FT_BYTES' => "FT_BYTES", 'FT_UINT_BYTES' => "FT_UINT_BYTES",
 				   'FT_IPv4' => "FT_IPv4", 'FT_IPv6' => "FT_IPv6", 'FT_IPXNET' => "FT_IPXNET", 'FT_AX25' => "FT_AX25", 'FT_VINES' => "FT_VINES",
-				   'FT_FRAMENUM' => "FT_FRAMENUM", 'FT_PCRE' => "FT_PCRE", 'FT_GUID' => "FT_GUID", 'FT_OID' => "FT_OID", 'FT_REL_OID' => "FT_REL_OID", 'FT_EUI64' => "FT_EUI64");
+				   'FT_FRAMENUM' => "FT_FRAMENUM", 'FT_GUID' => "FT_GUID", 'FT_OID' => "FT_OID", 'FT_REL_OID' => "FT_REL_OID", 'FT_EUI64' => "FT_EUI64");
 
 my %EXPERT_SEVERITY = ('PI_COMMENT' => "PI_COMMENT",
 					   'PI_CHAT' => "PI_CHAT",
diff --git a/tools/fix-encoding-args.pl b/tools/fix-encoding-args.pl
index e9f5186660..fc98c75bd5 100755
--- a/tools/fix-encoding-args.pl
+++ b/tools/fix-encoding-args.pl
@@ -167,7 +167,6 @@ my @types_ALL =
            FT_IPv6
            FT_IPXNET
            FT_FRAMENUM
-           FT_PCRE
            FT_GUID
            FT_OID
 	   FT_REL_OID
author	Guy Harris <gharris@sonic.net>	2021-03-21 03:06:17 -0700
committer	Guy Harris <gharris@sonic.net>	2021-03-21 03:27:44 -0700
commit	b61fd6d76a2ce97f2bae40d8a8eacebc91e617ea (patch)
tree	d229e30e3f969bb916d1dd50f1d98a60b33252d0
parent	ce611792c37f5c80692bc927e07d3d4f36cd5b65 (diff)