philterd · jzonthemtn · Dec 19, 2024 · Dec 17, 2024 · Dec 19, 2024 · jzonthemtn
@@ -208,9 +208,15 @@ An example policy using the `STATIC_REPLACE` filter strategy:
 
 ### The `TRUNCATE` Filter Strategy {id="truncate"}
 
-Available only to zip codes, this strategy allows for truncating zip codes to only a select number of digits. Specify `truncateDigits` to set the desired number of leading digits to leave. For example, if `truncateDigits` is 2, the zip code 90210 will be truncated to `90***`.&#x20;
+This strategy allows for truncating tokens to only a select number of digits. Specify `truncateLeaveCharacters`
+to set the desired number of digits to leave. For example, if `truncateLeaveCharacters` is 4, the
+string `4111111111111111` will be truncated to `4111************`. `truncateDirection` can be set to
+`LEADING` (the default) which leaves N leading digits or `TRAILING` which leaves N trailing digits.
+`truncateCharacter` can be overwritten (defaults to `*`) to change the character that is used for the
+replacement.
 
-The TRUNCATE filter strategy is available only to the zip code filter. An example policy using the `TRUNCATE` filter strategy:
+The `TRUNCATE` filter has special behavior for the zip code filter. For zip codes the Zip will always be truncated
+to 5 digits long. For example, `truncateLeaveCharacters=2` and a token of `90210-0110` will result in `90***`.
 
 ```
 {
@@ -220,7 +226,7 @@ The TRUNCATE filter strategy is available only to the zip code filter. An exampl
          "zipCodeFilterStrategies": [
             {
                "strategy": "TRUNCATE",
-               "truncateDigits": 3
+               "truncateLeaveCharacters": 3
             }
          ]
       }

@@ -21,15 +21,15 @@ This filter has no required parameters.
 
 The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of `REDACT` is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See [Filter Strategies](#filter-strategies) for details.
 
-| Strategy              | Description                                                                                                                                         |
-| --------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `REDACT`              | Replace the sensitive text with a placeholder.                                                                                                      |
-| `RANDOM_REPLACE`      | Replace the sensitive text with a similar, random value.                                                                                            |
-| `STATIC_REPLACE`      | Replace the sensitive text with a given value.                                                                                                      |
-| `CRYPTO_REPLACE`      | Replace the sensitive text with its encrypted value.                                                                                                |
-| `HASH_SHA256_REPLACE` | Replace the sensitive text with its SHA256 hash value.                                                                                              |
-| `TRUNCATE`            | Replace the sensitive text by removing the last `x` digits. (Set the number of digits using the `truncateDigits` parameter of the filter strategy.) |
-| `ZERO_LEADING`        | Replace the sensitive text by zeroing the first 3 digits.                                                                                           |
+| Strategy              | Description                                                                                                                                                                            |
+| --------------------- |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `REDACT`              | Replace the sensitive text with a placeholder.                                                                                                                                         |
+| `RANDOM_REPLACE`      | Replace the sensitive text with a similar, random value.                                                                                                                               |
+| `STATIC_REPLACE`      | Replace the sensitive text with a given value.                                                                                                                                         |
+| `CRYPTO_REPLACE`      | Replace the sensitive text with its encrypted value.                                                                                                                                   |
+| `HASH_SHA256_REPLACE` | Replace the sensitive text with its SHA256 hash value.                                                                                                                                 |
+| `TRUNCATE`            | Replace the sensitive text by removing everything except `x` characters. (Set the number of characters to leave using the `truncateLeaveCharacters` parameter of the filter strategy.) |
+| `ZERO_LEADING`        | Replace the sensitive text by zeroing the first 3 digits.                                                                                                                              |
 
 ### Conditions
 

@@ -129,7 +129,7 @@ This policy finds ZIP codes starting with `90` and truncates the zip code to jus
         {
           "condition": "token startswith \"90\"",
           "strategy": "TRUNCATE",
-          "truncateDigits": 2
+          "truncateLeaveCharacters": 2
         }
       ]
     }

@@ -44,6 +44,9 @@ public abstract class AbstractFilterStrategy {
     public static final String LAST_4 = "LAST_4";
     public static final String MASK = "MASK";
     public static final String SAME = AbstractFilterStrategy.SAME;
+    public static final String TRUNCATE = "TRUNCATE";
+    public static final String LEADING = "LEADING";
+    public static final String TRAILING = "TRAILING";
 
     // NER Person's name strategies
     public static final String ABBREVIATE = "ABBREVIATE";
@@ -107,6 +110,23 @@ public abstract class AbstractFilterStrategy {
     @Expose
     protected String maskLength = SAME;
 
+    @SerializedName("truncateDigits")
+    @Expose
+    @Deprecated
+    protected Integer truncateDigits;
+
+    @SerializedName("truncateLeaveCharacters")
+    @Expose
+    protected Integer truncateLeaveCharacters;
+
+    @SerializedName("truncateCharacter")
+    @Expose
+    protected String truncateCharacter = "*";
+
+    @SerializedName("truncateDirection")
+    @Expose
+    protected String truncateDirection = LEADING;
+
     @SerializedName("condition")
     @Expose
     protected String condition = "";
@@ -365,6 +385,42 @@ public String getMaskLength() {
         return maskLength;
     }
 
+    @Deprecated
+    public void setTruncateDigits(Integer truncateDigits) {
+        setTruncateLeaveCharacters(truncateDigits);
+    }
+
+    public void setTruncateLeaveCharacters(Integer truncateLeaveCharacters) {
+
+        // Make sure it is a valid value.
+        if(truncateLeaveCharacters >= 1) {
+            this.truncateLeaveCharacters = truncateLeaveCharacters;
+        } else {
+            throw new IllegalArgumentException("Truncate leave characters must be greater than or equal to 1");
+        }
+
+    }
+
+    public Integer getTruncateLeaveCharacters() {
+        return truncateLeaveCharacters;
+    }
+
+    public String getTruncateCharacter() {
+        return truncateCharacter;
+    }
+
+    public void setTruncateCharacter(String truncateCharacter) {
+        this.truncateCharacter = truncateCharacter;
+    }
+
+    public String getTruncateDirection() {
+        return truncateDirection;
+    }
+
+    public void setTruncateDirection(String truncateDirection) {
+        this.truncateDirection = truncateDirection;
+    }
+
     public void setConditions(String condition) {
         this.condition = condition;
     }

@@ -54,6 +54,20 @@ public Replacement getStandardReplacement(String label, String context, String d
 
             replacement = maskCharacter.repeat(characters);
 
+        } else if(StringUtils.equalsIgnoreCase(strategy, TRUNCATE)) {
+
+            int leaveCharacters = getValueOrDefault(getValueOrDefault(truncateLeaveCharacters, truncateDigits), 4);
+
+            if (leaveCharacters < 1) {
+                leaveCharacters = 1;
+            }
+
+            if(StringUtils.equalsIgnoreCase(truncateDirection, LEADING)) {
+                replacement = token.substring(0, leaveCharacters) + StringUtils.repeat(truncateCharacter, token.length() - leaveCharacters);
+            } else {
+                replacement = StringUtils.repeat(truncateCharacter, token.length() - leaveCharacters) + token.substring(token.length() - leaveCharacters);
+            }
+
         } else if(StringUtils.equalsIgnoreCase(strategy, RANDOM_REPLACE)) {
 
             // Default to document scope.

@@ -156,6 +156,20 @@ public Replacement getReplacement(String label, String context, String documentI
 
             replacement = maskCharacter.repeat(characters);
 
+        } else if(StringUtils.equalsIgnoreCase(strategy, TRUNCATE)) {
+
+            int leaveCharacters = getValueOrDefault(getValueOrDefault(truncateDigits, truncateLeaveCharacters), 4);
+
+            if (leaveCharacters < 1) {
+                leaveCharacters = 1;
+            }
+
+            if(StringUtils.equalsIgnoreCase(truncateDirection, LEADING)) {
+                replacement = token.substring(0, leaveCharacters) + StringUtils.repeat(truncateCharacter, token.length() - leaveCharacters);
+            } else {
+                replacement = StringUtils.repeat(truncateCharacter, token.length() - leaveCharacters) + token.substring(token.length() - leaveCharacters);
+            }
+
         } else if(StringUtils.equalsIgnoreCase(strategy, RANDOM_REPLACE)) {
 
             // Default to document scope.

@@ -174,6 +174,20 @@ public Replacement getReplacement(String label, String context, String documentI
 
             replacement = maskCharacter.repeat(characters);
 
+        } else if(StringUtils.equalsIgnoreCase(strategy, TRUNCATE)) {
+
+            int leaveCharacters = getValueOrDefault(getValueOrDefault(truncateDigits, truncateLeaveCharacters), 4);
+
+            if (leaveCharacters < 1) {
+                leaveCharacters = 1;
+            }
+
+            if(StringUtils.equalsIgnoreCase(truncateDirection, LEADING)) {
+                replacement = token.substring(0, leaveCharacters) + StringUtils.repeat(truncateCharacter, token.length() - leaveCharacters);
+            } else {
+                replacement = StringUtils.repeat(truncateCharacter, token.length() - leaveCharacters) + token.substring(token.length() - leaveCharacters);
+            }
+
         } else if(StringUtils.equalsIgnoreCase(strategy, RANDOM_REPLACE)) {
 
             // Default to document scope.

@@ -15,9 +15,6 @@
  */
 package ai.philterd.phileas.model.policy.filters.strategies.rules;
 
-import ai.philterd.phileas.model.policy.Policy;
-import com.google.gson.annotations.Expose;
-import com.google.gson.annotations.SerializedName;
 import ai.philterd.phileas.model.conditions.ParsedCondition;
 import ai.philterd.phileas.model.conditions.ParserListener;
 import ai.philterd.phileas.model.enums.FilterType;
@@ -28,6 +25,7 @@
 import ai.philterd.phileas.model.objects.Replacement;
 import ai.philterd.phileas.model.policy.Crypto;
 import ai.philterd.phileas.model.policy.FPE;
+import ai.philterd.phileas.model.policy.Policy;
 import ai.philterd.phileas.model.policy.filters.strategies.AbstractFilterStrategy;
 import ai.philterd.phileas.model.services.AnonymizationService;
 import ai.philterd.phileas.model.utils.Encryption;
@@ -62,10 +60,6 @@ public FilterType getFilterType() {
         return filterType;
     }
 
-    @SerializedName("truncateDigits")
-    @Expose
-    private Integer truncateDigits;
-
     @Override
     public boolean evaluateCondition(Policy policy, String context, String documentId, String token, String[] window, String condition, double confidence, Map<String, String> attributes) {
 
@@ -177,7 +171,7 @@ public Replacement getReplacement(String label, String context, String documentI
                 characters = Integer.parseInt(maskLength);
             }
 
-            if(characters < 1) {
+            if (characters < 1) {
                 characters = 5;
             }
 
@@ -200,8 +194,18 @@ public Replacement getReplacement(String label, String context, String documentI
 
         } else if(StringUtils.equalsIgnoreCase(strategy, TRUNCATE)) {
 
-            final int truncateLength = getValueOrDefault(truncateDigits, 2);
-            replacement = token.substring(0, truncateDigits) + StringUtils.repeat("*", Math.min(token.length() - truncateLength, 5 - truncateDigits));
+            int leaveCharacters = getValueOrDefault(getValueOrDefault(truncateDigits, truncateLeaveCharacters), 4);
+
+            if (leaveCharacters < 1) {
+                leaveCharacters = 1;
+            }
+
+            if(StringUtils.equalsIgnoreCase(truncateDirection, LEADING)) {
+                replacement = token.substring(0, leaveCharacters) + StringUtils.repeat(truncateCharacter, Math.min(token.length() - leaveCharacters, 5 - leaveCharacters));
+            } else {
+                replacement = StringUtils.repeat(truncateCharacter, Math.min(token.length() - leaveCharacters, 5 - leaveCharacters)) + token.substring(Math.min(token.length() - leaveCharacters, 5 - leaveCharacters), 5);
+            }
+
 
         } else if(StringUtils.equalsIgnoreCase(strategy, ZERO_LEADING)) {
 
@@ -230,15 +234,15 @@ public Replacement getReplacement(String label, String context, String documentI
 
     }
 
-    public Integer getTruncateDigits() {
-        return truncateDigits;
+    public void setTruncateDigits(Integer truncateDigits) {
+        setTruncateLeaveCharacters(truncateDigits);
     }
 
-    public void setTruncateDigits(Integer truncateDigits) {
+    public void setTruncateLeaveCharacters(Integer truncateLeaveCharacters) {
 
         // Make sure it is a valid value.
-        if(truncateDigits >= 1 && truncateDigits <= 4) {
-            this.truncateDigits = truncateDigits;
+        if(truncateLeaveCharacters >= 1 && truncateLeaveCharacters <= 4) {
+            this.truncateLeaveCharacters = truncateLeaveCharacters;
         } else {
             throw new IllegalArgumentException("Truncate length must be between 1 and 4, inclusive.");
         }

@@ -292,6 +292,88 @@ public void replacementWithMaskCharacterForSetLengthWithNegativeLength() throws
 
     }
 
+    @Test
+    public void truncate1() throws Exception {
+
+        final AnonymizationService anonymizationService = Mockito.mock(AnonymizationService.class);
+        final AnonymizationCacheService anonymizationCacheService = Mockito.mock(AnonymizationCacheService.class);
+
+        when(anonymizationService.getAnonymizationCacheService()).thenReturn(anonymizationCacheService);
+
+        final AbstractFilterStrategy strategy = getFilterStrategy();
+        strategy.setStrategy(AbstractFilterStrategy.TRUNCATE);
+        strategy.setTruncateDigits(1);
+
+        final String token = "12345";
+        final Replacement replacement = strategy.getReplacement("name", "context", "docId", token, WINDOW, null, null, anonymizationService, null);
+
+        Assertions.assertEquals(replacement.getReplacement(), "1****");
+        Assertions.assertEquals(replacement.getReplacement().length(), 5);
+
+    }
+
+    @Test
+    public void truncate2() throws Exception {
+
+        final AnonymizationService anonymizationService = Mockito.mock(AnonymizationService.class);
+        final AnonymizationCacheService anonymizationCacheService = Mockito.mock(AnonymizationCacheService.class);
+
+        when(anonymizationService.getAnonymizationCacheService()).thenReturn(anonymizationCacheService);
+
+        final AbstractFilterStrategy strategy = getFilterStrategy();
+        strategy.setStrategy(AbstractFilterStrategy.TRUNCATE);
+        strategy.setTruncateLeaveCharacters(4);
+
+        final String token = "12345";
+        final Replacement replacement = strategy.getReplacement("name", "context", "docId", token, WINDOW, null, null, anonymizationService, null);
+
+        Assertions.assertEquals(replacement.getReplacement(), "1234*");
+        Assertions.assertEquals(replacement.getReplacement().length(), 5);
+
+    }
+
+    @Test
+    public void truncate3() throws Exception {
+
+        final AnonymizationService anonymizationService = Mockito.mock(AnonymizationService.class);
+        final AnonymizationCacheService anonymizationCacheService = Mockito.mock(AnonymizationCacheService.class);
+
+        when(anonymizationService.getAnonymizationCacheService()).thenReturn(anonymizationCacheService);
+
+        final AbstractFilterStrategy strategy = getFilterStrategy();
+        strategy.setStrategy(AbstractFilterStrategy.TRUNCATE);
+        strategy.setTruncateDirection(AbstractFilterStrategy.LEADING);
+        strategy.setTruncateLeaveCharacters(2);
+
+        final String token = "12345";
+        final Replacement replacement = strategy.getReplacement("name", "context", "docId", token, WINDOW, null, null, anonymizationService, null);
+
+        Assertions.assertEquals(replacement.getReplacement(), "12***");
+        Assertions.assertEquals(replacement.getReplacement().length(), 5);
+
+    }
+
+    @Test
+    public void truncate4() throws Exception {
+
+        final AnonymizationService anonymizationService = Mockito.mock(AnonymizationService.class);
+        final AnonymizationCacheService anonymizationCacheService = Mockito.mock(AnonymizationCacheService.class);
+
+        when(anonymizationService.getAnonymizationCacheService()).thenReturn(anonymizationCacheService);
+
+        final AbstractFilterStrategy strategy = getFilterStrategy();
+        strategy.setStrategy(AbstractFilterStrategy.TRUNCATE);
+        strategy.setTruncateDirection(AbstractFilterStrategy.TRAILING);
+        strategy.setTruncateLeaveCharacters(4);
+
+        final String token = "4111111111111111";
+        final Replacement replacement = strategy.getReplacement("name", "context", "docId", token, WINDOW, null, null, anonymizationService, null);
+
+        Assertions.assertEquals(replacement.getReplacement(), "************1111");
+        Assertions.assertEquals(replacement.getReplacement().length(), 16);
+
+    }
+
     @Test
     public void evaluateCondition1() throws IOException {