Skip to content

Commit

Permalink
Add truncation filter strategy generally
Browse files Browse the repository at this point in the history
  • Loading branch information
JessieAMorris committed Dec 18, 2024
1 parent ab0c806 commit 4492011
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 24 deletions.
12 changes: 9 additions & 3 deletions docs/docs/filter_policies/filter_strategies.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,15 @@ An example policy using the `STATIC_REPLACE` filter strategy:

### The `TRUNCATE` Filter Strategy {id="truncate"}

Available only to zip codes, this strategy allows for truncating zip codes to only a select number of digits. Specify `truncateDigits` to set the desired number of leading digits to leave. For example, if `truncateDigits` is 2, the zip code 90210 will be truncated to `90***`. 

The TRUNCATE filter strategy is available only to the zip code filter. An example policy using the `TRUNCATE` filter strategy:
This strategy allows for truncating tokens to only a select number of digits. Specify `truncateDigits`
to set the desired number of digits to leave. For example, if `truncateDigits` is 4, the
string `4111111111111111` will be truncated to `4111************`. `truncateDirection` can be set to
`LEADING` (the default) which leaves N leading digits or `TRAILING` which leaves N trailing digits.
`truncateCharacter` can be overwritten (defaults to `*`) to change the character that is used for the
replacement.

The `TRUNCATE` filter has special behavior for the zip code filter. For zip codes the Zip will always be truncated
to 5 digits long. For example, `truncateDigits=2` and a token of `90210-0110` will result in `90***`.

```
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ public abstract class AbstractFilterStrategy {
public static final String LAST_4 = "LAST_4";
public static final String MASK = "MASK";
public static final String SAME = AbstractFilterStrategy.SAME;
public static final String TRUNCATE = "TRUNCATE";
public static final String LEADING = "LEADING";
public static final String TRAILING = "TRAILING";

// NER Person's name strategies
public static final String ABBREVIATE = "ABBREVIATE";
Expand Down Expand Up @@ -107,6 +110,18 @@ public abstract class AbstractFilterStrategy {
@Expose
protected String maskLength = SAME;

@SerializedName("truncateDigits")
@Expose
protected Integer truncateDigits;

@SerializedName("truncateCharacter")
@Expose
protected String truncateCharacter = "*";

@SerializedName("truncateDirection")
@Expose
protected String truncateDirection = LEADING;

@SerializedName("condition")
@Expose
protected String condition = "";
Expand Down Expand Up @@ -365,6 +380,37 @@ public String getMaskLength() {
return maskLength;
}

public Integer getTruncateDigits() {
return truncateDigits;
}

public void setTruncateDigits(Integer truncateDigits) {

// Make sure it is a valid value.
if(truncateDigits >= 1) {
this.truncateDigits = truncateDigits;
} else {
throw new IllegalArgumentException("Truncate length must be greater than 1");
}

}

public String getTruncateCharacter() {
return truncateCharacter;
}

public void setTruncateCharacter(String truncateCharacter) {
this.truncateCharacter = truncateCharacter;
}

public String getTruncateDirection() {
return truncateDirection;
}

public void setTruncateDirection(String truncateDirection) {
this.truncateDirection = truncateDirection;
}

public void setConditions(String condition) {
this.condition = condition;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,20 @@ public Replacement getStandardReplacement(String label, String context, String d

replacement = maskCharacter.repeat(characters);

} else if(StringUtils.equalsIgnoreCase(strategy, TRUNCATE)) {

int truncateLength = getValueOrDefault(truncateDigits, 4);

if (truncateLength < 1) {
truncateLength = 1;
}

if(StringUtils.equalsIgnoreCase(truncateDirection, LEADING)) {
replacement = token.substring(0, truncateLength) + StringUtils.repeat(truncateCharacter, token.length() - truncateLength);
} else {
replacement = StringUtils.repeat(truncateCharacter, token.length() - truncateLength) + token.substring(token.length() - truncateLength);
}

} else if(StringUtils.equalsIgnoreCase(strategy, RANDOM_REPLACE)) {

// Default to document scope.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,20 @@ public Replacement getReplacement(String label, String context, String documentI

replacement = maskCharacter.repeat(characters);

} else if(StringUtils.equalsIgnoreCase(strategy, TRUNCATE)) {

int truncateLength = getValueOrDefault(truncateDigits, 4);

if (truncateLength < 1) {
truncateLength = 1;
}

if(StringUtils.equalsIgnoreCase(truncateDirection, LEADING)) {
replacement = token.substring(0, truncateLength) + StringUtils.repeat(truncateCharacter, token.length() - truncateLength);
} else {
replacement = StringUtils.repeat(truncateCharacter, token.length() - truncateLength) + token.substring(token.length() - truncateLength);
}

} else if(StringUtils.equalsIgnoreCase(strategy, RANDOM_REPLACE)) {

// Default to document scope.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,20 @@ public Replacement getReplacement(String label, String context, String documentI

replacement = maskCharacter.repeat(characters);

} else if(StringUtils.equalsIgnoreCase(strategy, TRUNCATE)) {

int truncateLength = getValueOrDefault(truncateDigits, 4);

if (truncateLength < 1) {
truncateLength = 1;
}

if(StringUtils.equalsIgnoreCase(truncateDirection, LEADING)) {
replacement = token.substring(0, truncateLength) + StringUtils.repeat(truncateCharacter, token.length() - truncateLength);
} else {
replacement = StringUtils.repeat(truncateCharacter, token.length() - truncateLength) + token.substring(token.length() - truncateLength);
}

} else if(StringUtils.equalsIgnoreCase(strategy, RANDOM_REPLACE)) {

// Default to document scope.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
*/
package ai.philterd.phileas.model.policy.filters.strategies.rules;

import ai.philterd.phileas.model.policy.Policy;
import com.google.gson.annotations.Expose;
import com.google.gson.annotations.SerializedName;
import ai.philterd.phileas.model.conditions.ParsedCondition;
import ai.philterd.phileas.model.conditions.ParserListener;
import ai.philterd.phileas.model.enums.FilterType;
Expand All @@ -28,6 +25,7 @@
import ai.philterd.phileas.model.objects.Replacement;
import ai.philterd.phileas.model.policy.Crypto;
import ai.philterd.phileas.model.policy.FPE;
import ai.philterd.phileas.model.policy.Policy;
import ai.philterd.phileas.model.policy.filters.strategies.AbstractFilterStrategy;
import ai.philterd.phileas.model.services.AnonymizationService;
import ai.philterd.phileas.model.utils.Encryption;
Expand Down Expand Up @@ -62,10 +60,6 @@ public FilterType getFilterType() {
return filterType;
}

@SerializedName("truncateDigits")
@Expose
private Integer truncateDigits;

@Override
public boolean evaluateCondition(Policy policy, String context, String documentId, String token, String[] window, String condition, double confidence, Map<String, String> attributes) {

Expand Down Expand Up @@ -177,7 +171,7 @@ public Replacement getReplacement(String label, String context, String documentI
characters = Integer.parseInt(maskLength);
}

if(characters < 1) {
if (characters < 1) {
characters = 5;
}

Expand All @@ -200,8 +194,14 @@ public Replacement getReplacement(String label, String context, String documentI

} else if(StringUtils.equalsIgnoreCase(strategy, TRUNCATE)) {

final int truncateLength = getValueOrDefault(truncateDigits, 2);
replacement = token.substring(0, truncateDigits) + StringUtils.repeat("*", Math.min(token.length() - truncateLength, 5 - truncateDigits));
if(StringUtils.equalsIgnoreCase(truncateDirection, LEADING)) {
final int truncateLength = getValueOrDefault(truncateDigits, 2);
replacement = token.substring(0, truncateDigits) + StringUtils.repeat(truncateCharacter, Math.min(token.length() - truncateLength, 5 - truncateDigits));
} else {
final int truncateLength = getValueOrDefault(truncateDigits, 2);
replacement = StringUtils.repeat(truncateCharacter, Math.min(token.length() - truncateLength, 5 - truncateDigits)) + token.substring(Math.min(token.length() - truncateLength, 5 - truncateDigits), 5);
}


} else if(StringUtils.equalsIgnoreCase(strategy, ZERO_LEADING)) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ public void replacementWithMaskCharacterForSameLength() throws Exception {
}

@Test
public void replacementWithMaskCharacterForSetLength() throws Exception {
public void replacementWithMaskCharacterSetLength() throws Exception {

final AnonymizationService anonymizationService = Mockito.mock(AnonymizationService.class);
final AnonymizationCacheService anonymizationCacheService = Mockito.mock(AnonymizationCacheService.class);
Expand All @@ -260,38 +260,78 @@ public void replacementWithMaskCharacterForSetLength() throws Exception {

final AbstractFilterStrategy strategy = getFilterStrategy();
strategy.setStrategy(AbstractFilterStrategy.MASK);
strategy.setMaskCharacter("#");
strategy.setMaskLength("10");
strategy.setMaskLength(AbstractFilterStrategy.SAME);

final String token = "token";
final Replacement replacement = strategy.getReplacement("name", "context", "docId", token, WINDOW, null, null, anonymizationService, null);

Assertions.assertEquals(replacement.getReplacement(), "##########");
Assertions.assertEquals(replacement.getReplacement().length(), 10);
Assertions.assertEquals(replacement.getReplacement(), "*****");
Assertions.assertEquals(replacement.getReplacement().length(), token.length());

}

@Test
public void replacementWithMaskCharacterForSetLengthWithNegativeLength() throws Exception {
public void truncate1() throws Exception {

final AnonymizationService anonymizationService = Mockito.mock(AnonymizationService.class);
final AnonymizationCacheService anonymizationCacheService = Mockito.mock(AnonymizationCacheService.class);

when(anonymizationService.getAnonymizationCacheService()).thenReturn(anonymizationCacheService);

final AbstractFilterStrategy strategy = getFilterStrategy();
strategy.setStrategy(AbstractFilterStrategy.MASK);
strategy.setMaskCharacter("#");
strategy.setMaskLength("0");
strategy.setStrategy(AbstractFilterStrategy.TRUNCATE);
strategy.setTruncateDigits(2);

final String token = "token";
final String token = "12345";
final Replacement replacement = strategy.getReplacement("name", "context", "docId", token, WINDOW, null, null, anonymizationService, null);

Assertions.assertEquals(replacement.getReplacement(), "12***");
Assertions.assertEquals(replacement.getReplacement().length(), 5);

}

@Test
public void truncate2() throws Exception {

final AnonymizationService anonymizationService = Mockito.mock(AnonymizationService.class);
final AnonymizationCacheService anonymizationCacheService = Mockito.mock(AnonymizationCacheService.class);

when(anonymizationService.getAnonymizationCacheService()).thenReturn(anonymizationCacheService);

final AbstractFilterStrategy strategy = getFilterStrategy();
strategy.setStrategy(AbstractFilterStrategy.TRUNCATE);
strategy.setTruncateDirection(AbstractFilterStrategy.LEADING);
strategy.setTruncateDigits(2);

final String token = "12345";
final Replacement replacement = strategy.getReplacement("name", "context", "docId", token, WINDOW, null, null, anonymizationService, null);

Assertions.assertEquals(replacement.getReplacement(), "#####");
Assertions.assertEquals(replacement.getReplacement(), "12***");
Assertions.assertEquals(replacement.getReplacement().length(), 5);

}

@Test
public void truncate3() throws Exception {

final AnonymizationService anonymizationService = Mockito.mock(AnonymizationService.class);
final AnonymizationCacheService anonymizationCacheService = Mockito.mock(AnonymizationCacheService.class);

when(anonymizationService.getAnonymizationCacheService()).thenReturn(anonymizationCacheService);

final AbstractFilterStrategy strategy = getFilterStrategy();
strategy.setStrategy(AbstractFilterStrategy.TRUNCATE);
strategy.setTruncateDirection(AbstractFilterStrategy.TRAILING);
strategy.setTruncateDigits(4);

final String token = "4111111111111111";
final Replacement replacement = strategy.getReplacement("name", "context", "docId", token, WINDOW, null, null, anonymizationService, null);

Assertions.assertEquals(replacement.getReplacement(), "************1111");
Assertions.assertEquals(replacement.getReplacement().length(), 16);

}

@Test
public void evaluateCondition1() throws IOException {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,24 @@ public void truncateTo1() throws Exception {

}

@Test
public void truncateTo1Trailing() throws Exception {

ZipCodeFilterStrategy strategy = new ZipCodeFilterStrategy();
strategy.setStrategy(ZipCodeFilterStrategy.TRUNCATE);
strategy.setTruncateDigits(1);
strategy.setTruncateDirection(AbstractFilterStrategy.TRAILING);

AnonymizationService anonymizationService = Mockito.mock(AnonymizationService.class);

final Replacement replacement = strategy.getReplacement("name", "context", "documentid", "90210-0110", WINDOW, new Crypto(), new FPE(), anonymizationService, null);

LOGGER.info(replacement);

Assertions.assertEquals("****0", replacement.getReplacement());

}

@Test
public void zeroLeading1() throws Exception {

Expand All @@ -193,4 +211,14 @@ public void zeroLeading1() throws Exception {

}

// Override the standard truncate tests since zip has a different truncate behavior
@Test
public void truncate1() throws Exception {}

@Test
public void truncate2() throws Exception {}

@Test
public void truncate3() throws Exception {}

}

0 comments on commit 4492011

Please sign in to comment.