# 移除敏感键def hide_sensitive_metadata(metadata: dict) -> dict: return {k: v for k, v in metadata.items() if not k.startswith("_private")}client = Client(hide_metadata=hide_sensitive_metadata)# 编辑特定值def redact_emails(metadata: dict) -> dict: import re result = {} for k, v in metadata.items(): if isinstance(v, str) and "@" in v: result[k] = "[REDACTED_EMAIL]" else: result[k] = v return resultclient = Client(hide_metadata=redact_emails)# 添加转换标记def add_marker(metadata: dict) -> dict: return {**metadata, "transformed": True}client = Client(hide_metadata=add_marker)
import refrom langsmith import Client, traceable# 定义电子邮件地址和 UUID 的正则表达式模式EMAIL_REGEX = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}"UUID_REGEX = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"def replace_sensitive_data(data, depth=10): if depth == 0: return data if isinstance(data, dict): return {k: replace_sensitive_data(v, depth-1) for k, v in data.items()} elif isinstance(data, list): return [replace_sensitive_data(item, depth-1) for item in data] elif isinstance(data, str): data = re.sub(EMAIL_REGEX, "<email-address>", data) data = re.sub(UUID_REGEX, "<UUID>", data) return data else: return dataclient = Client( hide_inputs=lambda inputs: replace_sensitive_data(inputs), hide_outputs=lambda outputs: replace_sensitive_data(outputs))inputs = {"role": "user", "content": "Hello! My email is user@example.com and my ID is 123e4567-e89b-12d3-a456-426614174000."}outputs = {"role": "assistant", "content": "Hi! I've noted your email as user@example.com and your ID as 123e4567-e89b-12d3-a456-426614174000."}@traceable(client=client)def child(inputs: dict) -> dict: return outputs@traceable(client=client)def parent(inputs: dict) -> dict: child_outputs = child(inputs) return child_outputsparent(inputs)
import reimport openaifrom langsmith import Clientfrom langsmith.wrappers import wrap_openai# 定义各种个人身份信息的正则表达式模式SSN_PATTERN = re.compile(r'\b\d{3}-\d{2}-\d{4}\b')CREDIT_CARD_PATTERN = re.compile(r'\b(?:\d[ -]*?){13,16}\b')EMAIL_PATTERN = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b')PHONE_PATTERN = re.compile(r'\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b')FULL_NAME_PATTERN = re.compile(r'\b([A-Z][a-z]*\s[A-Z][a-z]*)\b')def regex_anonymize(text): """ 使用正则表达式模式匿名化文本中的敏感信息。 参数: text (str): 要匿名化的输入文本。 返回: str: 匿名化后的文本。 """ # 用占位符替换敏感信息 text = SSN_PATTERN.sub('[REDACTED SSN]', text) text = CREDIT_CARD_PATTERN.sub('[REDACTED CREDIT CARD]', text) text = EMAIL_PATTERN.sub('[REDACTED EMAIL]', text) text = PHONE_PATTERN.sub('[REDACTED PHONE]', text) text = FULL_NAME_PATTERN.sub('[REDACTED NAME]', text) return textdef recursive_anonymize(data, depth=10): """ 递归遍历数据结构并匿名化敏感信息。 参数: data (any): 要匿名化的输入数据。 depth (int): 当前递归深度,以防止过度递归。 返回: any: 匿名化后的数据。 """ if depth == 0: return data if isinstance(data, dict): anonymized_dict = {} for k, v in data.items(): anonymized_value = recursive_anonymize(v, depth - 1) anonymized_dict[k] = anonymized_value return anonymized_dict elif isinstance(data, list): anonymized_list = [] for item in data: anonymized_item = recursive_anonymize(item, depth - 1) anonymized_list.append(anonymized_item) return anonymized_list elif isinstance(data, str): anonymized_data = regex_anonymize(data) return anonymized_data else: return dataopenai_client = wrap_openai(openai.Client())# 使用匿名化函数初始化 LangSmith @[Client]langsmith_client = Client( hide_inputs=recursive_anonymize, hide_outputs=recursive_anonymize)# 生成的跟踪将包含其元数据,但输入和输出将被匿名化response_with_anonymization = openai_client.chat.completions.create( model="gpt-5.4-mini", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "My name is John Doe, my SSN is 123-45-6789, my credit card number is 4111 1111 1111 1111, my email is john.doe@example.com, and my phone number is (123) 456-7890."}, ], langsmith_extra={"client": langsmith_client},)# 生成的跟踪将不会匿名化输入和输出response_without_anonymization = openai_client.chat.completions.create( model="gpt-5.4-mini", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "My name is John Doe, my SSN is 123-45-6789, my credit card number is 4111 1111 1111 1111, my email is john.doe@example.com, and my phone number is (123) 456-7890."}, ],)
import openaifrom langsmith import Clientfrom langsmith.wrappers import wrap_openaifrom presidio_anonymizer import AnonymizerEnginefrom presidio_analyzer import AnalyzerEngineanonymizer = AnonymizerEngine()analyzer = AnalyzerEngine()def presidio_anonymize(data): """ 匿名化用户发送或模型返回的敏感信息。 参数: data (any): 要匿名化的数据。 返回: any: 匿名化后的数据。 """ message_list = ( data.get('messages') or [data.get('choices', [{}])[0].get('message')] ) if not message_list or not all(isinstance(msg, dict) and msg for msg in message_list): return data for message in message_list: content = message.get('content', '') if not content.strip(): print("检测到空内容。跳过匿名化。") continue results = analyzer.analyze( text=content, entities=["PERSON", "PHONE_NUMBER", "EMAIL_ADDRESS", "US_SSN"], language='en' ) anonymized_result = anonymizer.anonymize( text=content, analyzer_results=results ) message['content'] = anonymized_result.text return dataopenai_client = wrap_openai(openai.Client())# 使用匿名化函数初始化 langsmith @[Client]langsmith_client = Client( hide_inputs=presidio_anonymize, hide_outputs=presidio_anonymize)# 生成的跟踪将包含其元数据,但输入和输出将被匿名化response_with_anonymization = openai_client.chat.completions.create( model="gpt-5.4-mini", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com"}, ], langsmith_extra={"client": langsmith_client},)# 生成的跟踪将不会匿名化输入和输出response_without_anonymization = openai_client.chat.completions.create( model="gpt-5.4-mini", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com"}, ],)