never executed always true always false
1 module PureClaw.Transcript.Provider
2 ( -- * Transcript-logging provider wrapper
3 mkTranscriptProvider
4 -- * Header redaction
5 , redactHeaders
6 ) where
7
8 import Data.Aeson qualified as Aeson
9 import Data.ByteString qualified as BS
10 import Data.ByteString.Lazy qualified as LBS
11 import Data.IORef
12 import Data.Map.Strict qualified as Map
13 import Data.Text (Text)
14 import Data.Text qualified as T
15 import Data.Text.Encoding qualified as TE
16 import Data.Time
17
18 import PureClaw.Core.Types
19 import PureClaw.Handles.Transcript
20 import PureClaw.Providers.Class
21 import PureClaw.Transcript.Types
22
23 -- | Internal newtype wrapping a provider with transcript logging.
24 newtype TranscriptProvider = TranscriptProvider
25 { _tp_inner :: (TranscriptHandle, Text, SomeProvider) }
26
27 instance Provider TranscriptProvider where
28 complete (TranscriptProvider (th, source, inner)) req = do
29 corrId <- generateId
30 reqId <- generateId
31 now <- getCurrentTime
32 -- Serialize and redact the request
33 let reqBytes = LBS.toStrict (Aeson.encode req)
34 redacted = redactHeaders (decodeUtf8Lenient reqBytes)
35 reqEntry = TranscriptEntry
36 { _te_id = reqId
37 , _te_timestamp = now
38 , _te_harness = Nothing
39 , _te_model = Just source
40 , _te_direction = Request
41 , _te_payload = encodePayload (encodeUtf8Strict redacted)
42 , _te_durationMs = Nothing
43 , _te_correlationId = corrId
44 , _te_metadata = Map.empty
45 }
46 _th_record th reqEntry
47 -- Call the inner provider
48 resp <- complete inner req
49 -- Log the response (not redacted)
50 respId <- generateId
51 respNow <- getCurrentTime
52 let respBytes = LBS.toStrict (Aeson.encode resp)
53 respMeta = buildResponseMetadata resp
54 respEntry = TranscriptEntry
55 { _te_id = respId
56 , _te_timestamp = respNow
57 , _te_harness = Nothing
58 , _te_model = Just source
59 , _te_direction = Response
60 , _te_payload = encodePayload respBytes
61 , _te_durationMs = Nothing
62 , _te_correlationId = corrId
63 , _te_metadata = respMeta
64 }
65 _th_record th respEntry
66 pure resp
67
68 completeStream (TranscriptProvider (th, source, inner)) req callback = do
69 corrId <- generateId
70 reqId <- generateId
71 now <- getCurrentTime
72 -- Serialize and redact the request
73 let reqBytes = LBS.toStrict (Aeson.encode req)
74 redacted = redactHeaders (decodeUtf8Lenient reqBytes)
75 reqEntry = TranscriptEntry
76 { _te_id = reqId
77 , _te_timestamp = now
78 , _te_harness = Nothing
79 , _te_model = Just source
80 , _te_direction = Request
81 , _te_payload = encodePayload (encodeUtf8Strict redacted)
82 , _te_durationMs = Nothing
83 , _te_correlationId = corrId
84 , _te_metadata = Map.empty
85 }
86 _th_record th reqEntry
87 -- Wrap the callback to capture StreamDone
88 doneRef <- newIORef False
89 completeStream inner req $ \ev -> do
90 callback ev
91 case ev of
92 StreamDone resp -> do
93 alreadyDone <- readIORef doneRef
94 if alreadyDone then pure ()
95 else do
96 writeIORef doneRef True
97 respId <- generateId
98 respNow <- getCurrentTime
99 let respBytes = LBS.toStrict (Aeson.encode resp)
100 respMeta = buildResponseMetadata resp
101 respEntry = TranscriptEntry
102 { _te_id = respId
103 , _te_timestamp = respNow
104 , _te_harness = Nothing
105 , _te_model = Just source
106 , _te_direction = Response
107 , _te_payload = encodePayload respBytes
108 , _te_durationMs = Nothing
109 , _te_correlationId = corrId
110 , _te_metadata = respMeta
111 }
112 _th_record th respEntry
113 _ -> pure ()
114
115 listModels (TranscriptProvider (_th, _source, inner)) = listModels inner
116
117 -- | Wrap a 'SomeProvider' with transcript logging. Every @complete@ and
118 -- @completeStream@ call records a Request entry (with redacted headers)
119 -- and a Response entry (with token usage metadata).
120 mkTranscriptProvider :: TranscriptHandle -> Text -> SomeProvider -> SomeProvider
121 mkTranscriptProvider th source inner =
122 MkProvider (TranscriptProvider (th, source, inner))
123
124 -- | Build metadata map from a 'CompletionResponse'.
125 -- Includes model name and token usage when available.
126 buildResponseMetadata :: CompletionResponse -> Map.Map Text Aeson.Value
127 buildResponseMetadata resp =
128 let modelMeta = Map.singleton "model" (Aeson.String (unModelId (_crsp_model resp)))
129 usageMeta = case _crsp_usage resp of
130 Nothing -> Map.empty
131 Just (Usage inp outp) -> Map.fromList
132 [ ("input_tokens", Aeson.Number (fromIntegral inp))
133 , ("output_tokens", Aeson.Number (fromIntegral outp))
134 ]
135 in Map.union modelMeta usageMeta
136
137 -- | Redact sensitive header values from JSON-serialized text.
138 -- Matches patterns like @"Authorization": "Bearer sk-..."@ and replaces
139 -- the value portion.
140 redactHeaders :: Text -> Text
141 redactHeaders = redactBearer . redactApiKey . redactAnthropicKey
142
143 redactBearer :: Text -> Text
144 redactBearer t =
145 let parts = T.breakOnAll "\"Authorization\": \"Bearer " t
146 in case parts of
147 [] -> t
148 _ -> foldParts "\"Authorization\": \"Bearer " "\"Authorization\": \"Bearer <REDACTED>\"" t
149
150 redactApiKey :: Text -> Text
151 redactApiKey t =
152 let parts = T.breakOnAll "\"x-api-key\": \"" t
153 in case parts of
154 [] -> t
155 _ -> foldParts "\"x-api-key\": \"" "\"x-api-key\": \"<REDACTED>\"" t
156
157 redactAnthropicKey :: Text -> Text
158 redactAnthropicKey t =
159 let parts = T.breakOnAll "\"anthropic-api-key\": \"" t
160 in case parts of
161 [] -> t
162 _ -> foldParts "\"anthropic-api-key\": \"" "\"anthropic-api-key\": \"<REDACTED>\"" t
163
164 -- | Replace @prefix<value>"@ with @replacement@ for each occurrence.
165 -- The value is everything from after the prefix to the next unescaped @"@.
166 foldParts :: Text -> Text -> Text -> Text
167 foldParts prefix replacement = go
168 where
169 go t = case T.breakOn prefix t of
170 (before, rest)
171 | T.null rest -> before
172 | otherwise ->
173 let afterPrefix = T.drop (T.length prefix) rest
174 -- Find the closing quote (skip the prefix, find next ")
175 afterValue = dropToClosingQuote afterPrefix
176 in before <> replacement <> go afterValue
177
178 -- | Drop characters until we find the closing unescaped double-quote.
179 dropToClosingQuote :: Text -> Text
180 dropToClosingQuote t = case T.uncons t of
181 Nothing -> T.empty
182 Just ('\\', rest) -> case T.uncons rest of
183 Nothing -> T.empty
184 Just (_, rest') -> dropToClosingQuote rest' -- skip escaped char
185 Just ('"', rest) -> rest -- found closing quote
186 Just (_, rest) -> dropToClosingQuote rest
187
188 -- | Generate a simple unique identifier using the current time in picoseconds.
189 -- Not a true UUID but sufficient for correlation within a single process.
190 generateId :: IO Text
191 generateId = do
192 now <- getCurrentTime
193 let picos = diffTimeToPicoseconds (utctDayTime now)
194 dayNum = toModifiedJulianDay (utctDay now)
195 pure (T.pack (show dayNum) <> "-" <> T.pack (show picos))
196
197 -- | Decode UTF-8 bytes to Text, replacing invalid sequences.
198 decodeUtf8Lenient :: BS.ByteString -> Text
199 decodeUtf8Lenient = TE.decodeUtf8Lenient
200
201 -- | Encode Text to UTF-8 bytes.
202 encodeUtf8Strict :: Text -> BS.ByteString
203 encodeUtf8Strict = TE.encodeUtf8