never executed always true always false
1 module PureClaw.Agent.AgentDef
2 ( -- * Agent name (smart constructor)
3 AgentName
4 , unAgentName
5 , mkAgentName
6 , AgentNameError (..)
7 -- * TOML frontmatter extraction
8 , extractFrontmatter
9 -- * Agent configuration (TOML frontmatter)
10 , AgentConfig (..)
11 , defaultAgentConfig
12 , parseAgentsMd
13 , AgentsMdParseError (..)
14 -- * Agent definition
15 , AgentDef (..)
16 -- * Prompt composition
17 , composeAgentPrompt
18 , composeAgentPromptWithBootstrap
19 -- * Discovery and loading
20 , discoverAgents
21 , loadAgent
22 -- * Workspace validation and setup
23 , WorkspaceError (..)
24 , validateWorkspace
25 , ensureDefaultWorkspace
26 -- * Override precedence
27 , resolveOverride
28 ) where
29
30 import Control.Applicative ((<|>))
31 import Control.Exception qualified as Exc
32 import Data.Aeson qualified as Aeson
33 import Data.Char qualified as Char
34 import Data.Either (rights)
35 import Data.List (isPrefixOf)
36 import Data.Text (Text)
37 import Data.Text qualified as T
38 import Data.Text.IO qualified as TIO
39 import System.Directory qualified as Dir
40 import System.FilePath (isAbsolute, (</>))
41 import System.Posix.Files qualified as PF
42 import Toml (TomlCodec, (.=))
43 import Toml qualified
44
45 import PureClaw.Handles.Log (LogHandle (..))
46
47 -- | Validated agent name. The data constructor is intentionally NOT exported —
48 -- the only way to obtain an 'AgentName' is through 'mkAgentName'.
49 newtype AgentName = AgentName { unAgentName :: Text }
50 deriving stock (Eq, Ord, Show)
51
52 -- | Reasons a raw 'Text' cannot be promoted to an 'AgentName'.
53 data AgentNameError
54 = AgentNameEmpty
55 | AgentNameTooLong
56 | AgentNameInvalidChars Text
57 | AgentNameLeadingDot
58 deriving stock (Eq, Show)
59
60 -- | Maximum allowed length for an agent name.
61 agentNameMaxLength :: Int
62 agentNameMaxLength = 64
63
64 -- | Valid character predicate: ASCII letters, digits, underscore, hyphen.
65 isValidAgentNameChar :: Char -> Bool
66 isValidAgentNameChar c =
67 Char.isAsciiUpper c
68 || Char.isAsciiLower c
69 || Char.isDigit c
70 || c == '_'
71 || c == '-'
72
73 -- | Smart constructor. Rejects empty names, names longer than 64 characters,
74 -- names with a leading dot (hidden), and names containing any character
75 -- outside @[a-zA-Z0-9_-]@ (which in particular rejects @.@, @/@, and null).
76 mkAgentName :: Text -> Either AgentNameError AgentName
77 mkAgentName raw
78 | T.null raw = Left AgentNameEmpty
79 | T.length raw > agentNameMaxLength = Left AgentNameTooLong
80 | T.head raw == '.' && T.all isValidAgentNameChar (T.tail raw) = Left AgentNameLeadingDot
81 | not (T.all isValidAgentNameChar raw) = Left (AgentNameInvalidChars raw)
82 | otherwise = Right (AgentName raw)
83
84 -- | Custom 'Aeson.FromJSON' routes through 'mkAgentName' so that invalid
85 -- names on disk cannot bypass the smart constructor.
86 -- | Split a TOML frontmatter fence off the front of a document. Recognizes a
87 -- leading @---\\n@, a terminating @\\n---\\n@, and returns the inner block as
88 -- the first component and the body after the closer as the second. If the
89 -- input does not start with a fence or the fence is not closed, returns
90 -- @(Nothing, originalInput)@ unchanged.
91 extractFrontmatter :: Text -> (Maybe Text, Text)
92 extractFrontmatter input =
93 case T.stripPrefix "---\n" input of
94 Nothing -> (Nothing, input)
95 Just rest ->
96 case T.breakOn "\n---\n" rest of
97 (_, "") -> (Nothing, input) -- no closing fence
98 (inner, afterBreak) ->
99 let body = T.drop (T.length ("\n---\n" :: Text)) afterBreak
100 in (Just inner, body)
101
102 instance Aeson.FromJSON AgentName where
103 parseJSON = Aeson.withText "AgentName" $ \t ->
104 case mkAgentName t of
105 Right n -> pure n
106 Left err -> fail ("invalid AgentName: " ++ show err)
107
108 -- | Configuration loaded from the TOML frontmatter of an @AGENTS.md@ file.
109 -- All fields are optional; unknown fields are ignored by the codec.
110 data AgentConfig = AgentConfig
111 { _ac_model :: Maybe Text
112 , _ac_toolProfile :: Maybe Text
113 , _ac_workspace :: Maybe Text
114 } deriving stock (Eq, Show)
115
116 -- | An 'AgentConfig' with every field unset.
117 defaultAgentConfig :: AgentConfig
118 defaultAgentConfig = AgentConfig Nothing Nothing Nothing
119
120 agentConfigCodec :: TomlCodec AgentConfig
121 agentConfigCodec = AgentConfig
122 <$> Toml.dioptional (Toml.text "model") .= _ac_model
123 <*> Toml.dioptional (Toml.text "tool_profile") .= _ac_toolProfile
124 <*> Toml.dioptional (Toml.text "workspace") .= _ac_workspace
125
126 -- | Errors that may occur while parsing an @AGENTS.md@ document.
127 newtype AgentsMdParseError
128 = AgentsMdTomlError Text
129 deriving stock (Eq, Show)
130
131 -- | Parse an @AGENTS.md@ document: extract the optional TOML frontmatter,
132 -- decode it as an 'AgentConfig', and return the remaining body. A document
133 -- with no frontmatter yields 'defaultAgentConfig' and the whole input as the
134 -- body.
135 parseAgentsMd :: Text -> Either AgentsMdParseError (AgentConfig, Text)
136 parseAgentsMd input =
137 case extractFrontmatter input of
138 (Nothing, body) -> Right (defaultAgentConfig, body)
139 (Just "", body) -> Right (defaultAgentConfig, body)
140 (Just toml, body) ->
141 case Toml.decode agentConfigCodec toml of
142 Left errs -> Left (AgentsMdTomlError (Toml.prettyTomlDecodeErrors errs))
143 Right cfg -> Right (cfg, body)
144
145 -- | A discovered and loaded agent: its validated name, the on-disk directory
146 -- that holds its bootstrap files, and its parsed @AGENTS.md@ frontmatter
147 -- (falling back to 'defaultAgentConfig' when no frontmatter is present).
148 data AgentDef = AgentDef
149 { _ad_name :: AgentName
150 , _ad_dir :: FilePath
151 , _ad_config :: AgentConfig
152 } deriving stock (Eq, Show)
153
154 -- | Bootstrap file types, in the order they should be injected into the
155 -- system prompt.
156 data SectionKind = SoulK | UserK | AgentsK | MemoryK | IdentityK | ToolsK | BootstrapK
157 deriving stock (Eq, Show)
158
159 sectionFileName :: SectionKind -> FilePath
160 sectionFileName SoulK = "SOUL.md"
161 sectionFileName UserK = "USER.md"
162 sectionFileName AgentsK = "AGENTS.md"
163 sectionFileName MemoryK = "MEMORY.md"
164 sectionFileName IdentityK = "IDENTITY.md"
165 sectionFileName ToolsK = "TOOLS.md"
166 sectionFileName BootstrapK = "BOOTSTRAP.md"
167
168 sectionMarker :: SectionKind -> Text
169 sectionMarker SoulK = "--- SOUL ---"
170 sectionMarker UserK = "--- USER ---"
171 sectionMarker AgentsK = "--- AGENTS ---"
172 sectionMarker MemoryK = "--- MEMORY ---"
173 sectionMarker IdentityK = "--- IDENTITY ---"
174 sectionMarker ToolsK = "--- TOOLS ---"
175 sectionMarker BootstrapK = "--- BOOTSTRAP ---"
176
177 -- | Maximum raw file size we will read. Anything larger is rejected with a
178 -- log warning and skipped.
179 maxBootstrapFileBytes :: Integer
180 maxBootstrapFileBytes = 1024 * 1024
181
182 -- | Truncate a section body to @limit@ characters, appending the exact
183 -- truncation marker. Strings at or under the limit are returned as-is.
184 truncateSection :: Int -> Text -> Text
185 truncateSection limit txt
186 | T.length txt <= limit = txt
187 | otherwise =
188 T.take limit txt
189 <> "\n[...truncated at " <> T.pack (show limit) <> " chars...]"
190
191 -- | Read a single bootstrap section file, applying size/empty/truncation
192 -- rules. Returns 'Nothing' when the file is missing, empty (including
193 -- whitespace-only), or rejected as oversized.
194 readSection :: LogHandle -> FilePath -> SectionKind -> Int -> IO (Maybe Text)
195 readSection lg dir kind limit = do
196 let path = dir </> sectionFileName kind
197 exists <- Dir.doesFileExist path
198 if not exists
199 then pure Nothing
200 else do
201 size <- Dir.getFileSize path
202 if size > maxBootstrapFileBytes
203 then do
204 _lh_logWarn lg $
205 "Skipping oversized bootstrap file (>" <> T.pack (show maxBootstrapFileBytes) <>
206 " bytes): " <> T.pack path
207 pure Nothing
208 else do
209 raw <- Exc.try (TIO.readFile path) :: IO (Either Exc.IOException Text)
210 case raw of
211 Left e -> do
212 _lh_logWarn lg $
213 "Failed to read bootstrap file " <> T.pack path <> ": " <> T.pack (show e)
214 pure Nothing
215 Right txt ->
216 let contents = case kind of
217 AgentsK -> case parseAgentsMd txt of
218 Right (_, body) -> body
219 Left _ -> txt
220 _ -> txt
221 trimmed = T.dropWhileEnd Char.isSpace contents
222 in if T.null (T.strip trimmed)
223 then pure Nothing
224 else pure (Just (truncateSection limit trimmed))
225
226 -- | Compose a system prompt from an agent's bootstrap files. Files are read
227 -- in the fixed injection order (SOUL, USER, AGENTS, MEMORY, IDENTITY, TOOLS,
228 -- BOOTSTRAP), missing or empty (including whitespace-only) files are
229 -- skipped, oversized files (>1MB) are rejected with a log warning, and any
230 -- section exceeding @limit@ characters is truncated with the exact marker
231 -- @"\\n[...truncated at \<limit\> chars...]"@.
232 --
233 -- For @AGENTS.md@, only the body after the TOML frontmatter fence is
234 -- injected (the frontmatter itself lives in '_ad_config').
235 composeAgentPrompt :: LogHandle -> AgentDef -> Int -> IO Text
236 composeAgentPrompt lg def limit =
237 composeAgentPromptWithBootstrap lg def limit False
238
239 -- | Like 'composeAgentPrompt', but when @bootstrapConsumed@ is 'True' the
240 -- @BOOTSTRAP.md@ section is skipped entirely (used after the first
241 -- @StreamDone@ in a session).
242 composeAgentPromptWithBootstrap :: LogHandle -> AgentDef -> Int -> Bool -> IO Text
243 composeAgentPromptWithBootstrap lg def limit bootstrapConsumed = do
244 let kinds =
245 [ SoulK, UserK, AgentsK, MemoryK, IdentityK, ToolsK ]
246 <> [ BootstrapK | not bootstrapConsumed ]
247 sections <- mapM (\k -> readSection lg (_ad_dir def) k limit) kinds
248 let rendered = [ sectionMarker k <> "\n" <> body
249 | (k, Just body) <- zip kinds sections
250 ]
251 pure (T.intercalate "\n\n" rendered)
252
253 -- | Enumerate agent directories under @parent@. Subdirectories whose name
254 -- is not a valid 'AgentName' are skipped with a log warning. A missing
255 -- parent directory returns the empty list (no error).
256 discoverAgents :: LogHandle -> FilePath -> IO [AgentDef]
257 discoverAgents lg parent = do
258 exists <- Dir.doesDirectoryExist parent
259 if not exists
260 then pure []
261 else do
262 entries <- Dir.listDirectory parent
263 results <- mapM tryOne entries
264 pure (rights results)
265 where
266 tryOne entry = do
267 let full = parent </> entry
268 isDir <- Dir.doesDirectoryExist full
269 if not isDir
270 then pure (Left ())
271 else case mkAgentName (T.pack entry) of
272 Left err -> do
273 _lh_logWarn lg $
274 "Skipping invalid agent directory name " <> T.pack (show entry) <>
275 ": " <> T.pack (show err)
276 pure (Left ())
277 Right name -> do
278 cfg <- loadAgentConfig full
279 pure (Right AgentDef { _ad_name = name, _ad_dir = full, _ad_config = cfg })
280
281 -- | Load an agent by validated name from @parent@. Returns 'Nothing' if the
282 -- corresponding directory does not exist.
283 loadAgent :: FilePath -> AgentName -> IO (Maybe AgentDef)
284 loadAgent parent name = do
285 let dir = parent </> T.unpack (unAgentName name)
286 exists <- Dir.doesDirectoryExist dir
287 if not exists
288 then pure Nothing
289 else do
290 cfg <- loadAgentConfig dir
291 pure (Just AgentDef { _ad_name = name, _ad_dir = dir, _ad_config = cfg })
292
293 -- | Read and parse the @AGENTS.md@ frontmatter inside an agent directory.
294 -- If the file is missing or fails to parse, returns 'defaultAgentConfig'.
295 loadAgentConfig :: FilePath -> IO AgentConfig
296 loadAgentConfig dir = do
297 let path = dir </> "AGENTS.md"
298 exists <- Dir.doesFileExist path
299 if not exists
300 then pure defaultAgentConfig
301 else do
302 raw <- Exc.try (TIO.readFile path) :: IO (Either Exc.IOException Text)
303 case raw of
304 Left _ -> pure defaultAgentConfig
305 Right txt -> case parseAgentsMd txt of
306 Right (cfg, _) -> pure cfg
307 Left _ -> pure defaultAgentConfig
308
309 -- | Validation error for an agent workspace path.
310 data WorkspaceError
311 = WorkspaceNotAbsolute Text
312 | WorkspaceDoesNotExist FilePath
313 | WorkspaceDenied FilePath Text -- ^ denied canonical path + reason
314 deriving stock (Show, Eq)
315
316 -- | Absolute system directories that must never be used as a workspace.
317 -- Checked as equality or prefix (@base <> "/"@ prefix) against the
318 -- canonicalized input.
319 deniedAbsoluteRoots :: [FilePath]
320 deniedAbsoluteRoots =
321 [ "/", "/etc", "/usr", "/bin", "/sbin", "/var", "/sys", "/proc", "/dev" ]
322
323 -- | Home-relative segments that must never be used as a workspace. The
324 -- effective denied path is @homeDir </> segment@.
325 deniedHomeSegments :: [FilePath]
326 deniedHomeSegments =
327 [ ".ssh", ".gnupg", ".aws", ".config", ".pureclaw" ]
328
329 -- | @isUnderPath base target@ is 'True' iff @target@ equals @base@ or is
330 -- inside @base@. Both arguments are expected to already be canonicalized.
331 isUnderPath :: FilePath -> FilePath -> Bool
332 isUnderPath base target =
333 target == base
334 || (base /= "/" && (base <> "/") `isPrefixOf` target)
335
336 -- | Expand a leading tilde in a raw workspace path against a supplied home dir.
337 expandTilde :: FilePath -> Text -> FilePath
338 expandTilde home raw
339 | raw == "~" = home
340 | Just rest <- T.stripPrefix "~/" raw = home </> T.unpack rest
341 | otherwise = T.unpack raw
342
343 -- | Validate a user-supplied workspace path. Tilde-expands with the supplied
344 -- home dir, requires the path to be absolute, requires the directory to
345 -- exist, canonicalizes (resolving symlinks), then checks it against the
346 -- denylist of system and home-relative paths.
347 validateWorkspace :: FilePath -> Text -> IO (Either WorkspaceError FilePath)
348 validateWorkspace homeDir raw = do
349 let expanded = expandTilde homeDir raw
350 if not (isAbsolute expanded)
351 then pure (Left (WorkspaceNotAbsolute raw))
352 else do
353 exists <- Dir.doesDirectoryExist expanded
354 if not exists
355 then pure (Left (WorkspaceDoesNotExist expanded))
356 else do
357 canonical <- Dir.canonicalizePath expanded
358 reason <- checkDenylist homeDir canonical
359 case reason of
360 Just r -> pure (Left (WorkspaceDenied canonical r))
361 Nothing -> pure (Right canonical)
362
363 -- | Check a canonicalized path against the denylist. Returns 'Just' a
364 -- human-readable reason if denied, 'Nothing' otherwise. Denylist entries
365 -- are themselves canonicalized (when they exist) so that e.g. macOS's
366 -- @/etc -> /private/etc@ symlink is handled correctly.
367 checkDenylist :: FilePath -> FilePath -> IO (Maybe Text)
368 checkDenylist homeDir canonical = do
369 absHits <- mapM canonicalizeIfExists deniedAbsoluteRoots
370 let absMatch =
371 [ orig
372 | (orig, canon) <- zip deniedAbsoluteRoots absHits
373 , isUnderPath canon canonical
374 ]
375 case absMatch of
376 (hit : _) -> pure (Just ("is inside system directory " <> T.pack hit))
377 [] -> do
378 homeCanons <- mapM (canonicalizeIfExists . (homeDir </>)) deniedHomeSegments
379 let homeMatch =
380 [ seg
381 | (seg, canon) <- zip deniedHomeSegments homeCanons
382 , isUnderPath canon canonical
383 ]
384 case homeMatch of
385 (seg : _) -> pure (Just ("is inside sensitive home directory ~/" <> T.pack seg))
386 [] -> pure Nothing
387
388 -- | Canonicalize a path if it exists; otherwise return it unchanged. Used
389 -- to normalize denylist entries so symlinks like @/etc -> /private/etc@
390 -- resolve to the same canonical form as user-supplied paths.
391 canonicalizeIfExists :: FilePath -> IO FilePath
392 canonicalizeIfExists p = do
393 exists <- Dir.doesDirectoryExist p
394 if exists then Dir.canonicalizePath p else pure p
395
396 -- | Ensure the default workspace directory for an agent exists with
397 -- @0o700@ permissions. Creates parent directories as needed. Idempotent.
398 -- The workspace path is @\<pureclawDir\>/agents/\<name\>/workspace/@.
399 ensureDefaultWorkspace :: FilePath -> AgentName -> IO FilePath
400 ensureDefaultWorkspace pureclawDir name = do
401 let workspaceDir =
402 pureclawDir </> "agents" </> T.unpack (unAgentName name) </> "workspace"
403 Dir.createDirectoryIfMissing True workspaceDir
404 PF.setFileMode workspaceDir PF.ownerModes
405 pure workspaceDir
406
407 -- | Resolve a config value using precedence: CLI > frontmatter > config >
408 -- default. Returns the first non-'Nothing' argument.
409 resolveOverride :: Maybe a -> Maybe a -> Maybe a -> Maybe a -> Maybe a
410 resolveOverride cli fm cfg def = cli <|> fm <|> cfg <|> def