never executed always true always false
    1 module PureClaw.Agent.AgentDef
    2   ( -- * Agent name (smart constructor)
    3     AgentName
    4   , unAgentName
    5   , mkAgentName
    6   , AgentNameError (..)
    7     -- * TOML frontmatter extraction
    8   , extractFrontmatter
    9     -- * Agent configuration (TOML frontmatter)
   10   , AgentConfig (..)
   11   , defaultAgentConfig
   12   , parseAgentsMd
   13   , AgentsMdParseError (..)
   14     -- * Agent definition
   15   , AgentDef (..)
   16     -- * Prompt composition
   17   , composeAgentPrompt
   18   , composeAgentPromptWithBootstrap
   19     -- * Discovery and loading
   20   , discoverAgents
   21   , loadAgent
   22     -- * Workspace validation and setup
   23   , WorkspaceError (..)
   24   , validateWorkspace
   25   , ensureDefaultWorkspace
   26     -- * Override precedence
   27   , resolveOverride
   28   ) where
   29 
   30 import Control.Applicative ((<|>))
   31 import Control.Exception qualified as Exc
   32 import Data.Aeson qualified as Aeson
   33 import Data.Char qualified as Char
   34 import Data.Either (rights)
   35 import Data.List (isPrefixOf)
   36 import Data.Text (Text)
   37 import Data.Text qualified as T
   38 import Data.Text.IO qualified as TIO
   39 import System.Directory qualified as Dir
   40 import System.FilePath (isAbsolute, (</>))
   41 import System.Posix.Files qualified as PF
   42 import Toml (TomlCodec, (.=))
   43 import Toml qualified
   44 
   45 import PureClaw.Handles.Log (LogHandle (..))
   46 
   47 -- | Validated agent name. The data constructor is intentionally NOT exported —
   48 -- the only way to obtain an 'AgentName' is through 'mkAgentName'.
   49 newtype AgentName = AgentName { unAgentName :: Text }
   50   deriving stock (Eq, Ord, Show)
   51 
   52 -- | Reasons a raw 'Text' cannot be promoted to an 'AgentName'.
   53 data AgentNameError
   54   = AgentNameEmpty
   55   | AgentNameTooLong
   56   | AgentNameInvalidChars Text
   57   | AgentNameLeadingDot
   58   deriving stock (Eq, Show)
   59 
   60 -- | Maximum allowed length for an agent name.
   61 agentNameMaxLength :: Int
   62 agentNameMaxLength = 64
   63 
   64 -- | Valid character predicate: ASCII letters, digits, underscore, hyphen.
   65 isValidAgentNameChar :: Char -> Bool
   66 isValidAgentNameChar c =
   67   Char.isAsciiUpper c
   68     || Char.isAsciiLower c
   69     || Char.isDigit c
   70     || c == '_'
   71     || c == '-'
   72 
   73 -- | Smart constructor. Rejects empty names, names longer than 64 characters,
   74 -- names with a leading dot (hidden), and names containing any character
   75 -- outside @[a-zA-Z0-9_-]@ (which in particular rejects @.@, @/@, and null).
   76 mkAgentName :: Text -> Either AgentNameError AgentName
   77 mkAgentName raw
   78   | T.null raw = Left AgentNameEmpty
   79   | T.length raw > agentNameMaxLength = Left AgentNameTooLong
   80   | T.head raw == '.' && T.all isValidAgentNameChar (T.tail raw) = Left AgentNameLeadingDot
   81   | not (T.all isValidAgentNameChar raw) = Left (AgentNameInvalidChars raw)
   82   | otherwise = Right (AgentName raw)
   83 
   84 -- | Custom 'Aeson.FromJSON' routes through 'mkAgentName' so that invalid
   85 -- names on disk cannot bypass the smart constructor.
   86 -- | Split a TOML frontmatter fence off the front of a document. Recognizes a
   87 -- leading @---\\n@, a terminating @\\n---\\n@, and returns the inner block as
   88 -- the first component and the body after the closer as the second. If the
   89 -- input does not start with a fence or the fence is not closed, returns
   90 -- @(Nothing, originalInput)@ unchanged.
   91 extractFrontmatter :: Text -> (Maybe Text, Text)
   92 extractFrontmatter input =
   93   case T.stripPrefix "---\n" input of
   94     Nothing -> (Nothing, input)
   95     Just rest ->
   96       case T.breakOn "\n---\n" rest of
   97         (_, "") -> (Nothing, input)  -- no closing fence
   98         (inner, afterBreak) ->
   99           let body = T.drop (T.length ("\n---\n" :: Text)) afterBreak
  100           in (Just inner, body)
  101 
  102 instance Aeson.FromJSON AgentName where
  103   parseJSON = Aeson.withText "AgentName" $ \t ->
  104     case mkAgentName t of
  105       Right n -> pure n
  106       Left err -> fail ("invalid AgentName: " ++ show err)
  107 
  108 -- | Configuration loaded from the TOML frontmatter of an @AGENTS.md@ file.
  109 -- All fields are optional; unknown fields are ignored by the codec.
  110 data AgentConfig = AgentConfig
  111   { _ac_model       :: Maybe Text
  112   , _ac_toolProfile :: Maybe Text
  113   , _ac_workspace   :: Maybe Text
  114   } deriving stock (Eq, Show)
  115 
  116 -- | An 'AgentConfig' with every field unset.
  117 defaultAgentConfig :: AgentConfig
  118 defaultAgentConfig = AgentConfig Nothing Nothing Nothing
  119 
  120 agentConfigCodec :: TomlCodec AgentConfig
  121 agentConfigCodec = AgentConfig
  122   <$> Toml.dioptional (Toml.text "model")        .= _ac_model
  123   <*> Toml.dioptional (Toml.text "tool_profile") .= _ac_toolProfile
  124   <*> Toml.dioptional (Toml.text "workspace")    .= _ac_workspace
  125 
  126 -- | Errors that may occur while parsing an @AGENTS.md@ document.
  127 newtype AgentsMdParseError
  128   = AgentsMdTomlError Text
  129   deriving stock (Eq, Show)
  130 
  131 -- | Parse an @AGENTS.md@ document: extract the optional TOML frontmatter,
  132 -- decode it as an 'AgentConfig', and return the remaining body. A document
  133 -- with no frontmatter yields 'defaultAgentConfig' and the whole input as the
  134 -- body.
  135 parseAgentsMd :: Text -> Either AgentsMdParseError (AgentConfig, Text)
  136 parseAgentsMd input =
  137   case extractFrontmatter input of
  138     (Nothing, body) -> Right (defaultAgentConfig, body)
  139     (Just "", body) -> Right (defaultAgentConfig, body)
  140     (Just toml, body) ->
  141       case Toml.decode agentConfigCodec toml of
  142         Left errs -> Left (AgentsMdTomlError (Toml.prettyTomlDecodeErrors errs))
  143         Right cfg -> Right (cfg, body)
  144 
  145 -- | A discovered and loaded agent: its validated name, the on-disk directory
  146 -- that holds its bootstrap files, and its parsed @AGENTS.md@ frontmatter
  147 -- (falling back to 'defaultAgentConfig' when no frontmatter is present).
  148 data AgentDef = AgentDef
  149   { _ad_name   :: AgentName
  150   , _ad_dir    :: FilePath
  151   , _ad_config :: AgentConfig
  152   } deriving stock (Eq, Show)
  153 
  154 -- | Bootstrap file types, in the order they should be injected into the
  155 -- system prompt.
  156 data SectionKind = SoulK | UserK | AgentsK | MemoryK | IdentityK | ToolsK | BootstrapK
  157   deriving stock (Eq, Show)
  158 
  159 sectionFileName :: SectionKind -> FilePath
  160 sectionFileName SoulK      = "SOUL.md"
  161 sectionFileName UserK      = "USER.md"
  162 sectionFileName AgentsK    = "AGENTS.md"
  163 sectionFileName MemoryK    = "MEMORY.md"
  164 sectionFileName IdentityK  = "IDENTITY.md"
  165 sectionFileName ToolsK     = "TOOLS.md"
  166 sectionFileName BootstrapK = "BOOTSTRAP.md"
  167 
  168 sectionMarker :: SectionKind -> Text
  169 sectionMarker SoulK      = "--- SOUL ---"
  170 sectionMarker UserK      = "--- USER ---"
  171 sectionMarker AgentsK    = "--- AGENTS ---"
  172 sectionMarker MemoryK    = "--- MEMORY ---"
  173 sectionMarker IdentityK  = "--- IDENTITY ---"
  174 sectionMarker ToolsK     = "--- TOOLS ---"
  175 sectionMarker BootstrapK = "--- BOOTSTRAP ---"
  176 
  177 -- | Maximum raw file size we will read. Anything larger is rejected with a
  178 -- log warning and skipped.
  179 maxBootstrapFileBytes :: Integer
  180 maxBootstrapFileBytes = 1024 * 1024
  181 
  182 -- | Truncate a section body to @limit@ characters, appending the exact
  183 -- truncation marker. Strings at or under the limit are returned as-is.
  184 truncateSection :: Int -> Text -> Text
  185 truncateSection limit txt
  186   | T.length txt <= limit = txt
  187   | otherwise =
  188       T.take limit txt
  189         <> "\n[...truncated at " <> T.pack (show limit) <> " chars...]"
  190 
  191 -- | Read a single bootstrap section file, applying size/empty/truncation
  192 -- rules. Returns 'Nothing' when the file is missing, empty (including
  193 -- whitespace-only), or rejected as oversized.
  194 readSection :: LogHandle -> FilePath -> SectionKind -> Int -> IO (Maybe Text)
  195 readSection lg dir kind limit = do
  196   let path = dir </> sectionFileName kind
  197   exists <- Dir.doesFileExist path
  198   if not exists
  199     then pure Nothing
  200     else do
  201       size <- Dir.getFileSize path
  202       if size > maxBootstrapFileBytes
  203         then do
  204           _lh_logWarn lg $
  205             "Skipping oversized bootstrap file (>" <> T.pack (show maxBootstrapFileBytes) <>
  206             " bytes): " <> T.pack path
  207           pure Nothing
  208         else do
  209           raw <- Exc.try (TIO.readFile path) :: IO (Either Exc.IOException Text)
  210           case raw of
  211             Left e -> do
  212               _lh_logWarn lg $
  213                 "Failed to read bootstrap file " <> T.pack path <> ": " <> T.pack (show e)
  214               pure Nothing
  215             Right txt ->
  216               let contents = case kind of
  217                     AgentsK -> case parseAgentsMd txt of
  218                       Right (_, body) -> body
  219                       Left _ -> txt
  220                     _ -> txt
  221                   trimmed = T.dropWhileEnd Char.isSpace contents
  222               in if T.null (T.strip trimmed)
  223                    then pure Nothing
  224                    else pure (Just (truncateSection limit trimmed))
  225 
  226 -- | Compose a system prompt from an agent's bootstrap files. Files are read
  227 -- in the fixed injection order (SOUL, USER, AGENTS, MEMORY, IDENTITY, TOOLS,
  228 -- BOOTSTRAP), missing or empty (including whitespace-only) files are
  229 -- skipped, oversized files (>1MB) are rejected with a log warning, and any
  230 -- section exceeding @limit@ characters is truncated with the exact marker
  231 -- @"\\n[...truncated at \<limit\> chars...]"@.
  232 --
  233 -- For @AGENTS.md@, only the body after the TOML frontmatter fence is
  234 -- injected (the frontmatter itself lives in '_ad_config').
  235 composeAgentPrompt :: LogHandle -> AgentDef -> Int -> IO Text
  236 composeAgentPrompt lg def limit =
  237   composeAgentPromptWithBootstrap lg def limit False
  238 
  239 -- | Like 'composeAgentPrompt', but when @bootstrapConsumed@ is 'True' the
  240 -- @BOOTSTRAP.md@ section is skipped entirely (used after the first
  241 -- @StreamDone@ in a session).
  242 composeAgentPromptWithBootstrap :: LogHandle -> AgentDef -> Int -> Bool -> IO Text
  243 composeAgentPromptWithBootstrap lg def limit bootstrapConsumed = do
  244   let kinds =
  245         [ SoulK, UserK, AgentsK, MemoryK, IdentityK, ToolsK ]
  246         <> [ BootstrapK | not bootstrapConsumed ]
  247   sections <- mapM (\k -> readSection lg (_ad_dir def) k limit) kinds
  248   let rendered = [ sectionMarker k <> "\n" <> body
  249                  | (k, Just body) <- zip kinds sections
  250                  ]
  251   pure (T.intercalate "\n\n" rendered)
  252 
  253 -- | Enumerate agent directories under @parent@. Subdirectories whose name
  254 -- is not a valid 'AgentName' are skipped with a log warning. A missing
  255 -- parent directory returns the empty list (no error).
  256 discoverAgents :: LogHandle -> FilePath -> IO [AgentDef]
  257 discoverAgents lg parent = do
  258   exists <- Dir.doesDirectoryExist parent
  259   if not exists
  260     then pure []
  261     else do
  262       entries <- Dir.listDirectory parent
  263       results <- mapM tryOne entries
  264       pure (rights results)
  265   where
  266     tryOne entry = do
  267       let full = parent </> entry
  268       isDir <- Dir.doesDirectoryExist full
  269       if not isDir
  270         then pure (Left ())
  271         else case mkAgentName (T.pack entry) of
  272           Left err -> do
  273             _lh_logWarn lg $
  274               "Skipping invalid agent directory name " <> T.pack (show entry) <>
  275               ": " <> T.pack (show err)
  276             pure (Left ())
  277           Right name -> do
  278             cfg <- loadAgentConfig full
  279             pure (Right AgentDef { _ad_name = name, _ad_dir = full, _ad_config = cfg })
  280 
  281 -- | Load an agent by validated name from @parent@. Returns 'Nothing' if the
  282 -- corresponding directory does not exist.
  283 loadAgent :: FilePath -> AgentName -> IO (Maybe AgentDef)
  284 loadAgent parent name = do
  285   let dir = parent </> T.unpack (unAgentName name)
  286   exists <- Dir.doesDirectoryExist dir
  287   if not exists
  288     then pure Nothing
  289     else do
  290       cfg <- loadAgentConfig dir
  291       pure (Just AgentDef { _ad_name = name, _ad_dir = dir, _ad_config = cfg })
  292 
  293 -- | Read and parse the @AGENTS.md@ frontmatter inside an agent directory.
  294 -- If the file is missing or fails to parse, returns 'defaultAgentConfig'.
  295 loadAgentConfig :: FilePath -> IO AgentConfig
  296 loadAgentConfig dir = do
  297   let path = dir </> "AGENTS.md"
  298   exists <- Dir.doesFileExist path
  299   if not exists
  300     then pure defaultAgentConfig
  301     else do
  302       raw <- Exc.try (TIO.readFile path) :: IO (Either Exc.IOException Text)
  303       case raw of
  304         Left _ -> pure defaultAgentConfig
  305         Right txt -> case parseAgentsMd txt of
  306           Right (cfg, _) -> pure cfg
  307           Left _ -> pure defaultAgentConfig
  308 
  309 -- | Validation error for an agent workspace path.
  310 data WorkspaceError
  311   = WorkspaceNotAbsolute Text
  312   | WorkspaceDoesNotExist FilePath
  313   | WorkspaceDenied FilePath Text  -- ^ denied canonical path + reason
  314   deriving stock (Show, Eq)
  315 
  316 -- | Absolute system directories that must never be used as a workspace.
  317 -- Checked as equality or prefix (@base <> "/"@ prefix) against the
  318 -- canonicalized input.
  319 deniedAbsoluteRoots :: [FilePath]
  320 deniedAbsoluteRoots =
  321   [ "/", "/etc", "/usr", "/bin", "/sbin", "/var", "/sys", "/proc", "/dev" ]
  322 
  323 -- | Home-relative segments that must never be used as a workspace. The
  324 -- effective denied path is @homeDir </> segment@.
  325 deniedHomeSegments :: [FilePath]
  326 deniedHomeSegments =
  327   [ ".ssh", ".gnupg", ".aws", ".config", ".pureclaw" ]
  328 
  329 -- | @isUnderPath base target@ is 'True' iff @target@ equals @base@ or is
  330 -- inside @base@. Both arguments are expected to already be canonicalized.
  331 isUnderPath :: FilePath -> FilePath -> Bool
  332 isUnderPath base target =
  333   target == base
  334     || (base /= "/" && (base <> "/") `isPrefixOf` target)
  335 
  336 -- | Expand a leading tilde in a raw workspace path against a supplied home dir.
  337 expandTilde :: FilePath -> Text -> FilePath
  338 expandTilde home raw
  339   | raw == "~" = home
  340   | Just rest <- T.stripPrefix "~/" raw = home </> T.unpack rest
  341   | otherwise = T.unpack raw
  342 
  343 -- | Validate a user-supplied workspace path. Tilde-expands with the supplied
  344 -- home dir, requires the path to be absolute, requires the directory to
  345 -- exist, canonicalizes (resolving symlinks), then checks it against the
  346 -- denylist of system and home-relative paths.
  347 validateWorkspace :: FilePath -> Text -> IO (Either WorkspaceError FilePath)
  348 validateWorkspace homeDir raw = do
  349   let expanded = expandTilde homeDir raw
  350   if not (isAbsolute expanded)
  351     then pure (Left (WorkspaceNotAbsolute raw))
  352     else do
  353       exists <- Dir.doesDirectoryExist expanded
  354       if not exists
  355         then pure (Left (WorkspaceDoesNotExist expanded))
  356         else do
  357           canonical <- Dir.canonicalizePath expanded
  358           reason <- checkDenylist homeDir canonical
  359           case reason of
  360             Just r -> pure (Left (WorkspaceDenied canonical r))
  361             Nothing -> pure (Right canonical)
  362 
  363 -- | Check a canonicalized path against the denylist. Returns 'Just' a
  364 -- human-readable reason if denied, 'Nothing' otherwise. Denylist entries
  365 -- are themselves canonicalized (when they exist) so that e.g. macOS's
  366 -- @/etc -> /private/etc@ symlink is handled correctly.
  367 checkDenylist :: FilePath -> FilePath -> IO (Maybe Text)
  368 checkDenylist homeDir canonical = do
  369   absHits <- mapM canonicalizeIfExists deniedAbsoluteRoots
  370   let absMatch =
  371         [ orig
  372         | (orig, canon) <- zip deniedAbsoluteRoots absHits
  373         , isUnderPath canon canonical
  374         ]
  375   case absMatch of
  376     (hit : _) -> pure (Just ("is inside system directory " <> T.pack hit))
  377     [] -> do
  378       homeCanons <- mapM (canonicalizeIfExists . (homeDir </>)) deniedHomeSegments
  379       let homeMatch =
  380             [ seg
  381             | (seg, canon) <- zip deniedHomeSegments homeCanons
  382             , isUnderPath canon canonical
  383             ]
  384       case homeMatch of
  385         (seg : _) -> pure (Just ("is inside sensitive home directory ~/" <> T.pack seg))
  386         [] -> pure Nothing
  387 
  388 -- | Canonicalize a path if it exists; otherwise return it unchanged. Used
  389 -- to normalize denylist entries so symlinks like @/etc -> /private/etc@
  390 -- resolve to the same canonical form as user-supplied paths.
  391 canonicalizeIfExists :: FilePath -> IO FilePath
  392 canonicalizeIfExists p = do
  393   exists <- Dir.doesDirectoryExist p
  394   if exists then Dir.canonicalizePath p else pure p
  395 
  396 -- | Ensure the default workspace directory for an agent exists with
  397 -- @0o700@ permissions. Creates parent directories as needed. Idempotent.
  398 -- The workspace path is @\<pureclawDir\>/agents/\<name\>/workspace/@.
  399 ensureDefaultWorkspace :: FilePath -> AgentName -> IO FilePath
  400 ensureDefaultWorkspace pureclawDir name = do
  401   let workspaceDir =
  402         pureclawDir </> "agents" </> T.unpack (unAgentName name) </> "workspace"
  403   Dir.createDirectoryIfMissing True workspaceDir
  404   PF.setFileMode workspaceDir PF.ownerModes
  405   pure workspaceDir
  406 
  407 -- | Resolve a config value using precedence: CLI > frontmatter > config >
  408 -- default. Returns the first non-'Nothing' argument.
  409 resolveOverride :: Maybe a -> Maybe a -> Maybe a -> Maybe a -> Maybe a
  410 resolveOverride cli fm cfg def = cli <|> fm <|> cfg <|> def