-
Notifications
You must be signed in to change notification settings - Fork 197
Expand file tree
/
Copy pathAnalyze.hs
More file actions
449 lines (421 loc) · 19.3 KB
/
Analyze.hs
File metadata and controls
449 lines (421 loc) · 19.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
{-# LANGUAGE RecordWildCards #-}
module App.Fossa.Ficus.Analyze (
analyzeWithFicus,
-- Exported for use in hubble
analyzeWithFicusMain,
-- Exported for testing
singletonFicusMessage,
vendoredDepsToSourceUnit,
)
where
import App.Fossa.EmbeddedBinary (BinaryPaths, toPath, withFicusBinary)
import App.Fossa.Ficus.Types (
FicusAllFlag (..),
FicusAnalysisFlag (..),
FicusAnalysisResults (..),
FicusConfig (..),
FicusDebug (..),
FicusError (..),
FicusFinding (..),
FicusMessage (..),
FicusMessageData (..),
FicusMessages (..),
FicusPerStrategyFlag (..),
FicusScanStats (..),
FicusSnippetScanResults (..),
FicusStrategy (FicusStrategySnippetScan, FicusStrategyVendetta),
FicusVendoredDependency (..),
FicusVendoredDependencyScanResults (..),
)
import App.Types (ProjectRevision (..))
import Control.Applicative ((<|>))
import Control.Carrier.Diagnostics (Diagnostics)
import Control.Concurrent.Async (async, wait)
import Control.Effect.Lift (Has, Lift, sendIO)
import Control.Monad (when)
import Data.Aeson (decode, decodeStrictText)
import Data.Aeson qualified as Aeson
import Data.ByteString.Lazy qualified as BL
import Data.Conduit ((.|))
import Data.Conduit qualified as Conduit
import Data.Conduit.Combinators qualified as CC
import Data.Conduit.List qualified as CCL
import Data.Foldable (traverse_)
import Data.Hashable (Hashable)
import Data.Map.Strict qualified as Map
import Data.Maybe (isJust)
import Data.String.Conversion (ToText (toText), toString)
import Data.Text (Text)
import Data.Text qualified as Text
import Data.Text.Encoding qualified as Text.Encoding
import Data.Time (getCurrentTime)
import Data.Time.Format (defaultTimeLocale, formatTime)
import Effect.Exec (AllowErr (Never), Command (..), ExitCode (ExitSuccess), renderCommand)
import Effect.Logger (Logger, logDebug, logInfo)
import Fossa.API.Types (ApiKey (..), ApiOpts (..))
import Path (Abs, Dir, Path, toFilePath)
import Prettyprinter (pretty)
import Srclib.Types (Locator (..), SourceUnit (..), SourceUnitBuild (..), SourceUnitDependency (..), renderLocator, textToOriginPath)
import System.Directory qualified as Directory
import System.FilePath ((</>))
import System.IO (Handle, IOMode (WriteMode), hClose, hPutStrLn, hSetEncoding, openFile, stderr, utf8)
import System.Process.Typed (
createPipe,
getStderr,
getStdout,
proc,
setStderr,
setStdout,
setWorkingDir,
waitExitCode,
withProcessWait,
)
import Text.Printf (printf)
import Text.URI (render)
import Text.URI.Builder (PathComponent (PathComponent), TrailingSlash (TrailingSlash), setPath)
import Types (GlobFilter (..), GraphBreadth (..), LicenseScanPathFilters (..))
import Prelude
newtype CustomLicensePath = CustomLicensePath {unCustomLicensePath :: Text}
deriving (Eq, Ord, Show, Hashable)
newtype CustomLicenseTitle = CustomLicenseTitle {unCustomLicenseTitle :: Text}
deriving (Eq, Ord, Show, Hashable)
-- Helper function to log with timestamp
logDebugWithTime :: (Has Logger sig m, Has (Lift IO) sig m) => Text -> m ()
logDebugWithTime msg = do
now <- sendIO getCurrentTime
let timestamp = formatTime defaultTimeLocale "%H:%M:%S.%3q" now
logDebug $ "[" <> pretty timestamp <> "] " <> pretty msg
-- | scan rootDir with Ficus, using the given GrepOptions. This is the main entry point to this module
analyzeWithFicus ::
( Has Diagnostics sig m
, Has (Lift IO) sig m
, Has Logger sig m
) =>
Path Abs Dir ->
Maybe ApiOpts ->
ProjectRevision ->
[FicusStrategy] ->
Maybe LicenseScanPathFilters ->
Maybe Int ->
Maybe FilePath -> -- Debug directory (if enabled)
m (Maybe FicusAnalysisResults)
analyzeWithFicus rootDir apiOpts revision strategies filters snippetScanRetentionDays maybeDebugDir = do
Just <$> analyzeWithFicusMain rootDir apiOpts revision strategies filters snippetScanRetentionDays maybeDebugDir
analyzeWithFicusMain ::
( Has Diagnostics sig m
, Has (Lift IO) sig m
, Has Logger sig m
) =>
Path Abs Dir ->
Maybe ApiOpts ->
ProjectRevision ->
[FicusStrategy] ->
Maybe LicenseScanPathFilters ->
Maybe Int ->
Maybe FilePath -> -- Debug directory (if enabled)
m FicusAnalysisResults
analyzeWithFicusMain rootDir apiOpts revision strategies filters snippetScanRetentionDays maybeDebugDir = do
logDebugWithTime "Preparing Ficus analysis configuration..."
ficusResults <- runFicus maybeDebugDir ficusConfig
logDebugWithTime "runFicus completed, processing results..."
when (FicusStrategySnippetScan `elem` strategies) $
case snippetScanResults ficusResults of
Just results ->
logInfo $ pretty (formatFicusScanSummary results)
Nothing -> logInfo "Ficus analysis completed but no fingerprint findings were found"
pure ficusResults
where
ficusConfig =
FicusConfig
{ ficusConfigRootDir = rootDir
, ficusConfigExclude = maybe [] licenseScanPathFiltersExclude filters
, ficusConfigEndpoint = apiOptsUri =<< apiOpts
, ficusConfigSecret = apiOptsApiKey <$> apiOpts
, ficusConfigRevision = revision
, ficusConfigFlags = [All $ FicusAllFlag SkipHiddenFiles, All $ FicusAllFlag Gitignore]
, ficusConfigSnippetScanRetentionDays = snippetScanRetentionDays
, ficusConfigStrategies = strategies
}
findingToSnippetScanResult :: FicusFinding -> Maybe FicusSnippetScanResults
findingToSnippetScanResult (FicusFinding (FicusMessageData strategy payload))
| Text.toLower strategy == "fingerprint" =
decode (BL.fromStrict $ Text.Encoding.encodeUtf8 payload)
findingToSnippetScanResult _ = Nothing
formatFicusScanSummary :: FicusSnippetScanResults -> Text
formatFicusScanSummary results =
let stats = ficusSnippetScanResultsStats results
aid = ficusSnippetScanResultsAnalysisId results
in Text.unlines
[ "Ficus snippet scan completed successfully!"
, ""
, "============================================================"
, "Snippet scan summary:"
, " Analysis ID: " <> toText (show aid)
, " Bucket ID: " <> toText (show $ ficusSnippetScanResultsBucketId results)
, " Files skipped: " <> toText (show $ ficusStatsSkippedFiles stats)
, " Total Files processed: " <> toText (show $ ficusStatsProcessedFiles stats)
, " Unique Files processed: " <> toText (show $ ficusStatsUniqueProcessedFiles stats)
, " Unique Files with matches found: " <> toText (show $ ficusStatsUniqueMatchedFiles stats)
, " Unique Files with no matches found: " <> toText (show $ ficusStatsUniqueUnmatchedFiles stats)
, " Unique Files already in our knowledge base: " <> toText (show $ ficusStatsUniqueExistingFiles stats)
, " Unique Files new to our knowledge base: " <> toText (show $ ficusStatsUniqueNewFiles stats)
, " Processing time: " <> formatProcessingTime (ficusStatsProcessingTimeSeconds stats) <> "s"
, "============================================================"
, "See the docs for an explanation of this summary: https://github.com/fossas/fossa-cli/blob/master/docs/features/snippet-scanning.md#the-snippet-scan-summary"
]
where
-- Format the processing time as a string with 3 decimal places
formatProcessingTime :: Double -> Text
formatProcessingTime seconds = toText (printf "%.3f" seconds :: String)
findingToVendoredDependency :: FicusFinding -> Maybe FicusVendoredDependency
findingToVendoredDependency (FicusFinding (FicusMessageData strategy payload))
| Text.toLower strategy == "vendetta" =
decode (BL.fromStrict $ Text.Encoding.encodeUtf8 payload)
findingToVendoredDependency _ = Nothing
vendoredDepsToSourceUnit :: FilePath -> [FicusVendoredDependency] -> IO SourceUnit
vendoredDepsToSourceUnit rootDir deps = do
dependencies <- traverse vendoredDepToSourceUnitDependency deps
pure
SourceUnit
{ sourceUnitName = "ficus-vendored-dependencies"
, sourceUnitType = "ficus-vendored"
, sourceUnitManifest = "ficus-vendored-dependencies"
, sourceUnitBuild =
Just $
SourceUnitBuild
{ buildArtifact = "default"
, buildSucceeded = True
, buildImports = locators
, buildDependencies = dependencies
}
, sourceUnitGraphBreadth = Complete
, sourceUnitNoticeFiles = []
, sourceUnitOriginPaths = concatMap (map textToOriginPath . ficusVendoredDependencyLocations) deps
, sourceUnitLabels = Nothing
, additionalData = Nothing
}
where
locators :: [Locator]
locators = map vendoredDepToLocator deps
vendoredDepToLocator :: FicusVendoredDependency -> Locator
vendoredDepToLocator dep =
Locator
{ locatorFetcher = ficusVendoredDependencyEcosystem dep
, locatorProject = ficusVendoredDependencyName dep
, locatorRevision = ficusVendoredDependencyVersion dep
}
vendoredDepToSourceUnitDependency :: FicusVendoredDependency -> IO SourceUnitDependency
vendoredDepToSourceUnitDependency dep = do
vendoredEntries <- traverse classifyLocation (ficusVendoredDependencyLocations dep)
pure
SourceUnitDependency
{ sourceDepLocator = vendoredDepToLocator dep
, sourceDepImports = []
, sourceDepData =
Aeson.object
[ "vendored" Aeson..= vendoredEntries
]
}
classifyLocation :: Text -> IO Aeson.Value
classifyLocation loc = do
let fullPath = rootDir </> toString loc
isFile <- Directory.doesFileExist fullPath
let locType = if isFile then "file" :: Text else "directory"
pure $
Aeson.object
[ "type" Aeson..= locType
, "path" Aeson..= loc
]
runFicus ::
( Has Diagnostics sig m
, Has (Lift IO) sig m
, Has Logger sig m
) =>
Maybe FilePath ->
FicusConfig ->
m FicusAnalysisResults
runFicus maybeDebugDir ficusConfig = do
logDebugWithTime "About to extract Ficus binary..."
withFicusBinary $ \bin -> do
logDebugWithTime "Ficus binary extracted, building command..."
cmd <- ficusCommand ficusConfig bin
logDebugWithTime "Executing ficus (streaming)"
logDebug $ "Working directory: " <> pretty (toFilePath $ ficusConfigRootDir ficusConfig)
logDebugWithTime "Creating process configuration..."
let processConfig =
setWorkingDir (toFilePath $ ficusConfigRootDir ficusConfig) $
setStdout createPipe $
setStderr createPipe $
proc (toString $ cmdName cmd) (map toString $ cmdArgs cmd)
logInfo $ "Running Ficus analysis on " <> pretty (toFilePath $ ficusConfigRootDir ficusConfig)
logDebugWithTime "Starting Ficus process..."
-- Create files for teeing output if debug mode is enabled
(stdoutFile, stderrFile) <- case maybeDebugDir of
Just debugDir -> do
sendIO $ do
let stdoutPath = debugDir </> "fossa.ficus-stdout.log"
let stderrPath = debugDir </> "fossa.ficus-stderr.log"
stdoutH <- openFile stdoutPath WriteMode
hSetEncoding stdoutH utf8
stderrH <- openFile stderrPath WriteMode
hSetEncoding stderrH utf8
pure (Just stdoutH, Just stderrH)
Nothing ->
-- No debug mode, don't tee to files
pure (Nothing, Nothing)
(result, exitCode, stdErrLines) <- sendIO $ withProcessWait processConfig $ \p -> do
getCurrentTime >>= \now -> hPutStrLn stderr $ "[TIMING " ++ formatTime defaultTimeLocale "%H:%M:%S.%3q" now ++ "] Ficus process started, beginning stream processing..."
let stdoutHandle = getStdout p
let stderrHandle = getStderr p
-- Start async reading of stderr to prevent blocking
stderrAsync <- async $ consumeStderr stderrHandle stderrFile
-- Read stdout in the main thread
result <- streamFicusOutput stdoutHandle stdoutFile
-- Wait for stderr to finish
stdErrLines <- wait stderrAsync
exitCode <- waitExitCode p
pure (result, exitCode, stdErrLines)
sendIO $ do
traverse_ hClose stdoutFile
traverse_ hClose stderrFile
if exitCode /= ExitSuccess
then do
logInfo $
"[Ficus] Ficus process returned non-zero exit code. Printing last 50 lines of stderr: " <> pretty (show exitCode)
logInfo "\n==== BEGIN Ficus STDERR ====\n"
logInfo $ pretty (Text.unlines stdErrLines)
logInfo "\n==== END Ficus STDERR ====\n"
else logInfo "[Ficus] Ficus exited successfully"
pure result
where
currentTimeStamp :: IO String
currentTimeStamp = do
now <- getCurrentTime
pure . formatTime defaultTimeLocale "%H:%M:%S.%3q" $ now
streamFicusOutput :: Handle -> Maybe Handle -> IO FicusAnalysisResults
streamFicusOutput handle maybeFile = do
accumulator <-
Conduit.runConduit $
CC.sourceHandle handle
.| CC.decodeUtf8Lenient
.| CC.linesUnbounded
.| CC.mapM
( \line -> do
-- Tee raw line to file if debug mode
traverse_ (\fileH -> hPutStrLn fileH (toString line)) maybeFile
pure line
)
.| CCL.mapMaybe decodeStrictText
.| CC.foldM
( \(currentSnippetResults, currentVendoredDeps) message -> do
-- Log messages as they come, with timestamps
timestamp <- currentTimeStamp
case message of
FicusMessageError err -> do
hPutStrLn stderr $ "[" ++ timestamp ++ "] ERROR " <> toString (displayFicusError err)
pure (currentSnippetResults, currentVendoredDeps)
FicusMessageDebug dbg -> do
hPutStrLn stderr $ "[" ++ timestamp ++ "] DEBUG " <> toString (displayFicusDebug dbg)
pure (currentSnippetResults, currentVendoredDeps)
FicusMessageFinding finding -> do
hPutStrLn stderr $ "[" ++ timestamp ++ "] FINDING " <> toString (displayFicusFinding finding)
let analysisFinding = findingToSnippetScanResult finding
let vendoredDep = findingToVendoredDependency finding
when (isJust currentSnippetResults && isJust analysisFinding) $
hPutStrLn stderr $
"[" ++ timestamp ++ "] ERROR " <> "Unexpected mutliple snippet scan results"
let newSnippetResults = currentSnippetResults <|> analysisFinding
let newVendoredDeps = case vendoredDep of
Just dep -> dep : currentVendoredDeps
Nothing -> currentVendoredDeps
pure (newSnippetResults, newVendoredDeps)
)
(Nothing, [])
let (snippetResults, vendoredDeps) = accumulator
let rootDir = toFilePath $ ficusConfigRootDir ficusConfig
vendoredResults <- case vendoredDeps of
[] -> pure Nothing
deps -> do
srcUnit <- vendoredDepsToSourceUnit rootDir deps
pure . Just $ FicusVendoredDependencyScanResults (Just srcUnit)
pure $
FicusAnalysisResults
{ snippetScanResults = snippetResults
, vendoredDependencyScanResults = vendoredResults
}
-- Use Conduit with decodeUtf8Lenient to safely handle UTF-8 output from ficus.
-- This matches the approach used for stdout and prevents crashes on Windows
-- where the default system encoding (CP1252) cannot decode UTF-8 characters
-- like box-drawing characters (U+2501) used in ficus progress output.
consumeStderr :: Handle -> Maybe Handle -> IO [Text]
consumeStderr handle maybeFile = do
acc <-
Conduit.runConduit $
CC.sourceHandle handle
.| CC.decodeUtf8Lenient
.| CC.linesUnbounded
.| CC.foldM
( \acc line -> do
-- Tee raw line to file if debug mode
traverse_ (\fileH -> hPutStrLn fileH (toString line)) maybeFile
now <- getCurrentTime
-- Keep at most the last 50 lines of stderr (newest first during accumulation)
let ts = toText $ formatTime defaultTimeLocale "%H:%M:%S.%3q" now
let msg = "[" <> ts <> "] STDERR " <> line
pure (take 50 (msg : acc))
)
[]
pure (reverse acc)
displayFicusDebug :: FicusDebug -> Text
displayFicusDebug (FicusDebug FicusMessageData{..}) = ficusMessageDataStrategy <> ": " <> ficusMessageDataPayload
displayFicusError :: FicusError -> Text
displayFicusError (FicusError FicusMessageData{..}) = ficusMessageDataStrategy <> ": " <> ficusMessageDataPayload
displayFicusFinding :: FicusFinding -> Text
displayFicusFinding (FicusFinding FicusMessageData{..}) = ficusMessageDataStrategy <> ": " <> ficusMessageDataPayload
-- Run Ficus, passing config-based args as configuration.
-- Caveat! This hard-codes some flags currently which may later need to be set on a strategy-by-strategy basis.
ficusCommand :: (Has Diagnostics sig m, Has Logger sig m) => FicusConfig -> BinaryPaths -> m Command
ficusCommand ficusConfig bin = do
endpoint <- case ficusConfigEndpoint ficusConfig of
Just baseUri -> do
proxyUri <- setPath [PathComponent "api", PathComponent "proxy", PathComponent "analysis"] (TrailingSlash False) baseUri
pure $ render proxyUri
Nothing -> pure "https://app.fossa.com/api/proxy/analysis"
let cmd =
Command
{ cmdName = toText $ toPath bin
, cmdArgs = configArgs endpoint
, cmdAllowErr = Never
, cmdEnvVars = Map.empty
}
logDebug $ "Ficus command: " <> pretty (maskApiKeyInCommand $ renderCommand cmd)
pure cmd
where
snippetScanRetentionDays = ficusConfigSnippetScanRetentionDays ficusConfig
configArgs endpoint = ["analyze", "--secret", secret, "--endpoint", endpoint, "--locator", locator, "--set", "all:skip-hidden-files", "--set", "all:gitignore", "--exclude", ".git", "--exclude", ".git/**"] ++ configExcludes ++ configStrategies ++ maybe [] (\days -> ["--snippet-scan-retention-days", toText days]) snippetScanRetentionDays ++ [targetDir]
targetDir = toText $ toFilePath $ ficusConfigRootDir ficusConfig
secret = maybe "" (toText . unApiKey) $ ficusConfigSecret ficusConfig
locator = renderLocator $ Locator "custom" (projectName $ ficusConfigRevision ficusConfig) (Just $ projectRevision $ ficusConfigRevision ficusConfig)
configExcludes = concatMap (\path -> ["--exclude", unGlobFilter path]) $ ficusConfigExclude ficusConfig
configStrategies = concatMap (\strategy -> ["--strategy", strategyToArg strategy]) $ ficusConfigStrategies ficusConfig
strategyToArg = \case
FicusStrategySnippetScan -> "snippet-scanning"
FicusStrategyVendetta -> "vendetta"
maskApiKeyInCommand :: Text -> Text
maskApiKeyInCommand cmdText =
case Text.splitOn " --secret " cmdText of
[before, after] ->
case Text.words after of
(_ : rest) ->
before
<> " --secret "
<> "******"
<> if null rest then "" else " " <> Text.unwords rest
[] -> cmdText
_ -> cmdText
-- add a FicusMessage to the corresponding entry of an empty FicusMessages
singletonFicusMessage :: FicusMessage -> FicusMessages
singletonFicusMessage message = case message of
FicusMessageFinding msg -> mempty{ficusMessageFindings = [msg]}
FicusMessageDebug msg -> mempty{ficusMessageDebugs = [msg]}
FicusMessageError msg -> mempty{ficusMessageErrors = [msg]}