Pre-Built Parsers

Scalyr has several Built-In Parsers in addition to this list of pre-built parsers. If any of your logs are in the following formats, you can use the pre-build, at least as a guide, to configure your Parsers. For more information on parsing log files, see Log Parsers.

The list below covers some of the more commonly used parsers. More are available for you to consult at our GitHub parsers repository.

Please note that double escaping regex elements is required almost everywhere at Scalyr, including when specifying a parser. See Regex for more information.

Click on the links below, or scroll down the page to view the configuration files:

AWS Cloudfront Log Files

// Parser for AWS Cloudfront log files.
// For more info see https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#LogFileFormat
// The first 23 standard logfile fields are parsed below:

{
  patterns: {
    timestampPattern: "[0-9-]+[ \t]+[0-9:]+"
  },

  formats: [
    {
      format: "$cf_timestamp=timestampPattern$ $x_edge_location$ $sc_bytes$ $c_ip$ $cs_method$ " +
              "$cs_host$ $cs_uri_stem$ $sc_status$ $cs_referrer$ $cs_user_agent$ $cs_uri_query$ " +
              "$cs_cookie$ $x_edge_result_type$ $x_edge_request_id$ $x_host_header$ $cs_protocol$ " +
              "$cs_bytes$ $time_taken$ $x_forwarded_for$ $ssl_protocol$ $ssl_cipher$ " +
              "$x_edge_response_result_type$"
    }
  ]
}

ELB Access Logs

// Parser for ELB access logs.
//
// See http://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/access-log-collection.html.

{
  formats: [
    // Contrary to spec, some logs observed in the wild have "http " prefixed before the timestamp.
    {
      format: "$protocolType=identifier$ $time$ $elb$ $clientIp$:$clientPort$ $backendIp$:$backendPort$ " +
              "$requestProcessingTimeSecs$ $backendTimeSecs$ $responseProcessingTimeSecs$ " +
              "$elbStatus$ $backendStatus$ $receivedBytes$ $sentBytes$ \"$method$ " +
              "$uri{parse=uri}$ $protocol$\" $agent$ $sslCipher$ $sslProtocol$",
      halt: true
    },

    {
      format: "$time$ $elb$ $clientIp$:$clientPort$ $backendIp$:$backendPort$ " +
              "$requestProcessingTimeSecs$ $backendTimeSecs$ $responseProcessingTimeSecs$ " +
              "$elbStatus$ $backendStatus$ $receivedBytes$ $sentBytes$ \"$method$ " +
              "$uri{parse=uri}$ $protocol$\" $agent$ $sslCipher$ $sslProtocol$"
    }
  ]
}

Redshift Logs

// Parser for Redshift logs. Handles both connection and user activity logs.
//
// See http://docs.aws.amazon.com/redshift/latest/mgmt/db-auditing.html.



{
  lineGroupers: [
    {
      // Activity logs messages can contain multiple lines. They will start with a line like this:
      //
      // '2016-01-25T23:00:13Z UTC ...
      //
      // and continue until the next such line. We also exclude connection log messages, which begin
      // with something like this:
      //
      // initiating session |Mon, 25 Jan 2016
      start: "^'20",
      haltBefore: "^'20|^[a-z ]+\\|[a-z]+, [0-9]+ [a-z]+ [0-9]+ "
    }
  ],

  formats: [
    // Parse connection log messages.
    // This parses 9 of 19 fields, through the duration field ("Duration of connection in microseconds.")
    {
      id: "connectionLog",
      format: "$event$[ ]*\\|$time$[ ]*\\|$remotehost$[ ]*\\|$remoteport$[ ]*\\|$pid$[ ]*"
            + "\\|$database$[ ]*\\|$username$[ ]*\\|$authmethod$[ ]*\\|$microseconds$"
    },

    // Parse user activity log messages.
    // Parses 7 of 7 fields
    {
      id: "activityLog",
      format: "'$time$ \\[ db=$database$ user=$user$ pid=$pid$ userid=$userid$ xid=$xid$ \\]' $query$"
    }
  ]
}

AWS S3 Bucket Access Logs

// Parser for AWS S3 bucket access log files.
//
// See http://docs.aws.amazon.com/AmazonS3/latest/dev/LogFormat.html.
// This parses the first 18 of 24 fields.
{
  formats: [
    {
      format: "$bucketOwner$ $bucket$ \\[$time$\\] $remoteIp$ $requester$ " +
              "$requestId$ $operation$ $key$ \"$method$ $uri$ $protocol$\" " +
              "$status$ $errorCode$ $bytesSent$ $objectSize$ $totalTimeMs$ " +
              "$turnaroundTimeMs$ \"$referrer$\" \"$agent$\" $versionId$"
    }
  ]
}

pfSense

//Note: Depending on your pfSense configuration, additional support for various protocols (ICMP) may be necessary.

{
  patterns: {
    tsPattern: "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+\\d{2}:\\d{2}",
    ipv4: "\\d+\\.\\d+\\.\\d+\\.\\d+",
    ipv6: "[a-fA-F0-9:]+",
    ipv46: "(\\d+\\.\\d+\\.\\d+\\.\\d+|[a-zA-Z0-9:]+)",
    ipv: "(4|6)",
    hex: "([a-f0-9]+x[a-f0-9]+){0,1}",
    tcpflags: "[SA\\.FRPUEW]+",
    numberOrNone: "[0-9]{0,}",
    textOrNone: "[a-zA-Z-\\.0-9]{0,}",
  },
  formats: [
    {
      format: "$timestamp=tsPattern$ $hostname=ipv46$ filterlog: $pfRule=numberOrNone$,$pfSubRule=numberOrNone$,$pfAnchor=textOrNone$,$pfTracker=numberOrNone$,$pfInterface$,$pfReason=identifier$,$pfAction=identifier$,$pfDirection=identifier$,.*"
    },
    {
      attributes: { pfIpv: 4 }
      format: "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+\\d{2}:\\d{2} (\\d+\\.\\d+\\.\\d+\\.\\d+|[a-zA-Z0-9:]+) filterlog: ([a-zA-Z0-9-\\.]+,|,){8}4,$pfTos=hex$,$pfEcn$,$pfTtl=numberOrNone$,$pfPacketId=numberOrNone$,$pfOffset=numberOrNone$,$pfIPFlags=identifier$,$pfProtocolID$,$msg$"
    },
    {
      attributes: { pfProtocol: "tcp" }
      format: "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+\\d{2}:\\d{2} (\\d+\\.\\d+\\.\\d+\\.\\d+|[a-zA-Z0-9:]+) filterlog: ([a-zA-Z0-9-\\.]+,|,){16}tcp,$pfPacketLen=number$,$pfSourceIP=ipv4$,$pfDestIP=ipv4$,$pfSourcePort=number$,$pfDestPort=number$,$pfDataLen=number$,$pfTCPFlags=tcpflags$,$pfSeq=numberOrNone$,$pfAck=numberOrNone$,$pfWindow=numberOrNone$,$pfUrg=textOrNone$,$pfTcpOptions$"
      halt: true
    },
    {
      attributes: { pfProtocol: "udp" }
      format: "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+\\d{2}:\\d{2} (\\d+\\.\\d+\\.\\d+\\.\\d+|[a-zA-Z0-9:]+) filterlog: ([a-zA-Z0-9-\\.]+,|,){16}udp,$pfPacketLen=number$,$pfSourceIP=ipv4$,$pfDestIP=ipv4$,$pfSourcePort=number$,$pfDestPort=number$,$pfDataLen=number$"
      halt: true
    },
  ]
}

PingFederate

{
  formats: [
    "$timestamp$\\| ",
    {
      format: ".*tid:$Tracking_ID$\\| $Event$\\| $subject$\\| $ip$ \\| \\| $app$\\| $protocol$\\| $host$\\| $role$\\| $status$\\| $adapterid$\\| $description$\\| $response_time$"
    }
  ]
}

PostFix

{
  patterns: {
    tsPattern: "[\\w\\s\\d]+\\s+[\\d+:]+"
  },
  formats: [
    "$timestamp=tsPattern$ $server$ $service$[\\/\\[]"
    {
      format: ".*\\/$process$\\[$pid$\\]: $messageId$: $description$",
      halt: true
    },
    {
      format: ".*\\[$pid$\\]: $messageId$: $description$",
      halt: true
    },
    {
      format: ".*\\/$process$\\[$pid$\\]: $description$",
      halt: true
    },
    {
      format: ".*\\[$pid$\\]: $description$"
    },
  ]
}

Heroku

// Note: Format may vary
{
  timezone: "GMT",
  attributes: {
    // Tag all events parsed with this parser so we can easily select them in queries.
    dataset: "herokulog"
  },
  patterns: {
    spaceOrEOL: " |$"
  },
  formats: [
    // Parse all key=value entries in each log message. If a value ends with "ms", parse it without the ms,
    // so that it can be interpreted as a number.
    {format: ".*$_=identifier$=$_=number$ms$=spaceOrEOL$", repeat: true},
    {format: ".*$_=identifier$=$_=quoteOrSpace$", repeat: true},

    // Parse app messages which contain a web access record.
    {
      id: "time",
      format: "\\<$=number$\\>$=number$ $ts$ host .*",
      rewrites: [
        {
          input:   "ts",
          output:  "timestamp",
          match:   "(\\d+-\\d+-\\d+)T(\\d+:\\d+:\\d+).\\d+\\+\\d+:\\d+",
          replace: "$1 $2",
          replaceAll: true
        }
      ]
    },
    {
      //id: "param",
      //get contents of Parameters field
      format: ".*\"$_{regex=[a-zA-Z0-9._-]+}$\"=>\"$_$\".*",
      repeat: true
    },
    {
      //id: "paramList",
      //get lists included in Parameters field
      format: ".*\"$_{regex=[a-zA-Z0-9._-]+}$\"=>\\[?\\{$_$\\}\\]?,?.*",
      repeat: true
    },
    {
      //id: "numParam",
      //get parameters with numerical values, do not include comma (if present)
      format: ".*\"$_{regex=[a-zA-Z0-9._-]+}$\"=>$_{regex=[0-9a-z]+}$,?.*",
      repeat: true
    },
    {
      id: "app1",
      format: ".* host app $process$\\.$instance$ \\- $ip$ $user$ $authUser$  \\[$wtimestamp$\\] \"$method$ $uri{parse=uri}$ $protocol$\" $status$ $bytes$ $time$",
      halt:true
    },

    // Parse other app messages.
    {
      id: "app2",
      format: ".* host app $process$\\.$instance$ \\-\\s+\\w,\\s+\\[$wtimestamp$ $_$\\]\\s+$severity$ \\-\\-\\s+:\\s+$details$",
      halt:true
    },

    // Parse non-app messages.
    {
      id: "router",
      format: ".* host heroku $component$ \\- $details$",
      halt:true
    }
  ]
}

Fastly (Logplex)

{
  formats: [
    {
      format: "<\\d+>\\d+ $ts$ $value_0$ $value_1$ $value_3$ $value_4$ - .*"
      rewrites: [
        {
          input: "ts",
          output: "timestamp",
          match: "(\\d+-\\d+-\\d+)T(\\d+:\\d+:\\d+)\\+\\d+:\\d+",
          replace: "$1 $2",
          replaceAll: true
        }
      ]
    },
    {
      id: "errorFormat1",
      format: ".* [a-zA-Z0-9_-]+Errors[a-zA-Z0-9_-]* - [a-zA-Z0-9_-]+www[a-zA-Z0-9_-]* $fhost$ \"$fvalue_0$\" \"$fvalue_1$\" \"$ftimestamp$\" $method$ $url$ $responsecode{regex=\\d+}$ $serverdc$ $fclientIP$ $region$ $fssl$",
      halt: true
    },
    {
      id: "errorFormat2",
      format: ".* [a-zA-Z0-9_-]+Errors[a-zA-Z0-9_-]* - [a-zA-Z0-9_-]+www[a-zA-Z0-9_-]* $fhost$ \"$fvalue_0$\" \"$fvalue_1$\" \"$ftimestamp$\" $method$ $errordetails$",
      halt: true
    },
    {
      id: "nonError",
      format: ".* - [a-zA-Z0-9_-]+www[a-zA-Z0-9_-]* $fhost$ \"$fvalue_0$\" \"$fvalue_1$\" \"$ftimestamp$\" $method$ $url$ $responsecode{regex=\\d+}$ $serverdc$ $fclientIP$ $region$ $fssl$ $useragent$$$",
      halt: true
    }
  ]
}

Laravel

{
 lineGroupers: [
   {
     start: "^\\[\\d+-\\d+-\\d+ \\d+:\\d+:\\d+\\]",
     haltWith: "^\"\\}"
   }
 ],
 patterns: {
   ts: "\\d+-\\d+-\\d+ \\d+:\\d+:\\d+"
 },
 formats: [
   {
     format: ".*\\[$timestamp=ts$\\] $env$\\.$severity$:.*"
   }
 ]
}

NGINX Ingress Using Helm

// Note: Includes rudimentary support for IPV4 and IPV6 addresses

{
  patterns: {
    ipv4: "\\d+\\.\\d+\\.\\d+\\.\\d+",
    ipv6: "[a-fA-F0-9:]+",
    ipv46: "(\\d+\\.\\d+\\.\\d+\\.\\d+|[a-zA-Z0-9:]+)"
  },
  formats: [
    {
      format: "$serverIP=ipv4$\\s+.*"
    },
    {
      format: ".*\\s+-\\s+\\[$IP_3=ipv46$\\].*"
    },
    {
      format: ".*\\s+-\\s+-\\s+\\[$timestamp$\\]\\s+\"$method$ .*"
    },
    {
      format: ".*\"(POST|GET){1} $uri$ $protocol$\" $responseCode=number$ $responseBytes=number$ \"$referrer$\" \"$userAgent$\" .*"
    },
    {
      format: ".* \"$IP_1=ipv46$, $IP_2=ipv46$\"",
      halt: true
    },
    {
      format: ".* \"-\" \".*\" $value_1=number$ $value_2$ \\[$origin$\\] $IP_4=ipv46$:$port=number$ $value_3=number$ $value_4$ $responseCode_2=number$ $identifier$",
      halt: true
    },
    {
      id: "HealthChecker",
      format: ".*\"GET $uri$\" $responseCode=number$ $responseBytes=number$ \"-\" \"ELB-HealthChecker.*\" \"-\"",
      halt: true
    }
  ]
}

Extract and Apply Timezones From Timestamps

{
  patterns: {
    tsPattern: "^(\\d+-\\d+-\\d+)T(\\d+:\\d+:\\d+,\\d+)((\\-|\\+)(\\d+):(\\d+))?"
  },
  formats: [
    {
      id: "getTimestamp", // this format block is used to interpret timestamps from all Analyzer_stats loglines
      format: "$ts=tsPattern$.*", //prevents matching lines that are not prefixed by timestamp
      rewrites: [
        {
          input: "ts",
          output: "timestamp",
          match:   "(\\d+-\\d+-\\d+)T(\\d+:\\d+:\\d+,\\d+).*", //match subset of ts from beginning(hence the wildcard)
          replace: "$1 $2", //gets rid of "T"
          replaceAll: true
        },
        {
          input: "ts",
          output: "timezone",
          match: ".*(\\-|\\+)(\\d+):(\\d+)", //match substring of ts to end
          replace: "GMT$1$2$3", //valid timezone format for parser. If no match, GMT is used by default, which is most valid scenario anyway
          replaceAll: true
        }
      ],
    },
    // ...

Syslog

{
  // Specify timezone here, since timestamp does not include timezone value
  timezone: "GMT"
  patterns: {
    valuePattern: "[a-zA-Z0-9\/\\-,_.:;@\\(\\)<>]+",
    valuePattern_s: "[a-zA-Z0-9\/\\-,_.:;@\\(\\)<> ]+"
  },
  formats: [
    {
      format: "<$seq$>\\s+.*"
    },
    {
      format: ".*time=\"$timestamp$\".*"
    },
    {
      format: ".*$_=identifier$=$_=valuePattern$.*",
      repeat: true
    },
    {
      format: ".*$_=identifier$=[\"|']$_=valuePattern_s$[\"|'].*",
      repeat: true
    },
  ]
}

AWS Spot Data Feeds

// Parser for AWS spot data feeds.
//
// See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-data-feeds.html.

{
  patterns: {
    timestampPattern: "[0-9-]+ [0-9:]+ UTC"
  },

  formats: [
    {
      format: "$time=timestampPattern$ $usageType$ $operation$ $instanceId$ " +
              "$bidId$ $myMaxPrice$ $myMaxPriceCurrency$ $marketPrice$ " +
              "$marketPriceCurrency$ $charge$ $chargeCurrency$ $version$"
    }
  ]
}