# ------------------------------------------------------------------------------ # Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. # ------------------------------------------------------------------------------ # ndmpscan.awk is a program that scans an NDMP log file to produce a Phone Home # statistics report. It may also be used as a stand-alone program to report the # same statistics directly, and can produce additional data while doing so. # # Usage: # # nawk -f ndmpscan {optional t= pairs} # # Where: # # is the name of the log file to be analyzed # # Optional arguments: # # start= specifies the beginning of a time range # end= specifies the ending of a time range # (by default, the time range is open-ended. # start by itself is used to get "all after". # end, by itself, is used to get "all before".) # # full=1 specifies a "full listing", with data on the individual # data transfer sessions included. This is useful for # confirming that the summary statistics are correct. # # Any time values that are required can be specified as either an integer # unix time value (such as 132806599, for Wed, 01 Feb 2012 10:13:17 MST) # or in ISO8601 form (such as 2012-02-01T03:13:17-0700). The latter # can be cut-and-pasted directly from a current NDMP log file. # Omitted timezone information defaults to UTC. # ------------------------------------------------------------------------------ # BEGIN sections sets up some constants and data structures needed later # ------------------------------------------------------------------------------ BEGIN { # some constant definitions MIN_UNIXTIME = 0 #minimum legal unix time MAX_UNIXTIME = 2147483647 # maximum legal unix time (2^31 - 1) # Initialize some global flags and variables full_listing = 0 errors = 0 simultaneous = 0 # If no other timezone is specified, assume GMT ("+0000") ts_zone_plus = 1 ts_zone_mag = "0000" range_start = 0 range_end = 0 # unixtime conversion assistance tables. Day_numb reports, for a non-leap year, # the number of days in the year elapsed before the beginning of a given month. # The given month is specified numerically from 1 (january) to 12 (decembee). day_numb[1] = 0 day_numb[2] = day_numb[1]+31 # add the days in January day_numb[3] = day_numb[2]+28 # add the days in non-leap year February day_numb[4] = day_numb[3]+31 # etc. day_numb[5] = day_numb[4]+30 day_numb[6] = day_numb[5]+31 day_numb[7] = day_numb[6]+30 day_numb[8] = day_numb[7]+31 day_numb[9] = day_numb[8]+31 day_numb[10] = day_numb[9]+30 day_numb[11] = day_numb[10]+31 day_numb[12] = day_numb[11]+30 # leap_day_numb holds the same data as day_numb, but for a leap year leap_day_numb[1] = 0 leap_day_numb[2] = leap_day_numb[1]+31 # add the days in January leap_day_numb[3] = leap_day_numb[2]+29 # add days in leap year Feb leap_day_numb[4] = leap_day_numb[3]+31 # etc. leap_day_numb[5] = leap_day_numb[4]+30 leap_day_numb[6] = leap_day_numb[5]+31 leap_day_numb[7] = leap_day_numb[6]+30 leap_day_numb[8] = leap_day_numb[7]+31 leap_day_numb[9] = leap_day_numb[8]+31 leap_day_numb[10] = leap_day_numb[9]+30 leap_day_numb[11] = leap_day_numb[10]+31 leap_day_numb[12] = leap_day_numb[11]+30 # recover starting and stopping timestamps for the timestamp range, or # provide a wide-open range as the default. begin_proc_time = MIN_UNIXTIME end_proc_time = MAX_UNIXTIME if (ARGC == 1) { print \ "\n {, ...}\n"\ "where:\n"\ " start= (default is beginning of time)\n"\ " end= (default is end of time)\n"\ " full=1 (for full session listings)\n"\ " full=0 (default) (for no full session listings)\n"\ "\n"\ "Use a start=unixtime without an end= argument for all times\n"\ "after unixtime. Unix time can be either integer seconds or\n"\ "ISO8601 as in log files\n" \ "]]>\n" } for (i=1; i end_proc_time) { next } if (range_start == 0) { range_start = $1 } range_end = $1 } # Detect the beginning of a data transfer operation $3 == "--------" && $5 == "initiated" && $6 == "--------" { start_transfer_session($2) session_direction[$2] = $4 } # Detect the name of the target filesystem $3 == "env\(FILESYSTEM\):" { session_filesystem[$2] = $4 } # Detect the type of a backup or restore (ZFS, tar, etc.) $3 == "env\(TYPE\):" { split($4,tempstr,/"/) # split out name from surrounding quotes session_type[$2] = tempstr[2] } $3 ~ /env\(ZFS.*/ { session_type[$2] = "zfs" } $3 == "TYPE:" { session_type[$2] = $4 } # Detect whether token-based or level-base, and which level $3 == "env(BASE_DATE):" { session_token[$2] = $4 } $3 == "env(LEVEL):" { session_level[$2] = $4 } # Detect the number of bytes transferred during a session. The different # session types (zfs, tar, etc.) and directions (backup or restore) are not # consistent in reporting this, so provide several alternatives. $3 == "BYTES_TOTAL:" || $3 == "ZFS_BACKUP_SIZE:" || $3 == "bksize" { value = $4 + 0.0 if (!($2 in session_bytes) || (value > session_bytes[$2])) { session_bytes[$2] = value } } # Detect the end of a backup/restore operation. The different session types # (zfs, tar, etc.) are not consistent in reporting this, so provide several # alternatives. $3 == "BYTES_TOTAL:" { end_transfer_session($2) } # End of operation with detection of evidence of an incomplete operation $3 == "Connection" && $4 == "closed" && $5 == "remotely" { session_incomplete[$2] = 1 end_transfer_session($2) } $3 == "INCOMPLETE" { session_incomplete[$2] = 1 end_transfer_session($2) } # Detect any logged lines that are flagged as errors. Finding one of these # in an active session sometimes means that the session is terminated, so treat # this as an end-of-session indicator as well. In cases where the session was # not terminated by the error, we risk missing some simultaneous session counts # but the ambiguity makes this a necessary evil, and data transfer times will # generally not be affected because a later detection of end-of-session will # result in a correct session_end unixtime regardless. $3 ~ /\*\*\*/ { errors++ end_transfer_session($2) error_stat[error_tag()]++ } # ------------------------------------------------------------------------------ # END processing takes place after the log file has been completely scanned # beginning to end, and produces statistics and output from the collected # values. Note that it is expected that in many cases there will be no output # to produce. This is expected if we are processing a log file that contains # no entries within the specified time range. In this case, exit silently. END { # Only produce printed output if we have succeeded in gathering any # statistics from the log file. This rule screens out log files # with no data represented within the stated time range. data_exists = 0 for (x in session_start) { data_exists = 1 break } error_exists = 0 for (x in error_stat) { error_exists = 1 break } if (data_exists || error_exists) { printf("\t\n", logfilename) printf("\t\t%s\n", range_start) printf("\t\t%s\n", range_end) } if (data_exists) { max_simultaneous = 0 sort_count = 0 session_count = 0 for (x in session_start) { if (!(x in session_type)) { session_type[x] = "dump" } if (!(x in session_direction)) { session_direction[x] = "unknown" } duration = int(session_end[x]) - int(session_start[x]) if (session_bytes[x] != 0 && duration > 0) { sortable_rate[sort_count] = session_bytes[x] / duration sort_count++ } if (session_simult[x] > max_simultaneous) { max_simultaneous = session_simult[x] max_simultaneous_time = session_end[x] } tally_name = (session_type[x] "-" session_direction[x]) if (x in session_token) { split(session_token[x],tmp_array,"\"") if (int(tmp_array[2]) == 0) { tally_name = (tally_name "-token-0") } else { tally_name = (tally_name "-token-non0") } } else { if (x in session_level) { split(session_level[x],tmp_array,"\"") tally_name = \ (tally_name "-level-" tmp_array[2]) } } if (x in session_incomplete) { tally_name = (tally_name "-incomplete") } tally[tally_name]++ byte_tally[tally_name] += session_bytes[x] byte_total += session_bytes[x] session_count++ } if (full_listing != 0) { session_count = 0 for (x in session_start) { split(x, data, /[\[\]]/) value = data[2] + 0 session_index[session_count] = value session_count++ } qsort(session_index, 0, session_count-1) for (i=0; i\n", x) printf("\t\t\t%s\n", get_iso8601(session_start[x])) printf("\t\t\t%s\n", get_iso8601(session_end[x])) printf("\t\t\t%s\n", session_type[x]) printf("\t\t\t%s\n", session_direction[x]) if (x in session_token) { printf("\t\t\t%s\n", session_token[x]) } printf("\t\t\t%s\n", xml_clean(session_filesystem[x])) printf("\t\t\t%f\n", session_bytes[x]/1000000000.0) duration = int(session_end[x]) - int(session_start[x]) printf("\t\t\t%d\n", duration) if (session_bytes[x] != 0 && duration > 0) { data_rate = session_bytes[x] / duration } else { data_rate = 0 } printf("\t\t\t%f\n", data_rate/1000000.0) printf("\t\t\t%d\n", session_simult[x]) printf("\t\t\n") } } printf("\t\t%d\n", session_count) printf("\t\t%f\n", byte_total/1000000000.0) tally_count=0 for (x in tally) { sorted_tally_name[tally_count] = x tally_count = tally_count+1 } qsort(sorted_tally_name, 0, tally_count-1) for (i=0; i\n\t\t\t%s\n", xml_clean(x)) printf("\t\t\t%d\n", tally[x]) printf("\t\t\t%f\n", byte_tally[x]/1000000000.0) printf("\t\t\n") } qsort(sortable_rate, 0, count-1) printf("\t\t%f\n", sortable_rate[0]/1000000.0) if (count%2 == 0) { printf("\t\t%f\n", \ (sortable_rate[count/2]+sortable_rate[count/2 - 1])\ /2/1000000.0) } else { printf("\t\t%f\n", \ sortable_rate[int(count/2)]/1000000.0) } printf("\t\t%f\n", \ sortable_rate[count-1]/1000000.0) printf("\t\t%d\n", \ max_simultaneous) printf("\t\t%s\n", \ get_iso8601(max_simultaneous_time)) } if (error_exists) { for (x in error_stat) { printf("\t\t\n\t\t\t%s\n", xml_clean(x)) printf("\t\t\t%d\n\t\t\n", error_stat[x]) } } if (data_exists || error_exists) { printf("\t\n") } } # ------------------------------------------------------------------------------ # Internal functions used by various other routines within this script # ------------------------------------------------------------------------------ #function to compute unixtime from a ISO8601 timestamp string (as in NDMP logs). #Note: save_timestamp and save_time are global and so survive from call to call. # tmp_epoch, tmp_time, and tmp_fields are included in arg list so they act # as local variables, but they are not used as passed parameters # (this is idiomatic awk) # If timestamp consists of just a single integer value (determined if only one # field results from the using "T", ":", "+", and "-" is delimiters), then # assume we have a unixtime integer already and pass it back as unixtime. # Otherwise, we have a compound ISO8601 timestamp. There are a few variations # of this, so use the field count to determine what we have and proceed. # An ISO8601 timestamp is assumed to follow one of the following patterns: # yyyy-mm-ddThh:mm:ss[+-]xxxx (7 numeric fields) # yyyy-mm-ddThh:mm:ssZ (6 fields, "Z" at means UTC, same as +0000) # yyyy-mm-ddThh:mm:ss (6 fields, not using "Z", means local time, # same as -0000) function get_unixtime(timestamp, \ tmp_epoch, tmp_time, tmp_fields, ts_year, ts_month, ts_day, ts_hour, \ ts_min, ts_sec) { # all following used as local variables # If the current timestamp is the same a the previous one, we are # done because we can just pass back the previously-computed time. if (timestamp == save_timestamp) { return save_unixtime } # Otherwise, update saved timestamp and proceed save_timestamp = timestamp ts_zone_plus = match(save_timestamp,/\+/) tmp_fields = split(save_timestamp, tmp_time, "[T:+-]") if (tmp_fields == 1) { save_unixtime = int(tmp_time[1]) return save_unixtime } if (tmp_fields == 6) { if (tmp_time[6] ~ /[Zz]/) { # If final field contains "Z", treat timezone # as equivalent to "+0000" tmp_time[6] += 0.0 # Convert to float, losing "Z" ts_zone_plus = 1 # "Z", same as "+0000" } else { # If no "Z" but no zone field, assume local and # treat timezone as equivalent to "-0000" ts_zone_plus = 0 # No "Z", same as "-0000" local_time = 1; } tmp_time[7] = "0000" tmp_fields++ #Note that there are now 7 fields } if (tmp_fields != 7) { # if none of the above, unrecognized format or error save_unixtime = -1 return save_unixtime } ts_year = tmp_time[1] ts_month = tmp_time[2] ts_day = tmp_time[3] ts_hour = tmp_time[4] ts_min = tmp_time[5] ts_sec = tmp_time[6] ts_zone_mag = tmp_time[7] # Unix epoch starts 1970-01-01, but 1969-01-01 is better for convenient # leap year calculation using modulo 4 arithmetic. Integer truncation # of multiplication by 365.25 is correct for all leap years 1969 to 2099 # tmp_epoch is now number of days from start of 1969 to start of ts_year tmp_epoch = int((ts_year - 1969) * 365.25) # add days between start of ts_year and ts_day of ts_mon. Subtract # one from ts_day to make it 0-based instead of 1-based. if ((ts_year-1969) % 4 == 3) { tmp_epoch += leap_day_numb[int(ts_month)] + ts_day - 1 } else { tmp_epoch += day_numb[int(ts_month)] + ts_day - 1 } # Now that leap year caluclations are out of the way, subtract 731 days # from tmp_epoch to restore it to epoch 1970 (unixtime) from epoch 1968. tmp_epoch -= 365 # 365 is number of days in 1969. # Add or subtract a correction to ts_hour and ts_min to account for the # time zone. It is noted that this may leave them unnormalized (maybe # even negative) but this is OK and the final conversion into proper # epoch seconds will not mind. This is because for later caluclations # 2 days, -6 hours is just the same as 1 day, +18 hours). if (ts_zone_plus) { ts_hour -= int(ts_zone_mag/100) ts_min -= ts_zone_mag%100 } else { ts_hour += int(ts_zone_mag/100) ts_min += ts_zone_mag%100 } # Now convert epoch days into epoch seconds, adding in the equivalent # seconds for ts_hour, ts_min, and ts_sec as we go. Note that the # incorporation of time zone correction into ts_hour and ts_min may have # left them unnormalized from standard calendar conventions, but this is # OK and the conversion to proper epoch seconds will not mind. save_unixtime = ((tmp_epoch * 24 + ts_hour) * 60 + ts_min) * 60 + ts_sec return save_unixtime } # ------------------------------------------------------------------------------ #function to compute a ISO8601 timestamp string (as in NDMP logs) from unixtime. #Note: save_timestamp and save_time are global and so survive from call to call. # tmp_epoch, tmp_time, and tmp_fields are included in arg list so they act # as local variables, but they are not used as passed parameters # (this is idiomatic awk) # If timestamp consists of just a single integer value (determined if only one # field results from the using "T", ":", "+", and "-" is delimiters), then # assume we have a unixtime integer already and pass it back as unixtime. # Otherwise, we have a compound ISO8601 timestamp. There are a few variations # of this, so use the field count to determine what we have and proceed. # An ISO8601 timestamp is assumed to follow one of the following patterns: # yyyy-mm-ddThh:mm:ss[+-]xxxx (7 numeric fields) # yyyy-mm-ddThh:mm:ssZ (6 fields, "Z" at means UTC, same as +0000) # yyyy-mm-ddThh:mm:ss (6 fields, not using "Z", means local time, # same as -0000) function get_iso8601(unixtime, \ tmp_epoch, tmp_local_time, tmp_fields, ts_year, ts_month, ts_day, \ ts_hour, ts_min, ts_sec, local_i) { tmp_local_time = (ts_zone_mag == "0000" && ts_zone_plus == 0) # Convert the unixtime value from GMT-based epoch 1970 into # local time-zone based epoch 1970. With this change, the # converted time string will show local time. tmp_epoch = unixtime if (ts_zone_plus) { tmp_epoch += int(ts_zone_mag/100) * 3600 tmp_epoch += ts_zone_mag%100 * 60 } else { tmp_epoch -= int(ts_zone_mag/100) * 3600 tmp_epoch -= ts_zone_mag%100 * 60 } # Convert the seconds since epoch 1970 into seconds since epoch 1969. # This simplifies the leap year calculations that must be done later # by converting them into modulo 4-year arithmetic operations that are # valid over the full unix time range. tmp_epoch += 365*24*60*60 # 365 is the number of days in 1969 # Extract the hours, minutes, and seconds from tmp_epoch and in the # process rebase it from seconds to days, still from epoch 1969 ts_sec = tmp_epoch%60 tmp_epoch = int(tmp_epoch/60) ts_min = tmp_epoch%60 tmp_epoch = int(tmp_epoch/60) ts_hour = tmp_epoch%24 tmp_epoch = int(tmp_epoch/24) # Modulo 4 arithmetic results from integer trunction of division of # epoch 1969 day count by 365.25 days. This properly adjusts for # all leap years from 1969 through 2099 ts_leaps = int(tmp_epoch / (365+365+365+366)) # how many leap years? ts_year = ts_leaps*4 tmp_epoch -= ts_leaps * (365+365+365+366) if (tmp_epoch >= 365*3) { ts_year += 3 tmp_epoch -= 365*3 } else if (tmp_epoch >= 365*2) { ts_year += 2 tmp_epoch -= 365*2 } else if (tmp_epoch >= 365) { ts_year += 1 tmp_epoch -= 365 } day_in_year = tmp_epoch if (ts_year%4 == 3) { for (local_i=1; local_i<=12; local_i++) { if (day_in_year < leap_day_numb[local_i]) { break } } # ts_month is one less than local_i, because we overshot day_in_year ts_month = local_i-1 # Now calculate day in month, add 1 to make 1-based from 0-based ts_day = day_in_year - leap_day_numb[ts_month] + 1 } else { for (local_i=1; local_i<=12; local_i++) { if (day_in_year < day_numb[local_i]) { break } } # ts_month is one less than local_i, because we overshot day_in_year ts_month = local_i-1 # Now calculate day in month, add 1 to make 1-based from 0-based ts_day = day_in_year - day_numb[ts_month] + 1 } # Now convert ts_year to Gregorian year ts_year += 1969 return sprintf("%04d-%02d-%02dT%02d:%02d:%02d%1s%4s", \ ts_year, ts_month, ts_day, ts_hour, ts_min, ts_sec, \ (ts_zone_plus?"+":"-"), ts_zone_mag) } # ------------------------------------------------------------------------------ # function to implement the quicksort algorithm (nawk doesn't have a built-in # sort function to draw upon) function qsort(A, left, right, local_i, last) { if (left >= right) return swap(A, left, left+int((right-left+1)*rand())) last = left for (local_i = left+1; local_i <= right; local_i++) { if (A[local_i] < A[left]) { swap(A, ++last, local_i) } } swap(A, left, last) qsort(A, left, last-1) qsort(A, last+1, right) } function swap(A, i, j, t) { t = A[i]; A[i] = A[j]; A[j] = t } # ------------------------------------------------------------------------------ # Functions to monitor the status of data transfer sessions. Includes: # 1) Recording the start and end times of a data transfer session. # 2) Monitoring of the number of simultaneous data transfer sessions. # The function start_transfer_session(id) must be called whenever the # start of a new data transfer session is encountered. The ID parameter # is the session ID recorded in the second field of each NDMP log line. # Not every NDMP connection results in a data transfer operation, so use # of start_transfer_session() determines whether a particular session is # tracked. It is harmless to call start_transfer_session(id) more than # once for a given session. function start_transfer_session(id) { if (session_start[id] == 0) { # only do this if not already done for session ID session_start[id] = unixtime simultaneous++ } } # ------------------------------------------------------------------------------ # The function end_transfer_session(id) must be called whenever the end # of a data transfer session is encountered. This can be ambiguous to # detect with simple line parsing rules on an NDMP log file, so protection # is built into end_transfer_session() to make redundant calls harmless. # It is also harmless to call end_transfer_session() one a session for # which start_transfer_session was never called. function end_transfer_session(id) { if (id in session_start) { # If here, then start_transfer_session() was called session_end[id] = unixtime if (session_simult[id] == 0) { session_simult[id] = simultaneous if (simultaneous > 0) { simultaneous-- } } } } # ------------------------------------------------------------------------------ # replace all instances of XML-unclean characters in a string with proper XML # escape sequences for those characters. The cleaned string is returned as # the return value. function xml_clean(string) { gsub(/&/, "\\&", string) gsub(/"/, "\\"", string) gsub(/'/, "\\'", string) gsub(//, "\\>", string) return string } # ------------------------------------------------------------------------------ # Strip out variations in the ways that errors are logged in NDMP logs, # so that some simple statistics of error counts can be produced. # For this purpose, consider the timestamp and session numbers to be # irrelevant because they will be different for identical errors. # Consider the "*** E R R O R ***" banner to be irrelevant because # it is invariant across all errors. Everything else is significant. # To comply with customer privacy policies, strip out any IP addresses # from error messages return in a Phone Home audit, for which full_listing # is equal to 0. Don't do this for full_listing != 0 because those are # only used for local analysis. function error_tag( tmp_i, tmp_count, tmp_array, ret_tag) { # The error message is assumed to be passed as $0, $1, $2, etc. ret_tag = $0 tmp_count = split(ret_tag, tmp_array, / \*\*\* E R R O R \*\*\* /) if (tmp_count >= 2) { ret_tag = tmp_array[2] } if (full_listing == 0) { # substitute xx.xx.xx.xx for literal IP addresses in the error tag # whenever we are not doing a full listing. gsub(/[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/, "xx.xx.xx.xx", ret_tag) gsub(/Authorization denied for \(.*\)/, \ "Authorization denied for (******)", ret_tag) } return ret_tag }