Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
public:documents:raw_olap_data_formats [2010-10-22 08:23] – Jan David Mol | public:documents:raw_olap_data_formats [2017-03-08 15:27] (current) – external edit 127.0.0.1 | ||
---|---|---|---|
Line 1: | Line 1: | ||
- | ==== Raw OLAP data formats ==== | + | ===== Raw OLAP data formats |
- | OLAP produces several data formats, which are intended to be replaced by their final format, such as HDF5. The formats below are not officially supported and subject | + | OLAP produces several data formats, which are intended to be replaced by their final format, such as HDF5. |
+ | |||
+ | ===== After 2011-10-24 ===== | ||
+ | |||
+ | Files adhere to the following naming scheme: '' | ||
+ | |||
+ | - '' | ||
+ | - '' | ||
+ | - '' | ||
+ | - '' | ||
+ | |||
+ | The stokes numbers | ||
+ | |||
+ | - Complex Voltages: | ||
+ | - z = 0 -> Xr (X polarisation, | ||
+ | - z = 1 -> Xi (X polarisation, | ||
+ | - z = 2 -> Yr (Y polarisation, | ||
+ | - z = 3 -> Yi (Y polarisation, | ||
+ | - Coherent/ | ||
+ | - z = 0 -> I | ||
+ | - z = 1 -> Q | ||
+ | - z = 2 -> U | ||
+ | - z = 3 -> V | ||
+ | |||
+ | The data is encoded as follows. Each .raw file is a multiple of the following structure. All data is written as big-endian 32-bit IEEE floats. | ||
+ | |||
+ | < | ||
+ | struct block { | ||
+ | float sample[SUBBANDS][CHANNELS]; | ||
+ | }; | ||
+ | </ | ||
+ | |||
+ | The constants used can be derived from the parset: | ||
+ | |||
+ | < | ||
+ | SUBBANDS = len(parset[" | ||
+ | |||
+ | if (complex voltages || coherent stokes) { | ||
+ | |||
+ | CHANNELS = parset[" | ||
+ | if (CHANNELS == 0) CHANNELS = parset[" | ||
+ | |||
+ | } elif (incoherent stokes) { | ||
+ | |||
+ | CHANNELS = parset[" | ||
+ | if (CHANNELS == 0) CHANNELS = parset[" | ||
+ | |||
+ | } | ||
+ | </ | ||
+ | |||
+ | The sampling rate can be derived as follows: | ||
+ | |||
+ | < | ||
+ | # clock frequency (f.e. 200 MHz) | ||
+ | clock_hz = parset[" | ||
+ | |||
+ | # subband frequency (f.e. 195 kHz) | ||
+ | base_subband_hz = clock_hz / 1024 | ||
+ | |||
+ | # channel frequency (f.e. 763 Hz) | ||
+ | base_nrchannels = parset[" | ||
+ | base_channel_hz = base_subband_hz / base_nrchannels | ||
+ | |||
+ | if(complex voltages || coherent stokes) { | ||
+ | cs_temporalintegration = parset[" | ||
+ | |||
+ | sample_hz = base_channel_hz / cs_temporalintegration | ||
+ | |||
+ | } elif(incoherent stokes) { | ||
+ | |||
+ | is_temporalintegration = parset[" | ||
+ | |||
+ | sample_hz = base_channel_hz / is_temporalintegration | ||
+ | } | ||
+ | |||
+ | </ | ||
+ | |||
+ | ===== Before 2011-10-24 ===== | ||
Data can be recorded as either complex voltages (yielding X and Y polarisations) or one or more stokes. In either case, a sequence of blocks will be stored, each of which consists of a header and data. The header is defined as: | Data can be recorded as either complex voltages (yielding X and Y polarisations) or one or more stokes. In either case, a sequence of blocks will be stored, each of which consists of a header and data. The header is defined as: | ||
Line 12: | Line 89: | ||
in which sequence_number starts at 0, and is increased by 1 for every block. Missing sequence numbers implies missing data. The padding can have any value and is to be ignored. | in which sequence_number starts at 0, and is increased by 1 for every block. Missing sequence numbers implies missing data. The padding can have any value and is to be ignored. | ||
- | == Complex Voltages == | + | ==== Complex Voltages |
Each (pencil) beam produces two files: one containing the X polarisation, | Each (pencil) beam produces two files: one containing the X polarisation, | ||
- | |Lxxxxx_Byyy_S0-bf.raw|X polarisations of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S0_bf.raw|X polarisations of beam yyy of observation xxxxx| |
- | |Lxxxxx_Byyy_S1-bf.raw|Y polarisations of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S1_bf.raw|Y polarisations |
+ | |||
+ | Proposed is the following scheme: | ||
+ | |||
+ | |Lxxxxx_Byyy_S0_bf.raw|X polarisation (real part) of beam yyy of observation xxxxx| | ||
+ | |Lxxxxx_Byyy_S1_bf.raw|X polarisation (imaginary part) of beam yyy of observation xxxxx| | ||
+ | |Lxxxxx_Byyy_S2_bf.raw|Y polarisation (real part) of beam yyy of observation xxxxx| | ||
+ | |Lxxxxx_Byyy_S3_bf.raw|Y polarisation (imaginary part) of beam yyy of observation xxxxx| | ||
Each file is a sequence of blocks of the following structure: | Each file is a sequence of blocks of the following structure: | ||
Line 24: | Line 108: | ||
struct block { | struct block { | ||
struct header header; | struct header header; | ||
+ | |||
+ | /* each block contains SAMPLES samples. The data structure is two samples larger (|2) for | ||
+ | | ||
+ | and immediately discarded. Time should just be incremented SAMPLES samples per block. */ | ||
/* big endian */ | /* big endian */ | ||
+ | // 2010-09-20 release and later: | ||
fcomplex voltages[SAMPLES|2][SUBBANDS][CHANNELS]; | fcomplex voltages[SAMPLES|2][SUBBANDS][CHANNELS]; | ||
+ | |||
+ | /* | ||
+ | // 2010-06-29 release and earlier stored data per subband instead of per beam: | ||
+ | fcomplex voltages[BEAMS][CHANNELS][SAMPLES|2][POLARIZATIONS]; | ||
+ | */ | ||
}; | }; | ||
</ | </ | ||
- | == Coherent Stokes == | + | Older releases: |
+ | 2010-09-20: | ||
+ | - filenames ended in -bf.raw instead of _bf.raw | ||
+ | |||
+ | ==== Coherent Stokes | ||
Each (pencil) beam produces one or four files: one containing the Stokes I (power) values, and optionally three files for Stokes Q, U, and V, respectively. The names of these files adhere to the following scheme: | Each (pencil) beam produces one or four files: one containing the Stokes I (power) values, and optionally three files for Stokes Q, U, and V, respectively. The names of these files adhere to the following scheme: | ||
- | |Lxxxxx_Byyy_S0-bf.raw|Stokes I of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S0_bf.raw|Stokes I of beam yyy of observation xxxxx| |
- | |Lxxxxx_Byyy_S1-bf.raw|Stokes Q of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S1_bf.raw|Stokes Q of beam yyy of observation xxxxx| |
- | |Lxxxxx_Byyy_S2-bf.raw|Stokes U of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S2_bf.raw|Stokes U of beam yyy of observation xxxxx| |
- | |Lxxxxx_Byyy_S3-bf.raw|Stokes V of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S3_bf.raw|Stokes V of beam yyy of observation xxxxx| |
- | + | ||
- | Currently (release 2010-09-20), | + | |
Each file is a sequence of blocks of the following structure: | Each file is a sequence of blocks of the following structure: | ||
<code C> | <code C> | ||
+ | // Since 2011-10-24, Stokes are just a continuous stream of samples: | ||
+ | struct block { | ||
+ | float stokes[SAMPLES][SUBBANDS][CHANNELS]; | ||
+ | }; | ||
+ | |||
+ | // Before 2011-10-24: | ||
struct block { | struct block { | ||
struct header header; | struct header header; | ||
+ | |||
+ | /* each block contains SAMPLES samples. The data structure is two samples larger (|2) for | ||
+ | | ||
+ | and immediately discarded. Time should just be incremented SAMPLES samples per block. */ | ||
/* big endian */ | /* big endian */ | ||
+ | // 2010-09-20 release and later: | ||
float stokes[SAMPLES|2][SUBBANDS][CHANNELS]; | float stokes[SAMPLES|2][SUBBANDS][CHANNELS]; | ||
+ | |||
+ | /* | ||
+ | // 2010-06-29 release and earlier stored data per subband instead of per beam: | ||
+ | fcomplex voltages[BEAMS][CHANNELS][SAMPLES|2][STOKES]; | ||
+ | */ | ||
}; | }; | ||
</ | </ | ||
- | == Incoherent Stokes == | + | Older releases: |
+ | 2010-09-20: | ||
+ | - Values of Stokes U and V are multiplied by 1/2 | ||
+ | - filenames ended in -bf.raw instead of _bf.raw | ||
+ | |||
+ | ==== Incoherent Stokes | ||
Incoherent stokes are stored per subband, with one or four stokes per file, using the following naming convention: | Incoherent stokes are stored per subband, with one or four stokes per file, using the following naming convention: | ||
- | |Lxxxxx_SByyy-bf.incoherentstokes|Stokes of subband yyy of observation xxxxx| | + | |Lxxxxx_SByyy_bf.incoherentstokes|Stokes of subband yyy of observation xxxxx| |
- | + | ||
- | Currently (release 2010-09-20), | + | |
Each file is a sequence of blocks of the following structure: | Each file is a sequence of blocks of the following structure: | ||
Line 65: | Line 180: | ||
struct block { | struct block { | ||
struct header header; | struct header header; | ||
+ | |||
+ | /* each block contains SAMPLES samples. The data structure is two samples larger (|2) for | ||
+ | | ||
+ | and immediately discarded. Time should just be incremented SAMPLES samples per block. */\ | ||
/* big endian */ | /* big endian */ | ||
+ | // 2010-10-25 release and later: | ||
+ | float stokes[STOKES][CHANNELS][SAMPLES|2]; | ||
+ | |||
+ | /* | ||
+ | // 2010-09-20 release: | ||
float stokes[STOKES][SAMPLES|2][CHANNELS]; | float stokes[STOKES][SAMPLES|2][CHANNELS]; | ||
- | /* format next release: | + | // 2010-06-29 |
- | float stokes[STOKES][CHANNELS][SAMPLES|2]; | + | float stokes[CHANNELS][SAMPLES|2][STOKES]; |
*/ | */ | ||
}; | }; | ||
Line 76: | Line 200: | ||
The order in which the Stokes values are stored is: I, Q, U, V. | The order in which the Stokes values are stored is: I, Q, U, V. | ||
+ | |||
+ | Older releases: | ||
+ | 2010-09-20: | ||
+ | - Values of Stokes U and V are multiplied by 1/2 | ||
+ | - filenames ended in -bf.raw instead of _bf.raw | ||
+ | - data order changed | ||
+ | |||
+ | ==== BFRaw format ==== | ||
+ | |||
+ | Raw station data can be stored in a format called BFRaw. This format is used for debugging purposes and is not a regular observation mode, it takes more manpower to record it. The BFRaw format is recorded below for those who need to access it. | ||
+ | |||
+ | A BFRaw file starts with a file header containing the configuration: | ||
+ | |||
+ | <code C> | ||
+ | struct file_header | ||
+ | { | ||
+ | // 0x3F8304EC, also determines endianness | ||
+ | uint32_t | ||
+ | // The number of bits per sample (16) | ||
+ | uint8_t | ||
+ | // The number of polarizations (2) | ||
+ | uint8_t | ||
+ | // Number of subbands, maximum of 62 | ||
+ | uint16_t | ||
+ | // 155648 (160Mhz) or 196608 (200Mhz) | ||
+ | uint32_t | ||
+ | // Name of the station | ||
+ | char station[20]; | ||
+ | // The sample rate: 156250.0 or 195312.5 .. double (number of samples per second for each subband) | ||
+ | double | ||
+ | // The frequencies within a subband | ||
+ | double | ||
+ | // The beam pointing directions (RA, DEC in J2000) | ||
+ | double | ||
+ | // mapping from subbands to beams (SAPs) | ||
+ | int16_t | ||
+ | // Padding to circumvent 8-byte alignment | ||
+ | uint32_t | ||
+ | }; | ||
+ | </ | ||
+ | |||
+ | After the file header, there is a series of blocks until the end of file, configured using values from the file header: | ||
+ | |||
+ | <code C> | ||
+ | struct block | ||
+ | // 0x2913D852 | ||
+ | uint32_t | ||
+ | |||
+ | // per-SAP information (up to 8 SAPs can be defined, but typically only 1 is used) | ||
+ | |||
+ | // number of samples the signal is shifted to align the station beam to the reference | ||
+ | // phase center (=Observation.referencePhaseCenter in the parset) | ||
+ | int32_t | ||
+ | // Padding to circumvent 8-byte alignment | ||
+ | uint8_t | ||
+ | |||
+ | // the sub-sample delay which still has to be compensated for (in seconds), | ||
+ | // at the beginning and at the end of the block | ||
+ | double | ||
+ | double | ||
+ | // Compatible with TimeStamp class (see below) | ||
+ | int64_t | ||
+ | |||
+ | struct marshalledFlags | ||
+ | { | ||
+ | // up to 16 ranges of flagged samples within this block | ||
+ | uint32_t | ||
+ | struct range | ||
+ | { | ||
+ | uint32_t | ||
+ | uint32_t | ||
+ | } flagsRanges[16]; | ||
+ | } flags[8]; | ||
+ | |||
+ | std:: | ||
+ | samples[fileHeader.nrSubbands][fileHeader.nrSamplesPerSubband][fileHeader.nrPolarizations]; | ||
+ | }; | ||
+ | </ | ||
+ | |||
+ | To convert a TimeStamp-compatible int64_t to a C-readable timestamp, use | ||
+ | <code C> | ||
+ | /* clockspeed is in Hz */ | ||
+ | int64 nanoseconds = (int64) (timestamp * 1024 * 1e9 / clockspeed); | ||
+ | |||
+ | struct timespec ts; | ||
+ | ts.tv_sec | ||
+ | ts.tv_nsec = nanoseconds % 1000000000ULL; | ||
+ | </ | ||
==== Types and constants ==== | ==== Types and constants ==== | ||
Line 276: | Line 488: | ||
unsigned CHANNELS = 16; // Observation.channelsPerSubband | unsigned CHANNELS = 16; // Observation.channelsPerSubband | ||
unsigned SAMPLES | unsigned SAMPLES | ||
- | unsigned FLOATSPERSAMPLE = 2; // 1 for Stokes, 2 for Complex Voltages (real and imaginary parts) | + | unsigned FLOATSPERSAMPLE = 1; // 1 for Stokes, 2 for Complex Voltages (real and imaginary parts) |
struct header header; | struct header header; | ||
Line 326: | Line 538: | ||
} | } | ||
</ | </ | ||
+ | |||
+ | ==== Changelog for each release ==== | ||
+ | |||
+ | |2010-10-25|Incoherent Stokes data order changed| | ||
+ | | |File naming scheme changed (-bf -> _bf)| | ||
+ | | |Stokes U and V are no longer multiplied by 1/2| | ||
+ | |2010-09-20|First release documented| |