Differences
This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
| public:documents:raw_olap_data_formats [2010-10-21 15:11] – Jan David Mol | public:documents:raw_olap_data_formats [2017-03-08 15:27] (current) – external edit 127.0.0.1 | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| - | ==== Raw OLAP data formats ==== | + | ===== Raw OLAP data formats |
| - | OLAP produces several data formats, which are intended to be replaced by their final format, such as HDF5. The formats below are not officially supported and subject | + | OLAP produces several data formats, which are intended to be replaced by their final format, such as HDF5. |
| + | |||
| + | ===== After 2011-10-24 ===== | ||
| + | |||
| + | Files adhere to the following naming scheme: '' | ||
| + | |||
| + | - '' | ||
| + | - '' | ||
| + | - '' | ||
| + | - '' | ||
| + | |||
| + | The stokes numbers | ||
| + | |||
| + | - Complex Voltages: | ||
| + | - z = 0 -> Xr (X polarisation, | ||
| + | - z = 1 -> Xi (X polarisation, | ||
| + | - z = 2 -> Yr (Y polarisation, | ||
| + | - z = 3 -> Yi (Y polarisation, | ||
| + | - Coherent/ | ||
| + | - z = 0 -> I | ||
| + | - z = 1 -> Q | ||
| + | - z = 2 -> U | ||
| + | - z = 3 -> V | ||
| + | |||
| + | The data is encoded as follows. Each .raw file is a multiple of the following structure. All data is written as big-endian 32-bit IEEE floats. | ||
| + | |||
| + | < | ||
| + | struct block { | ||
| + | float sample[SUBBANDS][CHANNELS]; | ||
| + | }; | ||
| + | </ | ||
| + | |||
| + | The constants used can be derived from the parset: | ||
| + | |||
| + | < | ||
| + | SUBBANDS = len(parset[" | ||
| + | |||
| + | if (complex voltages || coherent stokes) { | ||
| + | |||
| + | CHANNELS = parset[" | ||
| + | if (CHANNELS == 0) CHANNELS = parset[" | ||
| + | |||
| + | } elif (incoherent stokes) { | ||
| + | |||
| + | CHANNELS = parset[" | ||
| + | if (CHANNELS == 0) CHANNELS = parset[" | ||
| + | |||
| + | } | ||
| + | </ | ||
| + | |||
| + | The sampling rate can be derived as follows: | ||
| + | |||
| + | < | ||
| + | # clock frequency (f.e. 200 MHz) | ||
| + | clock_hz = parset[" | ||
| + | |||
| + | # subband frequency (f.e. 195 kHz) | ||
| + | base_subband_hz = clock_hz / 1024 | ||
| + | |||
| + | # channel frequency (f.e. 763 Hz) | ||
| + | base_nrchannels = parset[" | ||
| + | base_channel_hz = base_subband_hz / base_nrchannels | ||
| + | |||
| + | if(complex voltages || coherent stokes) { | ||
| + | cs_temporalintegration = parset[" | ||
| + | |||
| + | sample_hz = base_channel_hz / cs_temporalintegration | ||
| + | |||
| + | } elif(incoherent stokes) { | ||
| + | |||
| + | is_temporalintegration = parset[" | ||
| + | |||
| + | sample_hz = base_channel_hz / is_temporalintegration | ||
| + | } | ||
| + | |||
| + | </ | ||
| + | |||
| + | ===== Before 2011-10-24 ===== | ||
| Data can be recorded as either complex voltages (yielding X and Y polarisations) or one or more stokes. In either case, a sequence of blocks will be stored, each of which consists of a header and data. The header is defined as: | Data can be recorded as either complex voltages (yielding X and Y polarisations) or one or more stokes. In either case, a sequence of blocks will be stored, each of which consists of a header and data. The header is defined as: | ||
| Line 12: | Line 89: | ||
| in which sequence_number starts at 0, and is increased by 1 for every block. Missing sequence numbers implies missing data. The padding can have any value and is to be ignored. | in which sequence_number starts at 0, and is increased by 1 for every block. Missing sequence numbers implies missing data. The padding can have any value and is to be ignored. | ||
| - | == Complex Voltages == | + | ==== Complex Voltages |
| Each (pencil) beam produces two files: one containing the X polarisation, | Each (pencil) beam produces two files: one containing the X polarisation, | ||
| - | |Lxxxxx_Byyy_S0-bf.raw|X polarisations of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S0_bf.raw|X polarisations of beam yyy of observation xxxxx| |
| - | |Lxxxxx_Byyy_S1-bf.raw|Y polarisations of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S1_bf.raw|Y polarisations |
| + | |||
| + | Proposed is the following scheme: | ||
| + | |||
| + | |Lxxxxx_Byyy_S0_bf.raw|X polarisation (real part) of beam yyy of observation xxxxx| | ||
| + | |Lxxxxx_Byyy_S1_bf.raw|X polarisation (imaginary part) of beam yyy of observation xxxxx| | ||
| + | |Lxxxxx_Byyy_S2_bf.raw|Y polarisation (real part) of beam yyy of observation xxxxx| | ||
| + | |Lxxxxx_Byyy_S3_bf.raw|Y polarisation (imaginary part) of beam yyy of observation xxxxx| | ||
| Each file is a sequence of blocks of the following structure: | Each file is a sequence of blocks of the following structure: | ||
| Line 24: | Line 108: | ||
| struct block { | struct block { | ||
| struct header header; | struct header header; | ||
| + | |||
| + | /* each block contains SAMPLES samples. The data structure is two samples larger (|2) for | ||
| + | | ||
| + | and immediately discarded. Time should just be incremented SAMPLES samples per block. */ | ||
| /* big endian */ | /* big endian */ | ||
| + | // 2010-09-20 release and later: | ||
| fcomplex voltages[SAMPLES|2][SUBBANDS][CHANNELS]; | fcomplex voltages[SAMPLES|2][SUBBANDS][CHANNELS]; | ||
| - | } | + | |
| + | /* | ||
| + | // 2010-06-29 release and earlier stored data per subband instead of per beam: | ||
| + | fcomplex voltages[BEAMS][CHANNELS][SAMPLES|2][POLARIZATIONS]; | ||
| + | */ | ||
| + | }; | ||
| </ | </ | ||
| - | == Coherent Stokes == | + | Older releases: |
| + | 2010-09-20: | ||
| + | - filenames ended in -bf.raw instead of _bf.raw | ||
| + | |||
| + | ==== Coherent Stokes | ||
| Each (pencil) beam produces one or four files: one containing the Stokes I (power) values, and optionally three files for Stokes Q, U, and V, respectively. The names of these files adhere to the following scheme: | Each (pencil) beam produces one or four files: one containing the Stokes I (power) values, and optionally three files for Stokes Q, U, and V, respectively. The names of these files adhere to the following scheme: | ||
| - | |Lxxxxx_Byyy_S0-bf.raw|Stokes I of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S0_bf.raw|Stokes I of beam yyy of observation xxxxx| |
| - | |Lxxxxx_Byyy_S1-bf.raw|Stokes Q of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S1_bf.raw|Stokes Q of beam yyy of observation xxxxx| |
| - | |Lxxxxx_Byyy_S2-bf.raw|Stokes U of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S2_bf.raw|Stokes U of beam yyy of observation xxxxx| |
| - | |Lxxxxx_Byyy_S3-bf.raw|Stokes V of beam yyy of observation xxxxx| | + | |Lxxxxx_Byyy_S3_bf.raw|Stokes V of beam yyy of observation xxxxx| |
| - | + | ||
| - | Currently (release 2010-09-20), | + | |
| Each file is a sequence of blocks of the following structure: | Each file is a sequence of blocks of the following structure: | ||
| <code C> | <code C> | ||
| + | // Since 2011-10-24, Stokes are just a continuous stream of samples: | ||
| + | struct block { | ||
| + | float stokes[SAMPLES][SUBBANDS][CHANNELS]; | ||
| + | }; | ||
| + | |||
| + | // Before 2011-10-24: | ||
| struct block { | struct block { | ||
| struct header header; | struct header header; | ||
| + | |||
| + | /* each block contains SAMPLES samples. The data structure is two samples larger (|2) for | ||
| + | | ||
| + | and immediately discarded. Time should just be incremented SAMPLES samples per block. */ | ||
| /* big endian */ | /* big endian */ | ||
| + | // 2010-09-20 release and later: | ||
| float stokes[SAMPLES|2][SUBBANDS][CHANNELS]; | float stokes[SAMPLES|2][SUBBANDS][CHANNELS]; | ||
| - | } | + | |
| + | /* | ||
| + | // 2010-06-29 release and earlier stored data per subband instead of per beam: | ||
| + | fcomplex voltages[BEAMS][CHANNELS][SAMPLES|2][STOKES]; | ||
| + | */ | ||
| + | }; | ||
| </ | </ | ||
| - | == Incoherent Stokes == | + | Older releases: |
| + | 2010-09-20: | ||
| + | - Values of Stokes U and V are multiplied by 1/2 | ||
| + | - filenames ended in -bf.raw instead of _bf.raw | ||
| + | |||
| + | ==== Incoherent Stokes | ||
| Incoherent stokes are stored per subband, with one or four stokes per file, using the following naming convention: | Incoherent stokes are stored per subband, with one or four stokes per file, using the following naming convention: | ||
| - | |Lxxxxx_SByyy-bf.incoherentstokes|Stokes of subband yyy of observation xxxxx| | + | |Lxxxxx_SByyy_bf.incoherentstokes|Stokes of subband yyy of observation xxxxx| |
| - | + | ||
| - | Currently (release 2010-09-20), | + | |
| Each file is a sequence of blocks of the following structure: | Each file is a sequence of blocks of the following structure: | ||
| Line 65: | Line 180: | ||
| struct block { | struct block { | ||
| struct header header; | struct header header; | ||
| + | |||
| + | /* each block contains SAMPLES samples. The data structure is two samples larger (|2) for | ||
| + | | ||
| + | and immediately discarded. Time should just be incremented SAMPLES samples per block. */\ | ||
| /* big endian */ | /* big endian */ | ||
| + | // 2010-10-25 release and later: | ||
| + | float stokes[STOKES][CHANNELS][SAMPLES|2]; | ||
| + | |||
| + | /* | ||
| + | // 2010-09-20 release: | ||
| float stokes[STOKES][SAMPLES|2][CHANNELS]; | float stokes[STOKES][SAMPLES|2][CHANNELS]; | ||
| - | /* format next release: | + | // 2010-06-29 |
| - | float stokes[STOKES][CHANNELS][SAMPLES|2]; | + | float stokes[CHANNELS][SAMPLES|2][STOKES]; |
| */ | */ | ||
| - | } | + | }; |
| </ | </ | ||
| The order in which the Stokes values are stored is: I, Q, U, V. | The order in which the Stokes values are stored is: I, Q, U, V. | ||
| + | |||
| + | Older releases: | ||
| + | 2010-09-20: | ||
| + | - Values of Stokes U and V are multiplied by 1/2 | ||
| + | - filenames ended in -bf.raw instead of _bf.raw | ||
| + | - data order changed | ||
| + | |||
| + | ==== BFRaw format ==== | ||
| + | |||
| + | Raw station data can be stored in a format called BFRaw. This format is used for debugging purposes and is not a regular observation mode, it takes more manpower to record it. The BFRaw format is recorded below for those who need to access it. | ||
| + | |||
| + | A BFRaw file starts with a file header containing the configuration: | ||
| + | |||
| + | <code C> | ||
| + | struct file_header | ||
| + | { | ||
| + | // 0x3F8304EC, also determines endianness | ||
| + | uint32_t | ||
| + | // The number of bits per sample (16) | ||
| + | uint8_t | ||
| + | // The number of polarizations (2) | ||
| + | uint8_t | ||
| + | // Number of subbands, maximum of 62 | ||
| + | uint16_t | ||
| + | // 155648 (160Mhz) or 196608 (200Mhz) | ||
| + | uint32_t | ||
| + | // Name of the station | ||
| + | char station[20]; | ||
| + | // The sample rate: 156250.0 or 195312.5 .. double (number of samples per second for each subband) | ||
| + | double | ||
| + | // The frequencies within a subband | ||
| + | double | ||
| + | // The beam pointing directions (RA, DEC in J2000) | ||
| + | double | ||
| + | // mapping from subbands to beams (SAPs) | ||
| + | int16_t | ||
| + | // Padding to circumvent 8-byte alignment | ||
| + | uint32_t | ||
| + | }; | ||
| + | </ | ||
| + | |||
| + | After the file header, there is a series of blocks until the end of file, configured using values from the file header: | ||
| + | |||
| + | <code C> | ||
| + | struct block | ||
| + | // 0x2913D852 | ||
| + | uint32_t | ||
| + | |||
| + | // per-SAP information (up to 8 SAPs can be defined, but typically only 1 is used) | ||
| + | |||
| + | // number of samples the signal is shifted to align the station beam to the reference | ||
| + | // phase center (=Observation.referencePhaseCenter in the parset) | ||
| + | int32_t | ||
| + | // Padding to circumvent 8-byte alignment | ||
| + | uint8_t | ||
| + | |||
| + | // the sub-sample delay which still has to be compensated for (in seconds), | ||
| + | // at the beginning and at the end of the block | ||
| + | double | ||
| + | double | ||
| + | // Compatible with TimeStamp class (see below) | ||
| + | int64_t | ||
| + | |||
| + | struct marshalledFlags | ||
| + | { | ||
| + | // up to 16 ranges of flagged samples within this block | ||
| + | uint32_t | ||
| + | struct range | ||
| + | { | ||
| + | uint32_t | ||
| + | uint32_t | ||
| + | } flagsRanges[16]; | ||
| + | } flags[8]; | ||
| + | |||
| + | std:: | ||
| + | samples[fileHeader.nrSubbands][fileHeader.nrSamplesPerSubband][fileHeader.nrPolarizations]; | ||
| + | }; | ||
| + | </ | ||
| + | |||
| + | To convert a TimeStamp-compatible int64_t to a C-readable timestamp, use | ||
| + | <code C> | ||
| + | /* clockspeed is in Hz */ | ||
| + | int64 nanoseconds = (int64) (timestamp * 1024 * 1e9 / clockspeed); | ||
| + | |||
| + | struct timespec ts; | ||
| + | ts.tv_sec | ||
| + | ts.tv_nsec = nanoseconds % 1000000000ULL; | ||
| + | </ | ||
| ==== Types and constants ==== | ==== Types and constants ==== | ||
| Line 94: | Line 306: | ||
| Constants can be computed using the parset file. Below is a translation between the C constants used above and their respective parset keys: | Constants can be computed using the parset file. Below is a translation between the C constants used above and their respective parset keys: | ||
| - | |SAMPLES |The number of time samples in a block | + | |SAMPLES |The number of time samples in a block |
| |SUBBANDS|The number of subbands (beamlets) specified | |SUBBANDS|The number of subbands (beamlets) specified | ||
| |CHANNELS|The number of channels per subband | |CHANNELS|The number of channels per subband | ||
| Line 108: | Line 320: | ||
| <code C> | <code C> | ||
| + | #include < | ||
| + | |||
| uint32_t swap_uint32( uint32_t x ) | uint32_t swap_uint32( uint32_t x ) | ||
| { | { | ||
| Line 132: | Line 346: | ||
| char c[4]; | char c[4]; | ||
| float f; | float f; | ||
| - | } dst;return | + | } dst; |
| dst.c[0] = x[3]; | dst.c[0] = x[3]; | ||
| Line 216: | Line 430: | ||
| <code C> | <code C> | ||
| - | // TODO: Test this code | ||
| - | |||
| #include " | #include " | ||
| #include < | #include < | ||
| Line 258: | Line 470: | ||
| char c[4]; | char c[4]; | ||
| float f; | float f; | ||
| - | } dst;return | + | } dst; |
| dst.c[0] = x[3]; | dst.c[0] = x[3]; | ||
| Line 270: | Line 482: | ||
| int main() | int main() | ||
| { | { | ||
| - | unsigned SUBBANDS = 248; | + | |
| - | unsigned CHANNELS = 256; | + | // http:// |
| - | unsigned SAMPLES | + | |
| + | | ||
| + | unsigned CHANNELS = 16; // Observation.channelsPerSubband | ||
| + | unsigned SAMPLES | ||
| + | unsigned FLOATSPERSAMPLE = 1; // 1 for Stokes, 2 for Complex Voltages (real and imaginary parts) | ||
| struct header header; | struct header header; | ||
| Line 278: | Line 494: | ||
| // the raw_array is read from disk and converted to the float_array | // the raw_array is read from disk and converted to the float_array | ||
| - | // the extra dimension [2] covers the real and imaginary parts of each fcomplex | ||
| // the extra dimension [4] covers the size of a float in chars in the raw_array | // the extra dimension [4] covers the size of a float in chars in the raw_array | ||
| - | boost:: | + | boost:: |
| - | boost:: | + | boost:: |
| - | FILE *f = fopen( "L12345_B000_S0-bf.raw", " | + | FILE *f = fopen( "L09330_B000_S0-example-stokes-I-248-subbands-16-channels-763-samples.raw", " |
| if (!f) { | if (!f) { | ||
| puts( "Could not open input file." ); | puts( "Could not open input file." ); | ||
| Line 323: | Line 538: | ||
| } | } | ||
| </ | </ | ||
| + | |||
| + | ==== Changelog for each release ==== | ||
| + | |||
| + | |2010-10-25|Incoherent Stokes data order changed| | ||
| + | | |File naming scheme changed (-bf -> _bf)| | ||
| + | | |Stokes U and V are no longer multiplied by 1/2| | ||
| + | |2010-09-20|First release documented| | ||