Differences

This shows you the differences between two versions of the page.

--- public:documents:raw_olap_data_formats [2011-01-25 18:58] – Jan David Mol
+++ public:documents:raw_olap_data_formats [2017-03-08 15:27] (current) – external edit 127.0.0.1
@@ Line 1: / Line 1: @@
-===== Raw OLAP data formats ====
+===== Raw OLAP data formats (obsolete) ====
-OLAP produces several data formats, which are intended to be replaced by their final format, such as HDF5. The formats below are not officially supported and subject to change without notice.
+OLAP produces several data formats, which are intended to be replaced by their final format, such as HDF5.
+===== After 2011-10-24 =====
+Files adhere to the following naming scheme: ''Liiiii_SAPsssss_Bbbb_Sz_bf.raw'', with:
+  - ''iiiii'' = SAS observation ID
+  - ''sssss'' = Station beam number (SAP)
+  - ''bbb'' = Tied-array beam number (TAB)
+  - ''z'' = Stokes number
+The stokes numbers are to be interpreted as follows:
+  - Complex Voltages:
+     - z = 0 -> Xr (X polarisation, real part)
+     - z = 1 -> Xi (X polarisation, imaginary part)
+     - z = 2 -> Yr (Y polarisation, real part)
+     - z = 3 -> Yi (Y polarisation, imaginary part)
+  - Coherent/incoherent Stokes:
+     - z = 0 -> I
+     - z = 1 -> Q
+     - z = 2 -> U
+     - z = 3 -> V
+The data is encoded as follows. Each .raw file is a multiple of the following structure. All data is written as big-endian 32-bit IEEE floats.
+<code>
+struct block {
+  float sample[SUBBANDS][CHANNELS];
+};
+</code>
+The constants used can be derived from the parset:
+<code>
+  SUBBANDS = len(parset["Observation.subbandList"])
+  if (complex voltages || coherent stokes) {
+    CHANNELS = parset["OLAP.CNProc_CoherentStokes.channelsPerSubband"]
+    if (CHANNELS == 0) CHANNELS = parset["Observation.channelsPerSubband"]
+  } elif (incoherent stokes) {
+    CHANNELS = parset["OLAP.CNProc_IncoherentStokes.channelsPerSubband"]
+    if (CHANNELS == 0) CHANNELS = parset["Observation.channelsPerSubband"]
+  }
+</code>
+The sampling rate can be derived as follows:
+<code>
+  # clock frequency (f.e. 200 MHz)
+  clock_hz = parset["Observation.sampleClock"] * 1.0e6
+  # subband frequency (f.e. 195 kHz)
+  base_subband_hz = clock_hz / 1024
+  # channel frequency (f.e. 763 Hz)
+  base_nrchannels = parset["Observation.channelsPerSubband"]
+  base_channel_hz = base_subband_hz / base_nrchannels
+  if(complex voltages || coherent stokes) {
+    cs_temporalintegration = parset["OLAP.CNProc_CoherentStokes.timeIntegrationFactor"]
+    sample_hz = base_channel_hz / cs_temporalintegration
+  } elif(incoherent stokes) {
+    is_temporalintegration = parset["OLAP.CNProc_IncoherentStokes.timeIntegrationFactor"]
+    sample_hz = base_channel_hz / is_temporalintegration
+  }
+</code>
+===== Before 2011-10-24 =====
 Data can be recorded as either complex voltages (yielding X and Y polarisations) or one or more stokes. In either case, a sequence of blocks will be stored, each of which consists of a header and data. The header is defined as:
@@ Line 18: / Line 95: @@
 |Lxxxxx_Byyy_S0_bf.raw|X polarisations of beam yyy of observation xxxxx|
 |Lxxxxx_Byyy_S1_bf.raw|Y polarisations of beam yyy of observation xxxxx|
+Proposed is the following scheme:
+|Lxxxxx_Byyy_S0_bf.raw|X polarisation (real part) of beam yyy of observation xxxxx|
+|Lxxxxx_Byyy_S1_bf.raw|X polarisation (imaginary part) of beam yyy of observation xxxxx|
+|Lxxxxx_Byyy_S2_bf.raw|Y polarisation (real part) of beam yyy of observation xxxxx|
+|Lxxxxx_Byyy_S3_bf.raw|Y polarisation (imaginary part) of beam yyy of observation xxxxx|
 Each file is a sequence of blocks of the following structure:
@@ Line 24: / Line 108: @@
 struct block {
   struct header header;
+  /* each block contains SAMPLES samples. The data structure is two samples larger (|2) for
+     technical reasons, but those two samples do not actually exist, and thus should be read
+     and immediately discarded. Time should just be incremented SAMPLES samples per block. */
   /* big endian */
@@ Line 32: / Line 120: @@
   // 2010-06-29 release and earlier stored data per subband instead of per beam:
   fcomplex voltages[BEAMS][CHANNELS][SAMPLES|2][POLARIZATIONS];
   */
 };
 </code>
@@ Line 52: / Line 140: @@
 <code C>
+// Since 2011-10-24, Stokes are just a continuous stream of samples:
+struct block {
+  float stokes[SAMPLES][SUBBANDS][CHANNELS];
+};
+// Before 2011-10-24:
 struct block {
   struct header header;
+  /* each block contains SAMPLES samples. The data structure is two samples larger (|2) for
+     technical reasons, but those two samples do not actually exist, and thus should be read
+     and immediately discarded. Time should just be incremented SAMPLES samples per block. */
   /* big endian */
@@ Line 82: / Line 180: @@
 struct block {
   struct header header;
+  /* each block contains SAMPLES samples. The data structure is two samples larger (|2) for
+     technical reasons, but those two samples do not actually exist, and thus should be read
+     and immediately discarded. Time should just be incremented SAMPLES samples per block. */\
   /* big endian */
@@ Line 107: / Line 209: @@
 ==== BFRaw format ====
-Raw station data can be stored in a format called BFRaw. This format is used for debugging purposes and is not a regular observation mode, it takes more manpower to record it. The BFRaw format is recorded below for those who need to access it:
+Raw station data can be stored in a format called BFRaw. This format is used for debugging purposes and is not a regular observation mode, it takes more manpower to record it. The BFRaw format is recorded below for those who need to access it.
-To be cleaned up.
+A BFRaw file starts with a file header containing the configuration:
-<code>
+<code C>
-  /* format */
+struct file_header
-class BFRawFormat
 {
+  // 0x3F8304EC, also determines endianness
+  uint32_t    magic;
+  // The number of bits per sample (16)
+  uint8_t     bitsPerSample;
+  // The number of polarizations (2)
+  uint8_t     nrPolarizations;
+  // Number of subbands, maximum of 62
+  uint16_t    nrSubbands;
+  // 155648 (160Mhz) or 196608 (200Mhz)
+  uint32_t    nrSamplesPerSubband;
+  // Name of the station
+  char      station[20];
+  // The sample rate: 156250.0 or 195312.5 .. double (number of samples per second for each subband)
+  double   sampleRate;
+  // The frequencies within a subband
+  double   subbandFrequencies[62];
+  // The beam pointing directions (RA, DEC in J2000)
+  double   beamDirections[8][2];
+  // mapping from subbands to beams (SAPs)
+  int16_t     subbandToSAPmapping[62];
+  // Padding to circumvent 8-byte alignment
+  uint32_t    padding;
+};
+</code>
-    public:
+After the file header, there is a series of blocks until the end of file, configured using values from the file header:
-        static const short maxNrSubbands = 62;
+<code C>
+struct block
+  // 0x2913D852
+  uint32_t      magic;
-            //! Components of the BFRaw header
+  // per-SAP information (up to 8 SAPs can be defined, but typically only 1 is used)
-        struct BFRaw_Header
-        {
-                    //! 0x3F8304EC, also determines endianness
-            uint32_t    magic;
-                    //! The number of bits per sample
-            uint8_t     bitsPerSample;
-                    //! The number of polarizations
-            uint8_t     nrPolarizations;
-                    //! Number of subbands
-            uint16_t    nrSubbands;
-                    //! 155648 (160Mhz) or 196608 (200Mhz)
-            uint32_t    nrSamplesPerSubband;
-                    //! Name of the station
-            char      station[20];
-                    //! The sample rate: 156250.0 or 195312.5 .. double (number of samples per second for each subband)
-            double   sampleRate;
-                    //! The frequencies within a subband
-            double   subbandFrequencies[maxNrSubbands];
-                    //! The beam pointing directions
-            double   beamDirections[8][2];
-                    //! mapping from subbands to beams
-            int16_t     subbandToSAPmapping[maxNrSubbands];
-                    //! Padding to circumvent 8-byte alignment
-            uint32_t    padding;
-        } header;
-            //! Components of the header of a single block of raw data
+  // number of samples the signal is shifted to align the station beam to the reference
-        struct BlockHeader
+  // phase center (=Observation.referencePhaseCenter in the parset)
-        {
+  int32_t       coarseDelayApplied[8];
-                    //! 0x2913D852
+  // Padding to circumvent 8-byte alignment
-            uint32_t      magic;
+  uint8_t       padding[4];
-            int32_t       coarseDelayApplied[8];
-                    //! Padding to circumvent 8-byte alignment
-            uint8_t       padding[4];
-            double     fineDelayRemainingAtBegin[8];
-            double     fineDelayRemainingAfterEnd[8];
-                    //! Compatible with TimeStamp class.
-            int64_t      time[8];
-                    //      uint32_t      nrFlagsRanges[8];
+  // the sub-sample delay which still has to be compensated for (in seconds),
-                    /*
+  // at the beginning and at the end of the block
-            struct range
+  double     fineDelayRemainingAtBegin[8];
-            {
+  double     fineDelayRemainingAfterEnd[8];
-            uint32_t    begin; // inclusive
+  // Compatible with TimeStamp class (see below)
-            uint32_t    end;   // exclusive
+  int64_t      time[8];
-        } flagsRanges[8][16];
-                    */
-            struct marshalledFlags
+  struct marshalledFlags
-            {
+  {
-                uint32_t      nrFlagsRanges;
+    // up to 16 ranges of flagged samples within this block
-                struct range
+    uint32_t      nrFlagsRanges;
-                {
+    struct range
-                    uint32_t    begin; // inclusive
+    {
-                    uint32_t    end;   // exclusive
+      uint32_t    begin; // inclusive
-                } flagsRanges[16];
+      uint32_t    end;   // exclusive
-            } flags[8];
+    } flagsRanges[16];
-         } block_header;
+  } flags[8];
+  std::complex<int16_t>
-            // dataStruct is 8 bytes
+    samples[fileHeader.nrSubbands][fileHeader.nrSamplesPerSubband][fileHeader.nrPolarizations];
-        struct Sample
-        {
-            std::complex<int16_t> xx;
-            std::complex<int16_t> yy;
-        };
 };
+</code>
-  /* write routine */
+To convert a TimeStamp-compatible int64_t to a C-readable timestamp, use
-  std::string stationName = itsPS->getStationNamesAndRSPboardNumbers(itsPsetNumber)[0].station; // TODO: support more than one station
+<code C>
+/* clockspeed is in Hz */
-  vector<unsigned> subbandToSAPmapping     = itsPS->subbandToSAPmapping();
+int64 nanoseconds = (int64) (timestamp * 1024 * 1e9 / clockspeed);
-  vector<unsigned> subbandToRSPboardMapping = itsPS->subbandToRSPboardMapping(stationName);
-  vector<unsigned> subbandToRSPslotMapping  = itsPS->subbandToRSPslotMapping(stationName);
-  unsigned         nrSubbands               = itsPS->nrSubbands();
-  BFRawFormat      bfraw_data;
-  if (!itsFileHeaderWritten) {
-    if (nrSubbands > 62)
-      THROW(IONProcException, "too many subbands for raw data format");
-    memset(&bfraw_data.header, 0, sizeof bfraw_data.header);
-    bfraw_data.header.magic               = 0x3F8304EC;
-    bfraw_data.header.bitsPerSample       = 16;
-    bfraw_data.header.nrPolarizations     = 2;
-    bfraw_data.header.nrSubbands          = nrSubbands;
-    bfraw_data.header.nrSamplesPerSubband = itsNrSamplesPerSubband;
-    bfraw_data.header.sampleRate          = itsSampleRate;
-    strncpy(bfraw_data.header.station, itsPS->getStationNamesAndRSPboardNumbers(itsPsetNumber)[0].station.c_str(), sizeof bfraw_data.header.station);
-    memcpy(bfraw_data.header.subbandFrequencies, &itsPS->subbandToFrequencyMapping()[0], nrSubbands * sizeof(double));
-    for (unsigned beam = 0; beam < itsNrBeams; beam ++)
-      memcpy(bfraw_data.header.beamDirections[beam], &itsPS->getBeamDirection(beam)[0], sizeof bfraw_data.header.beamDirections[beam]);
-    itsRawDataStream->write(&bfraw_data.header, sizeof bfraw_data.header);
-    itsFileHeaderWritten = true;
-  }
-  memset(&bfraw_data.block_header, 0, sizeof bfraw_data.block_header);
-  bfraw_data.block_header.magic = 0x2913D852;
-  for (unsigned beam = 0; beam < itsNrBeams; beam ++) {
-    bfraw_data.block_header.coarseDelayApplied[beam]     = itsSamplesDelay[beam];
-    bfraw_data.block_header.fineDelayRemainingAtBegin[beam]      = itsFineDelaysAtBegin[beam][0];
-    bfraw_data.block_header.fineDelayRemainingAfterEnd[beam] = itsFineDelaysAfterEnd[beam][0];
-    bfraw_data.block_header.time[beam]                   = itsDelayedStamps[beam];
-    // FIXME: the current BlockHeader format does not provide space for
-    // the flags from multiple RSP boards --- use the flags of RSP board 0
-    itsFlags[0][beam].marshall(reinterpret_cast<char *>(&bfraw_data.block_header.flags[beam]), sizeof(BFRawFormat::BlockHeader::marshalledFlags));
-  }
-  itsRawDataStream->write(&bfraw_data.block_header, sizeof bfraw_data.block_header);
-  for (unsigned subband = 0; subband < nrSubbands; subband ++)
+struct timespec ts;
-    itsBeamletBuffers[subbandToRSPboardMapping[subband]]->sendUnalignedSubband(itsRawDataStream, subbandToRSPslotMapping[subband], subbandToSAPmapping[subband]);
+ts.tv_sec  = nanoseconds / 1000000000ULL;
+ts.tv_nsec = nanoseconds % 1000000000ULL;
 </code>