A bundled STM32F10x Std Periph and CMSIS library
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

406 lines
12 KiB

  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 12. March 2014
  5. * $Revision: V1.4.4
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_biquad_cascade_df1_q31.c
  9. *
  10. * Description: Processing function for the
  11. * Q31 Biquad cascade filter
  12. *
  13. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14. *
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions
  17. * are met:
  18. * - Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * - Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in
  22. * the documentation and/or other materials provided with the
  23. * distribution.
  24. * - Neither the name of ARM LIMITED nor the names of its contributors
  25. * may be used to endorse or promote products derived from this
  26. * software without specific prior written permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39. * POSSIBILITY OF SUCH DAMAGE.
  40. * -------------------------------------------------------------------- */
  41. #include "arm_math.h"
  42. /**
  43. * @ingroup groupFilters
  44. */
  45. /**
  46. * @addtogroup BiquadCascadeDF1
  47. * @{
  48. */
  49. /**
  50. * @brief Processing function for the Q31 Biquad cascade filter.
  51. * @param[in] *S points to an instance of the Q31 Biquad cascade structure.
  52. * @param[in] *pSrc points to the block of input data.
  53. * @param[out] *pDst points to the block of output data.
  54. * @param[in] blockSize number of samples to process per call.
  55. * @return none.
  56. *
  57. * <b>Scaling and Overflow Behavior:</b>
  58. * \par
  59. * The function is implemented using an internal 64-bit accumulator.
  60. * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
  61. * Thus, if the accumulator result overflows it wraps around rather than clip.
  62. * In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).
  63. * After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to
  64. * 1.31 format by discarding the low 32 bits.
  65. *
  66. * \par
  67. * Refer to the function <code>arm_biquad_cascade_df1_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
  68. */
  69. void arm_biquad_cascade_df1_q31(
  70. const arm_biquad_casd_df1_inst_q31 * S,
  71. q31_t * pSrc,
  72. q31_t * pDst,
  73. uint32_t blockSize)
  74. {
  75. q63_t acc; /* accumulator */
  76. uint32_t uShift = ((uint32_t) S->postShift + 1u);
  77. uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
  78. q31_t *pIn = pSrc; /* input pointer initialization */
  79. q31_t *pOut = pDst; /* output pointer initialization */
  80. q31_t *pState = S->pState; /* pState pointer initialization */
  81. q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */
  82. q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
  83. q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
  84. q31_t Xn; /* temporary input */
  85. uint32_t sample, stage = S->numStages; /* loop counters */
  86. #ifndef ARM_MATH_CM0_FAMILY_FAMILY
  87. q31_t acc_l, acc_h; /* temporary output variables */
  88. /* Run the below code for Cortex-M4 and Cortex-M3 */
  89. do
  90. {
  91. /* Reading the coefficients */
  92. b0 = *pCoeffs++;
  93. b1 = *pCoeffs++;
  94. b2 = *pCoeffs++;
  95. a1 = *pCoeffs++;
  96. a2 = *pCoeffs++;
  97. /* Reading the state values */
  98. Xn1 = pState[0];
  99. Xn2 = pState[1];
  100. Yn1 = pState[2];
  101. Yn2 = pState[3];
  102. /* Apply loop unrolling and compute 4 output values simultaneously. */
  103. /* The variable acc hold output values that are being computed:
  104. *
  105. * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
  106. */
  107. sample = blockSize >> 2u;
  108. /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
  109. ** a second loop below computes the remaining 1 to 3 samples. */
  110. while(sample > 0u)
  111. {
  112. /* Read the input */
  113. Xn = *pIn++;
  114. /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  115. /* acc = b0 * x[n] */
  116. acc = (q63_t) b0 *Xn;
  117. /* acc += b1 * x[n-1] */
  118. acc += (q63_t) b1 *Xn1;
  119. /* acc += b[2] * x[n-2] */
  120. acc += (q63_t) b2 *Xn2;
  121. /* acc += a1 * y[n-1] */
  122. acc += (q63_t) a1 *Yn1;
  123. /* acc += a2 * y[n-2] */
  124. acc += (q63_t) a2 *Yn2;
  125. /* The result is converted to 1.31 , Yn2 variable is reused */
  126. /* Calc lower part of acc */
  127. acc_l = acc & 0xffffffff;
  128. /* Calc upper part of acc */
  129. acc_h = (acc >> 32) & 0xffffffff;
  130. /* Apply shift for lower part of acc and upper part of acc */
  131. Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
  132. /* Store the output in the destination buffer. */
  133. *pOut++ = Yn2;
  134. /* Read the second input */
  135. Xn2 = *pIn++;
  136. /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  137. /* acc = b0 * x[n] */
  138. acc = (q63_t) b0 *Xn2;
  139. /* acc += b1 * x[n-1] */
  140. acc += (q63_t) b1 *Xn;
  141. /* acc += b[2] * x[n-2] */
  142. acc += (q63_t) b2 *Xn1;
  143. /* acc += a1 * y[n-1] */
  144. acc += (q63_t) a1 *Yn2;
  145. /* acc += a2 * y[n-2] */
  146. acc += (q63_t) a2 *Yn1;
  147. /* The result is converted to 1.31, Yn1 variable is reused */
  148. /* Calc lower part of acc */
  149. acc_l = acc & 0xffffffff;
  150. /* Calc upper part of acc */
  151. acc_h = (acc >> 32) & 0xffffffff;
  152. /* Apply shift for lower part of acc and upper part of acc */
  153. Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
  154. /* Store the output in the destination buffer. */
  155. *pOut++ = Yn1;
  156. /* Read the third input */
  157. Xn1 = *pIn++;
  158. /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  159. /* acc = b0 * x[n] */
  160. acc = (q63_t) b0 *Xn1;
  161. /* acc += b1 * x[n-1] */
  162. acc += (q63_t) b1 *Xn2;
  163. /* acc += b[2] * x[n-2] */
  164. acc += (q63_t) b2 *Xn;
  165. /* acc += a1 * y[n-1] */
  166. acc += (q63_t) a1 *Yn1;
  167. /* acc += a2 * y[n-2] */
  168. acc += (q63_t) a2 *Yn2;
  169. /* The result is converted to 1.31, Yn2 variable is reused */
  170. /* Calc lower part of acc */
  171. acc_l = acc & 0xffffffff;
  172. /* Calc upper part of acc */
  173. acc_h = (acc >> 32) & 0xffffffff;
  174. /* Apply shift for lower part of acc and upper part of acc */
  175. Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
  176. /* Store the output in the destination buffer. */
  177. *pOut++ = Yn2;
  178. /* Read the forth input */
  179. Xn = *pIn++;
  180. /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  181. /* acc = b0 * x[n] */
  182. acc = (q63_t) b0 *Xn;
  183. /* acc += b1 * x[n-1] */
  184. acc += (q63_t) b1 *Xn1;
  185. /* acc += b[2] * x[n-2] */
  186. acc += (q63_t) b2 *Xn2;
  187. /* acc += a1 * y[n-1] */
  188. acc += (q63_t) a1 *Yn2;
  189. /* acc += a2 * y[n-2] */
  190. acc += (q63_t) a2 *Yn1;
  191. /* The result is converted to 1.31, Yn1 variable is reused */
  192. /* Calc lower part of acc */
  193. acc_l = acc & 0xffffffff;
  194. /* Calc upper part of acc */
  195. acc_h = (acc >> 32) & 0xffffffff;
  196. /* Apply shift for lower part of acc and upper part of acc */
  197. Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
  198. /* Every time after the output is computed state should be updated. */
  199. /* The states should be updated as: */
  200. /* Xn2 = Xn1 */
  201. /* Xn1 = Xn */
  202. /* Yn2 = Yn1 */
  203. /* Yn1 = acc */
  204. Xn2 = Xn1;
  205. Xn1 = Xn;
  206. /* Store the output in the destination buffer. */
  207. *pOut++ = Yn1;
  208. /* decrement the loop counter */
  209. sample--;
  210. }
  211. /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
  212. ** No loop unrolling is used. */
  213. sample = (blockSize & 0x3u);
  214. while(sample > 0u)
  215. {
  216. /* Read the input */
  217. Xn = *pIn++;
  218. /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  219. /* acc = b0 * x[n] */
  220. acc = (q63_t) b0 *Xn;
  221. /* acc += b1 * x[n-1] */
  222. acc += (q63_t) b1 *Xn1;
  223. /* acc += b[2] * x[n-2] */
  224. acc += (q63_t) b2 *Xn2;
  225. /* acc += a1 * y[n-1] */
  226. acc += (q63_t) a1 *Yn1;
  227. /* acc += a2 * y[n-2] */
  228. acc += (q63_t) a2 *Yn2;
  229. /* The result is converted to 1.31 */
  230. acc = acc >> lShift;
  231. /* Every time after the output is computed state should be updated. */
  232. /* The states should be updated as: */
  233. /* Xn2 = Xn1 */
  234. /* Xn1 = Xn */
  235. /* Yn2 = Yn1 */
  236. /* Yn1 = acc */
  237. Xn2 = Xn1;
  238. Xn1 = Xn;
  239. Yn2 = Yn1;
  240. Yn1 = (q31_t) acc;
  241. /* Store the output in the destination buffer. */
  242. *pOut++ = (q31_t) acc;
  243. /* decrement the loop counter */
  244. sample--;
  245. }
  246. /* The first stage goes from the input buffer to the output buffer. */
  247. /* Subsequent stages occur in-place in the output buffer */
  248. pIn = pDst;
  249. /* Reset to destination pointer */
  250. pOut = pDst;
  251. /* Store the updated state variables back into the pState array */
  252. *pState++ = Xn1;
  253. *pState++ = Xn2;
  254. *pState++ = Yn1;
  255. *pState++ = Yn2;
  256. } while(--stage);
  257. #else
  258. /* Run the below code for Cortex-M0 */
  259. do
  260. {
  261. /* Reading the coefficients */
  262. b0 = *pCoeffs++;
  263. b1 = *pCoeffs++;
  264. b2 = *pCoeffs++;
  265. a1 = *pCoeffs++;
  266. a2 = *pCoeffs++;
  267. /* Reading the state values */
  268. Xn1 = pState[0];
  269. Xn2 = pState[1];
  270. Yn1 = pState[2];
  271. Yn2 = pState[3];
  272. /* The variables acc holds the output value that is computed:
  273. * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
  274. */
  275. sample = blockSize;
  276. while(sample > 0u)
  277. {
  278. /* Read the input */
  279. Xn = *pIn++;
  280. /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  281. /* acc = b0 * x[n] */
  282. acc = (q63_t) b0 *Xn;
  283. /* acc += b1 * x[n-1] */
  284. acc += (q63_t) b1 *Xn1;
  285. /* acc += b[2] * x[n-2] */
  286. acc += (q63_t) b2 *Xn2;
  287. /* acc += a1 * y[n-1] */
  288. acc += (q63_t) a1 *Yn1;
  289. /* acc += a2 * y[n-2] */
  290. acc += (q63_t) a2 *Yn2;
  291. /* The result is converted to 1.31 */
  292. acc = acc >> lShift;
  293. /* Every time after the output is computed state should be updated. */
  294. /* The states should be updated as: */
  295. /* Xn2 = Xn1 */
  296. /* Xn1 = Xn */
  297. /* Yn2 = Yn1 */
  298. /* Yn1 = acc */
  299. Xn2 = Xn1;
  300. Xn1 = Xn;
  301. Yn2 = Yn1;
  302. Yn1 = (q31_t) acc;
  303. /* Store the output in the destination buffer. */
  304. *pOut++ = (q31_t) acc;
  305. /* decrement the loop counter */
  306. sample--;
  307. }
  308. /* The first stage goes from the input buffer to the output buffer. */
  309. /* Subsequent stages occur in-place in the output buffer */
  310. pIn = pDst;
  311. /* Reset to destination pointer */
  312. pOut = pDst;
  313. /* Store the updated state variables back into the pState array */
  314. *pState++ = Xn1;
  315. *pState++ = Xn2;
  316. *pState++ = Yn1;
  317. *pState++ = Yn2;
  318. } while(--stage);
  319. #endif /* #ifndef ARM_MATH_CM0_FAMILY_FAMILY */
  320. }
  321. /**
  322. * @} end of BiquadCascadeDF1 group
  323. */