ftoa_engine.S

/* Copyright (c) 2005, Dmitry Xmelkov
   All rights reserved.

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are met:

   * Redistributions of source code must retain the above copyright
     notice, this list of conditions and the following disclaimer.
   * Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in
     the documentation and/or other materials provided with the
     distribution.
   * Neither the name of the copyright holders nor the names of
     contributors may be used to endorse or promote products derived
     from this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  POSSIBILITY OF SUCH DAMAGE. */

/* $Id$ */

#if !defined(__AVR_TINY__)

#ifndef __DOXYGEN__

#include "macros.inc"
#include "ftoa_engine.h"

#if  defined(__AVR_HAVE_LPMX__) && __AVR_HAVE_LPMX__
#  define AVR_ENH_LPM   1
#else
#  define AVR_ENH_LPM   0
#endif

/*
   int __ftoa_engine (double val, char *buf,
                      unsigned char prec, unsigned char maxdgs)
 Input:
    val    - value to convert
    buf    - output buffer address
    prec   - precision: number of decimal digits is 'prec + 1'
    maxdgs - (0 if unused) precision restriction for "%f" specification

 Output:
    return     - decimal exponent of first digit
    buf[0]     - flags (FTOA_***)
    buf[1],... - decimal digits
    Number of digits:
    maxdgs == 0 ? prec+1 :
    (buf[0] & FTOA_CARRY) == 0 || buf[1] != '1' ?
        aver(1, maxdgs+exp, prec+1) :
        aver(1, masdgs+exp-1, prec+1)

 Notes:
    * Output string is not 0-terminated. For possibility of user's buffer
    usage in any case.
    * If used, 'maxdgs' is a number of digits for value with zero exponent.
*/

    /* Input */
#define maxdgs  r16
#define prec    r18
#define buf_lo  r20
#define buf_hi  r21
#define val_lo  r22
#define val_hi  r23
#define val_hlo r24
#define val_hhi r25

    /* Float value parse    */
#define flag    r19

    /* Multiplication of mantisses  */
#define exp_sv  r17
#define mlt_1   r19 /* lowest result byte   */
#define mlt_2   r14
#define mlt_3   r15
#define mlt_4   r20
#define mlt_5   r21
#define mlt_6   r28
#define mlt_7   r29

    /* Conversion to string */
#define pwr_2   r1  /* lowest byte of 'powr10' element  */
#define pwr_3   r17
#define pwr_4   r19
#define pwr_5   r22
#define pwr_6   r25
#define pwr_7   r0
#define digit   r23
#define exp10   r24

    /* Fixed */
#define zero    r1

    ASSEMBLY_CLIB_SECTION
    
    .global __ftoa_engine
    .type   __ftoa_engine, "function"
__ftoa_engine:

/* --------------------------------------------------------------------
   Float value parse.
*/
  ; limit 'prec'
    cpi prec, 8
    brlo    1f
    ldi prec, 7
1:
  ; init.
    clr flag
    X_movw  XL, buf_lo
  ; val_hhi := exponent, sign test and remove
#if  FTOA_MINUS != 1
#  error  FTOA_MINUS must be 1:  add with carry used
#endif
    lsl val_hhi
    adc flag, zero      ; FTOA_MINUS
    sbrc    val_hlo, 7
    ori val_hhi, 1
  ; zero test
    adiw    val_hlo, 0
    cpc val_lo, zero
    cpc val_hi, zero
    brne    3f
  ; return 0
    ori flag, FTOA_ZERO
    subi    prec, -2
2:  st  X+, flag
    ldi flag, '0'
    dec prec
    brne    2b
    ret             ; r24,r25 == 0
3:
  ; infinity, NaN ?
#if  FTOA_NAN != 2 * FTOA_INF
#  error  Must: FTOA_NAN == 2*FTOA_INF: 'rjmp' is absent
#endif
    cpi val_hhi, 0xff
    brlo    6f
    cpi val_hlo, 0x80
    cpc val_hi, zero
    cpc val_lo, zero
    breq    5f
    subi    flag, -FTOA_INF     ; FTOA_NAN
5:  subi    flag, -FTOA_INF
6:
  ; write flags byte
    st  X+, flag
  ; hidden bit
    cpi val_hhi, 1
    brlo    7f          ; if subnormal value
    ori val_hlo, 0x80
7:  adc val_hhi, zero
  ; pushes
    push    r29
    push    r28
    push    r17
    push    r16
    push    r15
    push    r14

/* --------------------------------------------------------------------
   Multiplication of mantisses (val and table).
   At the begin:
    val_hlo .. val_lo  - input value mantisse
    val_hhi            - input value exponent
    X                  - second byte address (string begin)
   At the end:
    mlt_7 .. mlt_2     - multiplication result
    exp10              - decimal exponent
*/

  ; save
    mov exp_sv, val_hhi
  ; Z := & base10[exp / 8]  (sizeof(base10[0]) == 5)
    andi    val_hhi, ~7
    lsr val_hhi         ; (exp/8) * 4
    mov ZL, val_hhi
    lsr val_hhi
    lsr val_hhi         ; exp/8
    add ZL, val_hhi     ; (exp/8) * 5
    clr ZH
    subi    ZL, lo8(-(.L_base10))
    sbci    ZH, hi8(-(.L_base10))
  ; highest mantissa byte  (mult. shifting prepare)
    clr val_hhi
  ; result initializ.
    clr mlt_1
    clr mlt_2
    clr mlt_3
    X_movw  mlt_4, mlt_2
    X_movw  mlt_6, mlt_2

  ; multiply to 1-st table byte
#if  AVR_ENH_LPM
    lpm r0, Z+
#else
    lpm
    adiw    ZL, 1
#endif
    sec         ; for loop end control
    ror r0
  ; addition
10: brcc    11f
    add mlt_1, val_lo
    adc mlt_2, val_hi
    adc mlt_3, val_hlo
    adc mlt_4, val_hhi
    adc mlt_5, zero
  ; arg shift
11: lsl val_lo
    rol val_hi
    rol val_hlo
    rol val_hhi
  ; next bit
    lsr r0
    brne    10b

  ; second table byte
#if  AVR_ENH_LPM
    lpm r0, Z+      ; C flag is stay 1
#else
    lpm
    adiw    ZL, 1
    sec
#endif
    ror r0
  ; addition
12: brcc    13f
    add mlt_2, val_hi       ; val_hi is the least byte now
    adc mlt_3, val_hlo
    adc mlt_4, val_hhi
    adc mlt_5, val_lo
    adc mlt_6, zero
  ; arg shift
13: lsl val_hi
    rol val_hlo
    rol val_hhi
    rol val_lo
  ; next bit
    lsr r0
    brne    12b

  ; 3-t table byte
#if  AVR_ENH_LPM
    lpm r0, Z+      ; C flag is stay 1
#else
    lpm
    adiw    ZL, 1
    sec
#endif
    ror r0
  ; addition
14: brcc    15f
    add mlt_3, val_hlo      ; val_hlo is the least byte now
    adc mlt_4, val_hhi
    adc mlt_5, val_lo
    adc mlt_6, val_hi
    adc mlt_7, zero
  ; arg shift
15: lsl val_hlo
    rol val_hhi
    rol val_lo
    rol val_hi
  ; next bit
    lsr r0
    brne    14b

  ; 4-t table byte
#if  AVR_ENH_LPM
    lpm r0, Z+      ; C flag is stay 1
#else
    lpm
#endif
    ror r0
  ; addition
16: brcc    17f
    add mlt_4, val_hhi      ; val_hhi is the least byte now
    adc mlt_5, val_lo
    adc mlt_6, val_hi
    adc mlt_7, val_hlo
  ; arg shift
17: lsl val_hhi
    rol val_lo
    rol val_hi
    rol val_hlo
  ; next bit
    lsr r0
    brne    16b

  ; decimal exponent
#if  AVR_ENH_LPM
    lpm exp10, Z
#else
    adiw    ZL, 1
    lpm
    mov exp10, r0
#endif

  ; result shift:  mlt_7..2 >>= (~exp & 7)
    com exp_sv
    andi    exp_sv, 7
    breq    19f
18: lsr mlt_7
    ror mlt_6
    ror mlt_5
    ror mlt_4
    ror mlt_3
    ror mlt_2
    dec exp_sv
    brne    18b
19:

/* --------------------------------------------------------------------
   Conversion to string.

   Registers usage:
      mlt_7 .. mlt_2    - new mantissa (multiplication result)
      pwr_7 .. pwr_2    - 'powr10' table element
      Z         - 'powr10' table pointer
      X         - output string pointer
      maxdgs        - number of digits
      prec      - number of digits stays to output
      exp10     - decimal exponent
      digit     - conversion process

   At the end:
      X         - end of buffer (nonfilled byte)
      exp10     - corrected dec. exponent
      mlt_7 .. mlt_2    - remainder
      pwr_7 .. pwr_2    - last powr10[] element

   Notes:
     * It is possible to leave out powr10'x table with subnormal value.
      Result: accuracy degrease on the rounding phase.  No matter: high
      precision with subnormals is not needed. (Now 0x00000001 is converted
      exactly on prec = 5, i.e. 6 digits.)
*/

  ; to find first digit
    ldi ZL, lo8(.L_powr10)
    ldi ZH, hi8(.L_powr10)
    set
  ; 'pwr10' element reading
.L_digit:
    X_lpm   pwr_2, Z+
    X_lpm   pwr_3, Z+
    X_lpm   pwr_4, Z+
    X_lpm   pwr_5, Z+
    X_lpm   pwr_6, Z+
    X_lpm   pwr_7, Z+
  ; 'digit' init.
    ldi digit, '0' - 1
  ; subtraction loop
20: inc digit
    sub mlt_2, pwr_2
    sbc mlt_3, pwr_3
    sbc mlt_4, pwr_4
    sbc mlt_5, pwr_5
    sbc mlt_6, pwr_6
    sbc mlt_7, pwr_7
    brsh    20b
  ; restore mult
    add mlt_2, pwr_2
    adc mlt_3, pwr_3
    adc mlt_4, pwr_4
    adc mlt_5, pwr_5
    adc mlt_6, pwr_6
    adc mlt_7, pwr_7
  ; analisys
    brtc    25f
    cpi digit, '0'
    brne    21f     ; this is the first digit finded
    dec exp10
    rjmp    .L_digit
  ; now is the first digit
21: clt
  ; number of digits
    subi    maxdgs, 1
    brlo    23f         ; maxdgs was 0
    add maxdgs, exp10
    brpl    22f
    clr maxdgs
22: cp  maxdgs, prec
    brsh    23f
    mov prec, maxdgs
23: inc prec
    mov maxdgs, prec    
  ; operate digit
25: cpi digit, '0' + 10
    brlo    27f
  ; overflow, digit > '9'
    ldi digit, '9'
26: st  X+, digit
    dec prec
    brne    26b
    rjmp    .L_up
  ; write digit
27: st  X+, digit
    dec prec
    brne    .L_digit

/* --------------------------------------------------------------------
    Rounding.
*/
.L_round:
  ; pwr10 /= 2
    lsr pwr_7
    ror pwr_6
    ror pwr_5
    ror pwr_4
    ror pwr_3
    ror pwr_2
  ; mult -= pwr10  (half of last 'pwr10' value)
    sub mlt_2, pwr_2
    sbc mlt_3, pwr_3
    sbc mlt_4, pwr_4
    sbc mlt_5, pwr_5
    sbc mlt_6, pwr_6
    sbc mlt_7, pwr_7
  ; rounding direction?
    brlo    .L_rest
  ; round to up
.L_up:
    inc prec
    ld  digit, -X
    inc digit
    cpi digit, '9' + 1
    brlo    31f
    ldi digit, '0'
31: st  X, digit
    cpse    prec, maxdgs
    brsh    .L_up
  ; it was a carry to master digit
    ld  digit, -X       ; flags
    ori digit, FTOA_CARRY   ; 'C' is not changed
    st  X+, digit
    brlo    .L_rest         ; above comparison
  ; overflow
    inc exp10
    ldi digit, '1'
32: st  X+, digit
    ldi digit, '0'
    dec prec
    brne    32b
  ; restore
.L_rest:
    clr zero
    pop r14
    pop r15
    pop r16
    pop r17
    pop r28
    pop r29
  ; return
    clr r25
    sbrc    exp10, 7        ; high byte
    com r25
    ret

    .size  __ftoa_engine, . - __ftoa_engine

/* --------------------------------------------------------------------
    Tables.  '.L_powr10' is placed first -- for subnormals stability.
*/
    .section .progmem.data,"a",@progbits

    .type .L_powr10, "object"
.L_powr10:
    .byte   0, 64, 122, 16, 243, 90 ; 100000000000000
    .byte   0, 160, 114, 78, 24, 9  ; 10000000000000
    .byte   0, 16, 165, 212, 232, 0 ; 1000000000000
    .byte   0, 232, 118, 72, 23, 0  ; 100000000000
    .byte   0, 228, 11, 84, 2, 0    ; 10000000000
    .byte   0, 202, 154, 59, 0, 0   ; 1000000000
    .byte   0, 225, 245, 5, 0, 0    ; 100000000
    .byte   128, 150, 152, 0, 0, 0  ; 10000000
    .byte   64, 66, 15, 0, 0, 0 ; 1000000
    .byte   160, 134, 1, 0, 0, 0    ; 100000
    .byte   16, 39, 0, 0, 0, 0  ; 10000
    .byte   232, 3, 0, 0, 0, 0  ; 1000
    .byte   100, 0, 0, 0, 0, 0  ; 100
    .byte   10, 0, 0, 0, 0, 0   ; 10
    .byte   1, 0, 0, 0, 0, 0    ; 1
    .size .L_powr10, . - .L_powr10

    .type   .L_base10, "object"
.L_base10:
    .byte   44, 118, 216, 136, -36  ; 2295887404
    .byte   103, 79, 8, 35, -33 ; 587747175
    .byte   193, 223, 174, 89, -31  ; 1504632769
    .byte   177, 183, 150, 229, -29 ; 3851859889
    .byte   228, 83, 198, 58, -26   ; 986076132
    .byte   81, 153, 118, 150, -24  ; 2524354897
    .byte   230, 194, 132, 38, -21  ; 646234854
    .byte   137, 140, 155, 98, -19  ; 1654361225
    .byte   64, 124, 111, 252, -17  ; 4235164736
    .byte   188, 156, 159, 64, -14  ; 1084202172
    .byte   186, 165, 111, 165, -12 ; 2775557562
    .byte   144, 5, 90, 42, -9  ; 710542736
    .byte   92, 147, 107, 108, -7   ; 1818989404
    .byte   103, 109, 193, 27, -4   ; 465661287
    .byte   224, 228, 13, 71, -2    ; 1192092896
    .byte   245, 32, 230, 181, 0    ; 3051757813
    .byte   208, 237, 144, 46, 3    ; 781250000
    .byte   0, 148, 53, 119, 5  ; 2000000000
    .byte   0, 128, 132, 30, 8  ; 512000000
    .byte   0, 0, 32, 78, 10    ; 1310720000
    .byte   0, 0, 0, 200, 12    ; 3355443200
    .byte   51, 51, 51, 51, 15  ; 858993459
    .byte   152, 110, 18, 131, 17   ; 2199023256
    .byte   65, 239, 141, 33, 20    ; 562949953
    .byte   137, 59, 230, 85, 22    ; 1441151881
    .byte   207, 254, 230, 219, 24  ; 3689348815
    .byte   209, 132, 75, 56, 27    ; 944473297
    .byte   247, 124, 29, 144, 29   ; 2417851639
    .byte   164, 187, 228, 36, 32   ; 618970020
    .byte   50, 132, 114, 94, 34    ; 1584563250
    .byte   129, 0, 201, 241, 36    ; 4056481921
    .byte   236, 161, 229, 61, 39   ; 1038459372
    .size .L_base10, . - .L_base10

    .end
#endif  /* !__DOXYGEN__ */

#endif /* !defined(__AVR_TINY__) */