https://github.com/VegaASM/SquareRootHex-PPCASM
Another small function I wrote up in my free time. It can take in any Hex value and spit out the square root result. The user can modify r4 to select what type of rounding will be done. Licensed under the Apache 2 License.
Another small function I wrote up in my free time. It can take in any Hex value and spit out the square root result. The user can modify r4 to select what type of rounding will be done. Licensed under the Apache 2 License.
Code:
/*
Copyright 2020 VegaASM
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#~~~~~~~~~~~~~~~~#
# START ASSEMBLY #
#~~~~~~~~~~~~~~~~#
hex_square_root:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# If 0/1, return 0/1 respectively. If not, backup r3 #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
cmplwi r3, 1
blelr-
mr r0, r3
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Calculate Word/24bit/Halfword/Byte #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
cntlzw r3, r0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Calc How Many times Secondary Loop is Done #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
cmplwi r3, 8
li r5, 4
li r6, 24
blt- start_with_msbyte #Word value found
cmplwi r3, 16
li r5, 3
li r6, 16
blt- start_with_msbyte #24-bit value found
cmplwi r3, 24
li r5, 2
li r6, 8
blt- start_with_msbyte #Halfword value found
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Byte value only option left #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
li r5, 1
li r6, 0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Start with the Most Significant Byte #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
start_with_msbyte:
srw r7, r0, r6
#~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Set One Above Digit Limit #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~#
li r8, 0x10
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# First type of Loop, No matter what Value is, this 1st type is only done once #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Square the r8 value, Proceed once result is less than r7 #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
first_type_loop:
subi r8, r8, 1
mullw r9, r8, r8
cmplw r9, r7
bgt- first_type_loop
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Loop Done, Subtract r9 from r7 to get Remainder, Build r3 Result w/ r8's value #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
sub r7, r7, r9
mr r3, r8
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Take r8's result from the above loop, and add it to itself (aka mul by 2 #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
add r8, r8, r8
#~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Big (Secondary Type) Loop #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Do Certain Rotate Instruction Based on How Many Loops are Left #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
big_secondary_loop:
cmpwi r5, 4
rlwinm r6, r0, 16, 0x000000FF
beq- build_into_leftovers
cmpwi r5, 3
rlwinm r6, r0, 24, 0x000000FF
beq- build_into_leftovers
cmpwi r5, 2
clrlwi r6, r0, 24
beq- build_into_leftovers
li r6, 0 #Fake decimal value
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Shift r7 leftover 8 bits to bring 'down' the next 'group' (byte) #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
build_into_leftovers:
slwi r7, r7, 8
or r7, r7, r6
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Shift r8 left by 4 bits to create the "Space" #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
slwi r8, r8, 4
#~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Set One Above Digit Limit #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~#
li r6, 0x10
small_secondary_loop:
subi r6, r6, 1
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Place r6 into r8's "space", Then Multiply. Proceed on first iteration when r10 is less than r7 #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
or r9, r8, r6
mullw r10, r9, r6
cmplw r10, r7 #Once we get first iteration of r6 < r12 then we can proceed
bgt- small_secondary_loop
#~~~~~~~~~~~~~~~~~~~~#
# Get Next Leftovers #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Adjust the Increasing Number that will be "Spaced" #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
sub r7, r7, r10
add r8, r9, r6
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Find out if we are doing Remainder of Whole Number #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# If so, we don't want to do the upcoming slwi and or instruction #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
cmpwi r5, 1
beq- decrement_loop
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Got Next Digit Result. Move r3 over by 4 bits left. OR in Digit (r6) #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
slwi r3, r3, 4
or r3, r3, r6
#~~~~~~~~~~~~~~~~~~~~#
# Decrement Big Loop #
#~~~~~~~~~~~~~~~~~~~~#
decrement_loop:
subic. r5, r5, 1
bne+ big_secondary_loop
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Check User Option For Rounding #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
cmpwi r4, 1
bnelr- #If User wanted rounding towards lowest zero, then function is done, END function
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Check Remainder Value of Whole Number #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
cmpwi r6, 8
bltlr- #If remainder has most significant digit of less than 8 then nothing needs to be done, END function
addi r3, r3, 1 #If digit is 8 or more, we need to round up
#~~~~~~~~~~~~~~#
# END Function #
#~~~~~~~~~~~~~~#
blr
#~~~~~~~~~~~~~~#
# END ASSEMBLY #
#~~~~~~~~~~~~~~#